feat: add FastAPI skeleton for LLM chat service
- POST /chat endpoint with message and conversation_id support - GET /health endpoint for Cloud Run health checks - Local and Remote LLM adapters with async httpx - Pydantic schemas and environment-based config - Dockerfile configured for Cloud Run deployment Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1 @@
|
||||
# Tyndale AI Service
|
||||
@@ -0,0 +1,22 @@
|
||||
from typing import Literal
|
||||
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""Application configuration loaded from environment variables."""
|
||||
|
||||
llm_mode: Literal["local", "remote"] = "local"
|
||||
llm_remote_url: str = ""
|
||||
llm_remote_token: str = ""
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
env_file_encoding = "utf-8"
|
||||
|
||||
|
||||
# Constants
|
||||
MAX_MESSAGE_LENGTH: int = 10_000
|
||||
|
||||
# Global settings instance
|
||||
settings = Settings()
|
||||
@@ -0,0 +1 @@
|
||||
# LLM adapters
|
||||
@@ -0,0 +1,110 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
import httpx
|
||||
|
||||
from app.config import settings
|
||||
|
||||
|
||||
class LLMAdapter(ABC):
|
||||
"""Abstract base class for LLM adapters."""
|
||||
|
||||
@abstractmethod
|
||||
async def generate(self, conversation_id: str, message: str) -> str:
|
||||
"""Generate a response for the given message.
|
||||
|
||||
Args:
|
||||
conversation_id: The conversation identifier
|
||||
message: The user's message
|
||||
|
||||
Returns:
|
||||
The generated response string
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class LocalAdapter(LLMAdapter):
|
||||
"""Local stub adapter for development and testing."""
|
||||
|
||||
async def generate(self, conversation_id: str, message: str) -> str:
|
||||
"""Return a stub response echoing the user message.
|
||||
|
||||
This is a placeholder that will be replaced with a real local model.
|
||||
"""
|
||||
return (
|
||||
f"[LOCAL STUB MODE] Acknowledged your message. "
|
||||
f"You said: \"{message[:100]}{'...' if len(message) > 100 else ''}\". "
|
||||
f"This is a stub response - local model not yet implemented."
|
||||
)
|
||||
|
||||
|
||||
class RemoteAdapter(LLMAdapter):
|
||||
"""Remote adapter that calls an external LLM service via HTTP."""
|
||||
|
||||
def __init__(self, url: str, token: str | None = None, timeout: float = 30.0):
|
||||
"""Initialize the remote adapter.
|
||||
|
||||
Args:
|
||||
url: The remote LLM service URL
|
||||
token: Optional bearer token for authentication
|
||||
timeout: Request timeout in seconds
|
||||
"""
|
||||
self.url = url
|
||||
self.token = token
|
||||
self.timeout = timeout
|
||||
|
||||
async def generate(self, conversation_id: str, message: str) -> str:
|
||||
"""Call the remote LLM service to generate a response.
|
||||
|
||||
Handles errors gracefully by returning informative error strings.
|
||||
"""
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if self.token:
|
||||
headers["Authorization"] = f"Bearer {self.token}"
|
||||
|
||||
payload = {
|
||||
"conversation_id": conversation_id,
|
||||
"message": message,
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
response = await client.post(self.url, json=payload, headers=headers)
|
||||
|
||||
if response.status_code != 200:
|
||||
return (
|
||||
f"[ERROR] Remote LLM returned status {response.status_code}: "
|
||||
f"{response.text[:200] if response.text else 'No response body'}"
|
||||
)
|
||||
|
||||
try:
|
||||
data = response.json()
|
||||
except ValueError:
|
||||
return "[ERROR] Remote LLM returned invalid JSON response"
|
||||
|
||||
if "response" not in data:
|
||||
return "[ERROR] Remote LLM response missing 'response' field"
|
||||
|
||||
return data["response"]
|
||||
|
||||
except httpx.TimeoutException:
|
||||
return f"[ERROR] Remote LLM request timed out after {self.timeout} seconds"
|
||||
except httpx.ConnectError:
|
||||
return f"[ERROR] Could not connect to remote LLM at {self.url}"
|
||||
except httpx.RequestError as e:
|
||||
return f"[ERROR] Remote LLM request failed: {str(e)}"
|
||||
|
||||
|
||||
def get_adapter() -> LLMAdapter:
|
||||
"""Factory function to create the appropriate adapter based on configuration.
|
||||
|
||||
Returns:
|
||||
An LLMAdapter instance based on the LLM_MODE setting
|
||||
"""
|
||||
if settings.llm_mode == "remote":
|
||||
if not settings.llm_remote_url:
|
||||
raise ValueError("LLM_REMOTE_URL must be set when LLM_MODE is 'remote'")
|
||||
return RemoteAdapter(
|
||||
url=settings.llm_remote_url,
|
||||
token=settings.llm_remote_token or None,
|
||||
)
|
||||
return LocalAdapter()
|
||||
+79
@@ -0,0 +1,79 @@
|
||||
import logging
|
||||
import uuid
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
|
||||
from app.config import settings, MAX_MESSAGE_LENGTH
|
||||
from app.llm.adapter import get_adapter
|
||||
from app.schemas import ChatRequest, ChatResponse, HealthResponse
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Create FastAPI app
|
||||
app = FastAPI(
|
||||
title="Tyndale AI Service",
|
||||
description="LLM Chat Service for algorithmic trading support",
|
||||
version="0.1.0",
|
||||
)
|
||||
|
||||
|
||||
@app.get("/health", response_model=HealthResponse)
|
||||
async def health_check() -> HealthResponse:
|
||||
"""Health check endpoint."""
|
||||
return HealthResponse(status="ok")
|
||||
|
||||
|
||||
@app.post("/chat", response_model=ChatResponse)
|
||||
async def chat(request: ChatRequest) -> ChatResponse:
|
||||
"""Process a chat message through the LLM adapter.
|
||||
|
||||
- Validates message length
|
||||
- Generates conversation_id if not provided
|
||||
- Routes to appropriate LLM adapter based on LLM_MODE
|
||||
"""
|
||||
# Validate message length
|
||||
if len(request.message) > MAX_MESSAGE_LENGTH:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Message exceeds maximum length of {MAX_MESSAGE_LENGTH:,} characters. "
|
||||
f"Your message has {len(request.message):,} characters.",
|
||||
)
|
||||
|
||||
# Generate or use provided conversation_id
|
||||
conversation_id = request.conversation_id or str(uuid.uuid4())
|
||||
|
||||
# Log request metadata (not content)
|
||||
logger.info(
|
||||
"Chat request received",
|
||||
extra={
|
||||
"conversation_id": conversation_id,
|
||||
"message_length": len(request.message),
|
||||
"mode": settings.llm_mode,
|
||||
},
|
||||
)
|
||||
|
||||
# Get adapter and generate response
|
||||
adapter = get_adapter()
|
||||
response_text = await adapter.generate(conversation_id, request.message)
|
||||
|
||||
# Log response metadata
|
||||
logger.info(
|
||||
"Chat response generated",
|
||||
extra={
|
||||
"conversation_id": conversation_id,
|
||||
"response_length": len(response_text),
|
||||
"mode": settings.llm_mode,
|
||||
},
|
||||
)
|
||||
|
||||
return ChatResponse(
|
||||
conversation_id=conversation_id,
|
||||
response=response_text,
|
||||
mode=settings.llm_mode,
|
||||
sources=[],
|
||||
)
|
||||
@@ -0,0 +1,33 @@
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ChatRequest(BaseModel):
|
||||
"""Request model for the /chat endpoint."""
|
||||
|
||||
message: str = Field(..., description="The user's message")
|
||||
conversation_id: str | None = Field(
|
||||
default=None, description="Optional conversation ID for continuity"
|
||||
)
|
||||
|
||||
|
||||
class ChatResponse(BaseModel):
|
||||
"""Response model for the /chat endpoint."""
|
||||
|
||||
conversation_id: str = Field(..., description="Conversation ID (generated if not provided)")
|
||||
response: str = Field(..., description="The LLM's response")
|
||||
mode: Literal["local", "remote"] = Field(..., description="Which adapter was used")
|
||||
sources: list = Field(default_factory=list, description="Source references (empty for now)")
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
"""Response model for the /health endpoint."""
|
||||
|
||||
status: str = Field(default="ok")
|
||||
|
||||
|
||||
class ErrorResponse(BaseModel):
|
||||
"""Standard error response model."""
|
||||
|
||||
detail: str = Field(..., description="Error description")
|
||||
Reference in New Issue
Block a user