feat: add FastAPI skeleton for LLM chat service

- POST /chat endpoint with message and conversation_id support
- GET /health endpoint for Cloud Run health checks
- Local and Remote LLM adapters with async httpx
- Pydantic schemas and environment-based config
- Dockerfile configured for Cloud Run deployment

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Danny Garcia
2026-01-07 19:32:57 -06:00
parent 11e7675c52
commit 84de9a02c8
11 changed files with 490 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
# Tyndale AI Service
+22
View File
@@ -0,0 +1,22 @@
from typing import Literal
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
"""Application configuration loaded from environment variables."""
llm_mode: Literal["local", "remote"] = "local"
llm_remote_url: str = ""
llm_remote_token: str = ""
class Config:
env_file = ".env"
env_file_encoding = "utf-8"
# Constants
MAX_MESSAGE_LENGTH: int = 10_000
# Global settings instance
settings = Settings()
+1
View File
@@ -0,0 +1 @@
# LLM adapters
+110
View File
@@ -0,0 +1,110 @@
from abc import ABC, abstractmethod
import httpx
from app.config import settings
class LLMAdapter(ABC):
"""Abstract base class for LLM adapters."""
@abstractmethod
async def generate(self, conversation_id: str, message: str) -> str:
"""Generate a response for the given message.
Args:
conversation_id: The conversation identifier
message: The user's message
Returns:
The generated response string
"""
pass
class LocalAdapter(LLMAdapter):
"""Local stub adapter for development and testing."""
async def generate(self, conversation_id: str, message: str) -> str:
"""Return a stub response echoing the user message.
This is a placeholder that will be replaced with a real local model.
"""
return (
f"[LOCAL STUB MODE] Acknowledged your message. "
f"You said: \"{message[:100]}{'...' if len(message) > 100 else ''}\". "
f"This is a stub response - local model not yet implemented."
)
class RemoteAdapter(LLMAdapter):
"""Remote adapter that calls an external LLM service via HTTP."""
def __init__(self, url: str, token: str | None = None, timeout: float = 30.0):
"""Initialize the remote adapter.
Args:
url: The remote LLM service URL
token: Optional bearer token for authentication
timeout: Request timeout in seconds
"""
self.url = url
self.token = token
self.timeout = timeout
async def generate(self, conversation_id: str, message: str) -> str:
"""Call the remote LLM service to generate a response.
Handles errors gracefully by returning informative error strings.
"""
headers = {"Content-Type": "application/json"}
if self.token:
headers["Authorization"] = f"Bearer {self.token}"
payload = {
"conversation_id": conversation_id,
"message": message,
}
try:
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.post(self.url, json=payload, headers=headers)
if response.status_code != 200:
return (
f"[ERROR] Remote LLM returned status {response.status_code}: "
f"{response.text[:200] if response.text else 'No response body'}"
)
try:
data = response.json()
except ValueError:
return "[ERROR] Remote LLM returned invalid JSON response"
if "response" not in data:
return "[ERROR] Remote LLM response missing 'response' field"
return data["response"]
except httpx.TimeoutException:
return f"[ERROR] Remote LLM request timed out after {self.timeout} seconds"
except httpx.ConnectError:
return f"[ERROR] Could not connect to remote LLM at {self.url}"
except httpx.RequestError as e:
return f"[ERROR] Remote LLM request failed: {str(e)}"
def get_adapter() -> LLMAdapter:
"""Factory function to create the appropriate adapter based on configuration.
Returns:
An LLMAdapter instance based on the LLM_MODE setting
"""
if settings.llm_mode == "remote":
if not settings.llm_remote_url:
raise ValueError("LLM_REMOTE_URL must be set when LLM_MODE is 'remote'")
return RemoteAdapter(
url=settings.llm_remote_url,
token=settings.llm_remote_token or None,
)
return LocalAdapter()
+79
View File
@@ -0,0 +1,79 @@
import logging
import uuid
from fastapi import FastAPI, HTTPException
from app.config import settings, MAX_MESSAGE_LENGTH
from app.llm.adapter import get_adapter
from app.schemas import ChatRequest, ChatResponse, HealthResponse
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
# Create FastAPI app
app = FastAPI(
title="Tyndale AI Service",
description="LLM Chat Service for algorithmic trading support",
version="0.1.0",
)
@app.get("/health", response_model=HealthResponse)
async def health_check() -> HealthResponse:
"""Health check endpoint."""
return HealthResponse(status="ok")
@app.post("/chat", response_model=ChatResponse)
async def chat(request: ChatRequest) -> ChatResponse:
"""Process a chat message through the LLM adapter.
- Validates message length
- Generates conversation_id if not provided
- Routes to appropriate LLM adapter based on LLM_MODE
"""
# Validate message length
if len(request.message) > MAX_MESSAGE_LENGTH:
raise HTTPException(
status_code=400,
detail=f"Message exceeds maximum length of {MAX_MESSAGE_LENGTH:,} characters. "
f"Your message has {len(request.message):,} characters.",
)
# Generate or use provided conversation_id
conversation_id = request.conversation_id or str(uuid.uuid4())
# Log request metadata (not content)
logger.info(
"Chat request received",
extra={
"conversation_id": conversation_id,
"message_length": len(request.message),
"mode": settings.llm_mode,
},
)
# Get adapter and generate response
adapter = get_adapter()
response_text = await adapter.generate(conversation_id, request.message)
# Log response metadata
logger.info(
"Chat response generated",
extra={
"conversation_id": conversation_id,
"response_length": len(response_text),
"mode": settings.llm_mode,
},
)
return ChatResponse(
conversation_id=conversation_id,
response=response_text,
mode=settings.llm_mode,
sources=[],
)
+33
View File
@@ -0,0 +1,33 @@
from typing import Literal
from pydantic import BaseModel, Field
class ChatRequest(BaseModel):
"""Request model for the /chat endpoint."""
message: str = Field(..., description="The user's message")
conversation_id: str | None = Field(
default=None, description="Optional conversation ID for continuity"
)
class ChatResponse(BaseModel):
"""Response model for the /chat endpoint."""
conversation_id: str = Field(..., description="Conversation ID (generated if not provided)")
response: str = Field(..., description="The LLM's response")
mode: Literal["local", "remote"] = Field(..., description="Which adapter was used")
sources: list = Field(default_factory=list, description="Source references (empty for now)")
class HealthResponse(BaseModel):
"""Response model for the /health endpoint."""
status: str = Field(default="ok")
class ErrorResponse(BaseModel):
"""Standard error response model."""
detail: str = Field(..., description="Error description")