From 3324b6ac12f3eacf04fcaf765fb647e920af0b02 Mon Sep 17 00:00:00 2001 From: Danny Date: Tue, 13 Jan 2026 15:17:44 -0600 Subject: [PATCH] feat: add OpenAI integration with dependency injection support - Add OpenAIAdapter class using official OpenAI SDK with async support - Create custom exception hierarchy for LLM errors (authentication, rate limit, connection, configuration, response errors) - Refactor adapter factory to use FastAPI Depends() for dependency injection - Update configuration to support 'openai' mode with API key and model settings - Add proper HTTP error mapping for all LLM exception types - Update Dockerfile with default OPENAI_MODEL environment variable - Update .env.example with OpenAI configuration options --- .claude/agents/git-version-control.md | 3 +- .env.example | 6 +- Dockerfile | 3 + OPENAI_INTEGRATION_PLAN.md | 164 ++++++++++++++++++++++++++ app/config.py | 6 +- app/llm/__init__.py | 41 ++++++- app/llm/adapter.py | 146 +++++++++++++++++++++-- app/llm/exceptions.py | 59 +++++++++ app/main.py | 25 +++- app/schemas.py | 2 +- requirements.txt | 1 + 11 files changed, 434 insertions(+), 22 deletions(-) create mode 100644 OPENAI_INTEGRATION_PLAN.md create mode 100644 app/llm/exceptions.py diff --git a/.claude/agents/git-version-control.md b/.claude/agents/git-version-control.md index 40903f7..e7fb63c 100644 --- a/.claude/agents/git-version-control.md +++ b/.claude/agents/git-version-control.md @@ -1,8 +1,9 @@ --- name: git-version-control -description: Use this agent when you need to perform git operations including staging changes, creating commits with well-structured messages, pushing to remote repositories, or creating pull requests. This agent should be invoked after code changes are complete and ready to be versioned. Examples:\n\n\nContext: The user has just finished implementing a new feature and wants to commit the changes.\nuser: "I've finished implementing the payment validation logic"\nassistant: "I'll use the git-version-control agent to commit these changes with an appropriate message"\n\nSince code changes are complete and need to be committed, use the git-version-control agent to handle the version control operations.\n\n\n\n\nContext: Multiple files have been modified and need to be committed and pushed.\nuser: "Please commit all the changes I made to the payment providers"\nassistant: "I'll use the git-version-control agent to review the changes, create a commit with a clear message, and push to the remote"\n\nThe user explicitly wants to commit changes, so the git-version-control agent should handle staging, committing, and pushing.\n\n\n\n\nContext: A feature branch needs to be pushed and a PR created.\nuser: "Can you push this branch and create a PR for the new Stripe integration?"\nassistant: "I'll use the git-version-control agent to push the branch and create a pull request with a comprehensive description"\n\nThe user needs both pushing and PR creation, which are core responsibilities of the git-version-control agent.\n\n +description: Use this agent when you need to perform git operations including staging changes, creating commits with well-structured messages, pushing to remote repositories, or creating pull requests. This agent should be invoked after code changes are complete and ready to be versioned. Examples: Context: The user has just finished implementing a new feature and wants to commit the changes. user: "I've finished implementing the payment validation logic" assistant: "I'll use the git-version-control agent to commit these changes with an appropriate message" Since code changes are complete and need to be committed, use the git-version-control agent to handle the version control operations. Context: Multiple files have been modified and need to be committed and pushed. user: "Please commit all the changes I made to the payment providers" assistant: "I'll use the git-version-control agent to review the changes, create a commit with a clear message, and push to the remote" The user explicitly wants to commit changes, so the git-version-control agent should handle staging, committing, and pushing. Context: A feature branch needs to be pushed and a PR created. user: "Can you push this branch and create a PR for the new Stripe integration?" assistant: "I'll use the git-version-control agent to push the branch and create a pull request with a comprehensive description" The user needs both pushing and PR creation, which are core responsibilities of the git-version-control agent. tools: Bash, Glob, Grep, Read, WebFetch, TodoWrite, WebSearch, BashOutput, KillBash model: sonnet +color: green --- You are an expert git version control specialist with deep knowledge of git workflows, commit conventions, and collaborative development practices. Your role is to manage all git operations with precision and clarity. diff --git a/.env.example b/.env.example index 10a6e1c..4430f6b 100644 --- a/.env.example +++ b/.env.example @@ -1,6 +1,10 @@ -# LLM Mode: "local" or "remote" +# LLM Mode: "local", "remote", or "openai" LLM_MODE=local # Remote LLM Configuration (required if LLM_MODE=remote) LLM_REMOTE_URL=https://your-llm-service.com/generate LLM_REMOTE_TOKEN= + +# OpenAI Configuration (required if LLM_MODE=openai) +OPENAI_API_KEY=sk-your-api-key-here +OPENAI_MODEL=gpt-4o-mini diff --git a/Dockerfile b/Dockerfile index 074085e..add4b14 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,6 +9,9 @@ RUN pip install --no-cache-dir -r requirements.txt # Copy application code COPY app/ ./app/ +# Default model (API key must be passed at runtime for security) +ENV OPENAI_MODEL=gpt-4o-mini + # Expose port for Cloud Run EXPOSE 8080 diff --git a/OPENAI_INTEGRATION_PLAN.md b/OPENAI_INTEGRATION_PLAN.md new file mode 100644 index 0000000..142c931 --- /dev/null +++ b/OPENAI_INTEGRATION_PLAN.md @@ -0,0 +1,164 @@ +# OpenAI Integration Plan for Tyndale AI Service + +## Summary +Replace the LocalAdapter stub with an OpenAI API client, using FastAPI's `Depends()` for dependency injection and keeping configuration minimal (API key + model only). + +--- + +## Files to Modify + +| File | Action | +|------|--------| +| `requirements.txt` | Add `openai>=1.0.0` | +| `app/config.py` | Add `openai_api_key`, `openai_model` settings | +| `app/llm/exceptions.py` | **Create**: Custom exception hierarchy | +| `app/llm/adapter.py` | Add `OpenAIAdapter` class, refactor `get_adapter()` for DI | +| `app/llm/__init__.py` | Export new components | +| `app/schemas.py` | Add `"openai"` to mode literal | +| `app/main.py` | Use `Depends()` for adapter injection | +| `.env.example` | Add OpenAI env vars | +| `Dockerfile` | Add `OPENAI_MODEL` default env | + +--- + +## Implementation Steps + +### 1. Add OpenAI dependency +**File**: `requirements.txt` +``` +openai>=1.0.0 +``` + +### 2. Extend configuration +**File**: `app/config.py` +- Add `llm_mode: Literal["local", "remote", "openai"]` +- Add `openai_api_key: str = ""` +- Add `openai_model: str = "gpt-4o-mini"` + +### 3. Create exception hierarchy +**File**: `app/llm/exceptions.py` (new) +- `LLMError` (base) +- `LLMAuthenticationError` (401) +- `LLMRateLimitError` (429) +- `LLMConnectionError` (503) +- `LLMConfigurationError` (500) +- `llm_exception_to_http()` helper + +### 4. Add OpenAIAdapter class +**File**: `app/llm/adapter.py` + +```python +class OpenAIAdapter(LLMAdapter): + def __init__(self, api_key: str, model: str = "gpt-4o-mini"): + self.client = OpenAI(api_key=api_key) + self.model = model + + async def generate(self, conversation_id: str, message: str) -> str: + # Use asyncio.to_thread() since OpenAI SDK is synchronous + response = await asyncio.to_thread( + self.client.chat.completions.create, + model=self.model, + messages=[{"role": "user", "content": message}], + ) + return response.choices[0].message.content +``` + +### 5. Refactor get_adapter() for dependency injection +**File**: `app/llm/adapter.py` + +```python +from functools import lru_cache +from typing import Annotated +from fastapi import Depends + +@lru_cache() +def get_settings() -> Settings: + return settings + +def get_adapter(settings: Annotated[Settings, Depends(get_settings)]) -> LLMAdapter: + if settings.llm_mode == "openai": + return OpenAIAdapter(api_key=settings.openai_api_key, model=settings.openai_model) + if settings.llm_mode == "remote": + return RemoteAdapter(url=settings.llm_remote_url, token=settings.llm_remote_token) + return LocalAdapter() + +# Type alias for clean injection +AdapterDependency = Annotated[LLMAdapter, Depends(get_adapter)] +``` + +### 6. Update /chat endpoint +**File**: `app/main.py` + +```python +from app.llm.adapter import AdapterDependency +from app.llm.exceptions import LLMError, llm_exception_to_http + +@app.post("/chat", response_model=ChatResponse) +async def chat(request: ChatRequest, adapter: AdapterDependency) -> ChatResponse: + # ... validation ... + try: + response_text = await adapter.generate(conversation_id, request.message) + except LLMError as e: + raise llm_exception_to_http(e) + # ... return response ... +``` + +### 7. Update mode type +**File**: `app/schemas.py` +- Change `mode: Literal["local", "remote"]` to `mode: Literal["local", "remote", "openai"]` + +### 8. Update environment examples +**File**: `.env.example` +``` +LLM_MODE=openai +OPENAI_API_KEY=sk-your-key-here +OPENAI_MODEL=gpt-4o-mini +``` + +### 9. Update Dockerfile +**File**: `Dockerfile` +- Add `ENV OPENAI_MODEL=gpt-4o-mini` (API key passed at runtime for security) + +--- + +## Error Handling + +| OpenAI Exception | Custom Exception | HTTP Status | +|------------------|------------------|-------------| +| `AuthenticationError` | `LLMAuthenticationError` | 401 | +| `RateLimitError` | `LLMRateLimitError` | 429 | +| `APIConnectionError` | `LLMConnectionError` | 503 | +| `APIError` | `LLMError` | varies | + +--- + +## Verification Steps + +1. **Install dependencies**: + ```bash + pip install -r requirements.txt + ``` + +2. **Run locally with OpenAI**: + ```bash + export LLM_MODE=openai + export OPENAI_API_KEY=sk-your-key + uvicorn app.main:app --reload + ``` + +3. **Test the /chat endpoint**: + ```bash + curl -X POST http://localhost:8000/chat \ + -H "Content-Type: application/json" \ + -d '{"message": "Hello, what is 2+2?"}' + ``` + +4. **Verify response contains OpenAI response and mode="openai"** + +5. **Test Docker build**: + ```bash + docker build -t tyndale-ai . + docker run -p 8080:8080 -e LLM_MODE=openai -e OPENAI_API_KEY=sk-your-key tyndale-ai + ``` + +6. **Test error handling** (optional): Use invalid API key to verify 401 response diff --git a/app/config.py b/app/config.py index 26a7772..449160f 100644 --- a/app/config.py +++ b/app/config.py @@ -6,10 +6,14 @@ from pydantic_settings import BaseSettings class Settings(BaseSettings): """Application configuration loaded from environment variables.""" - llm_mode: Literal["local", "remote"] = "local" + llm_mode: Literal["local", "remote", "openai"] = "local" llm_remote_url: str = "" llm_remote_token: str = "" + # OpenAI configuration + openai_api_key: str = "" + openai_model: str = "gpt-4o-mini" + class Config: env_file = ".env" env_file_encoding = "utf-8" diff --git a/app/llm/__init__.py b/app/llm/__init__.py index 4694f3e..6f07b07 100644 --- a/app/llm/__init__.py +++ b/app/llm/__init__.py @@ -1 +1,40 @@ -# LLM adapters +"""LLM adapters and utilities.""" + +from app.llm.adapter import ( + LLMAdapter, + LocalAdapter, + RemoteAdapter, + OpenAIAdapter, + get_adapter, + get_settings, + AdapterDependency, +) +from app.llm.exceptions import ( + LLMError, + LLMAuthenticationError, + LLMRateLimitError, + LLMConnectionError, + LLMConfigurationError, + LLMResponseError, + llm_exception_to_http, +) + +__all__ = [ + # Adapters + "LLMAdapter", + "LocalAdapter", + "RemoteAdapter", + "OpenAIAdapter", + # DI support + "get_adapter", + "get_settings", + "AdapterDependency", + # Exceptions + "LLMError", + "LLMAuthenticationError", + "LLMRateLimitError", + "LLMConnectionError", + "LLMConfigurationError", + "LLMResponseError", + "llm_exception_to_http", +] diff --git a/app/llm/adapter.py b/app/llm/adapter.py index 5ce44e4..dfd538c 100644 --- a/app/llm/adapter.py +++ b/app/llm/adapter.py @@ -1,8 +1,28 @@ +"""LLM adapter implementations with FastAPI dependency injection support.""" + from abc import ABC, abstractmethod +from functools import lru_cache +from typing import Annotated import httpx +from fastapi import Depends +from openai import ( + AsyncOpenAI, + AuthenticationError, + RateLimitError, + APIConnectionError, + APIError, +) -from app.config import settings +from app.config import Settings, settings +from app.llm.exceptions import ( + LLMError, + LLMAuthenticationError, + LLMRateLimitError, + LLMConnectionError, + LLMConfigurationError, + LLMResponseError, +) class LLMAdapter(ABC): @@ -18,6 +38,9 @@ class LLMAdapter(ABC): Returns: The generated response string + + Raises: + LLMError: If generation fails for any reason """ pass @@ -55,7 +78,12 @@ class RemoteAdapter(LLMAdapter): async def generate(self, conversation_id: str, message: str) -> str: """Call the remote LLM service to generate a response. - Handles errors gracefully by returning informative error strings. + Raises: + LLMConnectionError: If connection fails or times out + LLMAuthenticationError: If authentication fails + LLMRateLimitError: If rate limit is exceeded + LLMResponseError: If response is invalid + LLMError: For other HTTP errors """ headers = {"Content-Type": "application/json"} if self.token: @@ -70,41 +98,135 @@ class RemoteAdapter(LLMAdapter): async with httpx.AsyncClient(timeout=self.timeout) as client: response = await client.post(self.url, json=payload, headers=headers) + if response.status_code == 401: + raise LLMAuthenticationError("Remote LLM authentication failed") + if response.status_code == 429: + raise LLMRateLimitError("Remote LLM rate limit exceeded") if response.status_code != 200: - return ( - f"[ERROR] Remote LLM returned status {response.status_code}: " - f"{response.text[:200] if response.text else 'No response body'}" + raise LLMError( + f"Remote LLM returned status {response.status_code}: " + f"{response.text[:200] if response.text else 'No response body'}", + status_code=response.status_code if 400 <= response.status_code < 600 else 502, ) try: data = response.json() except ValueError: - return "[ERROR] Remote LLM returned invalid JSON response" + raise LLMResponseError("Remote LLM returned invalid JSON response") if "response" not in data: - return "[ERROR] Remote LLM response missing 'response' field" + raise LLMResponseError("Remote LLM response missing 'response' field") return data["response"] except httpx.TimeoutException: - return f"[ERROR] Remote LLM request timed out after {self.timeout} seconds" + raise LLMConnectionError( + f"Remote LLM request timed out after {self.timeout} seconds" + ) except httpx.ConnectError: - return f"[ERROR] Could not connect to remote LLM at {self.url}" + raise LLMConnectionError(f"Could not connect to remote LLM at {self.url}") except httpx.RequestError as e: - return f"[ERROR] Remote LLM request failed: {str(e)}" + raise LLMConnectionError(f"Remote LLM request failed: {str(e)}") -def get_adapter() -> LLMAdapter: +class OpenAIAdapter(LLMAdapter): + """OpenAI API adapter using the official SDK with native async support.""" + + def __init__(self, api_key: str, model: str = "gpt-4o-mini"): + """Initialize the OpenAI adapter. + + Args: + api_key: OpenAI API key + model: Model identifier (e.g., "gpt-4o-mini", "gpt-4o") + """ + self.client = AsyncOpenAI(api_key=api_key) + self.model = model + + async def generate(self, conversation_id: str, message: str) -> str: + """Generate a response using the OpenAI API. + + Args: + conversation_id: The conversation identifier (for future use with context) + message: The user's message + + Returns: + The generated response string + + Raises: + LLMAuthenticationError: If API key is invalid + LLMRateLimitError: If rate limit is exceeded + LLMConnectionError: If connection fails + LLMResponseError: If response content is empty + LLMError: For other API errors + """ + try: + response = await self.client.chat.completions.create( + model=self.model, + messages=[{"role": "user", "content": message}], + ) + + content = response.choices[0].message.content + if content is None: + raise LLMResponseError("OpenAI returned empty response content") + return content + + except AuthenticationError as e: + raise LLMAuthenticationError(f"OpenAI authentication failed: {e.message}") + except RateLimitError as e: + raise LLMRateLimitError(f"OpenAI rate limit exceeded: {e.message}") + except APIConnectionError as e: + raise LLMConnectionError(f"Could not connect to OpenAI: {str(e)}") + except APIError as e: + raise LLMError( + f"OpenAI API error: {e.message}", status_code=e.status_code or 500 + ) + + +# --- Dependency Injection Support --- + + +@lru_cache() +def get_settings() -> Settings: + """Get cached settings instance for dependency injection.""" + return settings + + +def get_adapter(settings: Annotated[Settings, Depends(get_settings)]) -> LLMAdapter: """Factory function to create the appropriate adapter based on configuration. + This function is designed for use with FastAPI's Depends() system. + + Args: + settings: Application settings (injected by FastAPI) + Returns: An LLMAdapter instance based on the LLM_MODE setting + + Raises: + LLMConfigurationError: If configuration is invalid for the selected mode """ + if settings.llm_mode == "openai": + if not settings.openai_api_key: + raise LLMConfigurationError( + "OPENAI_API_KEY must be set when LLM_MODE is 'openai'" + ) + return OpenAIAdapter( + api_key=settings.openai_api_key, + model=settings.openai_model, + ) + if settings.llm_mode == "remote": if not settings.llm_remote_url: - raise ValueError("LLM_REMOTE_URL must be set when LLM_MODE is 'remote'") + raise LLMConfigurationError( + "LLM_REMOTE_URL must be set when LLM_MODE is 'remote'" + ) return RemoteAdapter( url=settings.llm_remote_url, token=settings.llm_remote_token or None, ) + return LocalAdapter() + + +# Type alias for clean dependency injection in endpoints +AdapterDependency = Annotated[LLMAdapter, Depends(get_adapter)] diff --git a/app/llm/exceptions.py b/app/llm/exceptions.py new file mode 100644 index 0000000..2c37825 --- /dev/null +++ b/app/llm/exceptions.py @@ -0,0 +1,59 @@ +"""Custom exceptions for LLM adapters.""" + +from fastapi import HTTPException + + +class LLMError(Exception): + """Base exception for all LLM-related errors.""" + + def __init__(self, message: str, status_code: int = 500): + self.message = message + self.status_code = status_code + super().__init__(message) + + +class LLMAuthenticationError(LLMError): + """Raised when API authentication fails.""" + + def __init__(self, message: str = "LLM authentication failed"): + super().__init__(message, status_code=401) + + +class LLMRateLimitError(LLMError): + """Raised when rate limit is exceeded.""" + + def __init__(self, message: str = "LLM rate limit exceeded"): + super().__init__(message, status_code=429) + + +class LLMConnectionError(LLMError): + """Raised when connection to LLM service fails.""" + + def __init__(self, message: str = "Could not connect to LLM service"): + super().__init__(message, status_code=503) + + +class LLMConfigurationError(LLMError): + """Raised when LLM configuration is invalid.""" + + def __init__(self, message: str = "Invalid LLM configuration"): + super().__init__(message, status_code=500) + + +class LLMResponseError(LLMError): + """Raised when LLM returns an invalid or unexpected response.""" + + def __init__(self, message: str = "Invalid response from LLM"): + super().__init__(message, status_code=502) + + +def llm_exception_to_http(exc: LLMError) -> HTTPException: + """Convert an LLMError to a FastAPI HTTPException. + + Args: + exc: The LLMError to convert + + Returns: + An HTTPException with appropriate status code and detail + """ + return HTTPException(status_code=exc.status_code, detail=exc.message) diff --git a/app/main.py b/app/main.py index b8ed4ec..3ccdaf8 100644 --- a/app/main.py +++ b/app/main.py @@ -4,7 +4,7 @@ import uuid from fastapi import FastAPI, HTTPException from app.config import settings, MAX_MESSAGE_LENGTH -from app.llm.adapter import get_adapter +from app.llm import AdapterDependency, LLMError, llm_exception_to_http from app.schemas import ChatRequest, ChatResponse, HealthResponse # Configure logging @@ -29,7 +29,7 @@ async def health_check() -> HealthResponse: @app.post("/chat", response_model=ChatResponse) -async def chat(request: ChatRequest) -> ChatResponse: +async def chat(request: ChatRequest, adapter: AdapterDependency) -> ChatResponse: """Process a chat message through the LLM adapter. - Validates message length @@ -57,9 +57,19 @@ async def chat(request: ChatRequest) -> ChatResponse: }, ) - # Get adapter and generate response - adapter = get_adapter() - response_text = await adapter.generate(conversation_id, request.message) + # Generate response with exception handling + try: + response_text = await adapter.generate(conversation_id, request.message) + except LLMError as e: + logger.error( + "LLM generation failed", + extra={ + "conversation_id": conversation_id, + "error_type": type(e).__name__, + "error_message": e.message, + }, + ) + raise llm_exception_to_http(e) # Log response metadata logger.info( @@ -77,3 +87,8 @@ async def chat(request: ChatRequest) -> ChatResponse: mode=settings.llm_mode, sources=[], ) + + +if __name__ == "__main__": + import uvicorn + uvicorn.run("app.main:app", host="127.0.0.1", port=8000, reload=True) diff --git a/app/schemas.py b/app/schemas.py index 49137ca..8e815a7 100644 --- a/app/schemas.py +++ b/app/schemas.py @@ -17,7 +17,7 @@ class ChatResponse(BaseModel): conversation_id: str = Field(..., description="Conversation ID (generated if not provided)") response: str = Field(..., description="The LLM's response") - mode: Literal["local", "remote"] = Field(..., description="Which adapter was used") + mode: Literal["local", "remote", "openai"] = Field(..., description="Which adapter was used") sources: list = Field(default_factory=list, description="Source references (empty for now)") diff --git a/requirements.txt b/requirements.txt index edb54cd..4724e03 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ uvicorn[standard]>=0.27.0 pydantic>=2.5.0 pydantic-settings>=2.1.0 httpx>=0.26.0 +openai>=1.0.0