feat: add CORS middleware and SSE streaming endpoint

Add CORS support for frontend development with configurable origins via CORS_ORIGINS environment variable. Add /chat/stream endpoint for Server-Sent Events streaming with true streaming support for OpenAI adapter and fallback single-chunk behavior for other adapters. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-16 12:43:21 -06:00
parent f497fde153
commit 6c1cf0655a
5 changed files with 192 additions and 1 deletions
@@ -3,6 +3,7 @@
 import asyncio
 from abc import ABC, abstractmethod
 from functools import lru_cache
+from collections.abc import AsyncIterator
 from typing import Annotated

 import httpx
@@ -46,6 +47,27 @@ class LLMAdapter(ABC):
        """
        pass

+    async def generate_stream(
+        self, conversation_id: str, message: str
+    ) -> AsyncIterator[str]:
+        """Stream a response for the given message.
+
+        Default implementation yields the full response as a single chunk.
+        Subclasses can override this to provide true streaming.
+
+        Args:
+            conversation_id: The conversation identifier
+            message: The user's message
+
+        Yields:
+            Response content chunks
+
+        Raises:
+            LLMError: If generation fails for any reason
+        """
+        response = await self.generate(conversation_id, message)
+        yield response
+

 class LocalAdapter(LLMAdapter):
    """Local stub adapter for development and testing."""
@@ -183,6 +205,46 @@ class OpenAIAdapter(LLMAdapter):
                f"OpenAI API error: {e.message}", status_code=e.status_code or 500
            )

+    async def generate_stream(
+        self, conversation_id: str, message: str
+    ) -> AsyncIterator[str]:
+        """Stream a response using the OpenAI API.
+
+        Args:
+            conversation_id: The conversation identifier (for future use with context)
+            message: The user's message
+
+        Yields:
+            Response content chunks
+
+        Raises:
+            LLMAuthenticationError: If API key is invalid
+            LLMRateLimitError: If rate limit is exceeded
+            LLMConnectionError: If connection fails
+            LLMError: For other API errors
+        """
+        try:
+            stream = await self.client.chat.completions.create(
+                model=self.model,
+                messages=[{"role": "user", "content": message}],
+                stream=True,
+            )
+
+            async for chunk in stream:
+                if chunk.choices and chunk.choices[0].delta.content:
+                    yield chunk.choices[0].delta.content
+
+        except AuthenticationError as e:
+            raise LLMAuthenticationError(f"OpenAI authentication failed: {e.message}")
+        except RateLimitError as e:
+            raise LLMRateLimitError(f"OpenAI rate limit exceeded: {e.message}")
+        except APIConnectionError as e:
+            raise LLMConnectionError(f"Could not connect to OpenAI: {str(e)}")
+        except APIError as e:
+            raise LLMError(
+                f"OpenAI API error: {e.message}", status_code=e.status_code or 500
+            )
+

 class AskSageAdapter(LLMAdapter):
    """AskSage API adapter using the official asksageclient SDK."""