feat: add CORS middleware and SSE streaming endpoint

Add CORS support for frontend development with configurable origins via
CORS_ORIGINS environment variable. Add /chat/stream endpoint for
Server-Sent Events streaming with true streaming support for OpenAI
adapter and fallback single-chunk behavior for other adapters.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Danny
2026-01-16 12:43:21 -06:00
parent f497fde153
commit 6c1cf0655a
5 changed files with 192 additions and 1 deletions
+62
View File
@@ -3,6 +3,7 @@
import asyncio
from abc import ABC, abstractmethod
from functools import lru_cache
from collections.abc import AsyncIterator
from typing import Annotated
import httpx
@@ -46,6 +47,27 @@ class LLMAdapter(ABC):
"""
pass
async def generate_stream(
self, conversation_id: str, message: str
) -> AsyncIterator[str]:
"""Stream a response for the given message.
Default implementation yields the full response as a single chunk.
Subclasses can override this to provide true streaming.
Args:
conversation_id: The conversation identifier
message: The user's message
Yields:
Response content chunks
Raises:
LLMError: If generation fails for any reason
"""
response = await self.generate(conversation_id, message)
yield response
class LocalAdapter(LLMAdapter):
"""Local stub adapter for development and testing."""
@@ -183,6 +205,46 @@ class OpenAIAdapter(LLMAdapter):
f"OpenAI API error: {e.message}", status_code=e.status_code or 500
)
async def generate_stream(
self, conversation_id: str, message: str
) -> AsyncIterator[str]:
"""Stream a response using the OpenAI API.
Args:
conversation_id: The conversation identifier (for future use with context)
message: The user's message
Yields:
Response content chunks
Raises:
LLMAuthenticationError: If API key is invalid
LLMRateLimitError: If rate limit is exceeded
LLMConnectionError: If connection fails
LLMError: For other API errors
"""
try:
stream = await self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": message}],
stream=True,
)
async for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
except AuthenticationError as e:
raise LLMAuthenticationError(f"OpenAI authentication failed: {e.message}")
except RateLimitError as e:
raise LLMRateLimitError(f"OpenAI rate limit exceeded: {e.message}")
except APIConnectionError as e:
raise LLMConnectionError(f"Could not connect to OpenAI: {str(e)}")
except APIError as e:
raise LLMError(
f"OpenAI API error: {e.message}", status_code=e.status_code or 500
)
class AskSageAdapter(LLMAdapter):
"""AskSage API adapter using the official asksageclient SDK."""