feat: add CORS middleware and SSE streaming endpoint
Add CORS support for frontend development with configurable origins via CORS_ORIGINS environment variable. Add /chat/stream endpoint for Server-Sent Events streaming with true streaming support for OpenAI adapter and fallback single-chunk behavior for other adapters. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
import asyncio
|
||||
from abc import ABC, abstractmethod
|
||||
from functools import lru_cache
|
||||
from collections.abc import AsyncIterator
|
||||
from typing import Annotated
|
||||
|
||||
import httpx
|
||||
@@ -46,6 +47,27 @@ class LLMAdapter(ABC):
|
||||
"""
|
||||
pass
|
||||
|
||||
async def generate_stream(
|
||||
self, conversation_id: str, message: str
|
||||
) -> AsyncIterator[str]:
|
||||
"""Stream a response for the given message.
|
||||
|
||||
Default implementation yields the full response as a single chunk.
|
||||
Subclasses can override this to provide true streaming.
|
||||
|
||||
Args:
|
||||
conversation_id: The conversation identifier
|
||||
message: The user's message
|
||||
|
||||
Yields:
|
||||
Response content chunks
|
||||
|
||||
Raises:
|
||||
LLMError: If generation fails for any reason
|
||||
"""
|
||||
response = await self.generate(conversation_id, message)
|
||||
yield response
|
||||
|
||||
|
||||
class LocalAdapter(LLMAdapter):
|
||||
"""Local stub adapter for development and testing."""
|
||||
@@ -183,6 +205,46 @@ class OpenAIAdapter(LLMAdapter):
|
||||
f"OpenAI API error: {e.message}", status_code=e.status_code or 500
|
||||
)
|
||||
|
||||
async def generate_stream(
|
||||
self, conversation_id: str, message: str
|
||||
) -> AsyncIterator[str]:
|
||||
"""Stream a response using the OpenAI API.
|
||||
|
||||
Args:
|
||||
conversation_id: The conversation identifier (for future use with context)
|
||||
message: The user's message
|
||||
|
||||
Yields:
|
||||
Response content chunks
|
||||
|
||||
Raises:
|
||||
LLMAuthenticationError: If API key is invalid
|
||||
LLMRateLimitError: If rate limit is exceeded
|
||||
LLMConnectionError: If connection fails
|
||||
LLMError: For other API errors
|
||||
"""
|
||||
try:
|
||||
stream = await self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[{"role": "user", "content": message}],
|
||||
stream=True,
|
||||
)
|
||||
|
||||
async for chunk in stream:
|
||||
if chunk.choices and chunk.choices[0].delta.content:
|
||||
yield chunk.choices[0].delta.content
|
||||
|
||||
except AuthenticationError as e:
|
||||
raise LLMAuthenticationError(f"OpenAI authentication failed: {e.message}")
|
||||
except RateLimitError as e:
|
||||
raise LLMRateLimitError(f"OpenAI rate limit exceeded: {e.message}")
|
||||
except APIConnectionError as e:
|
||||
raise LLMConnectionError(f"Could not connect to OpenAI: {str(e)}")
|
||||
except APIError as e:
|
||||
raise LLMError(
|
||||
f"OpenAI API error: {e.message}", status_code=e.status_code or 500
|
||||
)
|
||||
|
||||
|
||||
class AskSageAdapter(LLMAdapter):
|
||||
"""AskSage API adapter using the official asksageclient SDK."""
|
||||
|
||||
Reference in New Issue
Block a user