Search stack replicates the Hamilton site pattern with pg_orrery-specific additions: - FastAPI REST API (chat SSE streaming, semantic search, health check) - FastMCP server at /mcp with doc search and live SQL query tools - pgvector + pgai vectorizer for 1024-dim document embeddings - Hybrid search (semantic cosine + text ILIKE with pg_trgm GIN) - Dual LLM backend: self-hosted qwen3 via GPU gateway or Anthropic Claude - Live read-only pg_orrery SQL execution with safety guardrails (SELECT-only validation, read-only transaction, 5s timeout, 100-row cap) - Convenience MCP tools: planet_position, sky_survey, satellite_pass - MDX content ingestion from docs/src/content/docs/ (50 pages) - Docker Compose: pg_orrery+pgvector DB, pgai, vectorizer-worker, API - Alembic async migrations, Makefile, .env.example
37 lines
1.0 KiB
Python
37 lines
1.0 KiB
Python
from pydantic_settings import BaseSettings
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
api_host: str = "0.0.0.0"
|
|
api_port: int = 8000
|
|
api_log_level: str = "info"
|
|
|
|
database_url: str = "postgresql+asyncpg://orrery:orrery@localhost:5432/orrery_search"
|
|
|
|
# Raw asyncpg URL for direct pg_orrery SQL execution (no SQLAlchemy)
|
|
orrery_db_url: str = "postgresql://orrery:orrery@localhost:5432/orrery_search"
|
|
|
|
gpu_api_key: str = ""
|
|
gpu_base_url: str = "https://orrery-search.gpu.supported.systems/v1"
|
|
gpu_embed_model: str = "mxbai-embed-large"
|
|
gpu_embed_dimensions: int = 1024
|
|
|
|
search_max_results: int = 50
|
|
|
|
# LLM provider: "gpu" for self-hosted (qwen3), "anthropic" for Claude
|
|
llm_provider: str = "gpu"
|
|
|
|
gpu_chat_model: str = "qwen3"
|
|
chat_timeout: float = 30.0
|
|
chat_max_tokens: int = 8192
|
|
|
|
anthropic_api_key: str = ""
|
|
anthropic_model: str = "claude-sonnet-4-20250514"
|
|
|
|
run_query_timeout: float = 5.0
|
|
|
|
model_config = {"env_prefix": "", "env_file": ".env", "extra": "ignore"}
|
|
|
|
|
|
settings = Settings()
|