feat: add multi-provider LLM support with thinking configurations

Models added:
- OpenAI: GPT-5.2, GPT-5.1, GPT-5, GPT-5 Mini, GPT-5 Nano, GPT-4.1
- Anthropic: Claude Opus 4.5/4.1, Claude Sonnet 4.5/4, Claude Haiku 4.5
- Google: Gemini 3 Pro/Flash, Gemini 2.5 Flash/Flash Lite
- xAI: Grok 4, Grok 4.1 Fast (Reasoning/Non-Reasoning)

Configs updated:
- Add unified thinking_level for Gemini (maps to thinking_level for Gemini 3,
  thinking_budget for Gemini 2.5; handles Pro's lack of "minimal" support)
- Add OpenAI reasoning_effort configuration
- Add NormalizedChatGoogleGenerativeAI for consistent response handling

Fixes:
- Fix Bull/Bear researcher display truncation
- Replace ChromaDB with BM25 for memory retrieval
This commit is contained in:
Yijia Xiao
2026-01-26 16:48:28 +00:00
parent 79051580b8
commit d4dadb82fc
17 changed files with 639 additions and 958 deletions
@@ -14,18 +14,12 @@ class AnthropicClient(BaseLLMClient):
def get_llm(self) -> Any:
"""Return configured ChatAnthropic instance."""
llm_kwargs = {
"model": self.model,
"max_tokens": self.kwargs.get("max_tokens", 4096),
}
llm_kwargs = {"model": self.model}
for key in ("timeout", "max_retries", "api_key"):
for key in ("timeout", "max_retries", "api_key", "max_tokens"):
if key in self.kwargs:
llm_kwargs[key] = self.kwargs[key]
if "thinking_config" in self.kwargs:
llm_kwargs["thinking"] = self.kwargs["thinking_config"]
return ChatAnthropic(**llm_kwargs)
def validate_model(self) -> bool:
+38 -7
View File
@@ -6,6 +6,28 @@ from .base_client import BaseLLMClient
from .validators import validate_model
class NormalizedChatGoogleGenerativeAI(ChatGoogleGenerativeAI):
"""ChatGoogleGenerativeAI with normalized content output.
Gemini 3 models return content as list: [{'type': 'text', 'text': '...'}]
This normalizes to string for consistent downstream handling.
"""
def _normalize_content(self, response):
content = response.content
if isinstance(content, list):
texts = [
item.get("text", "") if isinstance(item, dict) and item.get("type") == "text"
else item if isinstance(item, str) else ""
for item in content
]
response.content = "\n".join(t for t in texts if t)
return response
def invoke(self, input, config=None, **kwargs):
return self._normalize_content(super().invoke(input, config, **kwargs))
class GoogleClient(BaseLLMClient):
"""Client for Google Gemini models."""
@@ -20,14 +42,23 @@ class GoogleClient(BaseLLMClient):
if key in self.kwargs:
llm_kwargs[key] = self.kwargs[key]
if "thinking_budget" in self.kwargs and self._is_preview_model():
llm_kwargs["thinking_budget"] = self.kwargs["thinking_budget"]
# Map thinking_level to appropriate API param based on model
# Gemini 3 Pro: low, high
# Gemini 3 Flash: minimal, low, medium, high
# Gemini 2.5: thinking_budget (0=disable, -1=dynamic)
thinking_level = self.kwargs.get("thinking_level")
if thinking_level:
model_lower = self.model.lower()
if "gemini-3" in model_lower:
# Gemini 3 Pro doesn't support "minimal", use "low" instead
if "pro" in model_lower and thinking_level == "minimal":
thinking_level = "low"
llm_kwargs["thinking_level"] = thinking_level
else:
# Gemini 2.5: map to thinking_budget
llm_kwargs["thinking_budget"] = -1 if thinking_level == "high" else 0
return ChatGoogleGenerativeAI(**llm_kwargs)
def _is_preview_model(self) -> bool:
"""Check if this is a preview model that supports thinking budget."""
return "preview" in self.model.lower()
return NormalizedChatGoogleGenerativeAI(**llm_kwargs)
def validate_model(self) -> bool:
"""Validate model for Google."""
+54 -41
View File
@@ -1,58 +1,75 @@
from typing import Dict, List
"""Model name validators for each provider.
VALID_MODELS: Dict[str, List[str]] = {
Only validates model names - does NOT enforce limits.
Let LLM providers use their own defaults for unspecified params.
"""
VALID_MODELS = {
"openai": [
# GPT-5 series (2025)
"gpt-5.2",
"gpt-5.1",
"gpt-5",
"gpt-5-mini",
"gpt-5-nano",
# GPT-4.1 series (2025)
"gpt-4.1",
"gpt-4.1-mini",
"gpt-4.1-nano",
# o-series reasoning models
"o4-mini",
"o3",
"o3-mini",
"o1",
"o1-preview",
# GPT-4o series (legacy but still supported)
"gpt-4o",
"gpt-4o-mini",
"gpt-4-turbo",
"gpt-4",
"gpt-3.5-turbo",
"o1",
"o1-mini",
"o1-preview",
"o3-mini",
"gpt-5-nano",
"gpt-5-mini",
"gpt-5",
],
"anthropic": [
"claude-3-5-sonnet-20241022",
"claude-3-5-haiku-20241022",
"claude-3-opus-20240229",
"claude-3-sonnet-20240229",
"claude-3-haiku-20240307",
# Claude 4.5 series (2025)
"claude-opus-4-5",
"claude-sonnet-4-5",
"claude-haiku-4-5",
# Claude 4.x series
"claude-opus-4-1-20250805",
"claude-sonnet-4-20250514",
"claude-haiku-4-5-20251001",
"claude-opus-4-5-20251101",
# Claude 3.7 series
"claude-3-7-sonnet-20250219",
# Claude 3.5 series (legacy)
"claude-3-5-haiku-20241022",
"claude-3-5-sonnet-20241022",
],
"google": [
"gemini-1.5-pro",
"gemini-1.5-flash",
"gemini-2.0-flash",
"gemini-2.0-flash-lite",
"gemini-2.5-pro-preview-05-06",
"gemini-2.5-flash-preview-05-20",
# Gemini 3 series (preview)
"gemini-3-pro-preview",
"gemini-3-flash-preview",
# Gemini 2.5 series
"gemini-2.5-pro",
"gemini-2.5-flash",
"gemini-2.5-flash-lite",
# Gemini 2.0 series
"gemini-2.0-flash",
"gemini-2.0-flash-lite",
],
"xai": [
"grok-beta",
"grok-2",
"grok-2-mini",
"grok-3",
"grok-3-mini",
# Grok 4.1 series
"grok-4-1-fast",
"grok-4-1-fast-reasoning",
"grok-4-1-fast-non-reasoning",
# Grok 4 series
"grok-4",
"grok-4-0709",
"grok-4-fast-reasoning",
"grok-4-fast-non-reasoning",
],
"ollama": [],
"openrouter": [],
"vllm": [],
}
def validate_model(provider: str, model: str) -> bool:
"""Validate that a model is supported by the provider.
"""Check if model name is valid for the given provider.
For ollama, openrouter, and vllm, any model is accepted.
For other providers, checks against VALID_MODELS.
For ollama, openrouter, vllm - any model is accepted.
"""
provider_lower = provider.lower()
@@ -60,10 +77,6 @@ def validate_model(provider: str, model: str) -> bool:
return True
if provider_lower not in VALID_MODELS:
return False
valid = VALID_MODELS[provider_lower]
if not valid:
return True
return model in valid
return model in VALID_MODELS[provider_lower]