feat: add multi-provider LLM support with thinking configurations

Models added:
- OpenAI: GPT-5.2, GPT-5.1, GPT-5, GPT-5 Mini, GPT-5 Nano, GPT-4.1
- Anthropic: Claude Opus 4.5/4.1, Claude Sonnet 4.5/4, Claude Haiku 4.5
- Google: Gemini 3 Pro/Flash, Gemini 2.5 Flash/Flash Lite
- xAI: Grok 4, Grok 4.1 Fast (Reasoning/Non-Reasoning)

Configs updated:
- Add unified thinking_level for Gemini (maps to thinking_level for Gemini 3,
  thinking_budget for Gemini 2.5; handles Pro's lack of "minimal" support)
- Add OpenAI reasoning_effort configuration
- Add NormalizedChatGoogleGenerativeAI for consistent response handling

Fixes:
- Fix Bull/Bear researcher display truncation
- Replace ChromaDB with BM25 for memory retrieval
This commit is contained in:
Yijia Xiao
2026-01-26 16:48:28 +00:00
parent 79051580b8
commit d4dadb82fc
17 changed files with 639 additions and 958 deletions
+38 -7
View File
@@ -6,6 +6,28 @@ from .base_client import BaseLLMClient
from .validators import validate_model
class NormalizedChatGoogleGenerativeAI(ChatGoogleGenerativeAI):
"""ChatGoogleGenerativeAI with normalized content output.
Gemini 3 models return content as list: [{'type': 'text', 'text': '...'}]
This normalizes to string for consistent downstream handling.
"""
def _normalize_content(self, response):
content = response.content
if isinstance(content, list):
texts = [
item.get("text", "") if isinstance(item, dict) and item.get("type") == "text"
else item if isinstance(item, str) else ""
for item in content
]
response.content = "\n".join(t for t in texts if t)
return response
def invoke(self, input, config=None, **kwargs):
return self._normalize_content(super().invoke(input, config, **kwargs))
class GoogleClient(BaseLLMClient):
"""Client for Google Gemini models."""
@@ -20,14 +42,23 @@ class GoogleClient(BaseLLMClient):
if key in self.kwargs:
llm_kwargs[key] = self.kwargs[key]
if "thinking_budget" in self.kwargs and self._is_preview_model():
llm_kwargs["thinking_budget"] = self.kwargs["thinking_budget"]
# Map thinking_level to appropriate API param based on model
# Gemini 3 Pro: low, high
# Gemini 3 Flash: minimal, low, medium, high
# Gemini 2.5: thinking_budget (0=disable, -1=dynamic)
thinking_level = self.kwargs.get("thinking_level")
if thinking_level:
model_lower = self.model.lower()
if "gemini-3" in model_lower:
# Gemini 3 Pro doesn't support "minimal", use "low" instead
if "pro" in model_lower and thinking_level == "minimal":
thinking_level = "low"
llm_kwargs["thinking_level"] = thinking_level
else:
# Gemini 2.5: map to thinking_budget
llm_kwargs["thinking_budget"] = -1 if thinking_level == "high" else 0
return ChatGoogleGenerativeAI(**llm_kwargs)
def _is_preview_model(self) -> bool:
"""Check if this is a preview model that supports thinking budget."""
return "preview" in self.model.lower()
return NormalizedChatGoogleGenerativeAI(**llm_kwargs)
def validate_model(self) -> bool:
"""Validate model for Google."""