feat: add multi-provider LLM support with thinking configurations

Models added: - OpenAI: GPT-5.2, GPT-5.1, GPT-5, GPT-5 Mini, GPT-5 Nano, GPT-4.1 - Anthropic: Claude Opus 4.5/4.1, Claude Sonnet 4.5/4, Claude Haiku 4.5 - Google: Gemini 3 Pro/Flash, Gemini 2.5 Flash/Flash Lite - xAI: Grok 4, Grok 4.1 Fast (Reasoning/Non-Reasoning) Configs updated: - Add unified thinking_level for Gemini (maps to thinking_level for Gemini 3, thinking_budget for Gemini 2.5; handles Pro's lack of "minimal" support) - Add OpenAI reasoning_effort configuration - Add NormalizedChatGoogleGenerativeAI for consistent response handling Fixes: - Fix Bull/Bear researcher display truncation - Replace ChromaDB with BM25 for memory retrieval
2026-06-28 21:01:16 +03:00 · 2026-01-26 16:48:28 +00:00
parent 79051580b8
commit d4dadb82fc
17 changed files with 639 additions and 958 deletions
@@ -6,6 +6,28 @@ from .base_client import BaseLLMClient
 from .validators import validate_model


+class NormalizedChatGoogleGenerativeAI(ChatGoogleGenerativeAI):
+    """ChatGoogleGenerativeAI with normalized content output.
+
+    Gemini 3 models return content as list: [{'type': 'text', 'text': '...'}]
+    This normalizes to string for consistent downstream handling.
+    """
+
+    def _normalize_content(self, response):
+        content = response.content
+        if isinstance(content, list):
+            texts = [
+                item.get("text", "") if isinstance(item, dict) and item.get("type") == "text"
+                else item if isinstance(item, str) else ""
+                for item in content
+            ]
+            response.content = "\n".join(t for t in texts if t)
+        return response
+
+    def invoke(self, input, config=None, **kwargs):
+        return self._normalize_content(super().invoke(input, config, **kwargs))
+
+
 class GoogleClient(BaseLLMClient):
    """Client for Google Gemini models."""

@@ -20,14 +42,23 @@ class GoogleClient(BaseLLMClient):
            if key in self.kwargs:
                llm_kwargs[key] = self.kwargs[key]

-        if "thinking_budget" in self.kwargs and self._is_preview_model():
-            llm_kwargs["thinking_budget"] = self.kwargs["thinking_budget"]
+        # Map thinking_level to appropriate API param based on model
+        # Gemini 3 Pro: low, high
+        # Gemini 3 Flash: minimal, low, medium, high
+        # Gemini 2.5: thinking_budget (0=disable, -1=dynamic)
+        thinking_level = self.kwargs.get("thinking_level")
+        if thinking_level:
+            model_lower = self.model.lower()
+            if "gemini-3" in model_lower:
+                # Gemini 3 Pro doesn't support "minimal", use "low" instead
+                if "pro" in model_lower and thinking_level == "minimal":
+                    thinking_level = "low"
+                llm_kwargs["thinking_level"] = thinking_level
+            else:
+                # Gemini 2.5: map to thinking_budget
+                llm_kwargs["thinking_budget"] = -1 if thinking_level == "high" else 0

-        return ChatGoogleGenerativeAI(**llm_kwargs)
-
-    def _is_preview_model(self) -> bool:
-        """Check if this is a preview model that supports thinking budget."""
-        return "preview" in self.model.lower()
+        return NormalizedChatGoogleGenerativeAI(**llm_kwargs)

    def validate_model(self) -> bool:
        """Validate model for Google."""