feat: replace per-agent BM25 memory with persistent append-only decision log

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-28 17:01:20 +03:00 · 2026-04-19 22:13:53 -04:00
parent 8536ccacdd
commit 6abc768c1d
15 changed files with 1046 additions and 340 deletions
@@ -16,13 +16,14 @@ class Propagator:
        self.max_recur_limit = max_recur_limit

    def create_initial_state(
-        self, company_name: str, trade_date: str
+        self, company_name: str, trade_date: str, past_context: str = ""
    ) -> Dict[str, Any]:
        """Create the initial state for the agent graph."""
        return {
            "messages": [("human", company_name)],
            "company_of_interest": company_name,
            "trade_date": str(trade_date),
+            "past_context": past_context,
            "investment_debate_state": InvestDebateState(
                {
                    "bull_history": "",
@@ -1,120 +1,53 @@
 # TradingAgents/graph/reflection.py

-from typing import Any, Dict
+from typing import Any


 class Reflector:
-    """Handles reflection on decisions and updating memory."""
+    """Handles reflection on trading decisions."""

    def __init__(self, quick_thinking_llm: Any):
        """Initialize the reflector with an LLM."""
        self.quick_thinking_llm = quick_thinking_llm
-        self.reflection_system_prompt = self._get_reflection_prompt()
+        self.log_reflection_prompt = self._get_log_reflection_prompt()

-    def _get_reflection_prompt(self) -> str:
-        """Get the system prompt for reflection."""
-        return """
-You are an expert financial analyst tasked with reviewing trading decisions/analysis and providing a comprehensive, step-by-step analysis. 
-Your goal is to deliver detailed insights into investment decisions and highlight opportunities for improvement, adhering strictly to the following guidelines:
+    def _get_log_reflection_prompt(self) -> str:
+        """Concise prompt for reflect_on_final_decision (Phase B log entries).

-1. Reasoning:
-   - For each trading decision, determine whether it was correct or incorrect. A correct decision results in an increase in returns, while an incorrect decision does the opposite.
-   - Analyze the contributing factors to each success or mistake. Consider:
-     - Market intelligence.
-     - Technical indicators.
-     - Technical signals.
-     - Price movement analysis.
-     - Overall market data analysis 
-     - News analysis.
-     - Social media and sentiment analysis.
-     - Fundamental data analysis.
-     - Weight the importance of each factor in the decision-making process.
+        Produces 2-4 sentences of plain prose — compact enough to be re-injected
+        into future agent prompts without bloating the context window.
+        """
+        return (
+            "You are a trading analyst reviewing your own past decision now that the outcome is known.\n"
+            "Write exactly 2-4 sentences of plain prose (no bullets, no headers, no markdown).\n\n"
+            "Cover in order:\n"
+            "1. Was the directional call correct? (cite the alpha figure)\n"
+            "2. Which part of the investment thesis held or failed?\n"
+            "3. One concrete lesson to apply to the next similar analysis.\n\n"
+            "Be specific and terse. Your output will be stored verbatim in a decision log "
+            "and re-read by future analysts, so every word must earn its place."
+        )

-2. Improvement:
-   - For any incorrect decisions, propose revisions to maximize returns.
-   - Provide a detailed list of corrective actions or improvements, including specific recommendations (e.g., changing a decision from HOLD to BUY on a particular date).
-
-3. Summary:
-   - Summarize the lessons learned from the successes and mistakes.
-   - Highlight how these lessons can be adapted for future trading scenarios and draw connections between similar situations to apply the knowledge gained.
-
-4. Query:
-   - Extract key insights from the summary into a concise sentence of no more than 1000 tokens.
-   - Ensure the condensed sentence captures the essence of the lessons and reasoning for easy reference.
-
-Adhere strictly to these instructions, and ensure your output is detailed, accurate, and actionable. You will also be given objective descriptions of the market from a price movements, technical indicator, news, and sentiment perspective to provide more context for your analysis.
-"""
-
-    def _extract_current_situation(self, current_state: Dict[str, Any]) -> str:
-        """Extract the current market situation from the state."""
-        curr_market_report = current_state["market_report"]
-        curr_sentiment_report = current_state["sentiment_report"]
-        curr_news_report = current_state["news_report"]
-        curr_fundamentals_report = current_state["fundamentals_report"]
-
-        return f"{curr_market_report}\n\n{curr_sentiment_report}\n\n{curr_news_report}\n\n{curr_fundamentals_report}"
-
-    def _reflect_on_component(
-        self, component_type: str, report: str, situation: str, returns_losses
+    def reflect_on_final_decision(
+        self,
+        final_decision: str,
+        raw_return: float,
+        alpha_return: float,
    ) -> str:
-        """Generate reflection for a component."""
+        """Single reflection call on the final trade decision with outcome context.
+
+        Used by Phase B deferred reflection. The final_trade_decision already
+        synthesises all analyst insights, so no separate market context is needed.
+        """
        messages = [
-            ("system", self.reflection_system_prompt),
+            ("system", self.log_reflection_prompt),
            (
                "human",
-                f"Returns: {returns_losses}\n\nAnalysis/Decision: {report}\n\nObjective Market Reports for Reference: {situation}",
+                (
+                    f"Raw return: {raw_return:+.1%}\n"
+                    f"Alpha vs SPY: {alpha_return:+.1%}\n\n"
+                    f"Final Decision:\n{final_decision}"
+                ),
            ),
        ]
-
-        result = self.quick_thinking_llm.invoke(messages).content
-        return result
-
-    def reflect_bull_researcher(self, current_state, returns_losses, bull_memory):
-        """Reflect on bull researcher's analysis and update memory."""
-        situation = self._extract_current_situation(current_state)
-        bull_debate_history = current_state["investment_debate_state"]["bull_history"]
-
-        result = self._reflect_on_component(
-            "BULL", bull_debate_history, situation, returns_losses
-        )
-        bull_memory.add_situations([(situation, result)])
-
-    def reflect_bear_researcher(self, current_state, returns_losses, bear_memory):
-        """Reflect on bear researcher's analysis and update memory."""
-        situation = self._extract_current_situation(current_state)
-        bear_debate_history = current_state["investment_debate_state"]["bear_history"]
-
-        result = self._reflect_on_component(
-            "BEAR", bear_debate_history, situation, returns_losses
-        )
-        bear_memory.add_situations([(situation, result)])
-
-    def reflect_trader(self, current_state, returns_losses, trader_memory):
-        """Reflect on trader's decision and update memory."""
-        situation = self._extract_current_situation(current_state)
-        trader_decision = current_state["trader_investment_plan"]
-
-        result = self._reflect_on_component(
-            "TRADER", trader_decision, situation, returns_losses
-        )
-        trader_memory.add_situations([(situation, result)])
-
-    def reflect_invest_judge(self, current_state, returns_losses, invest_judge_memory):
-        """Reflect on investment judge's decision and update memory."""
-        situation = self._extract_current_situation(current_state)
-        judge_decision = current_state["investment_debate_state"]["judge_decision"]
-
-        result = self._reflect_on_component(
-            "INVEST JUDGE", judge_decision, situation, returns_losses
-        )
-        invest_judge_memory.add_situations([(situation, result)])
-
-    def reflect_portfolio_manager(self, current_state, returns_losses, portfolio_manager_memory):
-        """Reflect on portfolio manager's decision and update memory."""
-        situation = self._extract_current_situation(current_state)
-        judge_decision = current_state["risk_debate_state"]["judge_decision"]
-
-        result = self._reflect_on_component(
-            "PORTFOLIO MANAGER", judge_decision, situation, returns_losses
-        )
-        portfolio_manager_memory.add_situations([(situation, result)])
+        return self.quick_thinking_llm.invoke(messages).content
@@ -18,22 +18,12 @@ class GraphSetup:
        quick_thinking_llm: Any,
        deep_thinking_llm: Any,
        tool_nodes: Dict[str, ToolNode],
-        bull_memory,
-        bear_memory,
-        trader_memory,
-        invest_judge_memory,
-        portfolio_manager_memory,
        conditional_logic: ConditionalLogic,
    ):
        """Initialize with required components."""
        self.quick_thinking_llm = quick_thinking_llm
        self.deep_thinking_llm = deep_thinking_llm
        self.tool_nodes = tool_nodes
-        self.bull_memory = bull_memory
-        self.bear_memory = bear_memory
-        self.trader_memory = trader_memory
-        self.invest_judge_memory = invest_judge_memory
-        self.portfolio_manager_memory = portfolio_manager_memory
        self.conditional_logic = conditional_logic

    def setup_graph(
@@ -85,24 +75,16 @@ class GraphSetup:
            tool_nodes["fundamentals"] = self.tool_nodes["fundamentals"]

        # Create researcher and manager nodes
-        bull_researcher_node = create_bull_researcher(
-            self.quick_thinking_llm, self.bull_memory
-        )
-        bear_researcher_node = create_bear_researcher(
-            self.quick_thinking_llm, self.bear_memory
-        )
-        research_manager_node = create_research_manager(
-            self.deep_thinking_llm, self.invest_judge_memory
-        )
-        trader_node = create_trader(self.quick_thinking_llm, self.trader_memory)
+        bull_researcher_node = create_bull_researcher(self.quick_thinking_llm)
+        bear_researcher_node = create_bear_researcher(self.quick_thinking_llm)
+        research_manager_node = create_research_manager(self.deep_thinking_llm)
+        trader_node = create_trader(self.quick_thinking_llm)

        # Create risk analysis nodes
        aggressive_analyst = create_aggressive_debator(self.quick_thinking_llm)
        neutral_analyst = create_neutral_debator(self.quick_thinking_llm)
        conservative_analyst = create_conservative_debator(self.quick_thinking_llm)
-        portfolio_manager_node = create_portfolio_manager(
-            self.deep_thinking_llm, self.portfolio_manager_memory
-        )
+        portfolio_manager_node = create_portfolio_manager(self.deep_thinking_llm)

        # Create workflow
        workflow = StateGraph(AgentState)
@@ -1,18 +1,23 @@
 # TradingAgents/graph/trading_graph.py

+import logging
 import os
 from pathlib import Path
 import json
-from datetime import date
+from datetime import datetime, timedelta
 from typing import Dict, Any, Tuple, List, Optional

+import yfinance as yf
+
+logger = logging.getLogger(__name__)
+
 from langgraph.prebuilt import ToolNode

 from tradingagents.llm_clients import create_llm_client

 from tradingagents.agents import *
 from tradingagents.default_config import DEFAULT_CONFIG
-from tradingagents.agents.utils.memory import FinancialSituationMemory
+from tradingagents.agents.utils.memory import TradingMemoryLog
 from tradingagents.agents.utils.agent_states import (
    AgentState,
    InvestDebateState,
@@ -92,12 +97,7 @@ class TradingAgentsGraph:
        self.deep_thinking_llm = deep_client.get_llm()
        self.quick_thinking_llm = quick_client.get_llm()
        
-        # Initialize memories
-        self.bull_memory = FinancialSituationMemory("bull_memory", self.config)
-        self.bear_memory = FinancialSituationMemory("bear_memory", self.config)
-        self.trader_memory = FinancialSituationMemory("trader_memory", self.config)
-        self.invest_judge_memory = FinancialSituationMemory("invest_judge_memory", self.config)
-        self.portfolio_manager_memory = FinancialSituationMemory("portfolio_manager_memory", self.config)
+        self.memory_log = TradingMemoryLog(self.config)

        # Create tool nodes
        self.tool_nodes = self._create_tool_nodes()
@@ -111,11 +111,6 @@ class TradingAgentsGraph:
            self.quick_thinking_llm,
            self.deep_thinking_llm,
            self.tool_nodes,
-            self.bull_memory,
-            self.bear_memory,
-            self.trader_memory,
-            self.invest_judge_memory,
-            self.portfolio_manager_memory,
            self.conditional_logic,
        )

@@ -189,14 +184,90 @@ class TradingAgentsGraph:
            ),
        }

+    def _fetch_returns(
+        self, ticker: str, trade_date: str, holding_days: int = 5
+    ) -> Tuple[Optional[float], Optional[float], Optional[int]]:
+        """Fetch raw and alpha return for ticker over holding_days from trade_date.
+
+        Returns (raw_return, alpha_return, actual_holding_days) or
+        (None, None, None) if price data is unavailable (too recent, delisted,
+        or network error).
+        """
+        try:
+            start = datetime.strptime(trade_date, "%Y-%m-%d")
+            end = start + timedelta(days=holding_days + 7)  # buffer for weekends/holidays
+            end_str = end.strftime("%Y-%m-%d")
+
+            stock = yf.Ticker(ticker).history(start=trade_date, end=end_str)
+            spy = yf.Ticker("SPY").history(start=trade_date, end=end_str)
+
+            if len(stock) < 2 or len(spy) < 2:
+                return None, None, None
+
+            actual_days = min(holding_days, len(stock) - 1, len(spy) - 1)
+            raw = float(
+                (stock["Close"].iloc[actual_days] - stock["Close"].iloc[0])
+                / stock["Close"].iloc[0]
+            )
+            spy_ret = float(
+                (spy["Close"].iloc[actual_days] - spy["Close"].iloc[0])
+                / spy["Close"].iloc[0]
+            )
+            alpha = raw - spy_ret
+            return raw, alpha, actual_days
+        except Exception as e:
+            logger.debug("_fetch_returns failed for %s@%s: %s", ticker, trade_date, e)
+            return None, None, None
+
+    def _resolve_pending_entries(self, ticker: str) -> None:
+        """Resolve pending log entries for ticker at the start of a new run.
+
+        Fetches returns for each same-ticker pending entry, generates reflections,
+        then writes all updates in a single atomic batch write to avoid redundant I/O.
+        Skips entries whose price data is not yet available (too recent or delisted).
+
+        Trade-off: only same-ticker entries are resolved per run.  Entries for
+        other tickers accumulate until that ticker is run again.
+        """
+        pending = [e for e in self.memory_log.get_pending_entries() if e["ticker"] == ticker]
+        if not pending:
+            return
+
+        updates = []
+        for entry in pending:
+            raw, alpha, days = self._fetch_returns(ticker, entry["date"])
+            if raw is None:
+                continue  # price not available yet — try again next run
+            reflection = self.reflector.reflect_on_final_decision(
+                final_decision=entry.get("decision", ""),
+                raw_return=raw,
+                alpha_return=alpha,
+            )
+            updates.append({
+                "ticker": ticker,
+                "trade_date": entry["date"],
+                "raw_return": raw,
+                "alpha_return": alpha,
+                "holding_days": days,
+                "reflection": reflection,
+            })
+
+        if updates:
+            self.memory_log.batch_update_with_outcomes(updates)
+
    def propagate(self, company_name, trade_date):
        """Run the trading agents graph for a company on a specific date."""

        self.ticker = company_name

-        # Initialize state
+        # Resolve any pending log entries for this ticker before the pipeline runs.
+        # This adds the outcome + reflection from the previous run at zero latency cost.
+        self._resolve_pending_entries(company_name)
+
+        # Initialize state — inject memory log context for PM
+        past_context = self.memory_log.get_past_context(company_name)
        init_agent_state = self.propagator.create_initial_state(
-            company_name, trade_date
+            company_name, trade_date, past_context=past_context
        )
        args = self.propagator.get_graph_args()

@@ -221,6 +292,13 @@ class TradingAgentsGraph:
        # Log state
        self._log_state(trade_date, final_state)

+        # Store decision for deferred reflection.
+        self.memory_log.store_decision(
+            ticker=company_name,
+            trade_date=trade_date,
+            final_trade_decision=final_state["final_trade_decision"],
+        )
+
        # Return decision and processed signal
        return final_state, self.process_signal(final_state["final_trade_decision"])

@@ -264,24 +342,6 @@ class TradingAgentsGraph:
        with open(log_path, "w", encoding="utf-8") as f:
            json.dump(self.log_states_dict[str(trade_date)], f, indent=4)

-    def reflect_and_remember(self, returns_losses):
-        """Reflect on decisions and update memory based on returns."""
-        self.reflector.reflect_bull_researcher(
-            self.curr_state, returns_losses, self.bull_memory
-        )
-        self.reflector.reflect_bear_researcher(
-            self.curr_state, returns_losses, self.bear_memory
-        )
-        self.reflector.reflect_trader(
-            self.curr_state, returns_losses, self.trader_memory
-        )
-        self.reflector.reflect_invest_judge(
-            self.curr_state, returns_losses, self.invest_judge_memory
-        )
-        self.reflector.reflect_portfolio_manager(
-            self.curr_state, returns_losses, self.portfolio_manager_memory
-        )
-
    def process_signal(self, full_signal):
        """Process a signal to extract the core decision."""
        return self.signal_processor.process_signal(full_signal)