feat: replace per-agent BM25 memory with persistent append-only decision log

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Zhigong Liu
2026-04-19 22:13:53 -04:00
parent 8536ccacdd
commit 6abc768c1d
15 changed files with 1046 additions and 340 deletions
+2 -1
View File
@@ -16,13 +16,14 @@ class Propagator:
self.max_recur_limit = max_recur_limit
def create_initial_state(
self, company_name: str, trade_date: str
self, company_name: str, trade_date: str, past_context: str = ""
) -> Dict[str, Any]:
"""Create the initial state for the agent graph."""
return {
"messages": [("human", company_name)],
"company_of_interest": company_name,
"trade_date": str(trade_date),
"past_context": past_context,
"investment_debate_state": InvestDebateState(
{
"bull_history": "",
+35 -102
View File
@@ -1,120 +1,53 @@
# TradingAgents/graph/reflection.py
from typing import Any, Dict
from typing import Any
class Reflector:
"""Handles reflection on decisions and updating memory."""
"""Handles reflection on trading decisions."""
def __init__(self, quick_thinking_llm: Any):
"""Initialize the reflector with an LLM."""
self.quick_thinking_llm = quick_thinking_llm
self.reflection_system_prompt = self._get_reflection_prompt()
self.log_reflection_prompt = self._get_log_reflection_prompt()
def _get_reflection_prompt(self) -> str:
"""Get the system prompt for reflection."""
return """
You are an expert financial analyst tasked with reviewing trading decisions/analysis and providing a comprehensive, step-by-step analysis.
Your goal is to deliver detailed insights into investment decisions and highlight opportunities for improvement, adhering strictly to the following guidelines:
def _get_log_reflection_prompt(self) -> str:
"""Concise prompt for reflect_on_final_decision (Phase B log entries).
1. Reasoning:
- For each trading decision, determine whether it was correct or incorrect. A correct decision results in an increase in returns, while an incorrect decision does the opposite.
- Analyze the contributing factors to each success or mistake. Consider:
- Market intelligence.
- Technical indicators.
- Technical signals.
- Price movement analysis.
- Overall market data analysis
- News analysis.
- Social media and sentiment analysis.
- Fundamental data analysis.
- Weight the importance of each factor in the decision-making process.
Produces 2-4 sentences of plain prose — compact enough to be re-injected
into future agent prompts without bloating the context window.
"""
return (
"You are a trading analyst reviewing your own past decision now that the outcome is known.\n"
"Write exactly 2-4 sentences of plain prose (no bullets, no headers, no markdown).\n\n"
"Cover in order:\n"
"1. Was the directional call correct? (cite the alpha figure)\n"
"2. Which part of the investment thesis held or failed?\n"
"3. One concrete lesson to apply to the next similar analysis.\n\n"
"Be specific and terse. Your output will be stored verbatim in a decision log "
"and re-read by future analysts, so every word must earn its place."
)
2. Improvement:
- For any incorrect decisions, propose revisions to maximize returns.
- Provide a detailed list of corrective actions or improvements, including specific recommendations (e.g., changing a decision from HOLD to BUY on a particular date).
3. Summary:
- Summarize the lessons learned from the successes and mistakes.
- Highlight how these lessons can be adapted for future trading scenarios and draw connections between similar situations to apply the knowledge gained.
4. Query:
- Extract key insights from the summary into a concise sentence of no more than 1000 tokens.
- Ensure the condensed sentence captures the essence of the lessons and reasoning for easy reference.
Adhere strictly to these instructions, and ensure your output is detailed, accurate, and actionable. You will also be given objective descriptions of the market from a price movements, technical indicator, news, and sentiment perspective to provide more context for your analysis.
"""
def _extract_current_situation(self, current_state: Dict[str, Any]) -> str:
"""Extract the current market situation from the state."""
curr_market_report = current_state["market_report"]
curr_sentiment_report = current_state["sentiment_report"]
curr_news_report = current_state["news_report"]
curr_fundamentals_report = current_state["fundamentals_report"]
return f"{curr_market_report}\n\n{curr_sentiment_report}\n\n{curr_news_report}\n\n{curr_fundamentals_report}"
def _reflect_on_component(
self, component_type: str, report: str, situation: str, returns_losses
def reflect_on_final_decision(
self,
final_decision: str,
raw_return: float,
alpha_return: float,
) -> str:
"""Generate reflection for a component."""
"""Single reflection call on the final trade decision with outcome context.
Used by Phase B deferred reflection. The final_trade_decision already
synthesises all analyst insights, so no separate market context is needed.
"""
messages = [
("system", self.reflection_system_prompt),
("system", self.log_reflection_prompt),
(
"human",
f"Returns: {returns_losses}\n\nAnalysis/Decision: {report}\n\nObjective Market Reports for Reference: {situation}",
(
f"Raw return: {raw_return:+.1%}\n"
f"Alpha vs SPY: {alpha_return:+.1%}\n\n"
f"Final Decision:\n{final_decision}"
),
),
]
result = self.quick_thinking_llm.invoke(messages).content
return result
def reflect_bull_researcher(self, current_state, returns_losses, bull_memory):
"""Reflect on bull researcher's analysis and update memory."""
situation = self._extract_current_situation(current_state)
bull_debate_history = current_state["investment_debate_state"]["bull_history"]
result = self._reflect_on_component(
"BULL", bull_debate_history, situation, returns_losses
)
bull_memory.add_situations([(situation, result)])
def reflect_bear_researcher(self, current_state, returns_losses, bear_memory):
"""Reflect on bear researcher's analysis and update memory."""
situation = self._extract_current_situation(current_state)
bear_debate_history = current_state["investment_debate_state"]["bear_history"]
result = self._reflect_on_component(
"BEAR", bear_debate_history, situation, returns_losses
)
bear_memory.add_situations([(situation, result)])
def reflect_trader(self, current_state, returns_losses, trader_memory):
"""Reflect on trader's decision and update memory."""
situation = self._extract_current_situation(current_state)
trader_decision = current_state["trader_investment_plan"]
result = self._reflect_on_component(
"TRADER", trader_decision, situation, returns_losses
)
trader_memory.add_situations([(situation, result)])
def reflect_invest_judge(self, current_state, returns_losses, invest_judge_memory):
"""Reflect on investment judge's decision and update memory."""
situation = self._extract_current_situation(current_state)
judge_decision = current_state["investment_debate_state"]["judge_decision"]
result = self._reflect_on_component(
"INVEST JUDGE", judge_decision, situation, returns_losses
)
invest_judge_memory.add_situations([(situation, result)])
def reflect_portfolio_manager(self, current_state, returns_losses, portfolio_manager_memory):
"""Reflect on portfolio manager's decision and update memory."""
situation = self._extract_current_situation(current_state)
judge_decision = current_state["risk_debate_state"]["judge_decision"]
result = self._reflect_on_component(
"PORTFOLIO MANAGER", judge_decision, situation, returns_losses
)
portfolio_manager_memory.add_situations([(situation, result)])
return self.quick_thinking_llm.invoke(messages).content
+5 -23
View File
@@ -18,22 +18,12 @@ class GraphSetup:
quick_thinking_llm: Any,
deep_thinking_llm: Any,
tool_nodes: Dict[str, ToolNode],
bull_memory,
bear_memory,
trader_memory,
invest_judge_memory,
portfolio_manager_memory,
conditional_logic: ConditionalLogic,
):
"""Initialize with required components."""
self.quick_thinking_llm = quick_thinking_llm
self.deep_thinking_llm = deep_thinking_llm
self.tool_nodes = tool_nodes
self.bull_memory = bull_memory
self.bear_memory = bear_memory
self.trader_memory = trader_memory
self.invest_judge_memory = invest_judge_memory
self.portfolio_manager_memory = portfolio_manager_memory
self.conditional_logic = conditional_logic
def setup_graph(
@@ -85,24 +75,16 @@ class GraphSetup:
tool_nodes["fundamentals"] = self.tool_nodes["fundamentals"]
# Create researcher and manager nodes
bull_researcher_node = create_bull_researcher(
self.quick_thinking_llm, self.bull_memory
)
bear_researcher_node = create_bear_researcher(
self.quick_thinking_llm, self.bear_memory
)
research_manager_node = create_research_manager(
self.deep_thinking_llm, self.invest_judge_memory
)
trader_node = create_trader(self.quick_thinking_llm, self.trader_memory)
bull_researcher_node = create_bull_researcher(self.quick_thinking_llm)
bear_researcher_node = create_bear_researcher(self.quick_thinking_llm)
research_manager_node = create_research_manager(self.deep_thinking_llm)
trader_node = create_trader(self.quick_thinking_llm)
# Create risk analysis nodes
aggressive_analyst = create_aggressive_debator(self.quick_thinking_llm)
neutral_analyst = create_neutral_debator(self.quick_thinking_llm)
conservative_analyst = create_conservative_debator(self.quick_thinking_llm)
portfolio_manager_node = create_portfolio_manager(
self.deep_thinking_llm, self.portfolio_manager_memory
)
portfolio_manager_node = create_portfolio_manager(self.deep_thinking_llm)
# Create workflow
workflow = StateGraph(AgentState)
+93 -33
View File
@@ -1,18 +1,23 @@
# TradingAgents/graph/trading_graph.py
import logging
import os
from pathlib import Path
import json
from datetime import date
from datetime import datetime, timedelta
from typing import Dict, Any, Tuple, List, Optional
import yfinance as yf
logger = logging.getLogger(__name__)
from langgraph.prebuilt import ToolNode
from tradingagents.llm_clients import create_llm_client
from tradingagents.agents import *
from tradingagents.default_config import DEFAULT_CONFIG
from tradingagents.agents.utils.memory import FinancialSituationMemory
from tradingagents.agents.utils.memory import TradingMemoryLog
from tradingagents.agents.utils.agent_states import (
AgentState,
InvestDebateState,
@@ -92,12 +97,7 @@ class TradingAgentsGraph:
self.deep_thinking_llm = deep_client.get_llm()
self.quick_thinking_llm = quick_client.get_llm()
# Initialize memories
self.bull_memory = FinancialSituationMemory("bull_memory", self.config)
self.bear_memory = FinancialSituationMemory("bear_memory", self.config)
self.trader_memory = FinancialSituationMemory("trader_memory", self.config)
self.invest_judge_memory = FinancialSituationMemory("invest_judge_memory", self.config)
self.portfolio_manager_memory = FinancialSituationMemory("portfolio_manager_memory", self.config)
self.memory_log = TradingMemoryLog(self.config)
# Create tool nodes
self.tool_nodes = self._create_tool_nodes()
@@ -111,11 +111,6 @@ class TradingAgentsGraph:
self.quick_thinking_llm,
self.deep_thinking_llm,
self.tool_nodes,
self.bull_memory,
self.bear_memory,
self.trader_memory,
self.invest_judge_memory,
self.portfolio_manager_memory,
self.conditional_logic,
)
@@ -189,14 +184,90 @@ class TradingAgentsGraph:
),
}
def _fetch_returns(
self, ticker: str, trade_date: str, holding_days: int = 5
) -> Tuple[Optional[float], Optional[float], Optional[int]]:
"""Fetch raw and alpha return for ticker over holding_days from trade_date.
Returns (raw_return, alpha_return, actual_holding_days) or
(None, None, None) if price data is unavailable (too recent, delisted,
or network error).
"""
try:
start = datetime.strptime(trade_date, "%Y-%m-%d")
end = start + timedelta(days=holding_days + 7) # buffer for weekends/holidays
end_str = end.strftime("%Y-%m-%d")
stock = yf.Ticker(ticker).history(start=trade_date, end=end_str)
spy = yf.Ticker("SPY").history(start=trade_date, end=end_str)
if len(stock) < 2 or len(spy) < 2:
return None, None, None
actual_days = min(holding_days, len(stock) - 1, len(spy) - 1)
raw = float(
(stock["Close"].iloc[actual_days] - stock["Close"].iloc[0])
/ stock["Close"].iloc[0]
)
spy_ret = float(
(spy["Close"].iloc[actual_days] - spy["Close"].iloc[0])
/ spy["Close"].iloc[0]
)
alpha = raw - spy_ret
return raw, alpha, actual_days
except Exception as e:
logger.debug("_fetch_returns failed for %s@%s: %s", ticker, trade_date, e)
return None, None, None
def _resolve_pending_entries(self, ticker: str) -> None:
"""Resolve pending log entries for ticker at the start of a new run.
Fetches returns for each same-ticker pending entry, generates reflections,
then writes all updates in a single atomic batch write to avoid redundant I/O.
Skips entries whose price data is not yet available (too recent or delisted).
Trade-off: only same-ticker entries are resolved per run. Entries for
other tickers accumulate until that ticker is run again.
"""
pending = [e for e in self.memory_log.get_pending_entries() if e["ticker"] == ticker]
if not pending:
return
updates = []
for entry in pending:
raw, alpha, days = self._fetch_returns(ticker, entry["date"])
if raw is None:
continue # price not available yet — try again next run
reflection = self.reflector.reflect_on_final_decision(
final_decision=entry.get("decision", ""),
raw_return=raw,
alpha_return=alpha,
)
updates.append({
"ticker": ticker,
"trade_date": entry["date"],
"raw_return": raw,
"alpha_return": alpha,
"holding_days": days,
"reflection": reflection,
})
if updates:
self.memory_log.batch_update_with_outcomes(updates)
def propagate(self, company_name, trade_date):
"""Run the trading agents graph for a company on a specific date."""
self.ticker = company_name
# Initialize state
# Resolve any pending log entries for this ticker before the pipeline runs.
# This adds the outcome + reflection from the previous run at zero latency cost.
self._resolve_pending_entries(company_name)
# Initialize state — inject memory log context for PM
past_context = self.memory_log.get_past_context(company_name)
init_agent_state = self.propagator.create_initial_state(
company_name, trade_date
company_name, trade_date, past_context=past_context
)
args = self.propagator.get_graph_args()
@@ -221,6 +292,13 @@ class TradingAgentsGraph:
# Log state
self._log_state(trade_date, final_state)
# Store decision for deferred reflection.
self.memory_log.store_decision(
ticker=company_name,
trade_date=trade_date,
final_trade_decision=final_state["final_trade_decision"],
)
# Return decision and processed signal
return final_state, self.process_signal(final_state["final_trade_decision"])
@@ -264,24 +342,6 @@ class TradingAgentsGraph:
with open(log_path, "w", encoding="utf-8") as f:
json.dump(self.log_states_dict[str(trade_date)], f, indent=4)
def reflect_and_remember(self, returns_losses):
"""Reflect on decisions and update memory based on returns."""
self.reflector.reflect_bull_researcher(
self.curr_state, returns_losses, self.bull_memory
)
self.reflector.reflect_bear_researcher(
self.curr_state, returns_losses, self.bear_memory
)
self.reflector.reflect_trader(
self.curr_state, returns_losses, self.trader_memory
)
self.reflector.reflect_invest_judge(
self.curr_state, returns_losses, self.invest_judge_memory
)
self.reflector.reflect_portfolio_manager(
self.curr_state, returns_losses, self.portfolio_manager_memory
)
def process_signal(self, full_signal):
"""Process a signal to extract the core decision."""
return self.signal_processor.process_signal(full_signal)