!pip install -q langchain-openai langgraphDeep Research Agents: From RAG to Autonomous Investigation
Iterative retrieval, self-reflection, source triangulation, and report generation
Table of Contents
- Setup
- Single-Shot RAG Baseline
- Iterative Research Loop
- Self-Reflection
- Source Triangulation
- Report Generation Pipeline
import os
# os.environ["OPENAI_API_KEY"] = "your-key"2. Single-Shot RAG Baseline
Standard retrieve-then-generate — works for factoid questions but fails for complex research.
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
def single_shot_rag(question: str) -> str:
"""Baseline: one retrieval pass, one generation pass."""
# Simulated retrieval
retrieved = [
"RAG systems combine retrieval with generation for knowledge-grounded answers.",
"Vector databases like FAISS and Pinecone store document embeddings.",
"Chunking strategy significantly impacts retrieval quality.",
]
context = "\n".join(f"- {doc}" for doc in retrieved)
response = llm.invoke([{
"role": "system",
"content": f"Answer based on context only.\n\nContext:\n{context}",
}, {"role": "user", "content": question}])
return response.content
answer = single_shot_rag("What are the tradeoffs between different RAG architectures?")
print(f"Single-shot answer:\n{answer[:300]}...")3. Iterative Research Loop
Multiple rounds of retrieval, each guided by gaps identified in previous rounds.
from dataclasses import dataclass, field
@dataclass
class ResearchState:
question: str
findings: list[str] = field(default_factory=list)
gaps: list[str] = field(default_factory=list)
queries_used: list[str] = field(default_factory=list)
iteration: int = 0
def iterative_research(question: str, max_rounds: int = 3) -> ResearchState:
"""Multi-round research: retrieve -> analyze gaps -> retrieve more."""
state = ResearchState(question=question)
for i in range(max_rounds):
state.iteration = i + 1
# Generate search query
if state.gaps:
query_prompt = f"Original: {question}\nGaps: {', '.join(state.gaps)}\nGenerate a focused search query."
else:
query_prompt = f"Generate a search query for: {question}"
query_resp = llm.invoke([{"role": "user", "content": query_prompt}])
query = query_resp.content.strip()
state.queries_used.append(query)
# Simulated retrieval
retrieval_result = f"[Round {i+1}] Retrieved info about: {query[:50]}"
state.findings.append(retrieval_result)
print(f"🔍 Round {i+1}: Query='{query[:60]}...'")
# Gap analysis
gap_resp = llm.invoke([{
"role": "system",
"content": "List 1-2 information gaps remaining. If sufficient, say DONE.",
}, {
"role": "user",
"content": f"Question: {question}\nFindings: {'; '.join(state.findings)}",
}])
if "DONE" in gap_resp.content.upper():
print(f"✅ Research complete after {i+1} rounds")
break
state.gaps = [g.strip() for g in gap_resp.content.split("\n") if g.strip()]
print(f" Gaps: {state.gaps[:2]}")
return state
research = iterative_research("Compare retrieval-augmented generation approaches for scientific literature")
print(f"\nTotal findings: {len(research.findings)}")
print(f"Queries used: {research.queries_used}")4. Self-Reflection
The agent critiques its own findings to identify hallucinations, weak evidence, and bias.
def reflect_on_findings(question: str, findings: list[str]) -> dict:
"""Self-critique: check for completeness, accuracy, and gaps."""
findings_text = "\n".join(f"{i+1}. {f}" for i, f in enumerate(findings))
response = llm.invoke([{
"role": "system",
"content": """You are a research critic. Evaluate findings for:
1. Completeness - do they fully answer the question?
2. Evidence quality - are claims well-supported?
3. Potential hallucinations - any unsupported assertions?
4. Missing perspectives - any important viewpoints missing?
Rate overall confidence 0-100 and list specific issues.""",
}, {
"role": "user",
"content": f"Question: {question}\n\nFindings:\n{findings_text}",
}])
print("🪞 Self-reflection:")
print(response.content[:500])
return {"reflection": response.content}
# Demo
fake_findings = [
"RAG improves factual accuracy by grounding generation in retrieved documents.",
"Iterative RAG with 3 rounds achieves 15% higher recall than single-shot.",
"Self-reflection reduces hallucination rate by 40% in research tasks.",
]
reflect_on_findings("How can we improve RAG for research tasks?", fake_findings)5. Source Triangulation
Cross-validate claims across multiple independent sources.
def triangulate_claims(claims: list[str]) -> list[dict]:
"""Check each claim against multiple simulated sources."""
results = []
for claim in claims:
response = llm.invoke([{
"role": "system",
"content": """Evaluate this claim. For each, provide:
- support_level: strong / moderate / weak / contradicted
- reasoning: brief explanation
- confidence: 0.0-1.0
Respond in a structured way.""",
}, {"role": "user", "content": f"Claim: {claim}"}])
results.append({
"claim": claim,
"evaluation": response.content[:200],
})
return results
claims = [
"Transformer models always outperform RNNs on sequence tasks.",
"RAG reduces hallucination compared to closed-book generation.",
"Fine-tuning is always better than in-context learning.",
]
results = triangulate_claims(claims)
for r in results:
print(f"\n📌 {r['claim']}")
print(f" {r['evaluation']}")6. Report Generation Pipeline
Combine all research steps into a structured report.
def generate_report(question: str, findings: list[str], reflection: str) -> str:
"""Generate a structured research report from findings."""
findings_text = "\n".join(f"- {f}" for f in findings)
response = llm.invoke([{
"role": "system",
"content": """Generate a structured research report with:
1. Executive Summary (2-3 sentences)
2. Key Findings (bullet points)
3. Analysis (2-3 paragraphs)
4. Limitations & Gaps
5. Recommendations
Be concise and cite findings where possible.""",
}, {
"role": "user",
"content": f"Question: {question}\n\nFindings:\n{findings_text}\n\nSelf-reflection:\n{reflection}",
}])
return response.content
# Full pipeline demo
question = "What are the best practices for building production RAG systems?"
# 1. Research
state = iterative_research(question, max_rounds=2)
# 2. Reflect
reflection = reflect_on_findings(question, state.findings)
# 3. Generate report
report = generate_report(question, state.findings, reflection["reflection"])
print("\n" + "="*60)
print("📄 RESEARCH REPORT")
print("="*60)
print(report)