Design Patterns for AI Agents

A practical catalogue of architectural patterns — reflection, tool use, planning, multi-agent collaboration, routing, and more

Open In Colab

📖 Read the full article


Table of Contents

  1. Setup & Installation
  2. Pattern 1: Prompt Chaining
  3. Pattern 2: Routing
  4. Pattern 3: Reflection
  5. Pattern 4: Tool Use
  6. Pattern 5: Planning
  7. Cross-Reflection with LangGraph

1. Setup & Installation

!pip install -q openai langchain-openai langgraph langchain-core
import os
# os.environ["OPENAI_API_KEY"] = "your-api-key"

2. Pattern 1: Prompt Chaining

Decompose a task into a fixed sequence of LLM calls, with gates between steps.

from openai import OpenAI

client = OpenAI()


def chain_step(prompt: str, model: str = "gpt-4o-mini") -> str:
    """Single LLM call in the chain."""
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        temperature=0,
    )
    return response.choices[0].message.content


def prompt_chain_example(topic: str) -> str:
    """Generate a blog post through a 3-step chain."""
    # Step 1: Generate outline
    outline = chain_step(
        f"Create a detailed outline for a blog post about: {topic}. "
        f"Return only the outline with numbered sections."
    )
    print(f"📋 Step 1 - Outline:\n{outline[:300]}\n")

    # Gate: Check outline has at least 3 sections
    if outline.count("\n") < 3:
        outline = chain_step(
            f"The following outline is too short. Expand it to at least 5 sections:\n{outline}"
        )

    # Step 2: Write content
    content = chain_step(
        f"Write a blog post based on this outline. Each section 2-3 paragraphs:\n\n{outline}"
    )
    print(f"📝 Step 2 - Content written ({len(content)} chars)\n")

    # Step 3: Polish
    final = chain_step(
        f"Add an executive summary at the top and a conclusion at the bottom. "
        f"Fix any grammar issues:\n\n{content}"
    )
    print(f"✅ Step 3 - Final polished ({len(final)} chars)")
    return final


result = prompt_chain_example("ReAct agents for retrieval")
print(result[:500])

3. Pattern 2: Routing

Classify input and direct to a specialized handler — optimize per category.

import json

ROUTE_SCHEMA = {
    "type": "function",
    "function": {
        "name": "route_query",
        "description": "Classify the user query into a category.",
        "parameters": {
            "type": "object",
            "properties": {
                "category": {
                    "type": "string",
                    "enum": ["technical_support", "billing", "general_inquiry", "complaint"],
                },
                "complexity": {
                    "type": "string",
                    "enum": ["simple", "complex"],
                },
            },
            "required": ["category", "complexity"],
        },
    },
}


def route_query(query: str) -> dict:
    """Classify a query into category and complexity."""
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "Classify the user's query."},
            {"role": "user", "content": query},
        ],
        tools=[ROUTE_SCHEMA],
        tool_choice={"type": "function", "function": {"name": "route_query"}},
    )
    return json.loads(response.choices[0].message.tool_calls[0].function.arguments)


HANDLERS = {
    "technical_support": "You are a technical support specialist. Provide step-by-step solutions.",
    "billing": "You are a billing specialist. Help with payment issues.",
    "general_inquiry": "You are a helpful assistant. Provide concise answers.",
    "complaint": "You are a customer relations specialist. Be empathetic.",
}


def handle_query(query: str) -> str:
    route = route_query(query)
    print(f"🔀 Routed to: {route['category']} ({route['complexity']})")
    system_prompt = HANDLERS[route["category"]]
    model = "gpt-4o" if route["complexity"] == "complex" else "gpt-4o-mini"
    response = client.chat.completions.create(
        model=model, messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": query},
        ],
    )
    return response.choices[0].message.content


print(handle_query("My payment failed and I can't access my account"))

4. Pattern 3: Reflection

Ask the LLM to critique and improve its own output — the simplest genuine feedback loop.

def generate_with_reflection(
    task: str,
    max_rounds: int = 3,
    model: str = "gpt-4o-mini",
) -> str:
    """Generate output with iterative self-reflection."""
    # Step 1: Initial generation
    messages = [
        {"role": "system", "content": "You are an expert assistant."},
        {"role": "user", "content": task},
    ]
    response = client.chat.completions.create(model=model, messages=messages, temperature=0.7)
    draft = response.choices[0].message.content
    print(f"📝 Initial draft ({len(draft)} chars)")

    for round_num in range(max_rounds):
        # Step 2: Critique
        critique_response = client.chat.completions.create(
            model=model,
            messages=[{
                "role": "user",
                "content": f"Task: {task}\n\nDraft:\n{draft}\n\n"
                           f"Critique this draft. List specific issues. "
                           f"If excellent, respond with 'APPROVED'.",
            }],
            temperature=0,
        )
        critique = critique_response.choices[0].message.content
        print(f"🔍 Round {round_num + 1} critique: {critique[:150]}...")

        if "APPROVED" in critique.upper():
            print("✅ Approved!")
            break

        # Step 3: Refine
        refine_response = client.chat.completions.create(
            model=model,
            messages=[{
                "role": "user",
                "content": f"Task: {task}\n\nDraft:\n{draft}\n\n"
                           f"Critique:\n{critique}\n\nRewrite addressing all issues.",
            }],
            temperature=0.7,
        )
        draft = refine_response.choices[0].message.content
        print(f"📝 Refined draft ({len(draft)} chars)")

    return draft


result = generate_with_reflection("Write a concise explanation of the ReAct pattern for AI agents.")
print(f"\n--- Final Output ---\n{result[:500]}")

5. Pattern 4: Tool Use

Equip the LLM with external functions — search, code execution, APIs.

from langchain_core.tools import tool


# Good tool design: clear name, detailed docstring, constrained inputs
@tool
def search_knowledge_base(
    query: str,
    category: str = "all",
    max_results: int = 5,
) -> str:
    """Search the internal knowledge base for technical documentation.

    Use this tool when the user asks about:
    - API references, endpoints, and parameters
    - Configuration guides and how-to instructions
    - Known issues and workarounds

    DO NOT use this for general knowledge questions — use web_search instead.

    Args:
        query: Natural language search query. Be specific.
        category: Filter by category. Options: 'api', 'config', 'troubleshooting', 'all'.
        max_results: Number of results to return (1-10).
    """
    # Stub implementation
    return f"Found {max_results} results for '{query}' in {category} category."


# Bad tool design comparison:
@tool
def search(q: str) -> str:
    """Search for stuff."""  # Too vague!
    return f"Results for {q}"


print("Good tool:", search_knowledge_base.name, "-", search_knowledge_base.description[:100])
print("Bad tool:", search.name, "-", search.description)

6. Pattern 5: Planning

Let the LLM autonomously decide what sequence of steps to execute.

def plan_and_execute(query: str) -> str:
    """Simple plan-and-execute pattern."""
    # Step 1: Generate plan
    plan_response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{
            "role": "user",
            "content": f"Create a step-by-step plan to answer: {query}\n"
                       "Return as a numbered list of 3-5 concrete steps."
        }],
        temperature=0,
    )
    plan = plan_response.choices[0].message.content
    print(f"📋 Plan:\n{plan}\n")

    # Step 2: Execute each step
    results = []
    for line in plan.strip().split("\n"):
        if line.strip():
            step_response = client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[{
                    "role": "user",
                    "content": f"Execute this step: {line}\n\nPrevious results: {results}"
                }],
                temperature=0,
            )
            result = step_response.choices[0].message.content
            results.append(result[:200])
            print(f"▶️ {line[:80]}... → Done")

    # Step 3: Compose final answer
    final = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{
            "role": "user",
            "content": f"Answer: {query}\nUsing these results:\n{'\n'.join(results)}"
        }],
    )
    return final.choices[0].message.content


result = plan_and_execute("Compare the ReAct and Plan-and-Execute agent patterns")
print(f"\n--- Final Answer ---\n{result[:500]}")

7. Cross-Reflection with LangGraph

Use a more capable critic model to evaluate and improve output iteratively.

from typing import TypedDict
from langgraph.graph import StateGraph, END
from langchain_openai import ChatOpenAI


class ReflectionState(TypedDict):
    task: str
    draft: str
    critique: str
    round: int
    max_rounds: int


generator = ChatOpenAI(model="gpt-4o-mini", temperature=0.7)
critic = ChatOpenAI(model="gpt-4o-mini", temperature=0)  # Use gpt-4o for better critique


def generate(state: ReflectionState) -> dict:
    if state.get("critique"):
        prompt = f"Task: {state['task']}\nDraft: {state['draft']}\nCritique: {state['critique']}\nRewrite."
    else:
        prompt = state["task"]
    response = generator.invoke([{"role": "user", "content": prompt}])
    return {"draft": response.content, "round": state.get("round", 0) + 1}


def critique_node(state: ReflectionState) -> dict:
    response = critic.invoke([{
        "role": "user",
        "content": f"Task: {state['task']}\nDraft:\n{state['draft']}\n\n"
                   f"Provide detailed critique. Say 'APPROVED' if excellent.",
    }])
    return {"critique": response.content}


def should_continue(state: ReflectionState) -> str:
    if "APPROVED" in state.get("critique", "").upper():
        return END
    if state.get("round", 0) >= state.get("max_rounds", 3):
        return END
    return "generate"


graph = StateGraph(ReflectionState)
graph.add_node("generate", generate)
graph.add_node("critique", critique_node)
graph.set_entry_point("generate")
graph.add_edge("generate", "critique")
graph.add_conditional_edges("critique", should_continue, {"generate": "generate", END: END})

reflection_app = graph.compile()

result = reflection_app.invoke({
    "task": "Write a 3-sentence explanation of prompt chaining for AI agents.",
    "draft": "", "critique": "", "round": 0, "max_rounds": 3,
})
print(f"Final draft (round {result['round']}):\n{result['draft']}")

Pattern Summary

Pattern Complexity When to Use
Prompt Chaining Low Fixed subtasks, quality gates
Routing Low Distinct input categories
Reflection Medium Self-improvement loops
Tool Use Medium External data access
Planning High Multi-step reasoning
Multi-Agent High Parallel specialization