Tool Use and Function Calling for Retrieval Agents

From OpenAI function calling to MCP — building dynamic tool selection for SQL, API, and vector search retrieval

Open In Colab

📖 Read the full article


Table of Contents

  1. Setup & Installation
  2. OpenAI Function Calling
  3. Anthropic Tool Use
  4. Vector Search Tool
  5. SQL Database Tool
  6. REST API Tool
  7. LangChain Tool Abstractions
!pip install -q openai anthropic httpx langchain-core langchain-openai numpy
import os, json
# os.environ["OPENAI_API_KEY"] = "your-key"
# os.environ["ANTHROPIC_API_KEY"] = "your-key"

2. OpenAI Function Calling

Define tools as JSON Schema objects. The model decides when to call and emits structured tool_calls.

from openai import OpenAI

client = OpenAI()

TOOL_SCHEMAS = [
    {
        "type": "function",
        "function": {
            "name": "search_knowledge_base",
            "description": "Search the internal knowledge base for product documentation.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {"type": "string", "description": "Natural language search query"},
                    "top_k": {"type": "integer", "description": "Number of results (default: 5)"},
                },
                "required": ["query"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "query_database",
            "description": "Execute a read-only SQL query against the analytics database.",
            "parameters": {
                "type": "object",
                "properties": {
                    "sql": {"type": "string", "description": "SQL SELECT query"},
                },
                "required": ["sql"],
            },
        },
    },
]

# Stub tool implementations
TOOLS = {
    "search_knowledge_base": lambda query, top_k=5: f"Found {top_k} results for: {query}",
    "query_database": lambda sql: f"Results for: {sql}\n| metric | value |\n| users | 12450 |",
}

print("Tool schemas defined:", [s["function"]["name"] for s in TOOL_SCHEMAS])
def run_tool_calling_agent(query: str, tools: dict, schemas: list, max_steps: int = 8) -> str:
    """Agent loop using OpenAI function calling."""
    messages = [
        {"role": "system", "content": "You are a helpful retrieval agent. Use tools to answer accurately."},
        {"role": "user", "content": query},
    ]

    for step in range(max_steps):
        response = client.chat.completions.create(
            model="gpt-4o-mini", messages=messages,
            tools=schemas, tool_choice="auto", temperature=0,
        )
        msg = response.choices[0].message
        messages.append(msg)

        if not msg.tool_calls:
            return msg.content

        for tool_call in msg.tool_calls:
            name = tool_call.function.name
            args = json.loads(tool_call.function.arguments)
            print(f"  🔧 {name}({args})")

            result = tools[name](**args) if name in tools else f"Error: Unknown tool '{name}'"
            messages.append({"role": "tool", "tool_call_id": tool_call.id, "content": str(result)})

    return "Max steps reached."


answer = run_tool_calling_agent("How many users do we have?", TOOLS, TOOL_SCHEMAS)
print(f"\n🎯 Answer: {answer}")

tool_choice options

Value Behavior
"auto" Model decides
"none" Never calls tools
"required" Must call at least one
{"type": "function", "function": {"name": "..."}} Must call specific tool

3. Anthropic Tool Use

Same pattern, different message structure — input_schema + tool_use content blocks.

# Anthropic tool definition format
anthro_tools = [
    {
        "name": "search_knowledge_base",
        "description": "Search the internal knowledge base for documentation.",
        "input_schema": {
            "type": "object",
            "properties": {
                "query": {"type": "string", "description": "Natural language search query"},
            },
            "required": ["query"],
        },
    },
]

# Schema comparison
print("OpenAI:    parameters → message.tool_calls → role: tool")
print("Anthropic: input_schema → content[tool_use] → type: tool_result")
print("\nCore pattern is identical: define schemas → model chooses → execute → return results")

4. Vector Search Tool

For unstructured knowledge — documents, articles, support tickets.

import numpy as np


class VectorSearchTool:
    """Search a vector store for semantically similar documents."""

    def __init__(self, documents: list[str], embedding_model="text-embedding-3-small"):
        self.documents = documents
        self.embedding_model = embedding_model
        self._embeddings = None

    def _embed(self, texts: list[str]) -> np.ndarray:
        response = client.embeddings.create(model=self.embedding_model, input=texts)
        return np.array([d.embedding for d in response.data], dtype=np.float32)

    def build_index(self):
        self._embeddings = self._embed(self.documents)
        print(f"Indexed {len(self.documents)} documents")

    def search(self, query: str, top_k: int = 5) -> str:
        query_emb = self._embed([query])[0]
        # Cosine similarity
        scores = np.dot(self._embeddings, query_emb) / (
            np.linalg.norm(self._embeddings, axis=1) * np.linalg.norm(query_emb)
        )
        top_indices = np.argsort(scores)[::-1][:top_k]

        results = []
        for i, idx in enumerate(top_indices):
            results.append(f"[{i+1}] (score: {scores[idx]:.3f}) {self.documents[idx][:300]}")
        return "\n\n".join(results) if results else "No relevant documents found."


# Example usage
docs = [
    "Rate limiting is configured via the API gateway at 60 requests per minute.",
    "Authentication uses Bearer tokens. Generate tokens in the dashboard.",
    "Pagination uses cursor-based approach. Pass cursor parameter for next page.",
    "WebSocket connections support real-time event streaming.",
    "Database backups run daily at 2 AM UTC with 30-day retention.",
]

vector_tool = VectorSearchTool(docs)
vector_tool.build_index()
print(vector_tool.search("how to authenticate API requests"))

5. SQL Database Tool

For structured data — metrics, records, transaction history.

import sqlite3
from contextlib import contextmanager


class SQLTool:
    """Execute read-only SQL queries against a database."""

    def __init__(self, db_path: str = ":memory:"):
        self.db_path = db_path
        self._setup_demo_db()

    def _setup_demo_db(self):
        conn = sqlite3.connect(self.db_path)
        conn.execute("CREATE TABLE IF NOT EXISTS users (id INT, name TEXT, plan TEXT, created_at TEXT)")
        conn.execute("INSERT OR IGNORE INTO users VALUES (1, 'Alice', 'pro', '2024-01-15')")
        conn.execute("INSERT OR IGNORE INTO users VALUES (2, 'Bob', 'free', '2024-02-20')")
        conn.execute("INSERT OR IGNORE INTO users VALUES (3, 'Charlie', 'pro', '2024-03-10')")
        conn.commit()
        conn.close()

    def query(self, sql: str) -> str:
        normalized = sql.strip().upper()
        if not normalized.startswith("SELECT"):
            return "Error: Only SELECT queries are allowed."
        conn = sqlite3.connect(self.db_path)
        conn.row_factory = sqlite3.Row
        try:
            cursor = conn.execute(sql)
            rows = cursor.fetchmany(50)
            if not rows:
                return "Query returned no results."
            columns = [desc[0] for desc in cursor.description]
            result = [" | ".join(columns)]
            result.append("-" * len(result[0]))
            for row in rows:
                result.append(" | ".join(str(v) for v in row))
            return "\n".join(result)
        except sqlite3.Error as e:
            return f"SQL Error: {e}"
        finally:
            conn.close()


sql_tool = SQLTool()
print(sql_tool.query("SELECT * FROM users WHERE plan = 'pro'"))

6. REST API Tool

For real-time data — external services, live metrics.

import httpx
from urllib.parse import urljoin


class RESTAPITool:
    """Call a REST API endpoint to fetch data."""

    def __init__(self, base_url: str, allowed_paths: list | None = None):
        self.base_url = base_url
        self.allowed_paths = allowed_paths

    def call(self, endpoint: str, method: str = "GET") -> str:
        if self.allowed_paths:
            if not any(endpoint.startswith(p) for p in self.allowed_paths):
                return f"Error: Endpoint '{endpoint}' not in allowed paths."

        url = urljoin(self.base_url, endpoint)
        try:
            with httpx.Client(timeout=15) as http_client:
                if method == "GET":
                    resp = http_client.get(url)
                else:
                    return "Only GET is supported."
                resp.raise_for_status()
                return resp.text[:1000]
        except Exception as e:
            return f"API Error: {e}"


# Example: Wikipedia API
api_tool = RESTAPITool(
    base_url="https://en.wikipedia.org/w/",
    allowed_paths=["api.php"]
)

result = api_tool.call("api.php?action=query&list=search&srsearch=Python&format=json&srlimit=2")
print(result[:300])

7. LangChain Tool Abstractions

Unify tool definitions across providers with @tool decorator.

from langchain_core.tools import tool


@tool
def search_docs(query: str, top_k: int = 5) -> str:
    """Search documentation for relevant passages.

    Use for questions about APIs, configuration, and procedures.
    Do NOT use for general knowledge — use web_search instead.

    Args:
        query: Specific natural language query.
        top_k: Number of results (1-10).
    """
    return vector_tool.search(query, top_k)


@tool
def run_sql(sql: str) -> str:
    """Execute a read-only SQL SELECT query against the analytics database.

    Args:
        sql: SQL SELECT query to execute.
    """
    return sql_tool.query(sql)


# These tools work with any LangChain/LangGraph agent
print(f"Tool: {search_docs.name}")
print(f"Description: {search_docs.description}")
print(f"Schema: {json.dumps(search_docs.args_schema.schema(), indent=2)}")