!pip install -q openai anthropic httpx langchain-core langchain-openai numpyTool Use and Function Calling for Retrieval Agents
From OpenAI function calling to MCP — building dynamic tool selection for SQL, API, and vector search retrieval
Table of Contents
- Setup & Installation
- OpenAI Function Calling
- Anthropic Tool Use
- Vector Search Tool
- SQL Database Tool
- REST API Tool
- LangChain Tool Abstractions
import os, json
# os.environ["OPENAI_API_KEY"] = "your-key"
# os.environ["ANTHROPIC_API_KEY"] = "your-key"2. OpenAI Function Calling
Define tools as JSON Schema objects. The model decides when to call and emits structured tool_calls.
from openai import OpenAI
client = OpenAI()
TOOL_SCHEMAS = [
{
"type": "function",
"function": {
"name": "search_knowledge_base",
"description": "Search the internal knowledge base for product documentation.",
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "Natural language search query"},
"top_k": {"type": "integer", "description": "Number of results (default: 5)"},
},
"required": ["query"],
},
},
},
{
"type": "function",
"function": {
"name": "query_database",
"description": "Execute a read-only SQL query against the analytics database.",
"parameters": {
"type": "object",
"properties": {
"sql": {"type": "string", "description": "SQL SELECT query"},
},
"required": ["sql"],
},
},
},
]
# Stub tool implementations
TOOLS = {
"search_knowledge_base": lambda query, top_k=5: f"Found {top_k} results for: {query}",
"query_database": lambda sql: f"Results for: {sql}\n| metric | value |\n| users | 12450 |",
}
print("Tool schemas defined:", [s["function"]["name"] for s in TOOL_SCHEMAS])def run_tool_calling_agent(query: str, tools: dict, schemas: list, max_steps: int = 8) -> str:
"""Agent loop using OpenAI function calling."""
messages = [
{"role": "system", "content": "You are a helpful retrieval agent. Use tools to answer accurately."},
{"role": "user", "content": query},
]
for step in range(max_steps):
response = client.chat.completions.create(
model="gpt-4o-mini", messages=messages,
tools=schemas, tool_choice="auto", temperature=0,
)
msg = response.choices[0].message
messages.append(msg)
if not msg.tool_calls:
return msg.content
for tool_call in msg.tool_calls:
name = tool_call.function.name
args = json.loads(tool_call.function.arguments)
print(f" 🔧 {name}({args})")
result = tools[name](**args) if name in tools else f"Error: Unknown tool '{name}'"
messages.append({"role": "tool", "tool_call_id": tool_call.id, "content": str(result)})
return "Max steps reached."
answer = run_tool_calling_agent("How many users do we have?", TOOLS, TOOL_SCHEMAS)
print(f"\n🎯 Answer: {answer}")tool_choice options
| Value | Behavior |
|---|---|
"auto" |
Model decides |
"none" |
Never calls tools |
"required" |
Must call at least one |
{"type": "function", "function": {"name": "..."}} |
Must call specific tool |
3. Anthropic Tool Use
Same pattern, different message structure — input_schema + tool_use content blocks.
# Anthropic tool definition format
anthro_tools = [
{
"name": "search_knowledge_base",
"description": "Search the internal knowledge base for documentation.",
"input_schema": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "Natural language search query"},
},
"required": ["query"],
},
},
]
# Schema comparison
print("OpenAI: parameters → message.tool_calls → role: tool")
print("Anthropic: input_schema → content[tool_use] → type: tool_result")
print("\nCore pattern is identical: define schemas → model chooses → execute → return results")4. Vector Search Tool
For unstructured knowledge — documents, articles, support tickets.
import numpy as np
class VectorSearchTool:
"""Search a vector store for semantically similar documents."""
def __init__(self, documents: list[str], embedding_model="text-embedding-3-small"):
self.documents = documents
self.embedding_model = embedding_model
self._embeddings = None
def _embed(self, texts: list[str]) -> np.ndarray:
response = client.embeddings.create(model=self.embedding_model, input=texts)
return np.array([d.embedding for d in response.data], dtype=np.float32)
def build_index(self):
self._embeddings = self._embed(self.documents)
print(f"Indexed {len(self.documents)} documents")
def search(self, query: str, top_k: int = 5) -> str:
query_emb = self._embed([query])[0]
# Cosine similarity
scores = np.dot(self._embeddings, query_emb) / (
np.linalg.norm(self._embeddings, axis=1) * np.linalg.norm(query_emb)
)
top_indices = np.argsort(scores)[::-1][:top_k]
results = []
for i, idx in enumerate(top_indices):
results.append(f"[{i+1}] (score: {scores[idx]:.3f}) {self.documents[idx][:300]}")
return "\n\n".join(results) if results else "No relevant documents found."
# Example usage
docs = [
"Rate limiting is configured via the API gateway at 60 requests per minute.",
"Authentication uses Bearer tokens. Generate tokens in the dashboard.",
"Pagination uses cursor-based approach. Pass cursor parameter for next page.",
"WebSocket connections support real-time event streaming.",
"Database backups run daily at 2 AM UTC with 30-day retention.",
]
vector_tool = VectorSearchTool(docs)
vector_tool.build_index()
print(vector_tool.search("how to authenticate API requests"))5. SQL Database Tool
For structured data — metrics, records, transaction history.
import sqlite3
from contextlib import contextmanager
class SQLTool:
"""Execute read-only SQL queries against a database."""
def __init__(self, db_path: str = ":memory:"):
self.db_path = db_path
self._setup_demo_db()
def _setup_demo_db(self):
conn = sqlite3.connect(self.db_path)
conn.execute("CREATE TABLE IF NOT EXISTS users (id INT, name TEXT, plan TEXT, created_at TEXT)")
conn.execute("INSERT OR IGNORE INTO users VALUES (1, 'Alice', 'pro', '2024-01-15')")
conn.execute("INSERT OR IGNORE INTO users VALUES (2, 'Bob', 'free', '2024-02-20')")
conn.execute("INSERT OR IGNORE INTO users VALUES (3, 'Charlie', 'pro', '2024-03-10')")
conn.commit()
conn.close()
def query(self, sql: str) -> str:
normalized = sql.strip().upper()
if not normalized.startswith("SELECT"):
return "Error: Only SELECT queries are allowed."
conn = sqlite3.connect(self.db_path)
conn.row_factory = sqlite3.Row
try:
cursor = conn.execute(sql)
rows = cursor.fetchmany(50)
if not rows:
return "Query returned no results."
columns = [desc[0] for desc in cursor.description]
result = [" | ".join(columns)]
result.append("-" * len(result[0]))
for row in rows:
result.append(" | ".join(str(v) for v in row))
return "\n".join(result)
except sqlite3.Error as e:
return f"SQL Error: {e}"
finally:
conn.close()
sql_tool = SQLTool()
print(sql_tool.query("SELECT * FROM users WHERE plan = 'pro'"))6. REST API Tool
For real-time data — external services, live metrics.
import httpx
from urllib.parse import urljoin
class RESTAPITool:
"""Call a REST API endpoint to fetch data."""
def __init__(self, base_url: str, allowed_paths: list | None = None):
self.base_url = base_url
self.allowed_paths = allowed_paths
def call(self, endpoint: str, method: str = "GET") -> str:
if self.allowed_paths:
if not any(endpoint.startswith(p) for p in self.allowed_paths):
return f"Error: Endpoint '{endpoint}' not in allowed paths."
url = urljoin(self.base_url, endpoint)
try:
with httpx.Client(timeout=15) as http_client:
if method == "GET":
resp = http_client.get(url)
else:
return "Only GET is supported."
resp.raise_for_status()
return resp.text[:1000]
except Exception as e:
return f"API Error: {e}"
# Example: Wikipedia API
api_tool = RESTAPITool(
base_url="https://en.wikipedia.org/w/",
allowed_paths=["api.php"]
)
result = api_tool.call("api.php?action=query&list=search&srsearch=Python&format=json&srlimit=2")
print(result[:300])7. LangChain Tool Abstractions
Unify tool definitions across providers with @tool decorator.
from langchain_core.tools import tool
@tool
def search_docs(query: str, top_k: int = 5) -> str:
"""Search documentation for relevant passages.
Use for questions about APIs, configuration, and procedures.
Do NOT use for general knowledge — use web_search instead.
Args:
query: Specific natural language query.
top_k: Number of results (1-10).
"""
return vector_tool.search(query, top_k)
@tool
def run_sql(sql: str) -> str:
"""Execute a read-only SQL SELECT query against the analytics database.
Args:
sql: SQL SELECT query to execute.
"""
return sql_tool.query(sql)
# These tools work with any LangChain/LangGraph agent
print(f"Tool: {search_docs.name}")
print(f"Description: {search_docs.description}")
print(f"Schema: {json.dumps(search_docs.args_schema.schema(), indent=2)}")