第02章：工具调用工程——Function Calling与工具链设计

工具是Agent触手可及的世界。工具链设计的好坏，决定了Agent能做什么，以及做得有多可靠。

2.1 Function Calling的底层机制

Function Calling（现在也叫Tool Use）是LLM与外部世界交互的标准接口。

理解底层机制比记住API文档更重要：

# Function Calling的完整请求/响应周期

import openai
import json

client = openai.OpenAI()

# 第1步：发送工具定义 + 用户消息给LLM
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "user", "content": "今天北京的天气怎么样？"}
    ],
    tools=[
        {
            "type": "function",
            "function": {
                "name": "get_weather",
                "description": "获取指定城市的当前天气",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "city": {"type": "string", "description": "城市名"},
                        "unit": {
                            "type": "string",
                            "enum": ["celsius", "fahrenheit"],
                            "description": "温度单位"
                        }
                    },
                    "required": ["city"]
                }
            }
        }
    ],
    tool_choice="auto"
)

# 第2步：检查LLM的决定
message = response.choices[0].message
print(f"finish_reason: {response.choices[0].finish_reason}")
# → "tool_calls" 表示LLM决定调用工具
# → "stop" 表示LLM直接回答

if response.choices[0].finish_reason == "tool_calls":
    tool_call = message.tool_calls[0]
    print(f"工具名称: {tool_call.function.name}")
    print(f"工具参数: {tool_call.function.arguments}")
    # → {"city": "北京", "unit": "celsius"}
    
    # 第3步：执行工具（你的代码）
    args = json.loads(tool_call.function.arguments)
    weather_result = get_weather(**args)  # 实际调用天气API
    
    # 第4步：把工具结果返回给LLM
    final_response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "user", "content": "今天北京的天气怎么样？"},
            message,  # LLM的工具调用请求
            {
                "role": "tool",
                "content": json.dumps(weather_result),
                "tool_call_id": tool_call.id  # 必须对应！
            }
        ]
    )
    print(final_response.choices[0].message.content)

关键理解：LLM本身不执行工具。LLM只是告诉你"我想调用X工具，参数是Y"，然后你去执行，把结果再喂回给LLM。LLM是指挥官，你的代码是执行者。

2.2 工具设计原则

工具的描述质量直接影响LLM的使用准确性：

# ======= 坏的工具定义 =======
BAD_TOOL = {
    "type": "function",
    "function": {
        "name": "search",                      # 太模糊
        "description": "搜索东西",              # 太模糊，LLM不知道什么时候用
        "parameters": {
            "type": "object",
            "properties": {
                "q": {"type": "string"}        # 变量名不清晰
            },
            "required": ["q"]
        }
    }
}

# ======= 好的工具定义 =======
GOOD_TOOL = {
    "type": "function",
    "function": {
        "name": "search_web",
        "description": """搜索互联网获取最新信息。
        
        适合使用的场景：
        - 需要当前/最新数据（新闻、价格、时间敏感信息）
        - 需要事实核查
        - 需要了解特定网站的信息
        
        不适合使用的场景：
        - 问题可以从已知知识回答
        - 数学计算
        - 代码生成
        """,
        "parameters": {
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "搜索查询词。建议使用英文以获得更好结果。例如：'Bitcoin price today' 而不是 '比特币今天价格'"
                },
                "time_range": {
                    "type": "string",
                    "enum": ["day", "week", "month", "year"],
                    "description": "时间范围限制。用于获取特定时间段的信息"
                },
                "max_results": {
                    "type": "integer",
                    "description": "返回结果数量，1-10之间，默认3",
                    "minimum": 1,
                    "maximum": 10,
                    "default": 3
                }
            },
            "required": ["query"]
        }
    }
}

工具设计的7个原则：

TOOL_DESIGN_PRINCIPLES = [
    "1. 名称要自解释：search_web比search好，send_email比email好",
    "2. 描述要说明用途和边界：什么时候用，什么时候不用",
    "3. 参数名称要清晰：query比q好，recipient_email比to好",
    "4. 枚举类型用enum字段：让LLM知道有哪些合法值",
    "5. 可选参数提供默认值：减少LLM的决策负担",
    "6. 工具职责单一：一个工具做一件事，不做两件事",
    "7. 错误返回要有意义：让LLM知道出了什么问题，而不只是'error'"
]

2.3 构建生产可用的工具框架

from typing import Callable, Any
from dataclasses import dataclass, field
import functools
import time
import logging

logger = logging.getLogger(__name__)

@dataclass
class ToolResult:
    """工具执行结果的标准格式"""
    success: bool
    data: Any = None
    error: str = None
    metadata: dict = field(default_factory=dict)
    
    def to_string(self) -> str:
        if self.success:
            return str(self.data)
        return f"工具执行失败：{self.error}"


class ToolRegistry:
    """
    工具注册表：管理所有工具的注册、查找和执行
    
    支持：
    - 工具注册（装饰器模式）
    - 重试机制
    - 执行日志
    - 权限控制
    """
    
    def __init__(self):
        self._tools: dict[str, Callable] = {}
        self._tool_definitions: list[dict] = []
        self._requires_confirmation: set[str] = set()
    
    def register(
        self, 
        name: str, 
        description: str, 
        parameters: dict,
        requires_confirmation: bool = False,
        max_retries: int = 3
    ):
        """注册工具的装饰器"""
        def decorator(func: Callable):
            @functools.wraps(func)
            def wrapper(*args, **kwargs) -> ToolResult:
                for attempt in range(max_retries):
                    try:
                        start_time = time.time()
                        result = func(*args, **kwargs)
                        elapsed = time.time() - start_time
                        
                        logger.info(f"工具 {name} 执行成功，耗时 {elapsed:.2f}s")
                        
                        return ToolResult(
                            success=True,
                            data=result,
                            metadata={"elapsed_seconds": elapsed, "attempts": attempt + 1}
                        )
                    except Exception as e:
                        logger.warning(f"工具 {name} 第{attempt+1}次失败：{e}")
                        if attempt == max_retries - 1:
                            return ToolResult(
                                success=False,
                                error=str(e),
                                metadata={"attempts": attempt + 1}
                            )
                        time.sleep(2 ** attempt)  # 指数退避
            
            self._tools[name] = wrapper
            self._tool_definitions.append({
                "type": "function",
                "function": {
                    "name": name,
                    "description": description,
                    "parameters": parameters
                }
            })
            
            if requires_confirmation:
                self._requires_confirmation.add(name)
            
            return wrapper
        return decorator
    
    def execute(self, tool_name: str, tool_args: dict, 
                confirmation_callback=None) -> ToolResult:
        """执行工具"""
        if tool_name not in self._tools:
            return ToolResult(success=False, error=f"工具 {tool_name} 不存在")
        
        # 需要确认的工具
        if tool_name in self._requires_confirmation:
            if confirmation_callback:
                confirmed = confirmation_callback(tool_name, tool_args)
                if not confirmed:
                    return ToolResult(
                        success=False, 
                        error="用户拒绝了操作"
                    )
        
        return self._tools[tool_name](**tool_args)
    
    @property
    def definitions(self) -> list[dict]:
        """获取所有工具的OpenAI格式定义"""
        return self._tool_definitions


# ====== 使用示例：注册工具 ======

registry = ToolRegistry()

@registry.register(
    name="web_search",
    description="搜索互联网获取最新信息",
    parameters={
        "type": "object",
        "properties": {
            "query": {"type": "string", "description": "搜索查询词"},
            "max_results": {"type": "integer", "default": 3}
        },
        "required": ["query"]
    }
)
def web_search(query: str, max_results: int = 3) -> dict:
    """实际的搜索实现"""
    import requests
    response = requests.post(
        "https://api.tavily.com/search",
        json={"api_key": "...", "query": query, "max_results": max_results}
    )
    return response.json()


@registry.register(
    name="send_email",
    description="发送电子邮件给指定收件人",
    parameters={
        "type": "object",
        "properties": {
            "to": {"type": "string", "description": "收件人邮箱"},
            "subject": {"type": "string", "description": "邮件主题"},
            "body": {"type": "string", "description": "邮件正文"}
        },
        "required": ["to", "subject", "body"]
    },
    requires_confirmation=True  # 发邮件需要用户确认！
)
def send_email(to: str, subject: str, body: str) -> str:
    """实际的发邮件实现"""
    # 使用Resend/SendGrid等发送
    return f"邮件已发送给 {to}"

2.4 并行工具调用

GPT-4o支持在一次响应中调用多个工具（并行执行）：

import asyncio
from openai import AsyncOpenAI

async def execute_parallel_tools(tool_calls: list, registry: ToolRegistry):
    """并行执行多个工具调用"""
    
    async def execute_single(tool_call):
        tool_name = tool_call.function.name
        tool_args = json.loads(tool_call.function.arguments)
        
        # 在线程池中执行（避免阻塞事件循环）
        result = await asyncio.to_thread(
            registry.execute, tool_name, tool_args
        )
        
        return {
            "tool_call_id": tool_call.id,
            "result": result
        }
    
    # 所有工具并行执行
    results = await asyncio.gather(*[
        execute_single(tc) for tc in tool_calls
    ])
    
    return results


class AsyncAgent:
    """支持并行工具调用的Agent"""
    
    def __init__(self, registry: ToolRegistry, model: str = "gpt-4o"):
        self.client = AsyncOpenAI()
        self.registry = registry
        self.model = model
    
    async def run(self, user_message: str) -> str:
        messages = [{"role": "user", "content": user_message}]
        
        for _ in range(10):  # max iterations
            response = await self.client.chat.completions.create(
                model=self.model,
                messages=messages,
                tools=self.registry.definitions,
                tool_choice="auto"
            )
            
            message = response.choices[0].message
            
            if not message.tool_calls:
                return message.content
            
            messages.append(message)
            
            # 并行执行所有工具调用
            tool_results = await execute_parallel_tools(
                message.tool_calls, self.registry
            )
            
            # 把所有工具结果追加到对话
            for tr in tool_results:
                messages.append({
                    "role": "tool",
                    "content": tr["result"].to_string(),
                    "tool_call_id": tr["tool_call_id"]
                })
        
        return "达到最大迭代次数"

2.5 常用工具库：开箱即用的工具集

# 高价值的工具组合

ESSENTIAL_TOOLS = {
    "web": {
        "tavily_search": "最适合Agent的搜索API（专为LLM优化的结果格式）",
        "firecrawl": "网页全文抓取（Markdown格式，适合RAG）",
        "playwright": "浏览器自动化（填表、点击、截图）"
    },
    "code": {
        "e2b": "云端代码执行沙箱（安全的代码运行环境）",
        "local_python": "本地Python执行（需要严格输入验证）"
    },
    "data": {
        "pandas": "数据分析和处理",
        "sql_executor": "执行SQL查询（连接到你的数据库）"
    },
    "communication": {
        "resend": "发送邮件",
        "slack_api": "发送Slack消息",
        "telegram_bot": "Telegram消息"
    },
    "files": {
        "read_file": "读取文件内容",
        "write_file": "写入文件",
        "pdf_parser": "解析PDF（PyMuPDF/pdfplumber）"
    }
}

# 使用E2B安全执行代码的示例
from e2b_code_interpreter import Sandbox

def safe_code_execution(code: str, timeout: int = 30) -> str:
    """
    在隔离沙箱中执行Python代码
    E2B提供完全隔离的云端Python环境
    """
    with Sandbox() as sandbox:
        execution = sandbox.run_code(code, timeout=timeout)
        
        if execution.error:
            return f"代码执行错误：{execution.error.name}\n{execution.error.value}"
        
        output_parts = []
        for output in execution.results:
            if hasattr(output, 'text'):
                output_parts.append(output.text)
            elif hasattr(output, 'png'):
                # 图表输出
                output_parts.append("[图表已生成，保存为PNG]")
        
        if execution.logs.stdout:
            output_parts.extend(execution.logs.stdout)
        
        return "\n".join(output_parts) if output_parts else "代码执行完成（无输出）"

本章小结

Function Calling的底层：LLM只是"声明"要调用什么，你的代码才是执行者
工具描述质量直接影响LLM的使用准确性：描述用途、边界、参数含义
生产工具框架需要：注册表管理、重试机制、执行日志、权限控制
并行工具调用可以显著减少延迟（GPT-4o支持一次响应包含多个工具调用）
危险操作（发邮件/删除数据）必须有用户确认步骤

行动项：用ToolRegistry注册3个工具（搜索、计算器、文件读取），构建一个可以回答"分析这个CSV文件的销售数据趋势"的Agent。