第三章:查询优化实战
第三章:查询优化实战
写 SQL 很容易,写"不会在生产环境爆炸"的 SQL 很难。本章讲的是那些在开发环境没问题、在生产环境变成定时炸弹的查询模式。
一、N+1 查询——最常见的性能杀手
N+1 是最普遍的数据库性能问题,通常来自 ORM 的懒加载。
典型场景
# Python / SQLAlchemy 示例(懒加载导致 N+1)
users = session.query(User).filter(User.active == True).all()
for user in users:
# 每次循环都触发一个 SQL 查询!
# 如果 users 有 100 个,就会有 101 个查询
print(user.orders) # 懒加载触发:SELECT * FROM orders WHERE user_id = ?
-- 实际发生的 SQL:
-- 查询 1:
SELECT * FROM users WHERE active = true;
-- 查询 2(循环100次):
SELECT * FROM orders WHERE user_id = 1;
SELECT * FROM orders WHERE user_id = 2;
-- ... 99 次
解决方案 1:JOIN 查询
-- 一个查询解决所有问题
SELECT
u.id,
u.name,
u.email,
o.id AS order_id,
o.amount,
o.created_at AS order_date
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.active = true
ORDER BY u.id, o.created_at;
解决方案 2:预加载(Eager Loading)
# SQLAlchemy:使用 joinedload 或 selectinload
from sqlalchemy.orm import joinedload, selectinload
# joinedload:生成 JOIN SQL(适合一对一、多对一)
users = session.query(User)\
.options(joinedload(User.profile))\
.filter(User.active == True)\
.all()
# selectinload:生成 IN 子句(适合一对多,避免结果集膨胀)
users = session.query(User)\
.options(selectinload(User.orders))\
.filter(User.active == True)\
.all()
# 生成的 SQL:
# SELECT * FROM users WHERE active = true;
# SELECT * FROM orders WHERE user_id IN (1, 2, 3, ..., 100);
# 只有 2 个查询!
如何检测 N+1
# 使用 sqlalchemy 的 echo 或专门的工具
import logging
logging.basicConfig()
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
# 观察日志中是否有大量重复的 SELECT
# 或使用 pytest-sqlalchemy-mock 或 nplusone 库
# pip install nplusone
from nplusone.ext.sqlalchemy import NPlusOne
# 检测到 N+1 时自动报错
二、Window Function 的性能陷阱
Window Function 是 SQL 中极其强大的功能,但用错了会比简单查询慢很多。
基础用法
-- 每个用户的订单金额排名
SELECT
user_id,
order_id,
amount,
RANK() OVER (PARTITION BY user_id ORDER BY amount DESC) AS rank,
SUM(amount) OVER (PARTITION BY user_id) AS user_total,
amount / SUM(amount) OVER (PARTITION BY user_id) AS proportion
FROM orders;
陷阱 1:在子查询中重复使用 Window Function
-- 糟糕的写法(Window Function 计算了两次)
SELECT *
FROM (
SELECT *,
RANK() OVER (PARTITION BY user_id ORDER BY amount DESC) AS rank
FROM orders
) t
WHERE rank = 1;
-- 等价但更清晰(CTEfilter 先过滤)
WITH ranked AS (
SELECT *,
RANK() OVER (PARTITION BY user_id ORDER BY amount DESC) AS rank
FROM orders
)
SELECT * FROM ranked WHERE rank = 1;
-- 最优写法:DISTINCT ON(PostgreSQL 专有,比 Window Function 快)
SELECT DISTINCT ON (user_id)
user_id, order_id, amount
FROM orders
ORDER BY user_id, amount DESC;
-- 直接取每个 user_id 的第一行(按 amount 降序),无需计算 RANK
陷阱 2:Frame 子句的误用
-- ROWS vs RANGE 的区别(重要!)
SELECT
order_date,
amount,
-- RANGE:对所有相同日期的行,都用同一个窗口范围
SUM(amount) OVER (ORDER BY order_date RANGE UNBOUNDED PRECEDING) AS range_sum,
-- ROWS:严格按照物理行数
SUM(amount) OVER (ORDER BY order_date ROWS UNBOUNDED PRECEDING) AS rows_sum
FROM orders;
-- 当 order_date 有大量重复值时,RANGE 性能很差
-- 因为 PG 需要找出所有相同日期的行
-- 如果可以,总是使用 ROWS,或确保排序键唯一
陷阱 3:大窗口的移动平均
-- 计算 30 天移动平均(数据量大时很慢)
SELECT
order_date,
AVG(daily_amount) OVER (
ORDER BY order_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW -- 30 天窗口
) AS moving_avg_30d
FROM daily_sales;
-- 优化:预先用 CTE 聚合,减少参与 Window Function 的行数
WITH daily AS (
SELECT DATE(created_at) AS order_date, SUM(amount) AS daily_amount
FROM orders
GROUP BY DATE(created_at)
)
SELECT
order_date,
AVG(daily_amount) OVER (
ORDER BY order_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) AS moving_avg_30d
FROM daily;
三、CTE 的使用与误区
CTE(Common Table Expression)让复杂查询更可读,但也有性能陷阱。
PostgreSQL 12 之前:CTE 是优化栅栏
-- PostgreSQL 11 及以下:CTE 总是物化(创建临时结果集)
-- 即使外部查询有过滤条件,也不会下推到 CTE 内部
-- 慢!(PG 11):先计算所有用户,再过滤
WITH all_users AS (
SELECT * FROM users -- 先把 100 万用户全部查出来
)
SELECT * FROM all_users WHERE id = 123;
-- PostgreSQL 12+ 默认内联 CTE(可以下推过滤)
-- 但如果想强制物化(如需要缓存复用多次使用的结果集)
WITH all_users AS MATERIALIZED (
SELECT * FROM users
)
SELECT * FROM all_users WHERE id = 123; -- 12+ 中明确要求物化
CTE 的正确使用场景
-- ✅ 好用法:分解复杂查询,每个 CTE 只执行一次
WITH
active_users AS (
SELECT id, name FROM users WHERE status = 'active'
),
recent_orders AS (
SELECT user_id, COUNT(*) AS order_count, SUM(amount) AS total
FROM orders
WHERE created_at > NOW() - INTERVAL '30 days'
GROUP BY user_id
),
user_segments AS (
SELECT
au.id,
au.name,
COALESCE(ro.order_count, 0) AS order_count,
COALESCE(ro.total, 0) AS total_spent
FROM active_users au
LEFT JOIN recent_orders ro ON au.id = ro.user_id
)
SELECT *,
CASE
WHEN total_spent > 1000 THEN 'VIP'
WHEN total_spent > 100 THEN 'Regular'
ELSE 'Inactive'
END AS segment
FROM user_segments
ORDER BY total_spent DESC;
递归 CTE 的陷阱
-- 递归 CTE 必须有终止条件,否则无限循环
WITH RECURSIVE tree AS (
-- 基础情况
SELECT id, parent_id, name, 1 AS depth
FROM categories WHERE parent_id IS NULL
UNION ALL
-- 递归情况(必须最终导致空结果)
SELECT c.id, c.parent_id, c.name, t.depth + 1
FROM categories c
JOIN tree t ON c.parent_id = t.id
WHERE t.depth < 10 -- ⚠️ 安全限制:防止循环引用导致无限递归
)
SELECT * FROM tree;
四、JOIN 策略——选择哪种连接
PostgreSQL 查询优化器会自动选择 JOIN 策略,但理解它们有助于分析慢查询。
Hash Join
适用:大表连接大表(两个表都没有合适的索引)
原理:把较小的表哈希到内存,扫描大表时探测哈希表
成本:需要内存(work_mem 不足时溢出到磁盘,性能急剧下降)
-- 查看 Hash Join 溢出情况
EXPLAIN (ANALYZE, BUFFERS)
SELECT u.name, COUNT(o.id)
FROM users u JOIN orders o ON u.id = o.user_id
GROUP BY u.name;
-- 如果看到:Batches: 4(不是 1),说明溢出了
-- 临时增大 work_mem
SET work_mem = '256MB'; -- 只影响当前会话
EXPLAIN (ANALYZE, BUFFERS) SELECT ...;
Nested Loop
适用:外表小,内表有索引
原理:对外表每行,用索引在内表查找
成本:外表行数 × 内表索引查找成本
陷阱:外表行数多时,成本爆炸
-- 强制禁用 Nested Loop(调试用)
SET enable_nestloop = off;
EXPLAIN SELECT ...;
SET enable_nestloop = on;
Merge Join
适用:两个表在连接键上都已经排序(或有索引)
原理:双指针扫描两个有序列表
成本:需要两个表都排序,但排序后扫描很高效
JOIN 优化技巧
-- 1. 小表放左边(Hash Join 时,左表被 Hash)
-- 如果 users 比 orders 小,这样写更好:
SELECT u.name, o.amount
FROM users u -- 小表
JOIN orders o ON o.user_id = u.id; -- 大表
-- 2. 过滤条件尽量在 JOIN 前执行
-- 差(先 JOIN 再过滤)
SELECT u.name, o.amount
FROM users u
JOIN orders o ON u.id = o.user_id
WHERE o.created_at > '2024-01-01' AND u.status = 'active';
-- 好(让优化器先过滤)— PG 通常会自动做,但明确的子查询更安全
SELECT u.name, o.amount
FROM (SELECT id, name FROM users WHERE status = 'active') u
JOIN (SELECT user_id, amount FROM orders WHERE created_at > '2024-01-01') o
ON u.id = o.user_id;
五、批量操作 vs 逐行操作
批量 INSERT
# 差:逐行插入(N 个 SQL)
for row in data:
session.execute("INSERT INTO events VALUES (:id, :type, :data)", row)
# 好:批量插入(1 个 SQL)
# SQLAlchemy
session.execute(
insert(Event),
[{"id": r["id"], "type": r["type"], "data": r["data"]} for r in data]
)
-- SQL 层面的批量插入
INSERT INTO events (id, type, data)
VALUES
(1, 'click', '{"page": "/home"}'),
(2, 'view', '{"page": "/product"}'),
(3, 'purchase', '{"amount": 99.99}');
-- 使用 COPY 命令(最快,适合大批量数据导入)
COPY events (id, type, data) FROM '/tmp/events.csv' WITH (FORMAT CSV, HEADER);
-- Python 使用 COPY
import psycopg2
with conn.cursor() as cur:
cur.copy_expert(
"COPY events (id, type, data) FROM STDIN WITH (FORMAT CSV)",
csv_file
)
性能对比(插入 10 万行):
- 逐行 INSERT:~120 秒
- 批量 INSERT(1000 行/批):~4 秒
- COPY:~0.8 秒
批量 UPDATE
-- 差:循环 UPDATE
UPDATE orders SET status = 'shipped' WHERE id = 1;
UPDATE orders SET status = 'shipped' WHERE id = 2;
-- ...
-- 好:单次 UPDATE(如果 IDs 已知)
UPDATE orders SET status = 'shipped'
WHERE id IN (1, 2, 3, 4, 5);
-- 更好:使用 UPDATE ... FROM(批量更新不同值)
UPDATE orders SET status = new_status
FROM (VALUES
(1, 'shipped'),
(2, 'delivered'),
(3, 'returned')
) AS updates(order_id, new_status)
WHERE orders.id = updates.order_id;
六、子查询优化
关联子查询(Correlated Subquery)的陷阱
-- 糟糕的写法(关联子查询:对每行都执行一次子查询)
SELECT
u.id,
u.name,
(SELECT COUNT(*) FROM orders WHERE user_id = u.id) AS order_count,
(SELECT SUM(amount) FROM orders WHERE user_id = u.id) AS total_spent
FROM users u
WHERE u.active = true;
-- 对 10000 个用户,这会执行 20001 个查询!
-- 正确写法:用 JOIN + GROUP BY
SELECT
u.id,
u.name,
COALESCE(o.order_count, 0) AS order_count,
COALESCE(o.total_spent, 0) AS total_spent
FROM users u
LEFT JOIN (
SELECT user_id, COUNT(*) AS order_count, SUM(amount) AS total_spent
FROM orders
GROUP BY user_id
) o ON u.id = o.user_id
WHERE u.active = true;
EXISTS vs IN vs JOIN
-- EXISTS:检查是否存在(通常最快,找到第一个就停止)
SELECT * FROM users u
WHERE EXISTS (SELECT 1 FROM orders WHERE user_id = u.id);
-- IN + 子查询(结果集小时 OK,大时慢)
SELECT * FROM users
WHERE id IN (SELECT DISTINCT user_id FROM orders);
-- JOIN(通常和 EXISTS 性能相近,更灵活)
SELECT DISTINCT u.*
FROM users u
JOIN orders o ON u.id = o.user_id;
-- NOT EXISTS 通常比 NOT IN 好
-- (NOT IN 对 NULL 值有特殊行为,可能导致意外结果)
SELECT * FROM users u
WHERE NOT EXISTS (SELECT 1 FROM orders WHERE user_id = u.id);
-- vs
SELECT * FROM users
WHERE id NOT IN (SELECT user_id FROM orders WHERE user_id IS NOT NULL);
七、统计信息——优化器决策的基础
PostgreSQL 查询优化器基于统计信息(Statistics)估算每一步的代价,从而选择最优执行计划。
-- 查看列的统计信息
SELECT
attname,
n_distinct,
correlation -- 列值与物理存储顺序的相关性(-1 到 1)
FROM pg_stats
WHERE tablename = 'orders' AND attname IN ('user_id', 'status', 'amount');
-- n_distinct:
-- 正数 = 唯一值的绝对数量
-- 负数 = 唯一值比例的相反数(-0.5 表示约 50% 唯一)
-- correlation(对索引选择有重要影响):
-- 接近 1 = 数据按此列物理排序存储(索引扫描效率高)
-- 接近 0 = 数据随机分布(索引扫描可能不如全表扫描)
-- 强制更新统计信息(当表数据变化大时)
ANALYZE orders;
-- 提高统计信息的精度(对高基数列)
ALTER TABLE orders ALTER COLUMN amount SET STATISTICS 500; -- 默认是 100
ANALYZE orders;
八、实战案例:一个典型的慢查询优化过程
问题描述
用户投诉:后台管理页面的"客户列表"加载需要 15 秒。
-- 原始查询(已简化)
SELECT
u.id,
u.email,
u.created_at,
(SELECT COUNT(*) FROM orders WHERE user_id = u.id) AS order_count,
(SELECT MAX(created_at) FROM orders WHERE user_id = u.id) AS last_order_at,
(SELECT SUM(amount) FROM orders WHERE user_id = u.id AND status = 'paid') AS total_paid
FROM users u
WHERE u.role = 'customer'
ORDER BY u.created_at DESC
LIMIT 50 OFFSET 500;
分析过程
EXPLAIN (ANALYZE, BUFFERS)
-- 上面的查询...
-- 发现:
-- 1. 3 个关联子查询,每个都是 Index Scan,但执行了 50 × 3 = 150 次
-- 2. OFFSET 500 需要扫描前 550 行然后丢弃 500 行
-- 3. users 表没有 role 列的索引
优化方案
-- 优化 1:消除关联子查询
SELECT
u.id,
u.email,
u.created_at,
COALESCE(o.order_count, 0) AS order_count,
o.last_order_at,
COALESCE(o.total_paid, 0) AS total_paid
FROM users u
LEFT JOIN (
SELECT
user_id,
COUNT(*) AS order_count,
MAX(created_at) AS last_order_at,
SUM(CASE WHEN status = 'paid' THEN amount ELSE 0 END) AS total_paid
FROM orders
GROUP BY user_id
) o ON u.id = o.user_id
WHERE u.role = 'customer'
ORDER BY u.created_at DESC
LIMIT 50 OFFSET 500;
-- 优化 2:加索引
CREATE INDEX idx_users_role_created ON users(role, created_at DESC);
-- 优化 3:解决深度分页问题(Keyset Pagination)
-- 用"游标"代替 OFFSET(OFFSET 越大越慢)
SELECT
u.id,
u.email,
u.created_at,
...
FROM users u
LEFT JOIN ...
WHERE u.role = 'customer'
AND u.created_at < '2024-01-15 10:30:00' -- 上一页最后一行的 created_at
ORDER BY u.created_at DESC
LIMIT 50;
-- 这个查询无论第几页,性能都一样快
效果
| 优化前 | 优化后 | |
|---|---|---|
| 查询时间 | 15 秒 | 35 毫秒 |
| 执行计划 | 3 × N 关联子查询 | 2 次 Hash Join |
| 页面感受 | 超时 | 流畅 |
关键认知
大多数查询问题都有规律:
- N+1:ORM 懒加载,用 eager load 或 JOIN 解决
- 全表扫描:缺索引,或索引失效(函数、类型转换)
- 深度分页:OFFSET 大,改用 Keyset Pagination
- 关联子查询:每行触发一次,改为 JOIN + GROUP BY
- 统计信息过期:估算偏差大,ANALYZE 解决
“慢查询不是数据库的问题,是 SQL 的问题。数据库只是按你的指令行事。”