第02章:SEO自动化——关键词挖掘、竞品监控与内容缺口分析
第02章:SEO自动化——关键词挖掘、竞品监控与内容缺口分析
“手动做SEO就像手工挖矿。你挖一天,找到几个关键词。竞争对手用脚本,一天分析10万个关键词,找到你永远发现不了的机会。SEO自动化不是优化,是降维打击。”
一、SEO自动化的核心思路
传统SEO工作流(手动):
1. 打开Ahrefs,输入竞品域名
2. 手动浏览关键词列表
3. 复制感兴趣的到Excel
4. 手动判断哪些值得写内容
→ 1天能分析50–100个关键词
自动化SEO工作流:
1. 脚本批量获取竞品的所有排名关键词
2. 自动筛选(按搜索量、难度、商业意图)
3. 自动识别"他们有,你没有"的内容缺口
4. 自动生成内容优先级列表
→ 1天能分析10万+ 关键词
核心工具:
- SerpAPI:SERP数据($50/月,10,000次查询)
- Ahrefs API / Semrush API:关键词数据库($99+/月)
- Google Search Console API:自己网站的真实搜索数据(免费!)
- DataForSEO API:价格最低的SEO数据API($0.001/条数据)
二、Google Search Console自动化
import json
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
from datetime import datetime, timedelta
import pandas as pd
class GSCAutomation:
"""
Google Search Console API 自动化
这是成本最低的SEO数据源(完全免费)
"""
def __init__(self, site_url: str, credentials_file: str):
self.site_url = site_url
creds = Credentials.from_authorized_user_file(credentials_file)
self.service = build("searchconsole", "v1", credentials=creds)
def get_search_performance(
self,
days: int = 90,
dimensions: list = ["query", "page"],
row_limit: int = 25000
) -> pd.DataFrame:
"""
获取搜索表现数据
返回:查询词、页面、点击、展示、CTR、排名
"""
end_date = datetime.now().strftime("%Y-%m-%d")
start_date = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d")
request = {
"startDate": start_date,
"endDate": end_date,
"dimensions": dimensions,
"rowLimit": row_limit
}
response = self.service.searchanalytics().query(
siteUrl=self.site_url, body=request
).execute()
rows = response.get("rows", [])
data = []
for row in rows:
record = {
"query": row["keys"][0] if len(dimensions) >= 1 else None,
"page": row["keys"][1] if len(dimensions) >= 2 else None,
"clicks": row["clicks"],
"impressions": row["impressions"],
"ctr": row["ctr"],
"position": row["position"]
}
data.append(record)
return pd.DataFrame(data)
def find_quick_wins(self, df: pd.DataFrame) -> pd.DataFrame:
"""
找到"低垂果实":排名在第2页(11–20位),有一定展示量的关键词
这些页面优化后,最容易提升排名
"""
quick_wins = df[
(df["position"] >= 11) &
(df["position"] <= 20) &
(df["impressions"] >= 100) # 每月至少100次展示
].copy()
# 潜在提升收益估算
quick_wins["estimated_ctr_p1"] = 0.10 # 第1页平均CTR约10%
quick_wins["potential_clicks"] = (
quick_wins["impressions"] * quick_wins["estimated_ctr_p1"]
).astype(int)
quick_wins["click_uplift"] = quick_wins["potential_clicks"] - quick_wins["clicks"]
return quick_wins.sort_values("click_uplift", ascending=False)
def find_high_impressions_low_ctr(self, df: pd.DataFrame) -> pd.DataFrame:
"""
找到展示量高但CTR低的页面
这些页面可能需要优化Title/Description来提升点击率
"""
target = df[
(df["impressions"] >= 1000) &
(df["ctr"] < 0.02) & # CTR < 2%
(df["position"] <= 10) # 在第1页但无人点击
].copy()
target["missed_clicks"] = (
(target["impressions"] * 0.05) - target["clicks"] # 假设基准CTR 5%
).astype(int)
return target.sort_values("missed_clicks", ascending=False)
# 使用示例
# gsc = GSCAutomation("sc-domain:yoursite.com", "credentials.json")
# df = gsc.get_search_performance(days=90)
# quick_wins = gsc.find_quick_wins(df)
# print(quick_wins.head(20))
三、关键词缺口分析(竞品分析)
import requests
import time
class KeywordGapAnalyzer:
"""
关键词缺口分析:找到竞品有但你没有的关键词机会
使用 DataForSEO API(成本最低)
"""
def __init__(self, api_login: str, api_password: str):
self.base_url = "https://api.dataforseo.com/v3"
self.auth = (api_login, api_password)
def get_competitor_keywords(
self,
competitor_domain: str,
country: str = "us",
language: str = "en",
limit: int = 1000
) -> list:
"""
获取竞品域名的所有排名关键词
"""
endpoint = f"{self.base_url}/dataforseo_labs/google/ranked_keywords/live"
payload = [{
"target": competitor_domain,
"location_code": 2840 if country == "us" else 2826, # us/uk
"language_code": language,
"filters": [
["keyword_data.keyword_info.search_volume", ">", 100],
["ranked_serp_element.serp_item.rank_group", "<=", 20]
],
"order_by": ["keyword_data.keyword_info.search_volume,desc"],
"limit": limit
}]
response = requests.post(
endpoint,
json=payload,
auth=self.auth
)
result = response.json()
keywords = []
if result.get("tasks") and result["tasks"][0].get("result"):
for item in result["tasks"][0]["result"][0].get("items", []):
keywords.append({
"keyword": item["keyword_data"]["keyword"],
"search_volume": item["keyword_data"]["keyword_info"]["search_volume"],
"keyword_difficulty": item["keyword_data"]["keyword_properties"]["keyword_difficulty"],
"competitor_rank": item["ranked_serp_element"]["serp_item"]["rank_group"]
})
return keywords
def find_keyword_gaps(
self,
my_keywords: set,
competitor_keywords: list,
max_difficulty: int = 50,
min_volume: int = 500
) -> list:
"""
找出竞品有排名但你没有的关键词(内容机会)
"""
gaps = []
for kw in competitor_keywords:
keyword = kw["keyword"]
# 竞品在排名,但我们没有
if keyword not in my_keywords:
if (kw["keyword_difficulty"] <= max_difficulty and
kw["search_volume"] >= min_volume):
kw["opportunity_score"] = (
kw["search_volume"] / max(kw["keyword_difficulty"], 1)
)
gaps.append(kw)
# 按机会分数排序
return sorted(gaps, key=lambda x: x["opportunity_score"], reverse=True)
四、SERP分析自动化
class SERPAnalyzer:
"""
SERP分析:了解目标关键词的竞争格局
使用 SerpAPI(serpapi.com)
"""
def __init__(self, api_key: str):
self.api_key = api_key
self.base_url = "https://serpapi.com/search.json"
def analyze_serp(self, keyword: str, country: str = "us") -> dict:
"""
分析某个关键词的SERP页面
"""
params = {
"q": keyword,
"api_key": self.api_key,
"gl": country,
"hl": "en",
"num": 10
}
response = requests.get(self.base_url, params=params)
data = response.json()
results = {
"keyword": keyword,
"total_results": data.get("search_information", {}).get("total_results"),
"has_featured_snippet": "answer_box" in data,
"has_knowledge_panel": "knowledge_graph" in data,
"has_image_pack": any(r.get("type") == "images_results" for r in data.get("inline_images", [])),
"organic_results": []
}
for result in data.get("organic_results", []):
results["organic_results"].append({
"position": result.get("position"),
"title": result.get("title"),
"url": result.get("link"),
"domain": result.get("displayed_link", "").split("/")[0],
"snippet": result.get("snippet")
})
return results
def calculate_serp_difficulty(self, serp_data: dict) -> dict:
"""
基于SERP特征评估关键词难度
"""
signals = {
"has_featured_snippet": -10, # 有精选片段:难度降低(有机会抢占)
"has_knowledge_panel": +15, # 有知识图谱:很难挤进去
}
# 分析Top10的域名权威度(简化版)
top_domains = [r["domain"] for r in serp_data["organic_results"][:5]]
high_authority_domains = [
"wikipedia.org", "reddit.com", "forbes.com", "nytimes.com",
"amazon.com", "youtube.com", "linkedin.com"
]
authority_count = sum(1 for d in top_domains if d in high_authority_domains)
difficulty_from_authority = authority_count * 15 # 每个权威域名+15
base_difficulty = 30
total_difficulty = base_difficulty + difficulty_from_authority
for signal, adjustment in signals.items():
if serp_data.get(signal):
total_difficulty += adjustment
return {
"estimated_difficulty": min(total_difficulty, 100),
"high_authority_competitors": authority_count,
"has_featured_snippet_opportunity": serp_data.get("has_featured_snippet", False)
}
五、关键词批量监控系统
import schedule
import sqlite3
class KeywordMonitor:
"""
关键词排名监控系统
每日自动追踪目标关键词排名变化
"""
def __init__(self, db_path: str = "keywords.db"):
self.conn = sqlite3.connect(db_path)
self._init_db()
def _init_db(self):
self.conn.execute("""
CREATE TABLE IF NOT EXISTS rankings (
id INTEGER PRIMARY KEY AUTOINCREMENT,
keyword TEXT NOT NULL,
check_date TEXT NOT NULL,
position INTEGER,
url TEXT,
UNIQUE(keyword, check_date)
)
""")
self.conn.commit()
def record_ranking(self, keyword: str, position: int, url: str):
today = datetime.now().strftime("%Y-%m-%d")
self.conn.execute(
"INSERT OR REPLACE INTO rankings (keyword, check_date, position, url) VALUES (?, ?, ?, ?)",
(keyword, today, position, url)
)
self.conn.commit()
def get_ranking_trend(self, keyword: str, days: int = 30) -> pd.DataFrame:
"""获取关键词最近N天的排名趋势"""
query = """
SELECT check_date, position, url
FROM rankings
WHERE keyword = ?
ORDER BY check_date DESC
LIMIT ?
"""
return pd.read_sql_query(query, self.conn, params=(keyword, days))
def find_dropped_rankings(self, threshold: int = 5) -> list:
"""
找出排名下降超过threshold位的关键词
用于告警和快速响应
"""
query = """
SELECT
r1.keyword,
r1.position as today_pos,
r2.position as yesterday_pos,
(r1.position - r2.position) as drop
FROM rankings r1
JOIN rankings r2 ON r1.keyword = r2.keyword
WHERE r1.check_date = date('now')
AND r2.check_date = date('now', '-1 day')
AND (r1.position - r2.position) >= ?
ORDER BY drop DESC
"""
cursor = self.conn.execute(query, (threshold,))
return cursor.fetchall()
本章小结
- SEO自动化 = 10万关键词/天 vs 100个/天(手动)——这是数量级的竞争优势
- Google Search Console API免费且数据最真实:找"低垂果实"(第2页排名)和"高展示低CTR"页面
- 关键词缺口分析:系统性找到"竞品有排名但你没有"的内容机会——这是内容策略的核心输入
- SERP分析:在写任何内容之前,先分析Top10竞争格局,评估进入难度
- 排名监控系统:每日自动追踪关键词排名,发现异常立即响应
核心行动建议:今天接入你网站的 Google Search Console API(全免费),导出最近90天的数据,运行 find_quick_wins 函数。找到排名11–20位、月展示量超过100的关键词——这些是你最快能提升排名的页面。选择其中前3个,计划本周内更新这些页面的内容。
→ 继续阅读:第03章 程序化SEO:用代码批量生成高排名内容页面