筆記如何使用 Python 從 Google Trends 熱門關鍵字的 RSS Service 取得關鍵字。
說明
Google 提供的 RSS Url 可以加上地區代表來查詢,例如 US, JP, TW 等。
import html
import requests
from datetime import datetime
from collections import namedtuple
from xml.etree import ElementTree as ET
def get_google_trends(region):
# URL for Google Trends RSS feed for the specified region
url = f"https://trends.google.com/trends/trendingsearches/daily/rss?geo={region}"
# Define a namedtuple for storing the RSS feed items
TrendingItem = namedtuple('TrendingItem', ['title', 'news', 'pubDate', 'description'])
# Send a GET request to the URL
response = requests.get(url)
if response.status_code != 200:
raise Exception(f"Failed to fetch data: HTTP status code {response.status_code}")
# Parse the XML content
root = ET.fromstring(response.content)
namespace = url.split('/rss')[0]
# Extract items and save them into namedtuples
items = []
for item in root.findall('.//item'):
title = item.find('title').text if item.find('title') is not None else ''
pubDate = item.find('pubDate').text if item.find('pubDate') is not None else ''
description = item.find('description').text if item.find('description') is not None else ''
news = []
for news_item in item.findall(".//ht:news_item", namespaces={"ht": namespace}):
news_item_title = news_item.find("ht:news_item_title", namespaces={"ht": namespace}).text
news_item_url = news_item.find("ht:news_item_url", namespaces={"ht": namespace}).text
news.append((news_item_title, news_item_url))
items.append(TrendingItem(title, news, pubDate, description))
return items
實際在處理 RSS 內容需要注意的是,因為有自定義 Element ht:news_item
所以在使用 findall
或者是 find
的時候,都必須加上 namespace
才能夠正確處理。
def display_keyword(trends):
for item in trends:
date = datetime.strptime(item.pubDate, "%a, %d %b %Y %H:%M:%S %z").strftime("%Y/%m/%d")
print(f'【{item.title}】, {date}')
for idx, news in enumerate(item.news):
print(f'{idx+1}. {html.unescape(news[0])}')
display_keyword(get_google_trends("TW"))