Python Get Google Trends (RSS)
2023-12-22
筆記如何使用 Python 從 Google Trends 熱門關鍵字的 RSS Service 取得關鍵字。
data:image/s3,"s3://crabby-images/59c2d/59c2d5f13e52ae06aac55f795870106f9df8d0d9" alt="logo"
說明
Google 提供的 RSS Url 可以加上地區代表來查詢,例如 US, JP, TW 等。
import html
import requests
from datetime import datetime
from collections import namedtuple
from xml.etree import ElementTree as ET
def get_google_trends(region):
# URL for Google Trends RSS feed for the specified region
url = f"https://trends.google.com/trends/trendingsearches/daily/rss?geo={region}"
# Define a namedtuple for storing the RSS feed items
TrendingItem = namedtuple('TrendingItem', ['title', 'news', 'pubDate', 'description'])
# Send a GET request to the URL
response = requests.get(url)
if response.status_code != 200:
raise Exception(f"Failed to fetch data: HTTP status code {response.status_code}")
# Parse the XML content
root = ET.fromstring(response.content)
namespace = url.split('/rss')[0]
# Extract items and save them into namedtuples
items = []
for item in root.findall('.//item'):
title = item.find('title').text if item.find('title') is not None else ''
pubDate = item.find('pubDate').text if item.find('pubDate') is not None else ''
description = item.find('description').text if item.find('description') is not None else ''
news = []
for news_item in item.findall(".//ht:news_item", namespaces={"ht": namespace}):
news_item_title = news_item.find("ht:news_item_title", namespaces={"ht": namespace}).text
news_item_url = news_item.find("ht:news_item_url", namespaces={"ht": namespace}).text
news.append((news_item_title, news_item_url))
items.append(TrendingItem(title, news, pubDate, description))
return items
實際在處理 RSS 內容需要注意的是,因為有自定義 Element ht:news_item
所以在使用 findall
或者是 find
的時候,都必須加上 namespace
才能夠正確處理。
def display_keyword(trends):
for item in trends:
date = datetime.strptime(item.pubDate, "%a, %d %b %Y %H:%M:%S %z").strftime("%Y/%m/%d")
print(f'【{item.title}】, {date}')
for idx, news in enumerate(item.news):
print(f'{idx+1}. {html.unescape(news[0])}')
display_keyword(get_google_trends("TW"))