Python Get Google Trends (RSS)

  1. 說明

筆記如何使用 Python 從 Google Trends 熱門關鍵字的 RSS Service 取得關鍵字。



Google 提供的 RSS Url 可以加上地區代表來查詢,例如 US, JP, TW 等。

Google Trends RSS

import html
import requests
from datetime import datetime
from collections import namedtuple
from xml.etree import ElementTree as ET

def get_google_trends(region):
    # URL for Google Trends RSS feed for the specified region
    url = f"{region}"

    # Define a namedtuple for storing the RSS feed items
    TrendingItem = namedtuple('TrendingItem', ['title', 'news', 'pubDate', 'description'])

    # Send a GET request to the URL
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(f"Failed to fetch data: HTTP status code {response.status_code}")

    # Parse the XML content
    root = ET.fromstring(response.content)
    namespace = url.split('/rss')[0]

    # Extract items and save them into namedtuples
    items = []
    for item in root.findall('.//item'):
        title = item.find('title').text if item.find('title') is not None else ''
        pubDate = item.find('pubDate').text if item.find('pubDate') is not None else ''
        description = item.find('description').text if item.find('description') is not None else ''
        news = []
        for news_item in item.findall(".//ht:news_item", namespaces={"ht": namespace}):
            news_item_title = news_item.find("ht:news_item_title", namespaces={"ht": namespace}).text
            news_item_url = news_item.find("ht:news_item_url", namespaces={"ht": namespace}).text
            news.append((news_item_title, news_item_url))

        items.append(TrendingItem(title, news, pubDate, description))
    return items

實際在處理 RSS 內容需要注意的是,因為有自定義 Element ht:news_item 所以在使用 findall 或者是 find 的時候,都必須加上 namespace 才能夠正確處理。

def display_keyword(trends):
    for item in trends:
        date = datetime.strptime(item.pubDate, "%a, %d %b %Y %H:%M:%S %z").strftime("%Y/%m/%d")
        print(f'【{item.title}】, {date}')
        for idx, news in enumerate(
            print(f'{idx+1}. {html.unescape(news[0])}')