Python Auto Get Image For Keywords (Website Screenshot)
2023-10-23
筆記使用 Python 搭配 Selenium 批次取得關鍵字在 Google Images 的圖片,提供單字學習使用。
說明
import os
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from PIL import Image
import vocalist
def compress_and_resize_png(input_path, output_path, resize_percentage=75, compression_quality = 30):
"""
壓縮和縮小PNG圖像。
:param input_path: 原始圖像的文件路徑
:param output_path: 壓縮後的圖像的目標文件路徑
:param resize_percentage: 縮小百分比(預設為25%)
"""
try:
# 打開原始圖像
original_image = Image.open(input_path)
# # 計算新的寬度和高度
width, height = original_image.size
# new_width = int(width * resize_percentage / 100)
# new_height = int(height * resize_percentage / 100)
# # 縮小圖像
# resized_image = original_image.resize((new_width, new_height), Image.ANTIALIAS)
top_margin = 150 # 從頂端開始的像素數
box = (0, top_margin, width, height)
# 執行裁剪操作
cropped_image = original_image.crop(box)
# 壓縮並保存圖像
cropped_image.save(output_path, 'PNG', quality = compression_quality)
print(f'已縮小和壓縮圖像,並保存為:{output_path}')
except Exception as e:
print(f'處理圖像 {input_path} 時出現錯誤: {str(e)}')
# 要搜索的關鍵字列表
keywords = vocalist.vocalist
# 設置Chrome WebDriver
chrome_options = webdriver.ChromeOptions()
#chrome_driver_path = 'C:\Program Files\Google\Chrome\Application\chrome.exe'
#chrome_options.binary_location = chrome_driver_path
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--headless")
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("--log-level=3")
custom_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.1234.56 Safari/537.36"
chrome_options.add_argument(f"user-agent={custom_user_agent}")
driver = webdriver.Chrome(options=chrome_options)
# 設置截圖存儲目錄
screenshot_dir = r'C:\Users\webber\desktop\app'
if not os.path.exists(screenshot_dir):
os.makedirs(screenshot_dir)
# 開始搜索和截圖
for keyword in keywords:
try:
# 使用Google圖片搜索
driver.get('https://www.google.com/imghp')
search_box = driver.find_element(By.NAME, 'q')
search_box.send_keys(keyword)
search_box.send_keys(Keys.RETURN)
# 等待加載搜索結果
time.sleep(2)
# 截圖並儲存
screenshot_filename = os.path.join(screenshot_dir, f'{keyword}.png')
driver.save_screenshot(screenshot_filename)
compress_and_resize_png(screenshot_filename, screenshot_filename)
print(f'已經成功為關鍵字 "{keyword}" 截圖並儲存為 {screenshot_filename}')
except Exception as e:
print(f'處理關鍵字 "{keyword}" 時出現錯誤: {str(e)}')
# 關閉瀏覽器
driver.quit()
Website Screenshot
app.py
import time
from driver import driver
websites = {
'Google' : 'https://www.google.com',
'Microsoft' : 'https://www.microsoft.com',
}
# Loop through the dictionary
for name, url in websites.items():
print(name)
try:
driver.get(url, )
time.sleep(10) # 讓子彈飛一會
screenshot_filename = name + ".png"
driver.save_screenshot(screenshot_filename)
print(f' Saved in {screenshot_filename}')
except:
print(f' Download failed')
# Close the web driver
driver.quit()
driver.py
from selenium import webdriver
# Initialize the web driver (you need to have a compatible web driver installed)
chrome_options = webdriver.ChromeOptions()
#chrome_driver_path = 'C:\Program Files\Google\Chrome\Application\chrome.exe'
#chrome_options.binary_location = chrome_driver_path
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--headless")
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("--log-level=3")
custom_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.1234.56 Safari/537.36"
chrome_options.add_argument(f"user-agent={custom_user_agent}")
driver = webdriver.Chrome(options=chrome_options)