Python Auto Get Image For Keywords (Website Screenshot)
2023-10-23
筆記使用 Python 搭配 Selenium 批次取得關鍵字在 Google Images 的圖片,提供單字學習使用。
data:image/s3,"s3://crabby-images/59c2d/59c2d5f13e52ae06aac55f795870106f9df8d0d9" alt="logo"
說明
import os
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from PIL import Image
import vocalist
def compress_and_resize_png(input_path, output_path, resize_percentage=75, compression_quality = 30):
"""
壓縮和縮小PNG圖像。
:param input_path: 原始圖像的文件路徑
:param output_path: 壓縮後的圖像的目標文件路徑
:param resize_percentage: 縮小百分比(預設為25%)
"""
try:
# 打開原始圖像
original_image = Image.open(input_path)
# # 計算新的寬度和高度
width, height = original_image.size
# new_width = int(width * resize_percentage / 100)
# new_height = int(height * resize_percentage / 100)
# # 縮小圖像
# resized_image = original_image.resize((new_width, new_height), Image.ANTIALIAS)
top_margin = 150 # 從頂端開始的像素數
box = (0, top_margin, width, height)
# 執行裁剪操作
cropped_image = original_image.crop(box)
# 壓縮並保存圖像
cropped_image.save(output_path, 'PNG', quality = compression_quality)
print(f'已縮小和壓縮圖像,並保存為:{output_path}')
except Exception as e:
print(f'處理圖像 {input_path} 時出現錯誤: {str(e)}')
# 要搜索的關鍵字列表
keywords = vocalist.vocalist
# 設置Chrome WebDriver
chrome_options = webdriver.ChromeOptions()
#chrome_driver_path = 'C:\Program Files\Google\Chrome\Application\chrome.exe'
#chrome_options.binary_location = chrome_driver_path
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--headless")
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("--log-level=3")
custom_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.1234.56 Safari/537.36"
chrome_options.add_argument(f"user-agent={custom_user_agent}")
driver = webdriver.Chrome(options=chrome_options)
# 設置截圖存儲目錄
screenshot_dir = r'C:\Users\webber\desktop\app'
if not os.path.exists(screenshot_dir):
os.makedirs(screenshot_dir)
# 開始搜索和截圖
for keyword in keywords:
try:
# 使用Google圖片搜索
driver.get('https://www.google.com/imghp')
search_box = driver.find_element(By.NAME, 'q')
search_box.send_keys(keyword)
search_box.send_keys(Keys.RETURN)
# 等待加載搜索結果
time.sleep(2)
# 截圖並儲存
screenshot_filename = os.path.join(screenshot_dir, f'{keyword}.png')
driver.save_screenshot(screenshot_filename)
compress_and_resize_png(screenshot_filename, screenshot_filename)
print(f'已經成功為關鍵字 "{keyword}" 截圖並儲存為 {screenshot_filename}')
except Exception as e:
print(f'處理關鍵字 "{keyword}" 時出現錯誤: {str(e)}')
# 關閉瀏覽器
driver.quit()
Website Screenshot
app.py
import time
from driver import driver
websites = {
'Google' : 'https://www.google.com',
'Microsoft' : 'https://www.microsoft.com',
}
# Loop through the dictionary
for name, url in websites.items():
print(name)
try:
driver.get(url, )
time.sleep(10) # 讓子彈飛一會
screenshot_filename = name + ".png"
driver.save_screenshot(screenshot_filename)
print(f' Saved in {screenshot_filename}')
except:
print(f' Download failed')
# Close the web driver
driver.quit()
driver.py
from selenium import webdriver
# Initialize the web driver (you need to have a compatible web driver installed)
chrome_options = webdriver.ChromeOptions()
#chrome_driver_path = 'C:\Program Files\Google\Chrome\Application\chrome.exe'
#chrome_options.binary_location = chrome_driver_path
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--headless")
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("--log-level=3")
custom_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.1234.56 Safari/537.36"
chrome_options.add_argument(f"user-agent={custom_user_agent}")
driver = webdriver.Chrome(options=chrome_options)