Python 百度搜索工具:支持手动和自动模式
import requests
from bs4 import BeautifulSoup
import time
import tkinter as tk
import webbrowser
import random
import os
import re
def get_random_user_agent():
user_agents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36',
]
return random.choice(user_agents)
def crawl_baidu(keyword, page_limit):
headers = {
'User-Agent': get_random_user_agent()
}
results = []
for page in range(1, page_limit + 1):
url = f'https://www.baidu.com/s?wd={keyword}&pn={(page - 1) * 10}'
# 添加随机延迟
delay = random.uniform(0.5, 1.0)
time.sleep(delay)
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
for result in soup.find_all('div', class_='result'):
result_title = result.find('h3').get_text()
result_url = result.find('a')['href']
results.append((result_title, result_url))
return results
def open_url(url):
webbrowser.open(url)
def crawl_and_index_manual():
# ... (代码内容与之前相同,省略)
def search_local_manual():
# ... (代码内容与之前相同,省略)
def crawl_and_index_auto_recursive(keywords, page_limit, result_limit, total_results, depth=1):
# ... (代码内容与之前相同,省略)
def crawl_and_index_auto():
# ... (代码内容与之前相同,省略)
def search_local_auto_recursive(keyword, total_results, result_limit, depth=1):
# ... (代码内容与之前相同,省略)
def search_local_auto():
# ... (代码内容与之前相同,省略)
# --- UI 界面代码 (与之前相同,省略) ---
# 运行主界面
window.mainloop()
原文地址: https://www.cveoy.top/t/topic/vYQ 著作权归作者所有。请勿转载和采集!