Python 百度搜索爬虫工具:完整代码及使用教程
以下是经过修改的完整代码:
import requests
from bs4 import BeautifulSoup
import time
import tkinter as tk
import webbrowser
def crawl_baidu(keyword):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
url = f'https://www.baidu.com/s?wd={keyword}'
time.sleep(0.5) # 添加延迟
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
results = []
for result in soup.find_all('div', class_='result'):
result_url = result.find('a')['href']
results.append(result_url)
return results
def open_url(event):
index = result_text.index(tk.CURRENT)
tag_ranges = result_text.tag_ranges('link')
for tag_range in tag_ranges:
if int(result_text.index(tk.CURRENT).split('.')[0]) in range(int(str(tag_range[0]).split('.')[0]), int(str(tag_range[1]).split('.')[0])):
url = result_text.get(tag_range[0], tag_range[1])
if url:
webbrowser.open(url)
break
def search():
keyword = entry.get()
search_results = crawl_baidu(keyword)
if len(search_results) > 0:
for index, url in enumerate(search_results, start=1):
result_text.insert(tk.END, f'{index}. {url}
', 'link')
result_text.tag_configure('link', foreground='blue', underline=True)
result_text.tag_add('link', f'{index}.0', f'{index}.end')
result_text.insert(tk.END, '
')
else:
result_text.insert(tk.END, '没有搜索结果
')
# 创建UI界面
window = tk.Tk()
window.title('百度搜索')
window.geometry('800x600')
label = tk.Label(window, text='请输入关键词:')
label.pack()
entry = tk.Entry(window)
entry.pack()
search_button = tk.Button(window, text='搜索', command=search)
search_button.pack()
result_text = tk.Text(window)
result_text.pack()
result_text.bind('<Button-1>', open_url)
window.mainloop()
在这个修改后的代码中,我使用了str.split函数将索引转换为整数进行比较,以避免了之前的TypeError错误。希望这次能够成功运行。如果还有其他问题,请随时提问。
原文地址: https://www.cveoy.top/t/topic/rvz 著作权归作者所有。请勿转载和采集!