import requests
from bs4 import BeautifulSoup
import time
import tkinter as tk
import webbrowser
import random
import os
import re


def get_random_user_agent():
    user_agents = [
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36',
    ]
    return random.choice(user_agents)


def crawl_baidu(keyword, page_limit):
    headers = {
        'User-Agent': get_random_user_agent()
    }

    results = []
    for page in range(1, page_limit + 1):
        url = f'https://www.baidu.com/s?wd={keyword}&pn={(page - 1) * 10}'

        # 添加随机延迟
        delay = random.uniform(0.5, 1.0)
        time.sleep(delay)

        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')

        for result in soup.find_all('div', class_='result'):
            result_title = result.find('h3').get_text()
            result_url = result.find('a')['href']
            results.append((result_title, result_url))

    return results


def open_url(url):
    webbrowser.open(url)


def crawl_and_index_manual():
    # ... (代码内容与之前相同,省略)


def search_local_manual():
    # ... (代码内容与之前相同,省略)


def crawl_and_index_auto_recursive(keywords, page_limit, result_limit, total_results, depth=1):
    # ... (代码内容与之前相同,省略)


def crawl_and_index_auto():
    # ... (代码内容与之前相同,省略)


def search_local_auto_recursive(keyword, total_results, result_limit, depth=1):
    # ... (代码内容与之前相同,省略)


def search_local_auto():
    # ... (代码内容与之前相同,省略)


# --- UI 界面代码 (与之前相同,省略) ---


# 运行主界面
window.mainloop()
Python 百度搜索工具:支持手动和自动模式

原文地址: https://www.cveoy.top/t/topic/vYQ 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录