哪里有错误不输出文件卡在输出界面请写出修改后的代码:import requestsimport timeimport osimport urllib3import sysimport randomfrom bs4 import BeautifulSoupfrom concurrentfutures import ProcessPoolExecutor ThreadPoolExecutorfrom
import requests import time import os import urllib3 import sys import random from bs4 import BeautifulSoup from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed # 添加as_completed from threading import Lock from colorama import Fore, init
now_time = time.strftime('%Y-%m-%d %H-%M')
读取Dorks
def work(dorks): with open(dorks, mode='r', encoding='utf-8') as file: read_content = file.readlines() # 将内容加入列表 content = [result.strip() for result in read_content] # 返回数量丢给任务池 return len(read_content), content
Google搜索
def google_serach(query, locks, filename): try: # 关闭HTTPS报错信息 urllib3.disable_warnings() filename = os.path.join(os.getcwd(), f'{filename}.txt') domains = ['fr','it','ca','co.uk','ru','co,jp','co.kr','com.au','co.in','com.br','com.ar','co.za','co.nz','es','se','nl','ch','at','dk','be','pl','fi','ie','pt','gr', 'tw', 'com', 'uk', 'de', 'br', 'ca', 'kr', 'mx', 'au', 'za'] random_domain = random.choice(domains) url = f'https://www.google.{random_domain}/search?q={query}&num=100' # 请求头 headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', 'accept-language': 'zh-CN,zh;q=0.9', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,/;q=0.8,application/signed-exchange;v=b3;q=0.7', 'referer': 'https://www.google.com/', 'origin': 'https://www.google.com', 'Sec-Fetch-Site': 'same-origin', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-User': '?1', 'Sec-Fetch-Dest': 'document' } # 代理 proxies = {'http': 'http://127.0.0.1:7890', 'https': 'http://127.0.0.1:7890'} response = requests.get(url=url, headers=headers, proxies=proxies, verify=False, timeout=5) soup = BeautifulSoup(response.content, 'html.parser') # 查找全部div标签 find_div = soup.find_all('div', {'class': 'yuRUbf'}) # 开启线程锁 locks.acquire() # 加入列表 get_url = [url.findNext('a')['href'] + '\n' for url in find_div if 'google.com.tw' not in url.findNext('a')['href']] global url_num, dork_finish_num url_num += len(get_url) dork_finish_num += 1 print(Fore.GREEN + f'\r{now_time}[INFO]{ "-" * 10}>已获取Url数量:{url_num} Dorsk数量:{dork_finish_num} / {dork_total_num}', end='' + Fore.RESET) # 写入文件 write_info(filename, get_url) # 释放线程锁 locks.release()
except TimeoutError:
pass
写入文件函数
def write_info(filename, get_url): with open(filename, mode='a+', encoding='utf-8') as file: file.writelines(get_url)
if name == 'main': while True: try: init() # 初始化颜色模块 dorks_file = input(Fore.YELLOW + f'\n{now_time}[INFO]{ "-" * 10}>input file:' + Fore.RESET) print('') filename = input(Fore.YELLOW + f'\n{now_time}[INFO]{ "-" * 10}>output file:' + Fore.RESET) # 接受work函数返回的元组 dork_total_num, query_list = work(dorks_file) # 定义全局变量完成数量/URL数量 dork_finish_num = url_num = 0
# 定义进程池和线程池数量
process_pool = ProcessPoolExecutor(max_workers=4)
thread_pool = ThreadPoolExecutor(max_workers=20)
# 定义全局锁
threads_lock = Lock()
# 分配进程池任务
futures = []
for dokr_list in query_list:
# 使用as_completed获取结果,并设置timeout
future = thread_pool.submit(google_serach, dokr_list, threads_lock, filename)
futures.append(future)
for future in as_completed(futures, timeout=60):
pass
process_pool.shutdown(wait=True)
thread_pool.shutdown(wait=True)
if len(sys.argv) == 1:
pass
input(Fore.YELLOW + f'\n\n{now_time}[INFO]{"-" * 10}>程序运行完毕,按回车退出' + Fore.RESET)
break
# 文件为空
except FileNotFoundError:
print(Fore.RED + f'{now_time}[Error]{"-" * 10}>文件不存在' + Fore.RESET)
# 中断异常
except KeyboardInterrupt:
sys.exit(1
原文地址: http://www.cveoy.top/t/topic/hw2D 著作权归作者所有。请勿转载和采集!