下面代码没有正确爬取目标网站的数据请修改使其正确爬取数据然后修改后的运行代码显示运行结果:import requestsfrom bs4 import BeautifulSoupimport csvurl = httpssearch51jobcomlist010000020000030200040000180200000000000000999Python2pagehtmlheaders =
import requests from bs4 import BeautifulSoup import csv import time
url = 'https://search.51job.com/list/000000,000000,0000,00,9,99,Python,2,{page}.html' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.48' }
def get_data(page): response = requests.get(url=url.format(page=page), headers=headers) soup = BeautifulSoup(response.text, 'html.parser') job_list = soup.select('.dw_table .el') for job in job_list: title = job.select_one('.t a')['title'] company_name = job.select_one('.t span')['title'] money = job.select_one('.sal').get_text().strip() job_welf = job.select_one('.info .t3').get_text().strip() area = job.select_one('.info .t4').get_text().strip() exp_edu = job.select_one('.info .t5').get_text().strip().split('|') exp = exp_edu[0].strip() edu = exp_edu[1].strip() company_type = job.select_one('.info .t2').get_text().strip() date = job.select_one('.info .t6').get_text().strip() href = job.select_one('.t a')['href'] dit = { '职位名字': title, '公司名字': company_name, '薪资': money, '公司福利': job_welf, '地区': area, '经验': exp, '学历': edu, '公司类型': company_type, '发布日期': date, '详情页': href, } yield dit
with open('数据.csv', mode='a', encoding='utf-8', newline='') as f: csv_writer = csv.DictWriter(f, fieldnames=[ '职位名字', '公司名字', '薪资', '公司福利', '地区', '经验', '学历', '公司类型', '发布日期', '详情页', ]) csv_writer.writeheader() for page in range(1, 11): for item in get_data(page): csv_writer.writerow(item) print(item) time.sleep(1
原文地址: https://www.cveoy.top/t/topic/cFjV 著作权归作者所有。请勿转载和采集!