Python爬虫实战:从5iai网站抓取招聘和求职信息
import csv
import requests
headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,/;q=0.8,application/signed-exchange;v=b3;q=0.7', 'Accept-Encoding': 'gzip, deflate', # 'Accept-Language': 'zh-CN,zh;q=0.9', # 'Connection': 'close', 'Cookie': 'DEFAULT_ENTERPRISE_IMG=company.jpg; APP_HEADER_NAME=%E6%B3%B0%E8%BF%AA%E5%86%85%E6%8E%A8; APP_TITLE=%E6%B3%B0%E8%BF%AA%E5%86%85%E6%8E%A8; APP_RESOURCE_SCOPE_NAME=%E6%95%B0%E6%8D%AE%E4%B8%AD%E5%BF%83; APP_HELP_DOC_URL=http://45.116.35.168:8083/eb; REGISTER_URL=http://www.5iai.com:444/oauth/register', 'Host': 'www.5iai.com', 'Referer': 'https://www.5iai.com/', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36', 'X-Requested-With': 'AJAX' }
def t1():
w = open('result1-1.csv', 'a', encoding='utf-8', newline='')
w = csv.writer(w)
w.writerow(['序号','招聘信息ID','企业名称','招聘岗位','最大薪资','最小薪资','学历要求','经验要求','关键词','招收人数','公司名','公司人数','公司类型','公司地址'])
n = 1
# 工作
url1 = 'https://www.5iai.com/api/enterprise/job/public/es?pageSize=10&pageNumber={}&willNature=&function=&wageList=%5B%5D&workplace=&keyword='
# 最大页数
for i in range(1,160):
res = requests.get(url1.format(i), headers=headers).json()
data = res['data']['content']
for j in data:
id = j['id']
name = j['positionName']
# 最大薪资范围
max_price = j['maximumWage']
min_price = j['minimumWage']
#学历要求
edu = j['educationalRequirements']
# 经验要求
exp = j['exp']
# 关键词
key_word = '-'.join([k['labelName'] for k in j['keywordList']])
# 招收人数
count = j['count']
# 公司名
company = j['enterpriseExtInfo']['shortName']
# 公司人数
com_ps = j['enterpriseExtInfo']['personScope']
# 公司类型
com_code = j['enterpriseExtInfo']['industry']
# 公司地址
address = j['enterpriseAddress']['detailedAddress']
l = [n,id,company,name,max_price,min_price,edu,exp,key_word,count,company,com_ps,com_code,address]
w.writerow(l)
print(l)
n += 1
def t2():
w = open('result1-2.csv', 'a', encoding='utf-8', newline='')
w = csv.writer(w)
w.writerow(['序号', '求职者ID', '姓名', '预期岗位', '最大薪资','最小薪资','目前工作状态','个人经验','简历关键词'])
n = 1
# 人才
url2 = 'https://www.5iai.com/api/resume/baseInfo/public/es?pageSize=10&pageNumber={}&function=&skills=&workplace=&keyword='
for i in range(1,1095):
res = requests.get(url2.format(i),headers=headers).json()
data = res['data']['content']
for j in data:
id = j['id']
name = j['username']
# 预期岗位
func = j['expectPosition']
#薪资
max_sale = j['willSalaryEnd']
min_sale = j['willSalaryStart']
#目前工作状态
status = j['jobStatus']
# 个人经验
p_exp = j['exp']
# 地址
add = j['city']
# 简历关键词
keys = '-'.join([k['labelName'] for k in j['keywordList']])
l = [n,id, name, func, max_sale, min_sale,status,p_exp, add, keys]
w = open('result1-2.csv', 'a', encoding='utf-8', newline='')
w = csv.writer(w)
w.writerow(l)
print(l)
n += 1
t1() t2()
原文地址: https://www.cveoy.top/t/topic/nIqw 著作权归作者所有。请勿转载和采集!