以下是 Python 代码,使用了 requests、beautifulsoup4、openpyxl 等库:

import requests
from bs4 import BeautifulSoup
from openpyxl import Workbook

# 设置请求头
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}

# 获取页面源码
def get_html(url):
    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.text
        else:
            return None
    except Exception as e:
        print(e)
        return None

# 解析页面,获取信息
def parse_html(html):
    soup = BeautifulSoup(html, 'lxml')
    job_list = soup.find('ul', class_='item_con_list').find_all('li')

    result = []
    for job in job_list:
        job_name = job.find('h3').get_text().strip()
        company_name = job.find('div', class_='company').find('a').get_text().strip()
        location = job.find('div', class_='location').get_text().strip()
        salary = job.find('span', class_='money').get_text().strip()
        publish_time = job.find('span', class_='format-time').get_text().strip()
        job_detail_url = job.find('h3').find('a')['href']
        job_detail_html = get_html(job_detail_url)
        job_detail_soup = BeautifulSoup(job_detail_html, 'lxml')
        job_description = job_detail_soup.find('div', class_='job-detail').get_text().strip()

        result.append([job_name, company_name, location, salary, publish_time, job_description])

    return result

# 将信息保存到 Excel 表
def save_to_excel(result):
    wb = Workbook()
    ws = wb.active
    ws.title = 'job_info'

    headers = ['职位名称', '公司名称', '工作地点', '薪资', '发布时间', '技能要求']
    ws.append(headers)

    for row in result:
        ws.append(row)

    wb.save('job_info.xlsx')
    print('保存成功!')

if __name__ == '__main__':
    url = 'https://www.lagou.com/zhaopin/Python/'
    html = get_html(url)
    result = parse_html(html)
    save_to_excel(result)

运行后,程序会爬取拉勾网上 Python 岗位的信息,并保存到名为“job_info.xlsx”的 Excel 表格中。Excel 表格的第一行为列名,分别为“职位名称”、“公司名称”、“工作地点”、“薪资”、“发布时间”、“技能要求”。

Python 爬取拉勾网职位信息并保存到 Excel 表格

原文地址: https://www.cveoy.top/t/topic/mUMA 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录