Python 爬取航班信息并解析 - 使用 Requests 库获取数据
import requests
def get_content1(id):
url = f'https://flights.sda.cn/tRtApi/flight/resultSets/{id}'
payload={}
headers = {
'Accept': 'application/json',
'Accept-Language': 'zh-CN',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Market-Country-Code': 'CN',
'Pragma': 'no-cache',
'Referer': 'https://flights.sda.cn/flight/search/TNA-CSX-230402',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
'sec-ch-ua': ''Google Chrome';v='111', 'Not(A:Brand';v='8', 'Chromium';v='111'',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': ''Windows''
}
response = requests.request('GET', url, headers=headers, data=payload).json()
# print(response)
return response
def get_content3(response):
# 遍历开始
for li in response['flightSegments']:
# 出发
# 出发城市
start_city = li['departure']['city']
# 出发城市三字码
start_sityString = li['departure']['iataCode']
# 出发机场名称
start_airportName = li['departure']['name'] + li['departure']['terminal']
# 出发日期+时间
start_datetime = li['departure']['dateTime']
# 到达
# 到达城市
reach_city = li['arrival']['city']
# 到达城市三字码
reach_cityString = li['arrival']['iataCode']
# 到达机场名称
reach_airportName = li['arrival']['name'] + li['arrival']['terminal']
# 到达日期+时间
reach_datetime = li['arrival']['dateTime']
# 飞机用时
plan_userTime = str(li['duration']).replace(':', '时') + '分'
# 飞机名字
plan_name = '波音' + str(li['equipmentType'])
# 飞机航班
plan_flightNumber = li['operatingAirlineInfo']['airlineCode'] + li['marketingAirlineInfo']['flightNumber']
# plan_seatType=li['mealCode']
# ['flightOptions'][0]['prices'][0]['details']['cash']['base']['amount']
# print(start_city,start_sityString,start_airportName,start_datetime,
# reach_city,reach_cityString,reach_airportName,reach_datetime,
# plan_userTime,plan_flightNumber,plan_name)
yield start_city, start_sityString, start_airportName, start_datetime, reach_city, reach_cityString, reach_airportName, reach_datetime, plan_userTime, plan_flightNumber, plan_name
def get_content2(response):
for div in response['flightOptions']:
# 飞机价格 要改
price = div['prices'][0]['details']['cash']['base']['amount']
if price == '':
plan_price = div['prices'][0]['details']['cash']['base']['amount']
else:
plan_price = div['prices'][1]['fareBreakdowns'][0]['base']['amount']
plan_seatType = div['prices'][0]['fareInfos'][0]['rbd']
# print(plan_seatType,plan_price)
yield plan_seatType, plan_price
def download(id):
response = get_content1(id)
for start_city, start_sityString, start_airportName, start_datetime, reach_city, reach_cityString, reach_airportName, reach_datetime, plan_userTime, plan_flightNumber, plan_name, plan_seatType, plan_price in zip(get_content3(response), get_content2(response)):
wb2 = {
'q': start_city,
'w': start_sityString,
'e': start_airportName,
'r': start_datetime,
't': reach_city,
'y': reach_cityString,
'u': reach_airportName,
'i': reach_datetime,
'o': plan_userTime,
'p': plan_flightNumber,
's': plan_name,
'a': plan_price,
'b': plan_seatType
}
print(wb2)
if __name__ == '__main__':
# get_content1()
# get_content3()
# get_content2()
download(12345) # 替换 12345 为实际的 ID
使用方法:
- 将代码保存为 .py 文件。
- 替换代码中的
12345为实际的航班搜索结果 ID。 - 运行代码,将会输出航班信息字典。
注意:
- 本代码需要
requests库。如果未安装,请使用pip install requests命令安装。 - 代码中的 URL 和 headers 信息来自
https://flights.sda.cn网站,请根据实际情况修改。 - 代码仅供参考,实际使用时可能需要根据网站的更新情况进行调整。
- 请勿将此代码用于非法目的。
原文地址: https://www.cveoy.top/t/topic/ml8s 著作权归作者所有。请勿转载和采集!