Python 爬虫实战:从百度好看视频网站下载视频
import requests import os import json from moviepy.editor import VideoFileClip, AudioFileClip from bs4 import BeautifulSoup
数据的抓取
base_url = 'https://haokan.baidu.com/haokan/ui-search/pc/search/video?pn=2&rn=10&type=video&query=%E4%BD%A0%E7%9A%84%E5%90%8D%E5%AD%97&sign=2753e5d9aa663a17bda4aefe7d70c195&version=1×tamp=1689934533378' headers = { 'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36 Edg/99.0.1150.55', 'referer': 'https://haokan.baidu.com/v?vid=4662423747805012530', 'cookie':'BAIDUID_BFESS=41E64B4A4FE7E58286910E4CD6451D99:FG=1; __bid_n=188ae9b0787832d3fa4207; FPTOKEN=yN+gTGvaRHGRBrB2gcFYNfqb8znvTKQWVkm3NVzVNa8JauDDh4YgYldv5FXR1QbQf2Z6sD6YZfDi9ramXHt2XWIu7JcEe6JRMKShF7z0hOK9/EjpQOMYK/KrIIDzn1a8WG2KJuANDoF9uXhFiRh7s0lCgFsL8sEgc/GYlDOe4/6LIXnmDAFHIrPFPf7rDkWJxnccNFENixJJa0urzlcTiRzZnCQz4WpNaJhB5l1x3nSjAg+A6k3h3sk8YRJq+TObVt80JTwOjrchFAr5lO+pfkAV9nUJ/BdbhBcAK1bo7tWBkEm8Ox/4SpLFnwUjJgZ+2PX8JVpP872WibfP8/IYLfsmgPkVelgRfGtXOzcYQFecZRvho1WYjw/NPsqs2j1AZ4L3MsvfhvT0mAMmx0Ejpg==|6GtBgaCkbbg14uotso6iGjBE0BOmRdmpA3RM2xU+iPM=|10|151c74c42cd5e5842f643f6f329cc41d; jsdk-uuid=372c0a47-11fc-4013-b583-113dd66bed45; Hm_lvt_4aadd610dfd2f5972f1efee2653a2bc5=1689932808; BDUSS=Q0eVU1em9RY3dzc3lZd0tFTnRaSVlSZ01zcVRta2l4aWtvcnJ0fkJiQXI0ZUZrSVFBQUFBJCQAAAAAAQAAAAEAAACGrpNCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACtUumQrVLpkOU; BDUSS_BFESS=Q0eVU1em9RY3dzc3lZd0tFTnRaSVlSZ01zcVRta2l4aWtvcnJ0fkJiQXI0ZUZrSVFBQUFBJCQAAAAAAQAAAAEAAACGrpNCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACtUumQrVLpkOU; hkpcSearch=%u4F60%u7684%u540D%u5B57; PC_TAB_LOG=video_details_page; COMMON_LID=7bd2af4edb27287b3ac8b2cfcd2e7c7a; Hm_lpvt_4aadd610dfd2f5972f1efee2653a2bc5=1689933044; ariaDefaultTheme=undefined; ab_sr=1.0.1_NjFhZjE2ZmNiMjUwMzI1ZGIzN2VlZWVkZmQ3NDExM2M5MzMxOGMzYjJlNWNkNTVmMDg5ZmI2NGYxMDU1Yjk4YjEwNGM3OTlkNTY1MjU1Mjk3YTMyNWQ1ZjEwN2ZhOGZjNTNjYzU4OThkYjBiZTg3NzczMmMxZGE0ZjcxYWNlMmE3NDgxNzIwM2I5MDU5ODFiYzNhZmQ0ODMzNGQ1MjVhYQ==; reptileData=%7B%22data%22%3A%22889395e71af63ebf453cab9a1aaa0d0095f41f8d61f3714d6ad1a0cb5581229bad74f17646eb7578fba710523aeff1c1917424ebd7ca2fc18dc7c6d8aef1319a838942f1ae317b2fd22cf8e1aa9542ea54d1daeacce7307a89cfda3ef3e2bfbb%22%2C%22key_id%22%3A%2230%22%2C%22sign%22%3A%22891a9956%22%7D; RT='z=1&dm=baidu.com&si=46e6622b-7276-41cc-a209-ce37f0bd2058&ss=lkcee0u7&sl=4&tt=8be&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&ld=4802&ul=4gn5&hd=4gt2'', }
response = requests.get(base_url, headers=headers) # 第一次请求 print(response.status_code) data = response.text print(data)
数据解析, jsonload转换为python格式
json_data = json.loads(data) print(json_data) json_list = json_data['data']['response']['videos'] print(json_list)
for data in json_list: video_title = data['title'] + '.mp4' video_url = data['play_url'] print(video_title, video_url)
print('正在下载:', video_title)
# 第二次请求
video_data = requests.get(video_url, headers=headers).content
with open(r'./视频/' + video_title, 'wb') as f:
f.write(video_data)
print('下载完成
')
原文地址: https://www.cveoy.top/t/topic/fRle 著作权归作者所有。请勿转载和采集!