爬取部分弹幕数据的程序:

import requests
from bs4 import BeautifulSoup

def get_danmu(video_id):
    url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={video_id}"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'lxml')
    danmu_list = []
    for danmu in soup.find_all("d"):
        danmu_list.append(danmu.text)
    return danmu_list

video_id = "视频的ID"  # 替换成你要爬取的视频的ID
danmu_list = get_danmu(video_id)
for danmu in danmu_list:
    print(danmu)

爬取所有历史弹幕数据的程序:

import requests
import time
import json

def get_danmu(video_id):
    url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={video_id}"
    response = requests.get(url)
    response.encoding = 'utf-8'
    danmus = response.text
    return danmus

def get_all_danmu(video_id):
    url = f"https://api.bilibili.com/x/player/pagelist?bvid={video_id}&jsonp=jsonp"
    response = requests.get(url)
    data = response.json()
    cid = data['data'][0]['cid']
    url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={cid}"
    response = requests.get(url)
    response.encoding = 'utf-8'
    danmus = response.text

    page_num = data['data'][0]['pages']
    for i in range(2, page_num+1):
        time.sleep(0.5)  # 为了避免请求过于频繁,设置延时
        url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={cid}&p={i}"
        response = requests.get(url)
        response.encoding = 'utf-8'
        danmus += response.text

    return danmus

video_id = "视频的ID"  # 替换成你要爬取的视频的ID
danmu_list = get_all_danmu(video_id)
with open('danmu.txt', 'w', encoding='utf-8') as f:
    f.write(danmu_list)

这段程序将所有历史弹幕数据保存在名为danmu.txt的文件中

请写出python程序来爬取b站视频弹幕所有数据。这段程序只能爬取部分弹幕数据还有很多历史数据没有爬取下来请再写出程序爬取所有历史弹幕数据。

原文地址: https://www.cveoy.top/t/topic/ixV1 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录