写一个爬取哔哩哔哩首页内容包括标题类型封面图片并将爬取的内容存在csv文件中的python代码并运行出来显示csv文件中的内容继续运行直到爬取100条信息
import requests from bs4 import BeautifulSoup import csv
url = "https://www.bilibili.com" headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.63 Safari/537.36'}
def get_html(url): try: r = requests.get(url, headers=headers) r.raise_for_status() r.encoding = r.apparent_encoding return r.text except: return ""
def parse_html(html): soup = BeautifulSoup(html, 'html.parser') items = soup.find_all('a', class_="title") results = [] for item in items: title = item.get('title') type = item.find_previous('span', class_="type").text.strip() image = item.find_next('img').get('src') result = [title, type, image] results.append(result) return results
def write_csv(results): with open('bilibili.csv', 'a', newline='', encoding='utf-8-sig') as f: writer = csv.writer(f) writer.writerows(results)
count = 0 while count < 100: html = get_html(url) results = parse_html(html) write_csv(results) count += len(results) print(f"已爬取{count}条信息") url = "https:" + soup.find('a', class_="nav-item home active").get('href') + "?page=" + str(count//20+1)
print('爬取完成!')
原文地址: https://www.cveoy.top/t/topic/bDzt 著作权归作者所有。请勿转载和采集!