Bilibili 文章图片爬取 - 使用 Python 和 Selenium 获取所有文章的图片
import time from selenium import webdriver from selenium.webdriver.common.by import By from bs4 import BeautifulSoup
设置 WebDriver
driver = webdriver.Chrome()
访问 Bilibili 文章搜索页面
driver.get('https://search.bilibili.com/article?keyword=%E9%A5%AD%E6%8B%8D%E5%9B%BE&page=2') time.sleep(1)
查找所有文章链接
article_urls = driver.find_elements(By.CSS_SELECTOR, 'a') article_urls = [article_url.get_attribute('href') for article_url in article_urls if '_blank' in article_url.get_attribute('target')] article_urls = [article_url for article_url in article_urls if 'https://www.bilibili.com/read/' in article_url]
打印找到的文章数量
print(f'本次共扫描到 {len(article_urls)} 篇文章!')
设置第二个 WebDriver
driver2 = webdriver.Chrome()
遍历每个文章链接
for article_url in article_urls: # 访问文章页面 driver2.get(article_url) time.sleep(1)
# 解析页面内容
soup = BeautifulSoup(driver2.page_source, 'html.parser')
# 查找并打印图片链接
image_elements = soup.select('.card-image__image')
image_urls = [img['style'].split('url('')[1].split('')')[0] for img in image_elements]
for image_url in image_urls:
print(image_url)
关闭 WebDriver
driver.quit() driver2.quit()
原文地址: https://www.cveoy.top/t/topic/hNeo 著作权归作者所有。请勿转载和采集!