import time from selenium import webdriver from selenium.webdriver.common.by import By from bs4 import BeautifulSoup

设置 WebDriver

driver = webdriver.Chrome()

访问 Bilibili 文章搜索页面

driver.get('https://search.bilibili.com/article?keyword=%E9%A5%AD%E6%8B%8D%E5%9B%BE&page=2') time.sleep(1)

查找所有文章链接

article_urls = driver.find_elements(By.CSS_SELECTOR, 'a') article_urls = [article_url.get_attribute('href') for article_url in article_urls if '_blank' in article_url.get_attribute('target')] article_urls = [article_url for article_url in article_urls if 'https://www.bilibili.com/read/' in article_url]

打印找到的文章数量

print(f'本次共扫描到 {len(article_urls)} 篇文章!')

设置第二个 WebDriver

driver2 = webdriver.Chrome()

遍历每个文章链接

for article_url in article_urls: # 访问文章页面 driver2.get(article_url) time.sleep(1)

# 解析页面内容
soup = BeautifulSoup(driver2.page_source, 'html.parser')

# 查找并打印图片链接
image_elements = soup.select('.card-image__image')
image_urls = [img['style'].split('url('')[1].split('')')[0] for img in image_elements]
for image_url in image_urls:
    print(image_url)

关闭 WebDriver

driver.quit() driver2.quit()

Bilibili 文章图片爬取 - 使用 Python 和 Selenium 获取所有文章的图片

原文地址: https://www.cveoy.top/t/topic/hNeo 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录