To modify the crawler code to scrape all the images from the provided webpage, you can use the following code:

if RUNNING_MODE == 1:
    driver = webdriver.Chrome()
    print(f'开始抓取页面所有文章,请稍候..')
    driver.get('https://search.bilibili.com/article?keyword=%E9%A5%AD%E6%8B%8D%E5%9B%BE&page=2')
    time.sleep(1)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    article_urls = [article_url['href'] for article_url in soup.select('a') if '_blank' in article_url.get('target')]
    article_urls = [article_url for article_url in article_urls if 'https://www.bilibili.com/read/' in article_url]
    print(f'本次共扫描到 {len(article_urls)} 篇文章!')

    total_url_count = len(article_urls)
    current_url_count = 0
    for article_url in article_urls:
        current_url_count += 1
        print(f'正在加载第 {current_url_count}/{total_url_count} 篇文章内容...')

        # 进入文章页面
        driver.get(article_url)
        time.sleep(1)
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        image_elements = [img['data-src'] for img in soup.select('img[data-src]')]

        # Download images
        for i, image_url in enumerate(image_elements):
            response = requests.get(image_url)
            with open(f'image_{current_url_count}_{i+1}.jpg', 'wb') as f:
                f.write(response.content)
                print(f'下载图片 {i+1}/{len(image_elements)} 完成')

    driver.quit()

Note: This code assumes that you have installed the necessary libraries beautifulsoup4, selenium, and webdriver_manager for running the code.

Python 爬虫:抓取Bilibili 文章图片

原文地址: http://www.cveoy.top/t/topic/hNx0 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录