用Python编写一个可运行代码爬取七猫免费小说网数据主要任务:设计一个窗体应用系统具有以下功能:1加载需要用到的各种第三方库如requests;BeautifulSoup4;lxml;sqlite3;jieba;;WordCloud;openpyxl等。2爬取一个网站信息3将信息保存到Excel表中4生成图表或显示处理后的信息
import requests from bs4 import BeautifulSoup import sqlite3 import jieba from wordcloud import WordCloud from openpyxl import Workbook import matplotlib.pyplot as plt
爬取七猫免费小说网数据
def crawl_qimao(): url = 'https://www.qimao.com/free/all' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'} response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'lxml') books = soup.find_all('div', class_='book')
# 将信息保存到sqlite数据库中
conn = sqlite3.connect('qimao.db')
cursor = conn.cursor()
cursor.execute('CREATE TABLE IF NOT EXISTS books (id INTEGER PRIMARY KEY AUTOINCREMENT, title TEXT, author TEXT, intro TEXT, category TEXT)')
for book in books:
title = book.find('h4', class_='book-title').a.text
author = book.find('p', class_='author').a.text
intro = book.find('p', class_='intro').text
category = book.find('span', class_='category').text
cursor.execute('INSERT INTO books (title, author, intro, category) VALUES (?, ?, ?, ?)', (title, author, intro, category))
conn.commit()
conn.close()
# 将信息保存到Excel表中
wb = Workbook()
ws = wb.active
ws.append(['书名', '作者', '简介', '分类'])
conn = sqlite3.connect('qimao.db')
cursor = conn.cursor()
cursor.execute('SELECT * FROM books')
for row in cursor.fetchall():
ws.append(row[1:])
wb.save('qimao.xlsx')
conn.close()
# 生成词云图
conn = sqlite3.connect('qimao.db')
cursor = conn.cursor()
cursor.execute('SELECT intro FROM books')
text = ''
for row in cursor.fetchall():
text += row[0]
words = jieba.cut(text)
word_counts = {}
for word in words:
if len(word) >= 2:
word_counts[word] = word_counts.get(word, 0) + 1
wc = WordCloud(font_path='msyh.ttc', background_color='white', width=800, height=600, max_words=200)
wc.generate_from_frequencies(word_counts)
plt.imshow(wc)
plt.axis('off')
plt.show()
if name == 'main': crawl_qimao(
原文地址: https://www.cveoy.top/t/topic/hnGv 著作权归作者所有。请勿转载和采集!