import requests from bs4 import BeautifulSoup import openpyxl import jieba from wordcloud import WordCloud import matplotlib.pyplot as plt

创建excel文件

wb = openpyxl.Workbook() wb.remove(wb.active)

爬取热榜、娱乐榜、社会榜、挑战榜的数据并存储到excel文件中

urls = { '热榜': 'https://www.douyin.com/aweme/v1/hotsearch/billboard/?type=hot', '娱乐榜': 'https://www.douyin.com/aweme/v1/hotsearch/billboard/?type=entertainment', '社会榜': 'https://www.douyin.com/aweme/v1/hotsearch/billboard/?type=society', '挑战榜': 'https://www.douyin.com/aweme/v1/hotsearch/billboard/?type=challenge', }

for name, url in urls.items(): # 获取数据 response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') items = soup.select('.list-item') # 写入excel文件 sheet = wb.create_sheet(name) sheet.append(['标题', '热度', '链接']) for item in items: title = item.select_one('.title').text hot = item.select_one('.hot-value').text link = item.select_one('.link').get('href') sheet.append([title, hot, link])

保存excel文件

filename = '学号+姓名.xlsx' wb.save(filename)

生成词云图

wb = openpyxl.load_workbook(filename) words = [] for sheet in wb: if sheet.title != 'Sheet1': # 排除默认的工作表 for row in sheet.iter_rows(min_row=2): words.append(row[0].value) # 只取标题

text = ' '.join(jieba.cut(' '.join(words))) wordcloud = WordCloud(width=800, height=600, font_path='msyh.ttc') wordcloud.generate(text) plt.imshow(wordcloud) plt.axis('off') plt.show()

模拟搜索

keyword = input('请输入要搜索的视频名称：') categories = [] for sheet in wb: if sheet.title != 'Sheet1': # 排除默认的工作表 for row in sheet.iter_rows(min_row=2): if keyword in row[0].value: categories.append(sheet.title) break

if categories: print('该视频对应的分类有：') for i, category in enumerate(categories): print(f'{i + 1}. {category}') choice = int(input('请选择分类：')) sheet = wb[categories[choice - 1]] print('该分类前10条视频的标题和链接为：') for row in sheet.iter_rows(min_row=2, max_row=11): title = row[0].value link = row[2].value print(f'标题：{title} 链接：{link} ') else: print('没有找到相关视频')

抖音热榜数据爬取与分析：词云生成和视频搜索

创建excel文件

爬取热榜、娱乐榜、社会榜、挑战榜的数据并存储到excel文件中

保存excel文件

生成词云图

模拟搜索