# -- coding utf-8 --import mathimport numpy as npimport jiebaimport jiebaposseg as psgfrom gensim import corpora modelsfrom jieba import analyseimport functools# 步骤2定义停用词表加载函数。停用词表加载方法def get_stopword
根据你提供的代码,我注意到在tfidf_extract和textrank_extract函数中,关键词的提取结果是直接打印在控制台上的。如果你希望将结果保存到文件中,你可以将关键词保存到一个字符串中,然后再将该字符串写入文件中。下面是修改后的代码示例:
def tfidf_extract(word_list, pos=False, keyword_num=10):
doc_list = load_data(pos)
idf_dic, default_idf = train_idf(doc_list)
tfidf_model = TfIdf(idf_dic, default_idf, word_list, keyword_num)
keywords = tfidf_model.get_tfidf()
# 将关键词保存到字符串中
result = ""
for keyword in keywords:
result += keyword + "/"
# 将关键词写入文件
with open("d:/Users/Administrator/Desktop/data/result.txt", "w", encoding='utf-8') as f:
f.write('TF-IDF模型结果:\n')
f.write(result)
def textrank_extract(text, pos=False, keyword_num=10):
textrank = analyse.textrank
keywords = textrank(text, keyword_num)
# 将关键词保存到字符串中
result = ""
for keyword in keywords:
result += keyword + "/"
# 将关键词写入文件
with open("d:/Users/Administrator/Desktop/data/result.txt", "a", encoding='utf-8') as f:
f.write('\nTextRank模型结果:\n')
f.write(result)
# 调用函数进行关键词提取并保存到文件
with open("d:/Users/Administrator/Desktop/data/corpus.txt", "r", encoding='utf-8') as f:
text = f.read() # 读取文本
seg_list = seg_to_list(text, pos)
filter_list = word_filter(seg_list, pos)
tfidf_extract(filter_list, True, 10)
textrank_extract(text, True, 10)
这样,关键词提取结果就会保存到d:/Users/Administrator/Desktop/data/result.txt文件中了。
原文地址: https://www.cveoy.top/t/topic/i3zn 著作权归作者所有。请勿转载和采集!