Python实战：利用正则表达式处理汽车之家评论数据

日期: 2026-01-27 07:24:35

标签: 常规

import pandas as pd
import re

# 读取Excel文件
df = pd.read_excel('汽车之家_秦plus_评论.xls')

# 切分长文本为句子
def split_sentences(text):
    # 使用正则表达式切分句子
    sentences = re.split(r'[。！？]', text)
    # 去除空句子
    sentences = [s.strip() for s in sentences if s.strip()]
    return sentences

# 构建字典
output = []
for i, row in df.iterrows():
    dict_entry = {}
    dict_entry['最满意'] = split_sentences(row['最满意'])
    dict_entry['最不满意'] = split_sentences(row['最不满意'])
    # 添加更多列标对应的切分结果
    dict_entry['智能化'] = split_sentences(row['智能化'])
    # 添加到输出列表
    output.append(dict_entry)

# 打印结果
print(output)

原文地址: https://www.cveoy.top/t/topic/Sez 著作权归作者所有。请勿转载和采集!