import pandas as pd
import re

# 读取Excel文件
df = pd.read_excel('C:\Users\86186\Desktop\汽车之家_秦plus_评论.xls')

# 初始化字典列表
sentences = []

# 遍历每一行数据
for index in range(len(df)):
    row = df.iloc[index]
    if pd.notnull(row['用户昵称']) and pd.notnull(row['最满意']):
        sentence_dict = {}

        # 提取最满意的内容并按逗号分句
        max_satisfaction = str(row['最满意'])
        max_satisfaction = re.split(r'[,,]', max_satisfaction)
        sentence_dict['最满意'] = [sentence.strip() for sentence in max_satisfaction]

        # 提取最不满意的内容并按逗号分句
        if pd.notnull(row['最不满意']):
            min_satisfaction = str(row['最不满意'])
            min_satisfaction = re.split(r'[,,]', min_satisfaction)
            sentence_dict['最不满意'] = [sentence.strip() for sentence in min_satisfaction]
        else:
            sentence_dict['最不满意'] = []

        # 提取智能化的内容
        if index+1 < len(df) and pd.notnull(df.iloc[index+1]['智能化']):
            intelligence = str(df.iloc[index+1]['智能化'])
            intelligence = re.split(r'[,,]', intelligence)
            sentence_dict['智能化'] = [sentence.strip() for sentence in intelligence]
        else:
            sentence_dict['智能化'] = []

        # 添加到字典列表中
        sentences.append(sentence_dict)

# 打印结果
for sentence_dict in sentences:
    print(sentence_dict)

这段代码首先使用 Pandas 库读取汽车之家秦 PLUS 评论数据,然后遍历每一行数据,提取用户昵称、最满意、最不满意和智能化评价等信息。

在提取智能化评价时,代码会检查下一行是否存在 '智能化' 数据,以避免因为数据格式问题导致的错误。最后将所有信息存储到字典列表中,方便后续分析使用。

Python Pandas 处理汽车之家评论数据:提取最满意、最不满意和智能化评价

原文地址: https://www.cveoy.top/t/topic/S1p 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录