Python数据预处理:去除冗余与缺失值
import tkinter as tk
from tkinter import ttk, messagebox
import pandas as pd
#示例数据,实际使用时请替换成你的数据
data = pd.DataFrame({'A': [1, 2, 2, 4], 'B': [5, 6, 6, 8], 'C': [9, 10, 10, None]})
def preprocess_data():
#去除冗余数据
def deduplicate():
global data
if data.duplicated().any():
response = messagebox.askyesno('提示', '数据集中存在冗余,是否继续处理?')
if response == 0:
return
data = data.drop_duplicates()
#设置背景图片
deduplicate_desk = tk.Tk()
deduplicate_desk.title('针对模糊人脸判断性别软件--数据预处理')
deduplicate_desk.geometry('600x400')
tree = ttk.Treeview(deduplicate_desk)
tree['columns'] = list(data.columns)
tree.heading('#0', text='Index')
width = 160
for col in data.columns:
tree.heading(col, text=col)
tree.column(col, width=width)
for i, row in data.iterrows():
tree.insert('', 'end', text=i, values=tuple(row))
tree.pack()
messagebox.showinfo('提示', '数据集中不存在冗余!')
#去除缺失数据
def data_dropna():
global data
if data.isnull().any().sum() > 0:
response = messagebox.askyesno('提示', '数据集中存在缺失值,是否继续处理?')
if response == 0:
return
data = data.dropna()
dropna_desk = tk.Tk()
dropna_desk.title('针对模糊人脸判断性别软件--数据预处理')
dropna_desk.geometry('600x400')
tree = ttk.Treeview(dropna_desk)
tree['columns'] = list(data.columns)
tree.heading('#0', text='Index')
width = 160
for col in data.columns:
tree.heading(col, text=col)
tree.column(col, width=width)
for i, row in data.iterrows():
tree.insert('', 'end', text=i, values=tuple(row))
tree.pack()
messagebox.showinfo('提示', '数据集中不存在缺失值!')
global data
#对用户输入值的合法性进行判断
if data is None:
messagebox.showerror('错误', '请先读取数据!')
return
#数据预处理功能选择界面
preprocess_desk = tk.Tk()
preprocess_desk.title('针对模糊人脸判断性别软件--数据预处理')
preprocess_desk.geometry('600x400')
#构建'针对模糊人脸判断性别软件--数据预处理'界面菜单
preprocess_menubar = tk.Menu(preprocess_desk)
preprocess_menu = tk.Menu(preprocess_menubar,tearoff=0)
preprocess_menu.add_command(label='去除重复数据',command=deduplicate)
preprocess_menu.add_command(label='缺失值处理',command=data_dropna)
preprocess_desk.config(menu=preprocess_menu)
tk.Label(preprocess_desk, text='开始对您的数据集进行初步预处理操作', font=('Arial', 20)).pack(pady=50)
tk.Label(preprocess_desk, text='请从上方菜单栏选择您所希望的操作', font=('Arial', 15)).pack(pady=50)
# 调用函数以执行预处理
preprocess_data()
原文地址: https://www.cveoy.top/t/topic/fYMe 著作权归作者所有。请勿转载和采集!