import tkinter as tk
from tkinter import ttk, messagebox
import pandas as pd

#示例数据,实际使用时请替换成你的数据
data = pd.DataFrame({'A': [1, 2, 2, 4], 'B': [5, 6, 6, 8], 'C': [9, 10, 10, None]})

def preprocess_data():
    #去除冗余数据
    def deduplicate():
        global data
        
        if data.duplicated().any():
            response = messagebox.askyesno('提示', '数据集中存在冗余,是否继续处理?')
            if response == 0:
                return
            data = data.drop_duplicates()
        
        #设置背景图片
        deduplicate_desk = tk.Tk()
        deduplicate_desk.title('针对模糊人脸判断性别软件--数据预处理')
        deduplicate_desk.geometry('600x400')
        
        tree = ttk.Treeview(deduplicate_desk)
        tree['columns'] = list(data.columns)
        tree.heading('#0', text='Index')
        width = 160
        for col in data.columns:
            tree.heading(col, text=col)
            tree.column(col, width=width)
        for i, row in data.iterrows():
            tree.insert('', 'end', text=i, values=tuple(row))
        tree.pack()
        messagebox.showinfo('提示', '数据集中不存在冗余!')

    #去除缺失数据    
    def data_dropna():
        global data
        
        if data.isnull().any().sum() > 0:
            response = messagebox.askyesno('提示', '数据集中存在缺失值,是否继续处理?')
            if response == 0:
                return
            data = data.dropna()
            
        dropna_desk = tk.Tk()
        dropna_desk.title('针对模糊人脸判断性别软件--数据预处理')
        dropna_desk.geometry('600x400')
        tree = ttk.Treeview(dropna_desk)
        tree['columns'] = list(data.columns)
        tree.heading('#0', text='Index')
        width = 160
        for col in data.columns:
            tree.heading(col, text=col)
            tree.column(col, width=width)
        for i, row in data.iterrows():
            tree.insert('', 'end', text=i, values=tuple(row))
        tree.pack()    
        messagebox.showinfo('提示', '数据集中不存在缺失值!')

    global data
    #对用户输入值的合法性进行判断
    if data is None:
        messagebox.showerror('错误', '请先读取数据!')
        return
    
    #数据预处理功能选择界面
    preprocess_desk = tk.Tk()
    preprocess_desk.title('针对模糊人脸判断性别软件--数据预处理')
    preprocess_desk.geometry('600x400')
    
    #构建'针对模糊人脸判断性别软件--数据预处理'界面菜单
    preprocess_menubar = tk.Menu(preprocess_desk)
    preprocess_menu = tk.Menu(preprocess_menubar,tearoff=0)
    preprocess_menu.add_command(label='去除重复数据',command=deduplicate)
    preprocess_menu.add_command(label='缺失值处理',command=data_dropna)

    preprocess_desk.config(menu=preprocess_menu)
    
    tk.Label(preprocess_desk, text='开始对您的数据集进行初步预处理操作', font=('Arial', 20)).pack(pady=50)
    tk.Label(preprocess_desk, text='请从上方菜单栏选择您所希望的操作', font=('Arial', 15)).pack(pady=50)

# 调用函数以执行预处理
preprocess_data()
Python数据预处理:去除冗余与缺失值

原文地址: https://www.cveoy.top/t/topic/fYMe 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录