import tkinter as tk
from tkinter import filedialog, messagebox
import pandas as pd
import numpy as np

class MainWindow:
    def __init__(self):
        self.master = tk.Tk()
        self.master.title('数据预处理')
        self.master.geometry('900x700')

        # 设置标签
        label_title = tk.Label(self.master, text='数据预处理', font=('Arial', 16), pady=30)
        label_title.grid(row=0, column=0, columnspan=4, sticky='nsew')

        # 设置按钮以及事件处理函数
        button_open = tk.Button(self.master, text='打开文件', font=('Arial', 12), command=self.open_file)
        button_open.grid(row=1, column=0, padx=50, pady=30)

        button_select = tk.Button(self.master, text='选择列', font=('Arial', 12), state='disabled', command=self.select_columns)
        button_select.grid(row=1, column=2, padx=50, pady=30)

        button_fillna = tk.Button(self.master, text='填充空值', font=('Arial', 12), state='disabled', command=self.fill_na)
        button_fillna.grid(row=1, column=4, padx=50, pady=30)

        button_drop_duplicates = tk.Button(self.master, text='去重', font=('Arial', 12), state='disabled', command=self.drop_duplicates)
        button_drop_duplicates.grid(row=3, column=0, padx=50, pady=30)

        button_outliers = tk.Button(self.master, text='处理异常值', font=('Arial', 12), state='disabled', command=self.handle_outliers)
        button_outliers.grid(row=3, column=2, padx=50, pady=30)

        button_save = tk.Button(self.master, text='保存', font=('Arial', 12), state='disabled', command=self.save_file)
        button_save.grid(row=3, column=4, padx=50, pady=30)

        # 设置treeview
        self.columns = None
        self.df = None
        self.tree = tk.StringVar(value='')
        treeview = tk.Listbox(self.master, listvariable=self.tree, height=10)
        treeview.grid(row=4, column=0, columnspan=5, padx=10, pady=10)

        self.treeview = treeview

    # 设置事件处理函数
    def open_file(self):
        file_path = filedialog.askopenfilename(defaultextension='.csv', filetypes=(('CSV files', '*.csv'), ('All Files', '*.*')))
        if file_path:
            try:
                self.df = pd.read_csv(file_path)
                columns_str = ', '.join(self.df.columns)
                self.tree.set(columns_str.split(', '))
                self.columns = list(self.df.columns)
                messagebox.showinfo('提示', '数据已导入成功!', parent=self.master)
                self.enable_buttons()
            except Exception as e:
                messagebox.showerror('错误', '打开文件失败!{}'.format(e), parent=self.master)

    def select_columns(self):
        selected_columns = self.treeview.curselection()
        if len(selected_columns) > 0:
            selected_columns = [self.columns[i] for i in selected_columns]
            self.df = self.df[selected_columns]
            columns_str = ', '.join(self.df.columns)
            self.tree.set(columns_str.split(', '))
            self.columns = list(self.df.columns)
            messagebox.showinfo('提示', '列已选择成功!', parent=self.master)

    def fill_na(self):
        self.df.fillna(method='ffill', inplace=True)
        self.treeview.delete(0, tk.END)
        columns_str = ', '.join(self.df.columns)
        self.tree.set(columns_str.split(', '))
        messagebox.showinfo('提示', '空值已填充成功!', parent=self.master)

    def drop_duplicates(self):
        self.df.drop_duplicates(inplace=True)
        self.treeview.delete(0, tk.END)
        columns_str = ', '.join(self.df.columns)
        self.tree.set(columns_str.split(', '))
        messagebox.showinfo('提示', '重复行已去除成功!', parent=self.master)

    def handle_outliers(self):
        mileage_mean = self.df['Mileage'].mean()
        mileage_std = self.df['Mileage'].std()
        mileage_threshold = mileage_mean + 3 * mileage_std
        self.df.loc[self.df['Mileage'] > mileage_threshold, 'Mileage'] = mileage_threshold

        engine_mean = self.df['Engine'].mean()
        engine_std = self.df['Engine'].std()
        engine_threshold = engine_mean + 3 * engine_std
        self.df.loc[self.df['Engine'] > engine_threshold, 'Engine'] = engine_threshold

        power_mean = self.df['Power'].mean()
        power_std = self.df['Power'].std()
        power_threshold = power_mean + 3 * power_std
        self.df.loc[self.df['Power'] > power_threshold, 'Power'] = power_threshold

        self.treeview.delete(0, tk.END)
        columns_str = ', '.join(self.df.columns)
        self.tree.set(columns_str.split(', '))
        messagebox.showinfo('提示', '异常值已处理成功!', parent=self.master)

    def save_file(self):
        file_path = filedialog.asksaveasfilename(defaultextension='.csv', filetypes=(('CSV files', '*.csv'), ('All Files', '*.*')))
        if file_path:
            try:
                self.df.to_csv(file_path, index=False)
                messagebox.showinfo('提示', '数据已保存成功!', parent=self.master)
            except Exception as e:
                messagebox.showerror('错误', '保存文件失败!{}'.format(e), parent=self.master)

    # 帮助函数
    def enable_buttons(self):
        self.master.children['!button2'].config(state='normal')
        self.master.children['!button3'].config(state='normal')
        self.master.children['!button4'].config(state='normal')
        self.master.children['!button5'].config(state='normal')
        self.master.children['!button6'].config(state='normal')

if __name__ == 'main':
    app = MainWindow()
    app.master.mainloop()
数据预处理工具 - Python Tkinter GUI

原文地址: https://www.cveoy.top/t/topic/ou14 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录