数据预处理工具 - Python Tkinter GUI
import tkinter as tk
from tkinter import filedialog, messagebox
import pandas as pd
import numpy as np
class MainWindow:
def __init__(self):
self.master = tk.Tk()
self.master.title('数据预处理')
self.master.geometry('900x700')
# 设置标签
label_title = tk.Label(self.master, text='数据预处理', font=('Arial', 16), pady=30)
label_title.grid(row=0, column=0, columnspan=4, sticky='nsew')
# 设置按钮以及事件处理函数
button_open = tk.Button(self.master, text='打开文件', font=('Arial', 12), command=self.open_file)
button_open.grid(row=1, column=0, padx=50, pady=30)
button_select = tk.Button(self.master, text='选择列', font=('Arial', 12), state='disabled', command=self.select_columns)
button_select.grid(row=1, column=2, padx=50, pady=30)
button_fillna = tk.Button(self.master, text='填充空值', font=('Arial', 12), state='disabled', command=self.fill_na)
button_fillna.grid(row=1, column=4, padx=50, pady=30)
button_drop_duplicates = tk.Button(self.master, text='去重', font=('Arial', 12), state='disabled', command=self.drop_duplicates)
button_drop_duplicates.grid(row=3, column=0, padx=50, pady=30)
button_outliers = tk.Button(self.master, text='处理异常值', font=('Arial', 12), state='disabled', command=self.handle_outliers)
button_outliers.grid(row=3, column=2, padx=50, pady=30)
button_save = tk.Button(self.master, text='保存', font=('Arial', 12), state='disabled', command=self.save_file)
button_save.grid(row=3, column=4, padx=50, pady=30)
# 设置treeview
self.columns = None
self.df = None
self.tree = tk.StringVar(value='')
treeview = tk.Listbox(self.master, listvariable=self.tree, height=10)
treeview.grid(row=4, column=0, columnspan=5, padx=10, pady=10)
self.treeview = treeview
# 设置事件处理函数
def open_file(self):
file_path = filedialog.askopenfilename(defaultextension='.csv', filetypes=(('CSV files', '*.csv'), ('All Files', '*.*')))
if file_path:
try:
self.df = pd.read_csv(file_path)
columns_str = ', '.join(self.df.columns)
self.tree.set(columns_str.split(', '))
self.columns = list(self.df.columns)
messagebox.showinfo('提示', '数据已导入成功!', parent=self.master)
self.enable_buttons()
except Exception as e:
messagebox.showerror('错误', '打开文件失败!{}'.format(e), parent=self.master)
def select_columns(self):
selected_columns = self.treeview.curselection()
if len(selected_columns) > 0:
selected_columns = [self.columns[i] for i in selected_columns]
self.df = self.df[selected_columns]
columns_str = ', '.join(self.df.columns)
self.tree.set(columns_str.split(', '))
self.columns = list(self.df.columns)
messagebox.showinfo('提示', '列已选择成功!', parent=self.master)
def fill_na(self):
self.df.fillna(method='ffill', inplace=True)
self.treeview.delete(0, tk.END)
columns_str = ', '.join(self.df.columns)
self.tree.set(columns_str.split(', '))
messagebox.showinfo('提示', '空值已填充成功!', parent=self.master)
def drop_duplicates(self):
self.df.drop_duplicates(inplace=True)
self.treeview.delete(0, tk.END)
columns_str = ', '.join(self.df.columns)
self.tree.set(columns_str.split(', '))
messagebox.showinfo('提示', '重复行已去除成功!', parent=self.master)
def handle_outliers(self):
mileage_mean = self.df['Mileage'].mean()
mileage_std = self.df['Mileage'].std()
mileage_threshold = mileage_mean + 3 * mileage_std
self.df.loc[self.df['Mileage'] > mileage_threshold, 'Mileage'] = mileage_threshold
engine_mean = self.df['Engine'].mean()
engine_std = self.df['Engine'].std()
engine_threshold = engine_mean + 3 * engine_std
self.df.loc[self.df['Engine'] > engine_threshold, 'Engine'] = engine_threshold
power_mean = self.df['Power'].mean()
power_std = self.df['Power'].std()
power_threshold = power_mean + 3 * power_std
self.df.loc[self.df['Power'] > power_threshold, 'Power'] = power_threshold
self.treeview.delete(0, tk.END)
columns_str = ', '.join(self.df.columns)
self.tree.set(columns_str.split(', '))
messagebox.showinfo('提示', '异常值已处理成功!', parent=self.master)
def save_file(self):
file_path = filedialog.asksaveasfilename(defaultextension='.csv', filetypes=(('CSV files', '*.csv'), ('All Files', '*.*')))
if file_path:
try:
self.df.to_csv(file_path, index=False)
messagebox.showinfo('提示', '数据已保存成功!', parent=self.master)
except Exception as e:
messagebox.showerror('错误', '保存文件失败!{}'.format(e), parent=self.master)
# 帮助函数
def enable_buttons(self):
self.master.children['!button2'].config(state='normal')
self.master.children['!button3'].config(state='normal')
self.master.children['!button4'].config(state='normal')
self.master.children['!button5'].config(state='normal')
self.master.children['!button6'].config(state='normal')
if __name__ == 'main':
app = MainWindow()
app.master.mainloop()
原文地址: https://www.cveoy.top/t/topic/ou14 著作权归作者所有。请勿转载和采集!