class Ok_Window def __init__selfmaster selfcolumns = None selfdf = None # 创建主窗口 selfmaster = tkTk selfmastertitle数据预处理 selfmastergeometry500x400 # 设
- 导入PIL库:在代码文件开头处添加以下代码:
from PIL import Image, ImageTk
- 加载背景图片:在Ok_Window类的__init__方法中添加以下代码:
加载背景图片
img = Image.open("abc.jpg")
img = img.resize((500, 400), Image.ANTIALIAS)
self.photo = ImageTk.PhotoImage(img)
bg_label = tk.Label(self.master, image=self.photo)
bg_label.place(x=0, y=0, relwidth=1, relheight=1)
- 完整代码:
import tkinter as tk from tkinter import filedialog, messagebox, ttk import pandas as pd from PIL import Image, ImageTk
class Ok_Window: def init(self,master): self.columns = None self.df = None
# 创建主窗口
self.master = tk.Tk()
self.master.title("数据预处理")
self.master.geometry("500x400")
# 加载背景图片
img = Image.open("abc.jpg")
img = img.resize((500, 400), Image.ANTIALIAS)
self.photo = ImageTk.PhotoImage(img)
bg_label = tk.Label(self.master, image=self.photo)
bg_label.place(x=0, y=0, relwidth=1, relheight=1)
# 设置标题
label_title = tk.Label(self.master, text="数据预处理", font=("Arial", 16), pady=30)
label_title.grid(row=0, column=1, columnspan=3)
# 设置打开文件按钮
button_open = tk.Button(self.master, text="打开文件", command=self.open_file)
button_open.grid(row=1, column=0)
# 设置处理空值按钮
button_fillna = tk.Button(self.master, text="处理空值", state="disabled", command=self.fill_na)
button_fillna.grid(row=1, column=1)
# 设置去重按钮
button_drop_duplicates = tk.Button(self.master, text="去重", state="disabled", command=self.drop_duplicates)
button_drop_duplicates.grid(row=1, column=2)
# 设置处理异常值按钮
button_handle_outliers = tk.Button(self.master, text="处理异常值", state="disabled", command=self.handle_outliers)
button_handle_outliers.grid(row=1, column=3)
# 设置输出缺失值数目的按钮
button_missing_value = tk.Button(self.master, text="缺失值数量", state="disabled", command=self.missing_value_count)
button_missing_value.grid(row=1, column=4)
# 设置保存文件按钮
button_save = tk.Button(self.master, text="保存文件", state="disabled", command=self.save_file)
button_save.grid(row=5, column=0)
# 设置退出按钮
button_exit = tk.Button(self.master, text="退出", command=self.master.destroy)
button_exit.grid(row=5, column=4)
# 设置treeview组件
self.tree = ttk.Treeview(self.master, columns=("index", "data"))
self.tree.heading("#0", text="序号")
self.tree.heading("index", text="索引")
self.tree.heading("data", text="数据")
self.tree.column("#0", width=50, anchor="center")
self.tree.column("index", width=100, anchor="center")
self.tree.column("data", width=300, anchor="w")
self.tree.grid(row=2, column=0, columnspan=5, padx=1, pady=1)
# 打开文件
def open_file(self):
file_path = filedialog.askopenfilename(defaultextension=".csv",
filetypes=(("CSV files", "*.csv"), ("All Files", "*.*")))
if file_path:
try:
self.df = pd.read_csv(file_path)
self.columns = list(self.df.columns)
messagebox.showinfo("提示", "数据已导入成功!", parent=self.master)
self.enable_buttons()
# 显示前10行数据
for i in range(min(len(self.df), 10)):
index = str(i + 1)
self.tree.insert("", "end", text=index, values=(self.df.index[i], ", ".join(str(x) for x in self.df.iloc[i].values)))
except Exception as e:
messagebox.showerror("错误", "打开文件失败!{}".format(e), parent=self.master)
# 处理空值
def fill_na(self):
self.df.fillna(method="ffill", inplace=True)
messagebox.showinfo("提示", "空值已填充成功!", parent=self.master)
# 去重
def drop_duplicates(self):
self.df.drop_duplicates(inplace=True)
messagebox.showinfo("提示", "重复行已去除成功!", parent=self.master)
处理异常值
def handle_outliers(self):
# 处理 Mileage
if "Mileage" in self.df.columns:
mileage_unit = "km/l"
if mileage_unit == "mile/l":
self.df["Mileage"] = self.df["Mileage"].apply(lambda x: 1 / x)
elif mileage_unit == "km/kg":
self.df["Mileage"] = self.df["Mileage"].apply(lambda x: x * 0.425)
# 处理 Engine
if "Engine" in self.df.columns:
engine_unit = "CC"
if engine_unit == "L":
self.df["Engine"] = self.df["Engine"] / 1000
# 处理 Power
if "Power" in self.df.columns:
power_unit = "bhp"
if power_unit == "kW":
self.df["Power"] = self.df["Power"] * 0.7457
# 处理 Kilometers_Driven
if "Kilometers_Driven" in self.df.columns:
q1 = self.df["Kilometers_Driven"].quantile(0.25) # 计算四分位数 Q1
q3 = self.df["Kilometers_Driven"].quantile(0.75) # 计算四分位数 Q3
iqr = q3 - q1 # 计算四分位距 IQR
threshold = q3 + 1.5 * iqr # 计算阈值
# 找出超过阈值的数据行
outlier_rows = self.df[self.df["Kilometers_Driven"] > threshold].index
# 删除异常值数据行
self.df.drop(outlier_rows, inplace=True)
messagebox.showinfo("提示", "异常值已处理成功!", parent=self.master)
# 计算缺失值数量
def missing_value_count(self):
missing_values = self.df.isnull().sum().sum()
messagebox.showinfo("提示", "缺失值数量为{}".format(missing_values), parent=self.master)
# 保存文件
def save_file(self):
file_path = filedialog.asksaveasfilename(defaultextension=".csv",
filetypes=(("CSV files", "*.csv"), ("All Files", "*.*")))
if file_path:
try:
self.df.to_csv(file_path, index=False)
messagebox.showinfo("提示", "数据已保存成功!", parent=self.master)
except Exception as e:
messagebox.showerror("错误", "保存文件失败!{}".format(e), parent=self.master)
# 启用处理空值、去重、处理异常值、数据标准化和保存按钮
def enable_buttons(self):
self.master.children["!button2"].config(state="normal")
self.master.children["!button3"].config(state="normal")
self.master.children["!button4"].config(state="normal")
self.master.children["!button5"].config(state="normal")
self.master.children["!button6"].config(state="normal")
self.master.children["!button7"].config(state="normal")
创建应用程序窗口
root = tk.Tk() app = Ok_Window(root) root.mainloop(
原文地址: http://www.cveoy.top/t/topic/gI2O 著作权归作者所有。请勿转载和采集!