Python 数据集处理与机器学习模型训练可视化工具
import tkinter as tk import pandas as pd import numpy as np from tkinter import * from tkinter import messagebox #一个弹窗库 from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import GaussianNB from sklearn.neighbors import KNeighborsClassifier from sklearn import metrics from tkinter import filedialog import matplotlib.pyplot as plt import seaborn as sns import matplotlib.pyplot as plt root = Tk()#创建一个窗口 root.title('数据集处理窗口')#给窗口命名 root.geometry('2000x1000')#设置窗口的大小 lb1 =Label(root, text='选择数据集')#root是主体,text是内容 lb1.place(relx=0.1, rely=0.1, relwidth=0.8, relheight=0.1)#设置位置 def openfile():#打开文件并显示 openfile = filedialog.askopenfilename() #获得选择好的文件,单个文件 imgtype=['.csv']#规定读取的文件类型 return openfile def duru(txt): txt=pd.read_csv(txt,engine='python') txt=txt.describe() text = Text(root) text.place(rely=0.6, relheight=0.4) text.insert(END, txt) def queshizhi(txt): txt=pd.read_csv(txt,engine='python') txt=txt.fillna(method='ffill')#用前一个非空值填充缺失值 text = Text(root) text.place(rely=0.6, relheight=0.4) text.insert(END, txt) def qsjc(txt): txt=pd.read_csv(txt,engine='python') isnull=txt.isnull().sum()#统计每一列的缺失值数量 text = Text(root) text.place(rely=0.6, relheight=0.4) text.insert(END, isnull) #选择数据集、关闭、缺失值检测、缺失值填充、数据划分、标准化(逻辑回归、线性回归、贝叶斯模型、基尼系数)、制作各种图形(可视化) btn1 =Button(root,text='选择csv数据集',command=lambda:duru(openfile())) btn1.place(relx=0.3, rely=0.2, relwidth=0.1, relheight=0.1) btn2 = Button(root, text='关闭窗口', command=root.destroy) btn2.place(relx=0.6, rely=0.2, relwidth=0.1,relheight=0.1) btn3 = Button(root, text='缺失值检测', command=lambda:qsjc(openfile()))#调用来处理缺失值 btn3.place(relx=0.4, rely=0.2, relwidth=0.1,relheight=0.1) btn4 = Button(root, text='缺失值填充', command=lambda:queshizhi(openfile()))#调用queshizhi函数来处理缺失值 btn4.place(relx=0.5, rely=0.2, relwidth=0.1,relheight=0.1)#设置位置 iris_data=pd.read_csv('train.csv') X=iris_data[['android_id','media_id','cus_type','package']] y=iris_data[['label']] X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=10) def func1(): my_model=LogisticRegression() my_model.fit(X_train,y_train) y_pred=my_model.predict(X_test) score=metrics.accuracy_score(y_test,y_pred) a1=Label(root,text=('逻辑回归模型的准确率为:',score),font=('微软雅黑 -20')) #font指定字体和字体大小的参数 a1.place(relx=0.2, rely=0.5, relwidth=0.5, relheight=0.1) #place确定窗体布局的方法 def func2(): my_model=KNeighborsClassifier() my_model.fit(X_train,y_train) y_pred=my_model.predict(X_test) score=metrics.accuracy_score(y_test,y_pred) a2=Label(root,text=('K近邻模型的准确率为:',score),font=('微软雅黑 -20')) a2.place(relx=0.2, rely=0.6, relwidth=0.5, relheight=0.1) def func3(): my_model=GaussianNB() my_model.fit(X_train,y_train) y_pred=my_model.predict(X_test) score=metrics.accuracy_score(y_test,y_pred) a3=Label(root,text=('高斯模型的准确率为:',score),font=('微软雅黑 -20')) a3.place(relx=0.2, rely=0.7, relwidth=0.5, relheight=0.1) btn5 =Button(root,text='逻辑回归模型',command=lambda:func1()) btn5.place(relx=0.35, rely=0.4, relwidth=0.1, relheight=0.1) btn6 = Button(root, text='K近邻模型', command=lambda:func2()) btn6.place(relx=0.45, rely=0.4, relwidth=0.1,relheight=0.1) btn7 = Button(root, text='高斯模型', command=lambda:func3()) btn7.place(relx=0.55, rely=0.4, relwidth=0.1,relheight=0.1) def zxt(): data=pd.read_csv('train.csv') x=data['android_id'] y=data['media_id'] plt.plot(x,y) plt.xlabel('Android_id') plt.ylabel('Media_id') plt.title('Android_id和Media_id关系图') plt.show() def zft(): data=pd.read_csv('train.csv') plt.hist(data['label'],color='blue',edgecolor='black',alpha=0.5) plt.xlabel('Label') plt.ylabel('Count') plt.title('Distribution of Labels in the Dataset') plt.show() btn8=Button(root,text='制作直线图',command=lambda:zxt()) btn8.place(relx=0.3, rely=0.5, relwidth=0.1, relheight=0.1) btn9 = Button(root, text='制作直方图', command=lambda:zft()) btn9.place(relx=0.4, rely=0.5, relwidth=0.1, relheight=0.1) 使用饼图做可视化内容:def zkt(): data=pd.read_csv('train.csv') labels = ['0', '1'] sizes = [sum(data['label'] == 0), sum(data['label'] == 1)] fig1, ax1 = plt.subplots() ax1.pie(sizes, labels=labels, autopct='%1.1f%%', shadow=True, startangle=90) ax1.axis('equal') plt.title('Label分布饼图') plt.show() btn10=Button(root,text='制作饼图',command=lambda:zkt()) btn10.place(relx=0.5, rely=0.5, relwidth=0.1, relheight=0.1) root.mainloop()#显示窗口,一定要加上,否则窗口无法显示
原文地址: https://www.cveoy.top/t/topic/okQe 著作权归作者所有。请勿转载和采集!