from tkinter import import tkinter as tkfrom tkinter import messageboxfrom tkinter import filedialogfrom tkinter import ttkimport pandas as pdimport numpy as npimport matplotlibpyplot as pltfrom matpl
可以通过绘制学习曲线来判断模型是否过拟合。学习曲线是指绘制训练集和测试集的准确率/误差随着训练样本数量的增加而变化的曲线。如果训练集和测试集的准确率/误差差距较大,就可能存在过拟合的问题。
以下是绘制学习曲线的示例代码:
train_sizes, train_scores, test_scores = learning_curve(clf, X_train, Y_train, cv=5) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1)
plt.figure() plt.title("Learning Curve") plt.xlabel("Training examples") plt.ylabel("Score") plt.grid()
plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r") plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g") plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score") plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score")
plt.legend(loc="best") plt.show()
如果学习曲线中训练集和测试集的准确率/误差趋势相似,且均趋近于一个稳定值,则模型可能没有过拟合问题。如果测试集的准确率/误差明显低于训练集,则可能存在过拟合问题
原文地址: http://www.cveoy.top/t/topic/hnQC 著作权归作者所有。请勿转载和采集!