L1 和 L2 正则化对逻辑回归模型的影响
引入需要的库
import numpy as np from sklearn.linear_model import LogisticRegression # 逻辑回归模型 from sklearn.datasets import load_iris # 引入鸢尾花数据集 from sklearn.model_selection import train_test_split # 划分训练集和测试集 from sklearn.metrics import accuracy_score # 准确率评价指标 import matplotlib.pyplot as plt # 绘图库
加载鸢尾花数据集
X, y = load_iris(return_X_y=True)
划分训练集和测试集,test_size为测试集占比,random_state为随机种子,保证结果可重复
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=206)
初始化四个空列表,分别用于存放L1正则化和L2正则化的训练集准确率和测试集准确率
l1, l2, l1test, l2test = [], [], [], []
用于循环的列表,从0.05到1之间平均分成19份
for i in np.linspace(0.05, 1, 19): # 初始化两个逻辑回归模型,分别为L1和L2正则化,C为正则化强度,max_iter为最大迭代次数 lrl1 = LogisticRegression(penalty='l1',solver='liblinear',C=i,max_iter=1000) lrl2 = LogisticRegression(penalty='l2',solver='liblinear',C=i,max_iter=1000)
# 训练L1正则化模型,得到训练集和测试集准确率
lrl1 = lrl1.fit(X_train,y_train)
l1.append(accuracy_score(lrl1.predict(X_train),y_train))
l1test.append(accuracy_score(lrl1.predict(X_test),y_test))
# 训练L2正则化模型,得到训练集和测试集准确率
lrl2 = lrl2.fit(X_train, y_train)
l2.append(accuracy_score(lrl2.predict(X_train), y_train))
l2test.append(accuracy_score(lrl2.predict(X_test), y_test))
输出L1和L2正则化的系数
print('L1正则化结果:\n',lrl1.coef_,'\nL2正则化结果:\n',lrl2.coef_)
将四个列表打包成一个结果列表,用于绘图
result = [l1,l2,l1test,l2test]
定义绘图的颜色、标签
color = ['green','red','yellow','black'] label = ['L1','L2','Lltest','L2test']
创建一个8*4的画布
plt.figure(figsize=(8,4))
循环绘制四条折线,分别对应L1训练集准确率、L2训练集准确率、L1测试集准确率、L2测试集准确率
for i in range(4): plt.plot(np.linspace(0.05,1,19),result[i],color[i],label=label[i])
添加标签和图例
plt.legend() plt.show()
原文地址: https://www.cveoy.top/t/topic/nlfT 著作权归作者所有。请勿转载和采集!