简单线性回归模型:最小二乘法和梯度下降法实现
import numpy as np
import sklearn.datasets as datasets
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
# 生成随机数据的方法
# 由于没有学号后三位,这里使用固定种子206,你可以替换成你的学号后三位
np.random.seed(206)
class SimpleLinearRegression():
# 初始化模型参数
def __init__(self):
self.a_ = None
self.b_ = None
# 使用最小二乘法训练模型
def fit(self, x_train, y_train):
assert (x_train.ndim == 1 and y_train.ndim == 1), \
'Simple Linear Regression model can only solve single feature training data'
assert len(x_train) == len(y_train), \
'the size of x_train must be equal to y_train'
x_mean = np.mean(x_train)
y_mean = np.mean(y_train)
self.a_ = np.vdot((x_train - x_mean), (y_train - y_mean)) / np.vdot((x_train - x_mean), (x_train - x_mean))
self.b_ = y_mean - self.a_ * x_mean
# 使用训练好的模型进行预测
def predict(self, input_x):
assert input_x.ndim == 1, \
'Simple Linear Regression model can only solve single feature data'
return np.array([self.pred_(x) for x in input_x])
# 单个数据的预测
def pred_(self, x):
return self.a_ * x + self.b_
# 模型的字符串表示
def __repr__(self):
return 'SimpleLinearRegressionModel'
if __name__ == '__main__':
boston_data = datasets.load_boston()
# print(boston_data['DESCR'])
# 使用波士顿房价数据集的第五个特征作为自变量
x = boston_data['data'][:, 5]
# 使用波士顿房价数据集的目标变量作为因变量
y = boston_data['target']
# 过滤掉房价大于50的数据
x = x[y < 50] # total x data (490,)
y = y[y < 50] # total x data (490,)
# 可视化显示数据
plt.scatter(x, y)
plt.show()
# 将数据划分为训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
# 创建简单线性回归模型并使用最小二乘法训练模型
regs = SimpleLinearRegression()
regs.fit(x_train, y_train)
# 使用训练好的模型对测试集进行预测
y_hat = regs.predict(x_test)
# 计算模型评估指标
mse = np.mean((y_hat - y_test) ** 2)
rmse = np.sqrt(mse)
mae = np.mean(np.abs(y_hat - y_test))
R_squared = r2_score(y_test, y_hat)
# 打印模型评估指标
print('mean squared error:%.2f' % mse)
print('root mean squared error:%.2f' % rmse)
print('mean absolute error:%.2f' % mae)
print('R squared:%.2f' % R_squared)
# 可视化显示训练结果
a = regs.a_
b = regs.b_
x_plot = np.linspace(4, 8, 50)
y_plot = x_plot * a + b
plt.scatter(x, y)
plt.plot(x_plot, y_plot, color='red')
plt.show()
# 使用梯度下降法实现简单线性回归
alpha = 0.01 # 学习率
epoch = 1000 # 迭代次数
# 定义梯度下降算法函数
def gradient_descent(x_train, y_train, alpha, epoch):
a = 0
b = 0
n = len(x_train)
# 迭代更新参数
for i in range(epoch):
y_hat = a * x_train + b
loss_a = (-2 / n) * np.sum(x_train * (y_train - y_hat))
loss_b = (-2 / n) * np.sum(y_train - y_hat)
a = a - alpha * loss_a
b = b - alpha * loss_b
# 返回训练后的参数
return a, b
# 使用梯度下降法训练模型
a_gd, b_gd = gradient_descent(x_train, y_train, alpha, epoch)
# 使用训练好的模型进行预测
y_hat_gd = a_gd * x_test + b_gd
# 计算模型评估指标
mse_gd = np.mean((y_hat_gd - y_test) ** 2)
rmse_gd = np.sqrt(mse_gd)
mae_gd = np.mean(np.abs(y_hat_gd - y_test))
R_squared_gd = r2_score(y_test, y_hat_gd)
# 打印模型评估指标
print('mean squared error with gradient descent:%.2f' % mse_gd)
print('root mean squared error with gradient descent:%.2f' % rmse_gd)
print('mean absolute error with gradient descent:%.2f' % mae_gd)
print('R squared with gradient descent:%.2f' % R_squared_gd)
原文地址: https://www.cveoy.top/t/topic/nlbT 著作权归作者所有。请勿转载和采集!