import numpy as npimport sklearndatasets as datasetsfrom sklearnmodel_selection import train_test_splitimport matplotlibpyplot as pltfrom sklearnmetrics import mean_squared_errormean_absolute_errornpr
导入所需的库
import numpy as np import sklearn.datasets as datasets from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt from sklearn.metrics import mean_squared_error,mean_absolute_error
设置随机种子
np.random.seed(206)
定义简单线性回归类
class SimpleLinearRegression(): def init(self): self.a_=None self.b_=None
# 训练模型
def fit(self,x_train,y_train):
# 确保训练数据只有一个特征
assert (x_train.ndim==1 and y_train.ndim==1),\
"""Simple Linear Regression model can only solve single feature training data"""
# 确保训练数据和标签数据大小相同
assert len(x_train)==len(y_train),\
"""the size of x_train must be equal to y_train"""
# 计算斜率和截距
x_mean=np.mean(x_train)
y_mean=np.mean(y_train)
self.a_=np.vdot((x_train-x_mean),(y_train-y_mean))/np.vdot((x_train-x_mean),(x_train-x_mean))
self.b_=y_mean-self.a_*x_mean
# 预测
def predict(self,input_x):
# 确保预测数据只有一个特征
assert input_x.ndim==1 ,\
"""Simple Linear Regression model can only solve single feature data"""
# 返回预测结果
return np.array([self.pred_(x) for x in input_x])
# 预测函数
def pred_(self,x):
return self.a_*x+self.b_
def __repr__(self):
return "SimpleLinearRegressionModel"
if name == 'main': # 加载数据集 boston_data = datasets.load_boston() print(boston_data['DESCR'])
# 只保留标签值小于50的数据
x = boston_data['data'][:, 5]
y = boston_data['target']
x = x[y < 50]
y = y[y < 50]
# 绘制散点图
plt.scatter(x, y)
plt.show()
# 划分训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
# 实例化模型,训练模型,预测结果
regs = SimpleLinearRegression()
regs.fit(x_train, y_train)
y_hat = regs.predict(x_test)
# 计算模型评估指标
rmse = np.sqrt(np.sum((y_hat - y_test) ** 2) / len(x_test))
mse = mean_squared_error(y_test, y_hat)
mae = mean_absolute_error(y_test, y_hat)
R_squared_Error = 1 - mse / np.var(y_test)
# 输出模型评估指标
print('mean squared error:%.2f' % (mse))
print('root mean squared error:%.2f' % (rmse))
print('mean absolute error:%.2f' % (mae))
print('R squared Error:%.2f' % (R_squared_Error))
# 绘制回归直线
a = regs.a_
b = regs.b_
x_plot = np.linspace(4, 8, 50)
y_plot = x_plot * a + b
plt.scatter(x, y)
plt.plot(x_plot, y_plot, color='red')
plt.show()
原文地址: https://www.cveoy.top/t/topic/b0cf 著作权归作者所有。请勿转载和采集!