导入所需的库

import numpy as np import sklearn.datasets as datasets from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt from sklearn.metrics import mean_squared_error,mean_absolute_error

设置随机种子

np.random.seed(206)

定义简单线性回归类

class SimpleLinearRegression(): def init(self): self.a_=None self.b_=None

# 训练模型
def fit(self,x_train,y_train):
    # 确保训练数据只有一个特征
    assert (x_train.ndim==1 and y_train.ndim==1),\
        """Simple Linear Regression model can only solve single feature training data"""
    # 确保训练数据和标签数据大小相同
    assert len(x_train)==len(y_train),\
        """the size of x_train must be equal to y_train"""
    # 计算斜率和截距
    x_mean=np.mean(x_train)
    y_mean=np.mean(y_train)
    self.a_=np.vdot((x_train-x_mean),(y_train-y_mean))/np.vdot((x_train-x_mean),(x_train-x_mean))
    self.b_=y_mean-self.a_*x_mean

# 预测
def predict(self,input_x):
    # 确保预测数据只有一个特征
    assert input_x.ndim==1 ,\
        """Simple Linear Regression model can only solve single feature data"""
    # 返回预测结果
    return np.array([self.pred_(x) for x in input_x])

# 预测函数
def pred_(self,x):
    return self.a_*x+self.b_

def __repr__(self):
    return "SimpleLinearRegressionModel"

if name == 'main': # 加载数据集 boston_data = datasets.load_boston() print(boston_data['DESCR'])

# 只保留标签值小于50的数据
x = boston_data['data'][:, 5]
y = boston_data['target']
x = x[y < 50]  
y = y[y < 50]  

# 绘制散点图
plt.scatter(x, y)
plt.show()

# 划分训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

# 实例化模型,训练模型,预测结果
regs = SimpleLinearRegression()
regs.fit(x_train, y_train)
y_hat = regs.predict(x_test)

# 计算模型评估指标
rmse = np.sqrt(np.sum((y_hat - y_test) ** 2) / len(x_test))
mse = mean_squared_error(y_test, y_hat)
mae = mean_absolute_error(y_test, y_hat)
R_squared_Error = 1 - mse / np.var(y_test)

# 输出模型评估指标
print('mean squared error:%.2f' % (mse))
print('root mean squared error:%.2f' % (rmse))
print('mean absolute error:%.2f' % (mae))
print('R squared Error:%.2f' % (R_squared_Error))

# 绘制回归直线
a = regs.a_
b = regs.b_
x_plot = np.linspace(4, 8, 50)
y_plot = x_plot * a + b
plt.scatter(x, y)
plt.plot(x_plot, y_plot, color='red')
plt.show()
import numpy as npimport sklearndatasets as datasetsfrom sklearnmodel_selection import train_test_splitimport matplotlibpyplot as pltfrom sklearnmetrics import mean_squared_errormean_absolute_errornpr

原文地址: https://www.cveoy.top/t/topic/b0cf 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录