import numpy as np import sklearn.datasets as datasets from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt from sklearn.metrics import mean_squared_error,mean_absolute_error

设置随机种子，方便复现结果

np.random.seed(206) # 这里请将206替换成您的学号后三位

自己实现的简单线性回归

class SimpleLinearRegression(): def init(self): self.a_ = None self.b_ = None

# 最小二乘法训练模型
def fit(self, x_train, y_train):
    assert (x_train.ndim==1 and y_train.ndim==1),
        'Simple Linear Regression model can only solve single feature training data'
    assert len(x_train)==len(y_train),
        'the size of x_train must be equal to y_train'
    x_mean = np.mean(x_train)
    y_mean = np.mean(y_train)
    self.a_ = np.vdot((x_train - x_mean), (y_train - y_mean)) / np.vdot((x_train - x_mean), (x_train - x_mean))
    self.b_ = y_mean - self.a_ * x_mean

# 预测
def predict(self, input_x):
    assert input_x.ndim==1 ,
        'Simple Linear Regression model can only solve single feature data'
    return np.array([self.pred_(x) for x in input_x])

# 预测某个值
def pred_(self, x):
    return self.a_ * x + self.b_

def __repr__(self):
    return 'SimpleLinearRegressionModel'

if name == 'main': # 加载波士顿房价数据集 boston_data = datasets.load_boston() print(boston_data['DESCR']) x = boston_data['data'][:, 5] y = boston_data['target'] x = x[y < 50] # 过滤掉y值大于等于50的数据 y = y[y < 50]

# 可视化显示数据分布
plt.scatter(x, y)
plt.show()

# 划分训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

# 构建模型并训练 (最小二乘法)
regs = SimpleLinearRegression()
regs.fit(x_train, y_train)

# 测试集预测
y_hat = regs.predict(x_test)

# 计算评价指标
rmse = np.sqrt(np.sum((y_hat - y_test) ** 2) / len(x_test))
mse = mean_squared_error(y_test, y_hat)
mae = mean_absolute_error(y_test, y_hat)
R_squared_Error = 1 - mse / np.var(y_test)

# 打印评价指标
print('mean squared error:%.2f' % (mse))
print('root mean squared error:%.2f' % (rmse))
print('mean absolute error:%.2f' % (mae))
print('R squared Error:%.2f' % (R_squared_Error))

# 绘制回归直线 (最小二乘法)
a = regs.a_
b = regs.b_
x_plot = np.linspace(4, 8, 50)
y_plot = x_plot * a + b
plt.scatter(x, y)
plt.plot(x_plot, y_plot, color='red', label='Least Squares')

# 梯度下降法训练模型
n = len(x_train)
alpha = 0.01  # 学习率
theta0 = 0  # 初始化theta0
theta1 = 0  # 初始化theta1
iters = 1000  # 迭代次数

# 梯度下降迭代更新theta0和theta1
for i in range(iters):
    y_pred = theta0 + theta1 * x_train
    theta0 = theta0 - alpha * (1 / n) * np.sum(y_pred - y_train)
    theta1 = theta1 - alpha * (1 / n) * np.sum((y_pred - y_train) * x_train)

# 绘制梯度下降法拟合的直线
x_gd = np.linspace(4, 8, 50)
y_gd = theta0 + theta1 * x_gd
plt.plot(x_gd, y_gd, color='green', label='Gradient Descent')

# 添加图例和标题
plt.legend()
plt.title('Simple Linear Regression: Least Squares vs. Gradient Descent')
plt.xlabel('X')
plt.ylabel('Y')
plt.show()

Python实现简单线性回归：最小二乘法与梯度下降法

设置随机种子，方便复现结果

np.random.seed(206) # 这里请将206替换成您的学号后三位

自己实现的简单线性回归