{/'title/': /'导入所需软件包//nimport os//nimport numpy as np//nimport pandas as pd//nfrom sklearn.model_selection import train_test_split//nfrom sklearn.preprocessing import MinMaxScaler//nfrom sklearn.linear_model import LinearRegression//nfrom sklearn.metrics import mean_squared_error//n//ndata = pd.read_csv(/'./boston_house_prices.csv/')//n//n预览前5行数据//ndata.head()//n//n箱线图查看异常值分布//nimport matplotlib.pyplot as plt//n//ndef draw_boxplot(data)://n# 绘制除了 CHAS 属性之外的每个属性的箱线图//ndata_col = list(data.columns)//ndata_col.remove(/'CHAS/')//n//n复制代码//n# plt.figure(figsize=(2, 9), dpi=400)//nplt.figure(figsize=(5, 5), dpi=300)//nplt.subplots_adjust(wspace=0.6)//nfor i, col_name in enumerate(data_col)://n plt.subplot(3, 5, i + 1)//n plt.boxplot(data[col_name],//n showmeans=True,//n whiskerprops={/'color/': /'g/', /'linewidth/': 0.4, 'linestyle': /'--/'},//n flierprops={/'markersize/': 0.4},//n meanprops={/'markersize/': 1})//n plt.title(col_name, fontdict={/'size/': 5}, pad=2)//n plt.yticks(fontsize=4, rotation=90)//n plt.tick_params(pad=0.5)//n plt.xticks([])//n//nplt.show()//ndraw_boxplot(data)//n//n四分位处理异常值//nnum_features = data.select_dtypes(exclude=['object', 'bool']).columns.tolist()//nfor feature in num_features://nQ1 = data[feature].quantile(q=0.25)//nQ3 = data[feature].quantile(q=0.75)//nIQR = Q3 - Q1//ntop = Q3 + 1.5 * IQR//nbot = Q1 - 1.5 * IQR//nvalues = data[feature].values//nvalues[values > top] = top//nvalues[values < bot] = bot//ndata[feature] = values.astype(data[feature].dtypes)//n//n再次查看箱线图,异常值已被临界值替换//ndraw_boxplot(data)//n//nX = data.drop(['MEDV'], axis=1)//ny = data['MEDV']//nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)//n//nscaler = MinMaxScaler()//nx_train = scaler.fit_transform(X_train)//nx_test = scaler.transform(X_test)//n//nlr_model = LinearRegression()//nlr_model.fit(X_train, y_train)//n//ny_pred = lr_model.predict(X_test)//nmse = mean_squared_error(y_test, y_pred)//nprint('MSE:', np.sqrt(mse))//n//nfrom joblib import dump//nimport shutil//n//nmodel_path = /'./linear_model/'//n//nif os.path.exists(model_path)://nshutil.rmtree(model_path)//ndump(lr_model, model_path)//n//nfrom joblib import load//n//nlr_model = load('linear_model')//nlr_model.predict(X_test[:1])内容:array([26.99378702])/

波士顿房价预测:数据预处理与线性回归模型训练

原文地址: https://www.cveoy.top/t/topic/p4Y2 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录