定义随机森林回归函数

def RandomForest_reg(wheat_file_path,paddy_file_path,sampling_depth,cv_n): # 初始化评价指标列表 r2_test_list = [] rmse_test_list = [] mae_test_list = [] rmd_test_list = [] # 平均偏差 ef_test_list = [] best_r2 = 0 # 循环进行交叉验证 for cv_i in range(cv_n): best_r2_val = 0 # 合并数据集 x_train,y_train,x_val,y_val,x_test,y_test = merge_crop_data(wheat_file_path,paddy_file_path,cv_i,sampling_depth,2,1) # 遍历不同的超参数组合 for max_feature in ['auto','sqrt','log2',None]: for n_estimator in np.arange(100,300,20): regressor = RandomForestRegressor(n_estimators=n_estimator,max_features=max_feature,n_jobs=30) # 训练模型 regressor.fit(x_train, y_train) # 计算验证集上的r2 r2_val = r2_score(y_val, regressor.predict(x_val)) # 更新最优参数组合和r2 if r2_val > best_r2_val: best_r2_val = r2_val best_max_feature = max_feature best_n_estimator = n_estimator if best_r2_val > best_r2: best_r2 = best_r2_val final_best_max_feature = best_max_feature final_best_n_estimator = best_n_estimator # 训练最优模型并保存 regressor = RandomForestRegressor(n_estimators=best_n_estimator,max_features=best_max_feature,n_jobs=30) regressor.fit(x_train, y_train) joblib.dump(regressor,'../model/RF_'+sampling_depth+''+str(cv_i)+'.pkl') # 加载模型并计算测试集上的评价指标 regressor = joblib.load('../model/RF'+sampling_depth+''+str(cv_i)+'.pkl') r2_test = r2_score(y_test, regressor.predict(x_test)) rmse_test = np.sqrt(mean_squared_error(y_test, regressor.predict(x_test))) mae_test = mean_absolute_error(y_test, regressor.predict(x_test)) rmd_test = math_rmd(y_test,regressor.predict(x_test)) ef_test = math_ef(y_test,regressor.predict(x_test)) r2_test_list.append(r2_test) rmse_test_list.append(rmse_test) mae_test_list.append(mae_test) rmd_test_list.append(rmd_test) ef_test_list.append(ef_test) # 将测试集上的评价指标写入文件 with open('../result/result_predict_to_20cm.txt','a') as f: f.write('RandomForest,' + str(round(np.mean(r2_test_list), 2)) + ',' + str(round(np.mean(rmse_test_list), 2)) + ',' + str(round(np.mean(mae_test_list), 2)) + ',' + str(round(np.mean(rmd_test_list), 2)) + ',' + str( round(np.mean(ef_test_list), 2))+'\n') # 合并数据集并训练最终模型 x_train,y_train,x_val,y_val,x_test,y_test = merge_crop_data(wheat_file_path,paddy_file_path,1,sampling_depth,2,1) x_data_1 = np.concatenate((x_train,x_val),axis=0) x_data = np.concatenate((x_data_1,x_test),axis=0) y_data_1 = np.concatenate((y_train,y_val),axis=0) y_data = np.concatenate((y_data_1,y_test),axis=0) regressor = RandomForestRegressor(n_estimators=final_best_n_estimator,max_features=final_best_max_feature,n_jobs=30) regressor.fit(x_data,y_data) joblib.dump(regressor,'../final_model/RF'+sampling_depth+'.pkl')

定义支持向量回归函数

def svr_reg(wheat_file_path,paddy_file_path,sampling_depth,cv_n): # 初始化评价指标列表 r2_test_list = [] rmse_test_list = [] mae_test_list = [] rmd_test_list = [] # 平均偏差 ef_test_list = [] best_r2 = 0 # 循环进行交叉验证 for cv_i in range(cv_n): best_r2_val = 0 # 合并数据集 x_train,y_train,x_val,y_val,x_test,y_test = merge_crop_data(wheat_file_path,paddy_file_path,cv_i,sampling_depth,2,1) # 遍历不同的超参数组合 for kernel in [ 'linear','poly', 'rbf', 'sigmoid']: regressor = SVR(kernel=kernel) # 训练模型 regressor.fit(x_train, y_train) # 计算验证集上的r2 r2_val = r2_score(y_val, regressor.predict(x_val)) # 更新最优参数和r2 if r2_val > best_r2_val: best_r2_val = r2_val best_kernel_val = kernel if best_r2_val > best_r2: best_r2 = best_r2_val best_kernel = best_kernel_val # 训练最优模型并保存 regressor = SVR(kernel=best_kernel_val) regressor.fit(x_train, y_train) joblib.dump(regressor, '../model/svr_' + sampling_depth + '' + str(cv_i) + '.pkl') # 加载模型并计算测试集上的评价指标 regressor = joblib.load('../model/svr' + sampling_depth + '' + str(cv_i) + '.pkl') r2_test = r2_score(y_test, regressor.predict(x_test)) rmse_test = np.sqrt(mean_squared_error(y_test, regressor.predict(x_test))) mae_test = mean_absolute_error(y_test, regressor.predict(x_test)) rmd_test = math_rmd(y_test,regressor.predict(x_test)) ef_test = math_ef(y_test,regressor.predict(x_test)) r2_test_list.append(r2_test) rmse_test_list.append(rmse_test) mae_test_list.append(mae_test) rmd_test_list.append(rmd_test) ef_test_list.append(ef_test) # 将测试集上的评价指标写入文件 with open('../result/result_predict_to_20cm.txt','a') as f: f.write('svr,' + str(round(np.mean(r2_test_list), 2)) + ',' + str(round(np.mean(rmse_test_list), 2)) + ',' + str(round(np.mean(mae_test_list), 2)) + ',' + str(round(np.mean(rmd_test_list), 2)) + ',' + str( round(np.mean(ef_test_list), 2))+'\n') # 合并数据集并训练最终模型 x_train,y_train,x_val,y_val,x_test,y_test = merge_crop_data(wheat_file_path,paddy_file_path,1,sampling_depth,2,1) x_data_1 = np.concatenate((x_train, x_val), axis=0) x_data = np.concatenate((x_data_1, x_test), axis=0) y_data_1 = np.concatenate((y_train, y_val), axis=0) y_data = np.concatenate((y_data_1, y_test), axis=0) regressor = SVR(kernel=best_kernel) regressor.fit(x_data, y_data) joblib.dump(regressor, '../final_model/svr' + sampling_depth + '.pkl'

注释代码 def RandomForest_regwheat_file_pathpaddy_file_pathsampling_depthcv_n printRF r2_test_list = rmse_test_list = mae_test_list = rmd_test_list = # 平均偏差 ef_test_list = best_

原文地址: http://www.cveoy.top/t/topic/e3pb 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录