注释代码def DT_regwheat_file_pathpaddy_file_pathsampling_depthcv_n printDT r2_test_list = rmse_test_list = mae_test_list = rmd_test_list = # 平均偏差 ef_test_list = best_r2 = 0 f
导入所需的库
from sklearn.tree import DecisionTreeRegressor from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error from sklearn.externals import joblib from utils import merge_crop_data, math_rmd, math_ef
定义函数
def DT_reg(wheat_file_path, paddy_file_path, sampling_depth, cv_n): # 初始化评估指标列表 r2_test_list = [] rmse_test_list = [] mae_test_list = [] rmd_test_list = [] # 平均偏差 ef_test_list = [] best_r2 = 0
# 进行交叉验证
for cv_i in range(cv_n):
best_r2_val = 0
x_train, y_train, x_val, y_val, x_test, y_test = merge_crop_data(wheat_file_path, paddy_file_path, cv_i, sampling_depth, 2, 1)
for min_samples_leaf_se in [1, 2, 3]:
for splitter_se in ['best', 'random']:
regressor = DecisionTreeRegressor(min_samples_leaf=min_samples_leaf_se,splitter=splitter_se)
regressor.fit(x_train, y_train)
r2_val = r2_score(y_val, regressor.predict(x_val))
if r2_val > best_r2_val:
best_r2_val = r2_val
best_min_samples_leaf = min_samples_leaf_se
best_splitter = splitter_se
if best_r2_val > best_r2:
best_r2 = best_r2_val
final_best_min_samples_leaf = best_min_samples_leaf
final_best_splitter = best_splitter
regressor = DecisionTreeRegressor(min_samples_leaf=best_min_samples_leaf, splitter=best_splitter)
regressor.fit(x_train, y_train)
# 保存模型
joblib.dump(regressor, '../model/DT_' + sampling_depth + '_' + str(cv_i) + '.pkl')
# 加载模型
regressor = joblib.load('../model/DT_' + sampling_depth + '_' + str(cv_i) + '.pkl')
# 计算评估指标
r2_test = r2_score(y_test, regressor.predict(x_test))
rmse_test = np.sqrt(mean_squared_error(y_test, regressor.predict(x_test)))
mae_test = mean_absolute_error(y_test, regressor.predict(x_test))
rmd_test = math_rmd(y_test,regressor.predict(x_test))
ef_test = math_ef(y_test,regressor.predict(x_test))
# 将评估指标加入列表
r2_test_list.append(r2_test)
rmse_test_list.append(rmse_test)
mae_test_list.append(mae_test)
rmd_test_list.append(rmd_test)
ef_test_list.append(ef_test)
# 将评估指标写入文件
with open('../result/result_predict_to_20cm.txt','a') as f:
f.write('DT,' + str(round(np.mean(r2_test_list), 2)) + ',' + str(round(np.mean(rmse_test_list), 2)) + ',' +
str(round(np.mean(mae_test_list), 2)) + ',' + str(round(np.mean(rmd_test_list), 2)) + ',' + str(
round(np.mean(ef_test_list), 2)) + '\n')
# 合并数据集,并训练最终模型
x_train, y_train, x_val, y_val, x_test, y_test = merge_crop_data(wheat_file_path, paddy_file_path, 1, sampling_depth, 2, 1)
x_data_1 = np.concatenate((x_train, x_val), axis=0)
x_data = np.concatenate((x_data_1, x_test), axis=0)
y_data_1 = np.concatenate((y_train, y_val), axis=0)
y_data = np.concatenate((y_data_1, y_test), axis=0)
regressor = DecisionTreeRegressor(min_samples_leaf=final_best_min_samples_leaf, splitter=final_best_splitter)
regressor.fit(x_data, y_data)
# 保存最终模型
joblib.dump(regressor,'../final_model/DT_' + sampling_depth + '.pkl'
原文地址: https://www.cveoy.top/t/topic/e3qj 著作权归作者所有。请勿转载和采集!