导入所需的库

from sklearn.tree import DecisionTreeRegressor from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error from sklearn.externals import joblib from utils import merge_crop_data, math_rmd, math_ef

定义函数

def DT_reg(wheat_file_path, paddy_file_path, sampling_depth, cv_n): # 初始化评估指标列表 r2_test_list = [] rmse_test_list = [] mae_test_list = [] rmd_test_list = [] # 平均偏差 ef_test_list = [] best_r2 = 0

# 进行交叉验证
for cv_i in range(cv_n):
    best_r2_val = 0
    x_train, y_train, x_val, y_val, x_test, y_test = merge_crop_data(wheat_file_path, paddy_file_path, cv_i, sampling_depth, 2, 1)
    for min_samples_leaf_se in [1, 2, 3]:
        for splitter_se in ['best', 'random']:
            regressor = DecisionTreeRegressor(min_samples_leaf=min_samples_leaf_se,splitter=splitter_se)
            regressor.fit(x_train, y_train)
            r2_val = r2_score(y_val, regressor.predict(x_val))
            if r2_val > best_r2_val:
                best_r2_val = r2_val
                best_min_samples_leaf = min_samples_leaf_se
                best_splitter = splitter_se
            if best_r2_val > best_r2:
                best_r2 = best_r2_val
                final_best_min_samples_leaf = best_min_samples_leaf
                final_best_splitter = best_splitter
    regressor = DecisionTreeRegressor(min_samples_leaf=best_min_samples_leaf, splitter=best_splitter)
    regressor.fit(x_train, y_train)
    # 保存模型
    joblib.dump(regressor, '../model/DT_' + sampling_depth + '_' + str(cv_i) + '.pkl')
    # 加载模型
    regressor = joblib.load('../model/DT_' + sampling_depth + '_' + str(cv_i) + '.pkl')
    # 计算评估指标
    r2_test = r2_score(y_test, regressor.predict(x_test))
    rmse_test = np.sqrt(mean_squared_error(y_test, regressor.predict(x_test)))
    mae_test = mean_absolute_error(y_test, regressor.predict(x_test))
    rmd_test = math_rmd(y_test,regressor.predict(x_test))
    ef_test = math_ef(y_test,regressor.predict(x_test))
    # 将评估指标加入列表
    r2_test_list.append(r2_test)
    rmse_test_list.append(rmse_test)
    mae_test_list.append(mae_test)
    rmd_test_list.append(rmd_test)
    ef_test_list.append(ef_test)

# 将评估指标写入文件
with open('../result/result_predict_to_20cm.txt','a') as f:
    f.write('DT,' + str(round(np.mean(r2_test_list), 2)) + ',' + str(round(np.mean(rmse_test_list), 2)) + ',' +
            str(round(np.mean(mae_test_list), 2)) + ',' + str(round(np.mean(rmd_test_list), 2)) + ',' + str(
        round(np.mean(ef_test_list), 2)) + '\n')

# 合并数据集，并训练最终模型
x_train, y_train, x_val, y_val, x_test, y_test = merge_crop_data(wheat_file_path, paddy_file_path, 1, sampling_depth, 2, 1)
x_data_1 = np.concatenate((x_train, x_val), axis=0)
x_data = np.concatenate((x_data_1, x_test), axis=0)
y_data_1 = np.concatenate((y_train, y_val), axis=0)
y_data = np.concatenate((y_data_1, y_test), axis=0)
regressor = DecisionTreeRegressor(min_samples_leaf=final_best_min_samples_leaf, splitter=final_best_splitter)
regressor.fit(x_data, y_data)
# 保存最终模型
joblib.dump(regressor,'../final_model/DT_' + sampling_depth + '.pkl'

注释代码def DT_regwheat_file_pathpaddy_file_pathsampling_depthcv_n printDT r2_test_list = rmse_test_list = mae_test_list = rmd_test_list = # 平均偏差 ef_test_list = best_r2 = 0 f

注释代码def DT_regwheat_file_pathpaddy_file_pathsampling_depthcv_n printDT r2_test_list = rmse_test_list = mae_test_list = rmd_test_list = # 平均偏差 ef_test_list = best_r2 = 0 f

导入所需的库

定义函数