基于TSK模糊聚类模型的能源消费预测与置信区间分析

import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import skfuzzy as fuzz
from scipy.stats import t

# 成分数据矩阵
data = np.array([[0.758, 0.171, 0.049, 0.022],
                 [0.758, 0.172, 0.047, 0.023],
                 [0.762, 0.17, 0.047, 0.021],
                 [0.762, 0.17, 0.047, 0.021],
                 [0.76, 0.171, 0.047, 0.021],
                 [0.762, 0.166, 0.051, 0.021],
                 [0.761, 0.171, 0.048, 0.02],
                 [0.757, 0.175, 0.049, 0.019],
                 [0.747, 0.182, 0.052, 0.019],
                 [0.75, 0.174, 0.057, 0.019],
                 [0.746, 0.175, 0.061, 0.018],
                 [0.747, 0.18, 0.055, 0.018],
                 [0.715, 0.204, 0.062, 0.017],
                 [0.696, 0.215, 0.067, 0.022],
                 [0.68, 0.232, 0.066, 0.022],
                 [0.661, 0.246, 0.068, 0.025],
                 [0.653, 0.243, 0.077, 0.027],
                 [0.661, 0.234, 0.078, 0.027],
                 [0.702, 0.201, 0.074, 0.023],
                 [0.702, 0.199, 0.076, 0.023],
                 [0.724, 0.178, 0.074, 0.024],
                 [0.724, 0.175, 0.074, 0.027],
                 [0.725, 0.17, 0.075, 0.03],
                 [0.715, 0.167, 0.084, 0.034],
                 [0.716, 0.164, 0.085, 0.035],
                 [0.692, 0.174, 0.094, 0.04],
                 [0.702, 0.168, 0.084, 0.046],
                 [0.685, 0.17, 0.097, 0.048],
                 [0.674, 0.171, 0.102, 0.053],
                 [0.658, 0.173, 0.113, 0.056],
                 [0.638, 0.184, 0.12, 0.058],
                 [0.622, 0.187, 0.13, 0.061],
                 [0.606, 0.189, 0.136, 0.069],
                 [0.59, 0.189, 0.145, 0.076],
                 [0.577, 0.19, 0.153, 0.08],
                 [0.569, 0.188, 0.159, 0.084],
                 [0.559, 0.186, 0.167, 0.088],
                 [0.562, 0.179, 0.175, 0.084]])

# 转换为特征矩阵 (LCC 方法)
feature_matrix = np.zeros((len(data) - 1, len(data[0])))
for i in range(len(data) - 1):
    feature_matrix[i] = data[i + 1] - data[i]

# 构建特征矩阵的 DataFrame
df = pd.DataFrame(feature_matrix, columns=['Coal', 'Gas', 'Others', 'Petroleum'])

# 目标变量 (预测 Petroleum)
target = data[1:, 3] 

# 数据归一化
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(df_scaled, target, test_size=0.13, random_state=42)

class TSK_FS():
    # ... (代码内容与之前相同)

# 创建 TSK 模型对象
model = TSK_FS(n_cluster=20, C=0.1)
# 拟合模型
model.fit(X_train, y_train)
# 在测试集上进行预测
y_pred = model.predict(X_test)

# 计算 CRMSE
crmse = np.sqrt(mean_squared_error(y_test, y_pred))
print('CRMSE:', crmse)

# 预测未来 10 年的 Petroleum 值
num_years = 10
X_future = df_scaled[-1].reshape(1, -1) 

y_future = []
for _ in range(num_years):
    y_pred_future = model.predict(X_future)
    y_future.append(y_pred_future[0])
    X_future = np.append(X_future[:, 1:], y_pred_future).reshape(1, -1)

# 计算未来 10 年 Petroleum 预测值的 95% 置信区间
confidence = 0.95
n = len(y_test)
t_value = t.ppf((1 + confidence) / 2, n - 1)
std_dev = np.sqrt(np.sum((y_pred - y_test) ** 2) / (n - 1))
margin_of_error = t_value * std_dev / np.sqrt(n)

lower_bound = [y_pred_future - margin_of_error for y_pred_future in y_future]
upper_bound = [y_pred_future + margin_of_error for y_pred_future in y_future]

print('未来 10 年的 Petroleum 预测值 (95% 置信区间):')
for year, y_pred_future, lb, ub in zip(range(1, num_years + 1), y_future, lower_bound, upper_bound):
    print(f'Year {year}: {y_pred_future:.3f} ({lb:.3f} - {ub:.3f})')