TSK模糊推理系统预测煤炭未来趋势：代码示例和置信区间计算

import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
import skfuzzy as fuzz
from scipy.stats import t

# 成分数据矩阵
data = np.array([[0.758, 0.171, 0.049, 0.022],
                 [0.758, 0.172, 0.047, 0.023],
                 [0.762, 0.17, 0.047, 0.021],
                 ...
                 [0.559, 0.186, 0.167, 0.088],
                 [0.562, 0.179, 0.175, 0.084]])

# 转换为特征矩阵(LCC方法将1改成234）
feature_matrix = np.zeros((len(data) - 1, len(data[0])))
for i in range(len(data) - 1):
    feature_matrix[i] = data[i + 1] - data[i]

# 构建特征矩阵的DataFrame
df = pd.DataFrame(feature_matrix, columns=['Coal', 'Petroleum', 'Others', 'Gas'])

# 目标变量(LCC方法将1改成234）
target = data[1:, 1]  # 使用第二列作为目标变量

# 数据归一化
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(df_scaled, target, test_size=0.13, random_state=42)

class TSK_FS():
    def __init__(self, n_cluster=20, C=0.1):
        self.n_cluster = n_cluster
        self.lamda = C
        self.trained = False

    def fit(self, X_train, y_train):
        n_samples, n_features = X_train.shape
        n_cluster = self.n_cluster
        assert (n_samples == len(y_train)), 'X_train and y_train samples num must be same'
        centers, delta = self.__fcm__(X_train, n_cluster)
        self.centers = centers
        self.delta = delta
        xg = self.__gaussian_feature__(X_train, centers, delta)
        xg1 = np.dot(xg.T, xg)
        pg = np.linalg.pinv(xg1 + self.lamda * np.eye(xg1.shape[0])).dot(xg.T).dot(y_train)
        self.pg = pg
        self.trained = True

    def predict(self, X_test):
        assert(self.trained), 'Error when predict, use fit first!'
        xg_test = self.__gaussian_feature__(X_test, self.centers, self.delta)
        y_pred = xg_test.dot(self.pg)
        return y_pred

    def __fcm__(self, data, n_cluster):
        n_samples, n_features = data.shape
        centers, mem, _, _, _, _, _ = fuzz.cmeans(
            data.T, n_cluster, 2.0, error=1e-5, maxiter=200)

        delta = np.zeros([n_cluster, n_features])
        for i in range(n_cluster):
            d = (data - centers[i, :]) ** 2
            delta[i, :] = np.sum(d * mem[i, :].reshape(-1, 1),
                                 axis=0) / np.sum(mem[i, :])

        return centers, delta

    def __gaussian_feature__(self, data, centers, delta):
        n_cluster = self.n_cluster
        n_samples = data.shape[0]
        mu_a = np.zeros([n_samples, n_cluster])
        for i in range(n_cluster):
            tmp_k = 0 - np.sum((data - centers[i, :]) ** 2 /
                               delta[i, :], axis=1)
            mu_a[:, i] = np.exp(tmp_k)
        mu_a = mu_a / np.sum(mu_a, axis=1, keepdims=True)
        data_1 = np.concatenate((data, np.ones([n_samples, 1])), axis=1)
        zt = []
        for i in range(n_cluster):
            zt.append(data_1 * mu_a[:, i].reshape(-1, 1))
        data_fs = np.concatenate(zt, axis=1)
        data_fs = np.where(data_fs != data_fs, 1e-5, data_fs)
        return data_fs

# 创建TSK模型对象
model = TSK_FS(n_cluster=20, C=0.1)
# 拟合模型
model.fit(X_train, y_train)
# 在测试集上进行预测
y_pred = model.predict(X_test)

# 输出预测结果
print('预测结果:', y_pred)

# 计算CRMSE和CMAPE
crmse = np.sqrt(mean_squared_error(y_test, y_pred))
cmape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

print('CRMSE:', crmse)
print('CMAPE:', cmape)

# 预测未来10年的Coal值
num_years = 10
X_future = df_scaled[-1].reshape(1, -1)  # 使用最后一条数据作为输入
y_future = []
for _ in range(num_years):
    y_pred_future = model.predict(X_future)
    y_future.append(y_pred_future[0])
    X_future = np.append(X_future[:, 1:], y_pred_future).reshape(1, -1)

# 计算未来10年的Coal预测值的95%置信区间
confidence = 0.95
n = len(y_test)
t_value = t.ppf((1 + confidence) / 2, n - 1)
std_dev = np.sqrt(np.sum((y_pred - y_test) ** 2) / (n - 1))
margin_of_error = t_value * std_dev / np.sqrt(n)

lower_bound = [y_pred_future - margin_of_error for y_pred_future in y_future]
upper_bound = [y_pred_future + margin_of_error for y_pred_future in y_future]

print('未来10年的Coal预测值:')
for year, y_pred_future, lb, ub in zip(range(1, num_years + 1), y_future, lower_bound, upper_bound):
    print(f'Year {year}: {y_pred_future:.3f} ({lb:.3f} - {ub:.3f})')
这段代码将预测Coal未来10年的值，并计算了每个预测值的95%置信区间。希望对你有帮助！