使用CLR转换预测未来10年成分数据的置信区间
如果您想将代码中的ILR转换改为CLR(Centered Log-Ratio)转换,请使用以下代码进行修改:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import skfuzzy as fuzz
class TSK_FS:
def __init__(self, n_cluster, C):
self.n_cluster = n_cluster # 聚类的数目
self.C = C # 置信系数
def __gaussian_feature__(self, data, centers, delta):
# 计算高斯特征
n_data = len(data)
n_centers = len(centers)
features = np.zeros((n_data, n_centers))
for i in range(n_data):
for j in range(n_centers):
features[i, j] = np.exp(-np.sum((data[i] - centers[j]) ** 2) / (2 * delta[j] ** 2))
return features
def fit(self, X_train, y_train):
# 训练TSK模型
n_samples, n_features = X_train.shape
# 使用Fuzzy C-means进行聚类
cntr, u, _, _, _, _, centers, _, delta, _ = fuzz.cluster.cmeans(
X_train.T, self.n_cluster, 2, error=0.005, maxiter=1000, init=None)
# 计算模糊聚类结果的置信度
u = u.T
mu_a = (u ** self.C) / np.sum(u ** self.C, axis=1, keepdims=True)
# 构建输入特征的高斯特征
X_train_gaussian = self.__gaussian_feature__(X_train, centers, delta)
# 计算每个规则的权重
w = np.zeros((n_features, self.n_cluster))
for i in range(n_features):
for j in range(self.n_cluster):
w[i, j] = np.sum(mu_a[:, j] * X_train_gaussian[:, i]) / np.sum(mu_a[:, j])
self.centers = centers
self.delta = delta
self.w = w
def predict(self, X_test):
# 使用TSK模型进行预测
X_test_gaussian = self.__gaussian_feature__(X_test, self.centers, self.delta)
y_pred = np.dot(X_test_gaussian, self.w.T)
return y_pred
# 成分数据矩阵
data = np.array([[0.758, 0.171, 0.049, 0.022],
[0.758, 0.172, 0.047, 0.023],
[0.762, 0.17, 0.047, 0.021],
[0.762, 0.17, 0.047, 0.021],
[0.76, 0.171, 0.047, 0.021],
[0.762, 0.166, 0.051, 0.021],
[0.761, 0.171, 0.048, 0.02],
[0.757, 0.175, 0.049, 0.019],
[0.747, 0.182, 0.052, 0.019],
[0.75, 0.174, 0.057, 0.019],
[0.746, 0.175, 0.061, 0.018],
[0.747, 0.18, 0.055, 0.018],
[0.715, 0.204, 0.062, 0.017],
[0.696, 0.215, 0.067, 0.022],
[0.68, 0.232, 0.066, 0.022],
[0.661, 0.246, 0.068, 0.025],
[0.653, 0.243, 0.077, 0.027],
[0.661, 0.234, 0.078, 0.027],
[0.702, 0.201, 0.074, 0.023],
[0.702, 0.199, 0.076, 0.023],
[0.724, 0.178, 0.074, 0.024],
[0.724, 0.175, 0.074, 0.027],
[0.725, 0.17, 0.075, 0.03],
[0.715, 0.167, 0.084, 0.034],
[0.716, 0.164, 0.085, 0.035],
[0.692, 0.174, 0.094, 0.04],
[0.702, 0.168, 0.084, 0.046],
[0.685, 0.17, 0.097, 0.048],
[0.674, 0.171, 0.102, 0.053],
[0.658, 0.173, 0.113, 0.056],
[0.638, 0.184, 0.12, 0.058],
[0.622, 0.187, 0.13, 0.061],
[0.606, 0.189, 0.136, 0.069],
[0.59, 0.189, 0.145, 0.076],
[0.577, 0.19, 0.153, 0.08],
[0.569, 0.188, 0.159, 0.084],
[0.559, 0.186, 0.167, 0.088],
[0.562, 0.179, 0.175, 0.084]])
# 计算CLR转换
clr_matrix = np.zeros((len(data)-1, len(data[0])-1))
for i in range(len(data)-1):
clr_matrix[i] = np.log(data[i+1, :-1] / np.geomean(data[i, :-1]))
# 构建特征矩阵的DataFrame
df = pd.DataFrame(clr_matrix, columns=['Coal', 'Petroleum', 'Others'])
# 目标变量
target = data[1:, -1] # 使用最后一列作为目标变量
# 数据归一化
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df)
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(df_scaled, target, test_size=0.13, random_state=42)
# 创建TSK模型对象
model = TSK_FS(n_cluster=20, C=0.1)
# 拟合模型
model.fit(X_train, y_train)
# 构建未来10年的特征矩阵
future_features = np.zeros((10, len(data[0])-1))
current_data = data[-1, :-1] # 使用最后一行数据作为当前数据
# 根据模型预测未来十年的特征
for i in range(10):
feature = model.predict(current_data.reshape(1, -1)) # 使用模型预测特征
future_features[i] = feature # 更新特征矩阵
current_data = np.concatenate((current_data[1:], feature)) # 更新当前数据
# 归一化未来的特征数据
future_scaled = scaler.transform(future_features)
# 使用TSK_FLS模型预测未来十年的目标变量
future_pred = model.predict(future_scaled)
# 计算置信区间
residuals = y_test - model.predict(X_test)
mean_residuals = np.mean(residuals)
std_residuals = np.std(residuals)
n_samples = len(future_pred)
z_score = 1.96 # 对应于95%的置信区间
margin_of_error = z_score * std_residuals / np.sqrt(n_samples)
lower_bound = future_pred - margin_of_error
upper_bound = future_pred + margin_of_error
# 输出未来十年的预测置信区间
print('未来十年的预测置信区间:')
for i in range(len(future_pred)):
print(f'年份: {i+1}, 下界: {lower_bound[i]}, 上界: {upper_bound[i]}')
在以上代码中,我们使用CLR(Centered Log-Ratio)转换替换了ILR转换。CLR转换通过使用每个样本的特征值与该样本特征均值的差异来进行转换。
非常抱歉之前的错误带来的困扰,希望以上修正的代码能够帮助您计算预测未来10年成分数据的置信区间。如果您还有任何其他问题,请随时提问。
原文地址: https://www.cveoy.top/t/topic/6hy 著作权归作者所有。请勿转载和采集!