使用网格搜索优化SARIMA模型超参数预测月平均气温
# 导入所需的库
from math import sqrt # 平方根函数
from multiprocessing import cpu_count # 获取CPU核心数
from joblib import Parallel # 并行计算库
from joblib import delayed # 延迟执行函数库
from warnings import catch_warnings # 捕获警告信息的库
from warnings import filterwarnings # 过滤警告信息的库
from statsmodels.tsa.statespace.sarimax import SARIMAX # SARIMA模型库
from sklearn.metrics import mean_squared_error # 均方根误差库
from pandas import read_csv # 读取csv文件的库
# SARIMA模型的单步预测
def sarima_forecast(history, config):
order, sorder, trend = config
# 定义模型
model = SARIMAX(history, order=order, seasonal_order=sorder, trend=trend, enforce_stationarity=False, enforce_invertibility=False)
# 拟合模型
model_fit = model.fit(disp=False)
# 进行单步预测
yhat = model_fit.predict(len(history), len(history))
return yhat[0]
# 均方根误差(RMSE)
def measure_rmse(actual, predicted):
return sqrt(mean_squared_error(actual, predicted))
# 将单变量数据集拆分为训练集和测试集
def train_test_split(data, n_test):
return data[:-n_test], data[-n_test:]
# 逐步验证单变量数据的预测结果
def walk_forward_validation(data, n_test, cfg):
predictions = list()
# 拆分数据集
train, test = train_test_split(data, n_test)
# 用训练集初始化历史数据
history = [x for x in train]
# 遍历测试集中的每个时间步
for i in range(len(test)):
# 拟合模型并对历史数据进行预测
yhat = sarima_forecast(history, cfg)
# 将预测结果添加到预测列表中
predictions.append(yhat)
# 将实际观测值添加到历史数据中,以便下一次循环使用
history.append(test[i])
# 计算预测误差
error = measure_rmse(test, predictions)
return error
# 评估模型,如果出错则返回None
def score_model(data, n_test, cfg, debug=False):
result = None
# 将配置转换为键
key = str(cfg)
# 在调试模式下显示所有警告并在出现异常时失败
if debug:
result = walk_forward_validation(data, n_test, cfg)
else:
# 在网格搜索时不显示警告信息,会干扰输出结果
try:
# 忽略所有警告信息
with catch_warnings():
filterwarnings('ignore')
result = walk_forward_validation(data, n_test, cfg)
except:
error = None
# 检查是否有有趣的结果
if result is not None:
print(' > Model[%s] %.3f' % (key, result))
return (key, result)
# 网格搜索配置列表
def grid_search(data, cfg_list, n_test, parallel=True):
scores = None
if parallel:
# 并行执行配置
executor = Parallel(n_jobs=cpu_count(), backend='multiprocessing')
tasks = (delayed(score_model)(data, n_test, cfg) for cfg in cfg_list)
scores = executor(tasks)
else:
scores = [score_model(data, n_test, cfg) for cfg in cfg_list]
# 删除空结果
scores = [r for r in scores if r[1] != None]
# 按误差排序配置,升序
scores.sort(key=lambda tup: tup[1])
return scores
# 创建一组要尝试的SARIMA配置
def sarima_configs(seasonal=[0]):
models = list()
# 定义配置列表
p_params = [0, 1, 2]
d_params = [0, 1]
q_params = [0, 1, 2]
t_params = ['n','c','t','ct']
P_params = [0, 1, 2]
D_params = [0, 1]
Q_params = [0, 1, 2]
m_params = seasonal
# 创建配置实例
for p in p_params:
for d in d_params:
for q in q_params:
for t in t_params:
for P in P_params:
for D in D_params:
for Q in Q_params:
for m in m_params:
cfg = [(p,d,q), (P,D,Q,m), t]
models.append(cfg)
return models
if __name__ == '__main__':
# 加载数据集
series = read_csv('monthly-mean-temp.csv', header=0, index_col=0)
data = series.values
# 仅保留最近的5年数据
data = data[-(5*12):]
# 数据集拆分
n_test = 12
# SARIMA模型的配置列表
cfg_list = sarima_configs(seasonal=[0, 12])
# 网格搜索
scores = grid_search(data, cfg_list, n_test, False)
print('done')
# 输出前3个配置及其误差
for cfg, error in scores[:3]:
print(cfg, error)
原文地址: https://www.cveoy.top/t/topic/eeR6 著作权归作者所有。请勿转载和采集!