import pandas as pdfrom pmdarimaarima import auto_arimafrom datetime import datetime timedeltafrom sklearnmetrics import mean_absolute_error mean_squared_errorfrom sklearnpreprocessing import LabelEnc
在进行以下操作之前,需要先检查selected_product数据是否为空,如果为空需要跳过该商品的预测。解决方法如下:
- 在进行selected_product数据处理前,加入以下代码:
if selected_product.empty: continue
- 在添加到结果DataFrame中之前,加入以下代码:
if merged_df.empty: continue
完整代码如下:
import pandas as pd from pmdarima.arima import auto_arima from datetime import datetime, timedelta from sklearn.metrics import mean_absolute_error, mean_squared_error from sklearn.preprocessing import LabelEncoder
读取需求数据和订单数据
order_train = pd.read_csv('data1/order_train1.csv') predict_sku = pd.read_csv('data1/predict_sku1.csv')
根据商品编号进行预测并保存结果到result1.xlsx中
res_columns = ['sales_region_code', 'item_code', '2019年1月预测需求量', '2019年2月预测需求量', '2019年3月预测需求量'] res_df = pd.DataFrame(columns=res_columns) for item_code in predict_sku['item_code'].unique(): le = LabelEncoder() order_train['sales_chan_name'] = le.fit_transform(order_train['sales_chan_name']) selected_product = order_train.loc[order_train['item_code'] == item_code].copy()
# 检查selected_product数据是否为空
if selected_product.empty:
continue
selected_product['order_date'] = pd.to_datetime(selected_product['order_date'])
sales_region_code = selected_product.iloc[0]['sales_region_code']
order_train.loc[order_train['item_code'] == item_code, 'order_date'] = pd.to_datetime(selected_product['order_date'])
# 按天、周、月的不同粒度对销售数据进行汇总
day_sales = selected_product.groupby('order_date')['ord_qty'].sum().values
week_sales = selected_product.set_index('order_date').resample('W-SUN').sum()['ord_qty'].values
month_sales = selected_product.groupby([pd. Grouper(key='order_date', freq='M')])['ord_qty'].sum()
# 使用resample产生的日期生成pd.DataFrame,并在索引上分组并汇总
week_df = selected_product.groupby(pd.Grouper(key='order_date', freq='W-SUN'))['ord_qty'].sum().reset_index()
month_df = selected_product.groupby(pd.Grouper(key='order_date', freq='M'))['ord_qty'].sum().reset_index()
# 构建未来三个月的时间序列
start_date = datetime.strptime('2019/01/01', '%Y/%m/%d')
end_date = datetime.strptime('2019/03/31', '%Y/%m/%d')
days = (end_date - start_date).days + 1
future_dates = [start_date + timedelta(days=i) for i in range(days)]
# 根据ARIMA模型进行预测
day_arima = auto_arima(day_sales, start_p=0, start_q=0, max_p=5, max_q=5, m=12,
start_P=0, seasonal=True, D=1, trace=True,
error_action='ignore', suppress_warnings=True, stepwise=True)
week_arima = auto_arima(week_sales, start_p=0, start_q=0, max_p=5, max_q=5, m=12,
start_P=0, seasonal=True, D=1, trace=True,
error_action='ignore', suppress_warnings=True, stepwise=True)
month_arima = auto_arima(month_sales, start_p=0, start_q=0, max_p=5, max_q=5, m=12,
start_P=0, seasonal=True, D=1, trace=True,
error_action='ignore', suppress_warnings=True, stepwise=True)
day_pred = day_arima.predict(n_periods=90)
week_pred = week_arima.predict(n_periods=13)
month_pred = month_arima.predict(n_periods=3)
# 将预测结果与日期对应起来,并输出到文件中
day_df = pd.DataFrame({'order_date': future_dates, 'ord_qty': day_pred})
week_dates = [date.strftime('%Y/%m/%d') for date in pd.date_range(start=start_date, end=end_date, freq='W-SUN')]
week_df = pd.DataFrame({'order_date': week_dates, 'ord_qty': week_pred})
week_df['order_date'] = pd.to_datetime(week_df['order_date']) - pd.offsets.Week(weekday=0)
month_dates = [date.date().strftime('%Y/%m/%d') for date in pd.date_range(start=start_date, end=end_date, freq='M')]
month_df = pd.DataFrame({'order_date': month_dates, 'ord_qty': month_pred})
month_df['order_date'] = pd.to_datetime(month_df['order_date']) + pd.offsets.MonthBegin(n=1) - pd.offsets.Day(1)
# 按需求进行合并
month_df['order_date'] = pd.to_datetime(month_df['order_date'])
month_df = month_df.set_index('order_date').resample('D').sum().reset_index()
merged_df = day_df.merge(week_df, on='order_date', how='outer').merge(month_df, on='order_date', how='outer')
merged_df.columns = ['order_date', '2019年1月预测需求量', '2019年2月预测需求量', '2019年3月预测需求量']
merged_df['item_code'] = item_code
merged_df['sales_region_code'] = sales_region_code
# 检查merged_df数据是否为空
if merged_df.empty:
continue
# 添加到结果DataFrame中
res_df.loc[len(res_df)] = merged_df[res_columns].iloc[0]
保存结果到文件
res_df.to_excel('result1.xlsx', index=False
原文地址: https://www.cveoy.top/t/topic/clgm 著作权归作者所有。请勿转载和采集!