In [1]:
#XGBOOST_NO_TUNING
#Private Score
#12.79118
#Public Score
#14.03881



import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from datetime import datetime
from time import time
from datetime import timedelta
In [2]:
start = time()

1.导入数据

In [3]:
train = pd.read_csv('train.csv', nrows=None)
test = pd.read_csv('test.csv', nrows=None).set_index('id')

2.数据探索

2.1.检查空值与重复

In [4]:
train.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 913000 entries, 0 to 912999
Data columns (total 4 columns):
date     913000 non-null object
store    913000 non-null int64
item     913000 non-null int64
sales    913000 non-null int64
dtypes: int64(3), object(1)
memory usage: 27.9+ MB
In [5]:
test.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 45000 entries, 0 to 44999
Data columns (total 3 columns):
date     45000 non-null object
store    45000 non-null int64
item     45000 non-null int64
dtypes: int64(2), object(1)
memory usage: 1.4+ MB
In [6]:
#去重操作
train.drop_duplicates(subset=['date', 'store', 'item'], keep='first', inplace=True)
test.drop_duplicates(subset=['date', 'store', 'item'], keep='first', inplace=True)
In [7]:
train.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 913000 entries, 0 to 912999
Data columns (total 4 columns):
date     913000 non-null object
store    913000 non-null int64
item     913000 non-null int64
sales    913000 non-null int64
dtypes: int64(3), object(1)
memory usage: 34.8+ MB
In [8]:
test.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 45000 entries, 0 to 44999
Data columns (total 3 columns):
date     45000 non-null object
store    45000 non-null int64
item     45000 non-null int64
dtypes: int64(2), object(1)
memory usage: 1.4+ MB

2.2. 检查分布

In [9]:
#将date列转换为datetime数据类型
train.date=pd.to_datetime(train.date)
#test.date=pd.to_datetime(test.date)
In [10]:
plt.hist(train.sales,bins=231)
Out[10]:
(array([1.0000e+00, 4.0000e+00, 2.5000e+01, 9.1000e+01, 2.1500e+02,
        4.0400e+02, 7.9500e+02, 1.3360e+03, 2.0780e+03, 2.7810e+03,
        3.6020e+03, 4.5760e+03, 5.7360e+03, 6.6780e+03, 7.7020e+03,
        8.6910e+03, 9.8330e+03, 1.0552e+04, 1.1379e+04, 1.1982e+04,
        1.2423e+04, 1.3051e+04, 1.3413e+04, 1.3956e+04, 1.3812e+04,
        1.4027e+04, 1.4212e+04, 1.4231e+04, 1.4164e+04, 1.4178e+04,
        1.4283e+04, 1.4130e+04, 1.3818e+04, 1.3963e+04, 1.3774e+04,
        1.3591e+04, 1.3689e+04, 1.3685e+04, 1.3376e+04, 1.3121e+04,
        1.2809e+04, 1.2775e+04, 1.2669e+04, 1.2818e+04, 1.2536e+04,
        1.2368e+04, 1.1966e+04, 1.1897e+04, 1.1835e+04, 1.1809e+04,
        1.1580e+04, 1.1468e+04, 1.1251e+04, 1.0887e+04, 1.0864e+04,
        1.0648e+04, 1.0501e+04, 1.0352e+04, 1.0185e+04, 9.9920e+03,
        9.7620e+03, 9.5720e+03, 9.4030e+03, 9.2050e+03, 9.2020e+03,
        8.7350e+03, 8.5940e+03, 8.6260e+03, 8.2320e+03, 8.2080e+03,
        7.9600e+03, 7.6500e+03, 7.5760e+03, 7.2480e+03, 7.3090e+03,
        7.0200e+03, 6.7510e+03, 6.5770e+03, 6.4430e+03, 6.2950e+03,
        6.0440e+03, 5.7860e+03, 5.6710e+03, 5.4880e+03, 5.4330e+03,
        5.2280e+03, 5.0770e+03, 4.8790e+03, 4.8390e+03, 4.5850e+03,
        4.4500e+03, 4.2080e+03, 3.9650e+03, 4.1370e+03, 3.7910e+03,
        3.6240e+03, 3.6400e+03, 3.4150e+03, 3.4460e+03, 3.1610e+03,
        3.1020e+03, 2.9590e+03, 2.8510e+03, 2.8520e+03, 2.7070e+03,
        2.5510e+03, 2.4510e+03, 2.4120e+03, 2.2790e+03, 2.1650e+03,
        2.1950e+03, 1.9780e+03, 1.8550e+03, 1.8420e+03, 1.6760e+03,
        1.7880e+03, 1.5930e+03, 1.5060e+03, 1.4770e+03, 1.3240e+03,
        1.2860e+03, 1.2770e+03, 1.2330e+03, 1.1910e+03, 1.1120e+03,
        1.0510e+03, 9.8200e+02, 9.2400e+02, 8.7700e+02, 9.4200e+02,
        7.9200e+02, 7.9700e+02, 6.8500e+02, 6.5800e+02, 6.3200e+02,
        5.8000e+02, 5.7700e+02, 5.3300e+02, 5.3000e+02, 4.7000e+02,
        4.4800e+02, 4.5500e+02, 4.0200e+02, 3.7600e+02, 3.5300e+02,
        2.9800e+02, 3.0000e+02, 2.9000e+02, 2.9300e+02, 2.2400e+02,
        2.3300e+02, 2.2600e+02, 2.1100e+02, 1.9800e+02, 1.7900e+02,
        1.6100e+02, 1.5700e+02, 1.3300e+02, 1.4100e+02, 1.0300e+02,
        1.1900e+02, 9.5000e+01, 8.0000e+01, 1.0300e+02, 8.1000e+01,
        7.7000e+01, 7.1000e+01, 6.1000e+01, 6.2000e+01, 6.7000e+01,
        4.0000e+01, 4.0000e+01, 2.8000e+01, 4.3000e+01, 4.3000e+01,
        3.4000e+01, 2.4000e+01, 2.4000e+01, 2.5000e+01, 1.5000e+01,
        2.3000e+01, 2.2000e+01, 1.4000e+01, 1.7000e+01, 1.5000e+01,
        7.0000e+00, 9.0000e+00, 1.3000e+01, 7.0000e+00, 6.0000e+00,
        9.0000e+00, 8.0000e+00, 3.0000e+00, 3.0000e+00, 5.0000e+00,
        2.0000e+00, 4.0000e+00, 3.0000e+00, 3.0000e+00, 1.0000e+00,
        2.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 3.0000e+00,
        3.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00,
        1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        1.0000e+00]),
 array([  0.,   1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,
         11.,  12.,  13.,  14.,  15.,  16.,  17.,  18.,  19.,  20.,  21.,
         22.,  23.,  24.,  25.,  26.,  27.,  28.,  29.,  30.,  31.,  32.,
         33.,  34.,  35.,  36.,  37.,  38.,  39.,  40.,  41.,  42.,  43.,
         44.,  45.,  46.,  47.,  48.,  49.,  50.,  51.,  52.,  53.,  54.,
         55.,  56.,  57.,  58.,  59.,  60.,  61.,  62.,  63.,  64.,  65.,
         66.,  67.,  68.,  69.,  70.,  71.,  72.,  73.,  74.,  75.,  76.,
         77.,  78.,  79.,  80.,  81.,  82.,  83.,  84.,  85.,  86.,  87.,
         88.,  89.,  90.,  91.,  92.,  93.,  94.,  95.,  96.,  97.,  98.,
         99., 100., 101., 102., 103., 104., 105., 106., 107., 108., 109.,
        110., 111., 112., 113., 114., 115., 116., 117., 118., 119., 120.,
        121., 122., 123., 124., 125., 126., 127., 128., 129., 130., 131.,
        132., 133., 134., 135., 136., 137., 138., 139., 140., 141., 142.,
        143., 144., 145., 146., 147., 148., 149., 150., 151., 152., 153.,
        154., 155., 156., 157., 158., 159., 160., 161., 162., 163., 164.,
        165., 166., 167., 168., 169., 170., 171., 172., 173., 174., 175.,
        176., 177., 178., 179., 180., 181., 182., 183., 184., 185., 186.,
        187., 188., 189., 190., 191., 192., 193., 194., 195., 196., 197.,
        198., 199., 200., 201., 202., 203., 204., 205., 206., 207., 208.,
        209., 210., 211., 212., 213., 214., 215., 216., 217., 218., 219.,
        220., 221., 222., 223., 224., 225., 226., 227., 228., 229., 230.,
        231.]),
 <a list of 231 Patch objects>)
In [11]:
plt.figure(figsize=(12,8))
plt.xlim(train.sales.min(), train.sales.max()*1.1)
sns.boxplot(x=train.sales)

print(max(train.sales))
print(min(train.sales))
231
0
In [12]:
sales_Q1=train.sales.quantile(0.25)
sales_Q3=train.sales.quantile(0.75)
sales_IQR=sales_Q3-sales_Q1
print(sales_Q1-1.5*sales_IQR,sales_Q3+1.5*sales_IQR)
print(sales_Q1-3*sales_IQR,sales_Q3+3*sales_IQR)
# 按箱型图理论,在Q3+1.5IQR和Q1-1.5IQR区间以外在Q3+1.5IQR和Q1-1.5IQR以内的是温和的异常值(mild outliers),
# 在Q3+1.5IQR和Q1-1.5IQR以内的是极端的异常值(extreme outliers)
# -30.0 130.0
# -90.0 190.0
-30.0 130.0
-90.0 190.0
In [13]:
#温和异常值有11967条数据
len(train[train.sales>130])
Out[13]:
11967
In [14]:
len(train[train.sales>190])
Out[14]:
50
In [15]:
train.sales.value_counts()[-30:]
#极端异常值有50条数据
#但似乎出现这样的数据也正常

#但这只是根据经验的归纳,查看销量数据,这些所谓极端异常值也并不很离群,所以不做处理。
Out[15]:
187    13
186     9
190     9
191     8
185     7
188     7
189     6
194     5
196     4
1       4
198     3
197     3
204     3
205     3
193     3
192     3
200     2
195     2
231     1
214     1
199     1
201     1
202     1
203     1
206     1
207     1
208     1
209     1
210     1
0       1
Name: sales, dtype: int64
In [16]:
#将唯一销量0改为1,以便损失函数或度量函数可采用mape
train.loc[train[train.sales==0].index,'sales']=1

销售数据呈偏态分布,按箱型图理论,有50/913000销量数据在极端异常值,有(11967-50)/913000数据处于温和异常值。但这是一种经验理论,而且处于所谓极端异常值数据相互间并没有相隔很远,因此我认为这些销量是正常的,故不作处理。

2.3. 时间序列查看趋势

In [17]:
train_copy = train.copy()
date_mean = train_copy.groupby(['date'])['sales'].mean().reset_index()
del train_copy
In [18]:
plt.rcParams['figure.figsize'] = (12, 6)
fig, ax = plt.subplots()

ax.plot(date_mean.date,date_mean.sales,'r',label='Sales')
ax.set_xlabel('Date')
ax.set_ylabel('Sales')
ax.set_title("Sales_date_mean")
ax.legend()
plt.show()
E:\toolkits.win\anaconda\envs\ml\lib\site-packages\pandas\plotting\_converter.py:129: FutureWarning: Using an implicitly registered datetime converter for a matplotlib plotting method. The converter was registered by pandas on import. Future versions of pandas will require you to explicitly register matplotlib converters.

To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()
  warnings.warn(msg, FutureWarning)
In [19]:
plt.rcParams['figure.figsize'] = (12, 6)
fig, ax = plt.subplots()

ax.plot(date_mean.date[-365:],date_mean.sales[-365:],'r',label='Sales')
ax.set_xlabel('Date')
ax.set_ylabel('Sales')
ax.set_title("Sales_date_mean")
ax.legend()
plt.show()
In [20]:
plt.rcParams['figure.figsize'] = (12, 6)
fig, ax = plt.subplots()

ax.plot(date_mean.date[-31:],date_mean.sales[-31:],'r',label='Sales')
ax.set_xlabel('Date')
ax.set_ylabel('Sales')
ax.set_title("Sales_date_mean")
ax.legend()
plt.show()
In [21]:
plt.rcParams['figure.figsize'] = (12, 6)
fig, ax = plt.subplots()

ax.plot(date_mean.date[-14:],date_mean.sales[-14:],'r',label='Sales')
ax.set_xlabel('Date')
ax.set_ylabel('Sales')
ax.set_title("Sales_date_mean")
ax.legend()
plt.show()
In [22]:
date_mean.insert(1,'year',date_mean.date.dt.year)
date_mean.insert(2,'quarter',date_mean.date.dt.quarter)
date_mean.head()
Out[22]:
date year quarter sales
0 2013-01-01 2013 1 27.392
1 2013-01-02 2013 1 27.356
2 2013-01-03 2013 1 28.976
3 2013-01-04 2013 1 31.354
4 2013-01-05 2013 1 32.474
In [23]:
year_mean = date_mean.groupby(['year'])['sales'].mean().reset_index()
year_mean
Out[23]:
year sales
0 2013 43.513660
1 2014 50.057441
2 2015 52.256915
3 2016 56.596503
4 2017 58.815014
In [24]:
plt.rcParams['figure.figsize'] = (12, 6)
fig, ax = plt.subplots()

ax.plot(year_mean.year,year_mean.sales,'r',label='Sales')
ax.set_xlabel('Year')
ax.set_ylabel('Sales')
ax.set_title("Sales_year_mean")
ax.legend()
plt.show()
In [25]:
quarter1 = date_mean[date_mean.quarter==1]
quarter1.tail()
Out[25]:
date year quarter sales
1546 2017-03-27 2017 1 42.026
1547 2017-03-28 2017 1 49.386
1548 2017-03-29 2017 1 48.948
1549 2017-03-30 2017 1 52.786
1550 2017-03-31 2017 1 55.666
In [26]:
year_quarter1_mean = quarter1.groupby(['year'])['sales'].mean().reset_index()
year_quarter1_mean
Out[26]:
year sales
0 2013 34.037844
1 2014 39.097911
2 2015 40.772911
3 2016 44.137956
4 2017 45.807600
In [27]:
plt.rcParams['figure.figsize'] = (12, 6)
fig, ax = plt.subplots()

ax.plot(year_quarter1_mean.year,year_quarter1_mean.sales,'r',label='Sales')
ax.set_xlabel('Year')
ax.set_ylabel('Sales')
ax.set_title("Sales_year_quarter1_mean")
ax.legend()
plt.show()
In [28]:
del date_mean,year_mean,quarter1,year_quarter1_mean

可以看出销量呈现明显年周期和星期周期性。

3.构造更多特征

显然,只有日期、店铺分类、商品分类并不足以解释销量的信息。按照信息熵理论,这三个变量的信息熵远小于销量的信息熵。时序数据最大的信息来源于过往数据,应利用起来。但是,利用往期数据谨防泄露(leakage)。 我将增加三种类型的特征:

  1. 时间特征

即将date转化成year、month、day等。

  1. 平移特征

平移特征也成为lag特征,通过将对象value进行平移操作,获取的特征。通过shift函数实现(含有多列,shift多有不变,可直接date列+periods)。若periods参数小于90,那么2018年3月31时候的平移特征将含有2018年某天销量,这天的销量是不知道的。所以为保证能进行20018年头三个月的预测,其periods参数必须大于等于90。假设浪费一年数据用来构造特征,那么periods也应小于等于365.

  1. 窗口特征

即窗口内取平均值、最大值和最小值等。通过rolling().mean()等实现。rolling函数默认将mean等结果显示在窗口末尾,那么必须进行shiift(periods>=90,含有多列,shift多有不变,可直接date列+periods),且periods+window-1<=365。

  1. 分类平均 与第三类特征不同,不再沿着日期平均,这次沿着分类特征平均。

  2. 不同商店、货物差值

  3. 与窗口平均比较的极大值、极小值标记

In [29]:
df_all = train
df_all = df_all.sort_values(by=['store','item','date'])
del train  #,test
In [30]:
sales = df_all.pop('sales')
df_all.insert(0,'sales',sales)
In [31]:
df_all.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 913000 entries, 0 to 912999
Data columns (total 4 columns):
sales    913000 non-null int64
date     913000 non-null datetime64[ns]
store    913000 non-null int64
item     913000 non-null int64
dtypes: datetime64[ns](1), int64(3)
memory usage: 34.8 MB
In [32]:
df_all.store = df_all.store.astype(np.int8)
df_all.item = df_all.item.astype(np.int8)
df_all.sales = df_all.sales.astype(np.int16)
In [33]:
from sklearn import metrics as mr
print(mr.mutual_info_score(df_all.sales,df_all.sales))
for i in df_all.columns[1:]:
    print(i, mr.mutual_info_score(df_all.sales,df_all.loc[:,i]))
4.668257396303383
date 0.25756156006867154
store 0.08080738800839304
item 0.469412480597298
In [34]:
#add_window
def add_window(df,window=2):
    df_copy = df.copy()
    df_copy = df_copy[['sales','date','store','item']]
    df_copy = df_copy.set_index('date')
    df_copy_roll = df_copy.groupby(['store','item'])['sales'].rolling(window=window).mean().reset_index()
      
    df_copy_roll['sales'] = df_copy_roll['sales'].astype(np.float32)
    
    new_name='sales_window{0}_mean'.format(window)
    df_copy_roll.rename(columns={'sales':new_name}, inplace=True)
    df = df.merge(df_copy_roll,on=['date','store','item'],how='left')
    del df_copy,df_copy_roll,new_name
    return df
In [35]:
#add_store_group
def add_store_group(df):
    df_copy = df.copy()
    df_copy = df_copy[['sales','date','store','item']]
    df_copy = df_copy.groupby(['date','store'])['sales'].mean().reset_index()
      
    df_copy['sales'] = df_copy['sales'].astype(np.float32)
    
    new_name='sales_store_mean'
    df_copy.rename(columns={'sales':new_name}, inplace=True)
    df = df.merge(df_copy,on=['date','store'],how='left')
    del df_copy,new_name
    return df
In [36]:
#add_item_group
def add_item_group(df):
    df_copy = df.copy()
    df_copy = df_copy[['sales','date','store','item']]
    df_copy = df_copy.groupby(['date','item'])['sales'].mean().reset_index()
      
    df_copy['sales'] = df_copy['sales'].astype(np.float32)
    
    new_name='sales_item_mean'
    df_copy.rename(columns={'sales':new_name}, inplace=True)
    df = df.merge(df_copy,on=['date','item'],how='left')
    del df_copy,new_name
    return df    
In [37]:
df_all = add_store_group(df_all)
df_all = add_item_group(df_all)

df_all = add_window(df_all,2)
df_all = add_window(df_all,3)
df_all = add_window(df_all,4)
df_all = add_window(df_all,5)
df_all = add_window(df_all,6)
df_all = add_window(df_all,7)
In [38]:
def add_lag(df,periods,lag_col='sales'):
    #df:columns:date,store,item,sales and so on
    #periods:periods to shift,
    #for this very project,90<=periods<=365
    #maybe periods=i*week or i*month
    df_shift = df.copy()
    df_shift = df_shift[['date','store','item',lag_col]]
    #from datetime import timedelta
    df_shift.date = df_shift.date+timedelta(periods)
    new_name=lag_col+'_lag{0}'.format(periods)
    df_shift.rename(columns={lag_col:new_name}, inplace=True)
    df = df.merge(df_shift,on=['date','store','item'],how='left')
    del df_shift,new_name
    return df
In [39]:
column_window = df_all.columns[-6:]
window_lag_list = [358]+list(range(357,200,-7))+[91,90]
other_lag_list = list(range(365,357,-1))+list(range(357,200,-7))+[91,90]
In [40]:
#   sales_window2_mean\3\4\5\6\7
ts = time()
for j in column_window:
    for i in window_lag_list:  #358,300,-7
        df_all = add_lag(df_all,i,lag_col=j)
    df_all.drop(j,axis=1,inplace=True)
time() - ts
Out[40]:
157.29692840576172
In [41]:
ts = time()
for i in other_lag_list:  #365,250,-1
    df_all = add_lag(df_all,i,lag_col='sales')
time() - ts
Out[41]:
42.25670599937439
In [42]:
ts = time()
for i in other_lag_list:  #364,200,-7
    df_all = add_lag(df_all,i,lag_col='sales_store_mean')
df_all.drop('sales_store_mean',axis=1,inplace=True)
time() - ts
Out[42]:
84.6533715724945
In [43]:
ts = time()
for i in other_lag_list:   #364,200,-7
    df_all = add_lag(df_all,i,lag_col='sales_item_mean')
df_all.drop('sales_item_mean',axis=1,inplace=True)
time() - ts
Out[43]:
98.7822482585907
In [44]:
def add_datetime_properties(df,position=1,add_year=True,add_month=True,add_dayofweek=True,
                            add_day=True,add_weekofyear=True):
                            

    
    if add_year==True:
        df.insert(position,'year',df.date.dt.year)
        df.year = df.year.astype(np.int16)

    
    if add_month==True:
        df.insert(position,'month',df.date.dt.month)
        df.month = df.month.astype(np.int8)     
        
        
    if add_day==True:
        df.insert(position,'day',df.date.dt.day)
        df.day = df.day.astype(np.int8)             
        
        
    if add_dayofweek==True:
        df.insert(position,'dayofweek',df.date.dt.dayofweek)
        df.dayofweek = df.dayofweek.astype(np.int8)
        
    if add_weekofyear==True:
        df.insert(position,'weekofyear',df.date.dt.weekofyear)
        df.weekofyear = df.weekofyear.astype(np.int8)
       
    return df
In [45]:
#距数据起始时间的月数天数
def total_from_start(df,position=1):        
    #df.insert(position,'month_from_start',df.date.dt.month+(df.date.dt.year-2013)*12)
    #df.month_from_start = df.month_from_start.astype(np.int8) 
    
    from datetime import datetime
    df.insert(position,'day_from_start',(df.date-datetime(2013,1,1)).dt.days)
    df.day_from_start = df.day_from_start.astype(np.int16)
    
    return df
In [46]:
#datetime properties能添加的有17项目,但是2013年初到2017年末共5年数据只有1年是闰年,
#需要用到目标变量构造新的特征,为防止泄露(leakage)
#需要从train中专门分出一个数据集来构造特征(这部分知识在feature-engineering-for-machine-learning机器学习的特征工程alice-zhang)
#所以is_leap_year没必要添加。
df_all=add_datetime_properties(df_all)

df_all=total_from_start(df_all)
In [47]:
def add_history_mean(df,computing,col1,col2,col3,mean_col):
    #col1:month\day
    #col2:weekofyear\dayofweek 
    
    # one column
    for i in col1+col2+col3:
        keys=[i]
        right=computing.groupby(keys)[mean_col].mean()
        right = right.astype(np.float32) #float64 float32 or float16
        right = right.reset_index()
        new_name='sales_history_{0}_mean'.format(keys[0]) 
        right.rename(columns={'sales':new_name}, inplace = True)
        df=pd.merge(df,right, how='left', on=keys,suffixes=('_x', '_y'), copy=False, indicator=False)
        
    #two columns        
    for i in col1+col2:
        for j in col3:
            keys=[i,j]
            right=computing.groupby(keys)[mean_col].mean()
            right = right.astype(np.float32) #float64 float32 or float16
            right = right.reset_index()
            new_name='sales_history_{0}_{1}_mean'.format(keys[0],keys[1]) 
            right.rename(columns={'sales':new_name}, inplace = True)
            df=pd.merge(df,right, how='left', on=keys,suffixes=('_x', '_y'), copy=False, indicator=False)
            
    keys = col1
    right=computing.groupby(keys)[mean_col].mean()
    right = right.astype(np.float32) #float64 float32 or float16
    right = right.reset_index()
    new_name='sales_history_{0}_{1}_mean'.format(keys[0],keys[1]) 
    right.rename(columns={'sales':new_name}, inplace = True)
    df=pd.merge(df,right, how='left', on=keys,suffixes=('_x', '_y'), copy=False, indicator=False)
    
    keys = col2
    right=computing.groupby(keys)[mean_col].mean()
    right = right.astype(np.float32) #float64 float32 or float16
    right = right.reset_index()
    new_name='sales_history_{0}_{1}_mean'.format(keys[0],keys[1]) 
    right.rename(columns={'sales':new_name}, inplace = True)
    df=pd.merge(df,right, how='left', on=keys,suffixes=('_x', '_y'), copy=False, indicator=False)
    
    keys = col3
    right=computing.groupby(keys)[mean_col].mean()
    right = right.astype(np.float32) #float64 float32 or float16
    right = right.reset_index()
    new_name='sales_history_{0}_{1}_mean'.format(keys[0],keys[1]) 
    right.rename(columns={'sales':new_name}, inplace = True)
    df=pd.merge(df,right, how='left', on=keys,suffixes=('_x', '_y'), copy=False, indicator=False)
    
    # three columns
    for i in col1:
        keys=[i]+col3
        right=computing.groupby(keys)[mean_col].mean()
        right = right.astype(np.float32) #float64 float32 or float16
        right = right.reset_index()
        new_name='sales_history_{0}_{1}_{2}_mean'.format(keys[0],keys[1],keys[2]) 
        right.rename(columns={'sales':new_name}, inplace = True)
        df=pd.merge(df,right, how='left', on=keys,suffixes=('_x', '_y'), copy=False, indicator=False)
        
    for i in col2:
        keys=[i]+col3
        right=computing.groupby(keys)[mean_col].mean()
        right = right.astype(np.float32) #float64 float32 or float16
        right = right.reset_index()
        new_name='sales_history_{0}_{1}_{2}_mean'.format(keys[0],keys[1],keys[2]) 
        right.rename(columns={'sales':new_name}, inplace = True)
        df=pd.merge(df,right, how='left', on=keys,suffixes=('_x', '_y'), copy=False, indicator=False)
        
    for i in col3:
        keys=[i]+col1
        right=computing.groupby(keys)[mean_col].mean()
        right = right.astype(np.float32) #float64 float32 or float16
        right = right.reset_index()
        new_name='sales_history_{0}_{1}_{2}_mean'.format(keys[0],keys[1],keys[2]) 
        right.rename(columns={'sales':new_name}, inplace = True)
        df=pd.merge(df,right, how='left', on=keys,suffixes=('_x', '_y'), copy=False, indicator=False)
        
    for i in col3:
        keys=[i]+col2
        right=computing.groupby(keys)[mean_col].mean()
        right = right.astype(np.float32) #float64 float32 or float16
        right = right.reset_index()
        new_name='sales_history_{0}_{1}_{2}_mean'.format(keys[0],keys[1],keys[2]) 
        right.rename(columns={'sales':new_name}, inplace = True)
        df=pd.merge(df,right, how='left', on=keys,suffixes=('_x', '_y'), copy=False, indicator=False)
        

    #four columns
    keys = col1+col3
    right=computing.groupby(keys)[mean_col].mean()
    right = right.astype(np.float32) #float64 float32 or float16
    right = right.reset_index()
    new_name='sales_history_{0}_{1}_{2}_{3}_mean'.format(keys[0],keys[1],keys[2],keys[3]) 
    right.rename(columns={'sales':new_name}, inplace = True)
    df=pd.merge(df,right, how='left', on=keys,suffixes=('_x', '_y'), copy=False, indicator=False)
    
    keys = col2+col3
    right=computing.groupby(keys)[mean_col].mean()
    right = right.astype(np.float32) #float64 float32 or float16
    right = right.reset_index()
    new_name='sales_history_{0}_{1}_{2}_{3}_mean'.format(keys[0],keys[1],keys[2],keys[3]) 
    right.rename(columns={'sales':new_name}, inplace = True)
    df=pd.merge(df,right, how='left', on=keys,suffixes=('_x', '_y'), copy=False, indicator=False)
    
    return df
In [48]:
computing_2013 = df_all[(df_all.year==2013)]

df_all = add_history_mean(df_all,computing_2013,['weekofyear', 'dayofweek'], ['day', 'month'], ['store', 'item'],'sales')
In [49]:
def get_mpe(df,window):
    #df:用来计算窗口平均的历史数据,必须含有date、store、item、sales列
    #window:窗口平均的期数
    #return:mpe,含有date、store、item、mape_window3(比如窗口期数为3)
    #其中mpe列为窗口平均与销量的差值除以销量。
    for i in range(1,11):
        for j in range(1,51):
            copy = df[(df.store==i)&(df.item==j)]
            copy = copy[['date','sales']]
            copy = copy.set_index('date')
            s_i_mpe = (copy.rolling(window=window).mean()-copy)/copy
            s_i_mpe = s_i_mpe.fillna(0)
            s_i_mpe.insert(1,'store',i)
            s_i_mpe.insert(2,'item',j)
            if 'mpe' not in locals() and 'mpe' not in globals():  #判断是否存在mpe
                mpe = s_i_mpe
            else:
                mpe = pd.concat([mpe,s_i_mpe])
    mpe = mpe.reset_index()
    mpe = mpe.sort_values(by=['sales'])
    sales = mpe.pop('sales')
    mpe.insert(mpe.shape[1],'sales',sales)
    new_name = 'mpe_window{0}'.format(window)
    mpe.rename(columns={'sales':new_name}, inplace=True)
    del copy,s_i_mpe,new_name
    return mpe
In [50]:
ts = time()
mpe_window2 = get_mpe(computing_2013,2)
mpe_window3 = get_mpe(computing_2013,3)
mpe_window4 = get_mpe(computing_2013,4)
mpe_window5 = get_mpe(computing_2013,5)
mpe_window6 = get_mpe(computing_2013,6)
mpe_window7 = get_mpe(computing_2013,7)

del computing_2013
print(time()-ts)
20.278087377548218
In [51]:
def add_extrme(df,mpe,large_rate=0.1,small_rate=0.1,month_day_concat=True,weekofyear_dayofweek_concat=True):
    #df:需要添加列的时间数据,
    #mpe:即get_mpe的返回函数,需要注意的是,mpe的计算应采用不参与训练的数据计算。
    #参考其它时期数据有泄露的嫌疑,但似乎也不尽然???
    
    #large_percent:极大值占的比例
    #small_percent:极小值占的比例
    #month_day_concat:是否按月、日合并
    #weekofyear_dayofweek_concat:是否按一年的周数、星期合并
    #return:df:添加了mpe极大极小值对应的日期对应的月、日的标记,极大值标为1,极小值标为-1,其他值标为0,
    #若mpe参考了不止一年数据,则对应标记取平均再四舍五入到整数。
    extrme_large = mpe[-int(len(mpe)*large_rate):]
    extrme_large.insert(1,'extrme',0.999)
    extrme_small = mpe[:int(len(mpe)*small_rate)]
    extrme_small.insert(1,'extrme',-0.999)
    extrme = pd.concat([extrme_large,extrme_small],axis=0)
    extrme = extrme[['date','store','item','extrme']]
    if month_day_concat is True:
        extrme_1 = extrme.copy()
        extrme_1.insert(1,'month',extrme_1.date.dt.month)
        extrme_1.insert(1,'day',extrme_1.date.dt.day)
        extrme_1 = extrme_1.drop(['date'],axis=1)
        extrme_1_grouped = extrme_1.groupby(['month','day','store','item'])['extrme'].mean().reset_index()
        extrme_1_grouped.extrme = extrme_1_grouped.extrme.round()
        
        df = df.merge(extrme_1_grouped,on=['month','day','store','item'],how='left')
        df.extrme = df.extrme.fillna(0)
        df.extrme = df.extrme.astype(np.int8)
        new_name='sales_extrme_'+mpe.columns[-1]+\
        '_month_day_concat_large_{0}_thousandths_small_{1}_thousandths'.\
        format(int(1000*large_rate),int(1000*small_rate))
        df.rename(columns={'extrme':new_name}, inplace=True)
    
    if weekofyear_dayofweek_concat is True:
        extrme_2 = extrme.copy()
        extrme_2.insert(1,'weekofyear',extrme_2.date.dt.weekofyear)
        extrme_2.insert(1,'dayofweek',extrme_2.date.dt.dayofweek)
        extrme_2 = extrme_2.drop(['date'],axis=1)
        extrme_2_grouped = extrme_2.groupby(['weekofyear','dayofweek','store','item'])['extrme'].mean().reset_index()
        extrme_2_grouped.extrme = extrme_2_grouped.extrme.round()
        
        df = df.merge(extrme_2_grouped,on=['weekofyear','dayofweek','store','item'],how='left')
        df.extrme = df.extrme.fillna(0)
        df.extrme = df.extrme.astype(np.int8)
        new_name='sales_extrme_'+mpe.columns[-1]+\
        '_weekofyear_dayofweek_concat_large_{0}_thousandths_small_{1}_thousandths'.\
        format(int(1000*large_rate),int(1000*small_rate))
        df.rename(columns={'extrme':new_name}, inplace=True)
    

    del extrme_large,extrme_small,extrme,extrme_1,extrme_2,new_name
    return df
In [52]:
rate_list = [2*10**(-2)*i for i in range(1,26)]
In [53]:
ts = time()
for i in rate_list:
    df_all = add_extrme(df_all,mpe_window2,i,i)
print(time()-ts)
174.90803980827332
In [54]:
ts = time()
for i in rate_list: #1,51
    df_all = add_extrme(df_all,mpe_window3,i,i)
print(time()-ts)
184.1297345161438
In [55]:
ts = time()
for i in rate_list:
    df_all = add_extrme(df_all,mpe_window4,i,i)
print(time()-ts)
195.97084546089172
In [56]:
ts = time()
for i in rate_list:
    df_all = add_extrme(df_all,mpe_window5,i,i)
print(time()-ts)
208.39550590515137
In [57]:
ts = time()
for i in rate_list:
    df_all = add_extrme(df_all,mpe_window6,i,i)
print(time()-ts)
222.22090816497803
In [58]:
ts = time()
for i in rate_list:
    df_all = add_extrme(df_all,mpe_window7,i,i)
print(time()-ts)
232.0511507987976
In [59]:
def add_mpe(df,mpe,month_day_concat=True,weekofyear_dayofweek_concat=True):
    #df:需要添加列的时间数据,
    #mpe:即get_mpe的返回函数,需要注意的是,mpe的计算应采用不参与训练的2013年的数据计算。
    #参考其它时期数据有泄露的嫌疑,但似乎也不尽然???
    
    #large_percent:极大值占的比例
    #small_percent:极小值占的比例
    #month_day_concat:是否按月、日合并
    #weekofyear_dayofweek_concat:是否按一年的周数、星期合并
    #return:df:添加了不参与训练的历史数据窗口平均与实际值的mpe
    #若mpe参考了不止一年数据,则对应标记取平均。
    
    if month_day_concat is True:
        mpe_1 = mpe.copy()
        mpe_1.insert(1,'month',mpe_1.date.dt.month)
        mpe_1.insert(1,'day',mpe_1.date.dt.day)
        mpe_1 = mpe_1.drop(['date'],axis=1)
        mpe_1_grouped = mpe_1.groupby(['month','day','store','item'])[mpe.columns[-1]].mean().reset_index()
        mpe_1_grouped[mpe.columns[-1]] = mpe_1_grouped[mpe.columns[-1]].round()
        mpe_1_grouped[mpe.columns[-1]] = mpe_1_grouped[mpe.columns[-1]].astype(np.float32)
        
        df = df.merge(mpe_1_grouped,on=['month','day','store','item'],how='left')
        df[mpe.columns[-1]] = df[mpe.columns[-1]].fillna(0)
        df[mpe.columns[-1]] = df[mpe.columns[-1]].astype(np.float32)
        new_name='sales_'+mpe.columns[-1]+'_month_day_concat'
        df.rename(columns={mpe.columns[-1]:new_name}, inplace=True)
    
    if weekofyear_dayofweek_concat is True:
        mpe_2 = mpe.copy()
        mpe_2.insert(1,'weekofyear',mpe_2.date.dt.month)
        mpe_2.insert(1,'dayofweek',mpe_2.date.dt.day)
        mpe_2 = mpe_2.drop(['date'],axis=1)
        mpe_2_grouped = mpe_2.groupby(['weekofyear','dayofweek','store','item'])[mpe.columns[-1]].mean().reset_index()
        mpe_2_grouped[mpe.columns[-1]] = mpe_2_grouped[mpe.columns[-1]].round()
        mpe_2_grouped[mpe.columns[-1]] = mpe_2_grouped[mpe.columns[-1]].astype(np.float32)
        
        df = df.merge(mpe_2_grouped,on=['weekofyear','dayofweek','store','item'],how='left')
        df[mpe.columns[-1]] = df[mpe.columns[-1]].fillna(0)
        df[mpe.columns[-1]] = df[mpe.columns[-1]].astype(np.float32)
        new_name='sales_'+mpe.columns[-1]+'_weekofyear_dayofweek_concat'
        df.rename(columns={mpe.columns[-1]:new_name}, inplace=True)
    

    del mpe_1,mpe_1_grouped,mpe_2,mpe_2_grouped,new_name
    return df
In [60]:
df_all = add_mpe(df_all,mpe_window2)
df_all = add_mpe(df_all,mpe_window3)
df_all = add_mpe(df_all,mpe_window4)
df_all = add_mpe(df_all,mpe_window5)
df_all = add_mpe(df_all,mpe_window6)
df_all = add_mpe(df_all,mpe_window7)
In [61]:
del mpe_window2,mpe_window3,mpe_window4,mpe_window5,mpe_window6,mpe_window7
In [62]:
from sklearn import metrics as mr
print(mr.mutual_info_score(df_all.sales,df_all.sales))
for i in df_all.columns[1:]:
    #print(i,df_all.loc[:,i].shape)
    print(i, mr.mutual_info_score(df_all.sales,df_all.loc[:,i]))
4.668257396303383
day_from_start 0.25756156006867154
weekofyear 0.07134755789114708
dayofweek 0.030077094301948566
day 0.0034466399745189747
month 0.06965465974410086
year 0.020220463612645005
date 0.25756156006867154
store 0.08080738800839304
item 0.469412480597298
sales_window2_mean_lag358 1.5818288716989122
sales_window2_mean_lag357 1.6831127159719836
sales_window2_mean_lag350 1.6350739167481605
sales_window2_mean_lag343 1.5912761813132787
sales_window2_mean_lag336 1.5494028843646415
sales_window2_mean_lag329 1.4991327779432337
sales_window2_mean_lag322 1.4491514437009096
sales_window2_mean_lag315 1.4025640385711766
sales_window2_mean_lag308 1.3594745180085976
sales_window2_mean_lag301 1.316946844586016
sales_window2_mean_lag294 1.2780753003551122
sales_window2_mean_lag287 1.2424093976663562
sales_window2_mean_lag280 1.2099545324991507
sales_window2_mean_lag273 1.180408408676773
sales_window2_mean_lag266 1.1498798248381195
sales_window2_mean_lag259 1.1212810842636065
sales_window2_mean_lag252 1.0947243618157527
sales_window2_mean_lag245 1.0689798123717382
sales_window2_mean_lag238 1.0372286743334367
sales_window2_mean_lag231 1.0041273542209084
sales_window2_mean_lag224 0.9734171446850924
sales_window2_mean_lag217 0.9448675895664939
sales_window2_mean_lag210 0.9188283720826048
sales_window2_mean_lag203 0.8953533130886092
sales_window2_mean_lag91 0.7821253661696093
sales_window2_mean_lag90 0.7896705070456269
sales_window3_mean_lag358 1.5975813447792404
sales_window3_mean_lag357 1.6896834734394277
sales_window3_mean_lag350 1.6413411940793379
sales_window3_mean_lag343 1.5965343315065859
sales_window3_mean_lag336 1.5541402275467033
sales_window3_mean_lag329 1.504917471794832
sales_window3_mean_lag322 1.454773872414574
sales_window3_mean_lag315 1.4087401415073935
sales_window3_mean_lag308 1.36587270924997
sales_window3_mean_lag301 1.3241861185078099
sales_window3_mean_lag294 1.285600648695084
sales_window3_mean_lag287 1.2501567121896684
sales_window3_mean_lag280 1.2179203102909315
sales_window3_mean_lag273 1.1886960746873088
sales_window3_mean_lag266 1.1588680688881097
sales_window3_mean_lag259 1.1299187994946185
sales_window3_mean_lag252 1.1032340022088782
sales_window3_mean_lag245 1.0773799333100553
sales_window3_mean_lag238 1.0463124427750694
sales_window3_mean_lag231 1.013265622090837
sales_window3_mean_lag224 0.9823744185984783
sales_window3_mean_lag217 0.9539473296514177
sales_window3_mean_lag210 0.9274222104979758
sales_window3_mean_lag203 0.9040160611226646
sales_window3_mean_lag91 0.785703963041584
sales_window3_mean_lag90 0.8016737869060284
sales_window4_mean_lag358 1.614227233532631
sales_window4_mean_lag357 1.685131314487
sales_window4_mean_lag350 1.6378749634633276
sales_window4_mean_lag343 1.5940638474003332
sales_window4_mean_lag336 1.5527833902465922
sales_window4_mean_lag329 1.5060259442889503
sales_window4_mean_lag322 1.456690800074901
sales_window4_mean_lag315 1.4111876755662005
sales_window4_mean_lag308 1.3689201942262672
sales_window4_mean_lag301 1.3274954152073641
sales_window4_mean_lag294 1.2890322696483998
sales_window4_mean_lag287 1.2541984610372234
sales_window4_mean_lag280 1.221931025543949
sales_window4_mean_lag273 1.1920222796930475
sales_window4_mean_lag266 1.162710068513564
sales_window4_mean_lag259 1.133908588648825
sales_window4_mean_lag252 1.106142941760595
sales_window4_mean_lag245 1.0801375053304991
sales_window4_mean_lag238 1.0508330370465544
sales_window4_mean_lag231 1.0183216372942496
sales_window4_mean_lag224 0.9880292517548344
sales_window4_mean_lag217 0.9595279902318443
sales_window4_mean_lag210 0.9327321619754819
sales_window4_mean_lag203 0.9098247460520611
sales_window4_mean_lag91 0.7862745690676864
sales_window4_mean_lag90 0.8057987870475269
sales_window5_mean_lag358 1.6417894021679984
sales_window5_mean_lag357 1.6883484530043273
sales_window5_mean_lag350 1.6422030411579094
sales_window5_mean_lag343 1.5993304523495422
sales_window5_mean_lag336 1.5583885172894338
sales_window5_mean_lag329 1.5133185262057423
sales_window5_mean_lag322 1.4646116756342589
sales_window5_mean_lag315 1.4190870422211095
sales_window5_mean_lag308 1.3765204389893206
sales_window5_mean_lag301 1.3359888458498754
sales_window5_mean_lag294 1.296694607255348
sales_window5_mean_lag287 1.26194823080588
sales_window5_mean_lag280 1.228459442897908
sales_window5_mean_lag273 1.1984226868234389
sales_window5_mean_lag266 1.1697680848860237
sales_window5_mean_lag259 1.1405958426713725
sales_window5_mean_lag252 1.1134092134594757
sales_window5_mean_lag245 1.0869252834607936
sales_window5_mean_lag238 1.0583099824913225
sales_window5_mean_lag231 1.0265664061285218
sales_window5_mean_lag224 0.9962166787091773
sales_window5_mean_lag217 0.9678145076014026
sales_window5_mean_lag210 0.941004265426489
sales_window5_mean_lag203 0.9180981152945399
sales_window5_mean_lag91 0.7912986741403103
sales_window5_mean_lag90 0.8061558259996683
sales_window6_mean_lag358 1.6756354389297694
sales_window6_mean_lag357 1.7039871166963099
sales_window6_mean_lag350 1.6583259397936752
sales_window6_mean_lag343 1.615049711858078
sales_window6_mean_lag336 1.5743600179350936
sales_window6_mean_lag329 1.5306374807354137
sales_window6_mean_lag322 1.480702803173524
sales_window6_mean_lag315 1.4350286487308594
sales_window6_mean_lag308 1.3917724274895353
sales_window6_mean_lag301 1.350279975256295
sales_window6_mean_lag294 1.31092202346269
sales_window6_mean_lag287 1.2747861396095725
sales_window6_mean_lag280 1.2417318824191907
sales_window6_mean_lag273 1.2111315185390659
sales_window6_mean_lag266 1.1822208559328884
sales_window6_mean_lag259 1.1531260040041509
sales_window6_mean_lag252 1.1255944777339533
sales_window6_mean_lag245 1.099384863671342
sales_window6_mean_lag238 1.0718904083439242
sales_window6_mean_lag231 1.0400493273755662
sales_window6_mean_lag224 1.0097518786448882
sales_window6_mean_lag217 0.9807061134329887
sales_window6_mean_lag210 0.9535852660878792
sales_window6_mean_lag203 0.9306736208160657
sales_window6_mean_lag91 0.8028630548418284
sales_window6_mean_lag90 0.8090219238299597
sales_window7_mean_lag358 1.7348628549647245
sales_window7_mean_lag357 1.7278838239268683
sales_window7_mean_lag350 1.6812411347998464
sales_window7_mean_lag343 1.6375193860275081
sales_window7_mean_lag336 1.5960712621156852
sales_window7_mean_lag329 1.5519346837686758
sales_window7_mean_lag322 1.5017476530969995
sales_window7_mean_lag315 1.4548370616203565
sales_window7_mean_lag308 1.410503781489876
sales_window7_mean_lag301 1.3687369027866776
sales_window7_mean_lag294 1.3281329652929423
sales_window7_mean_lag287 1.2921426857598437
sales_window7_mean_lag280 1.2580140869626733
sales_window7_mean_lag273 1.2266506095929062
sales_window7_mean_lag266 1.198661186271476
sales_window7_mean_lag259 1.1692645923930365
sales_window7_mean_lag252 1.1418187279955099
sales_window7_mean_lag245 1.1159138232096257
sales_window7_mean_lag238 1.0881343440456437
sales_window7_mean_lag231 1.056112731274407
sales_window7_mean_lag224 1.0253505549380035
sales_window7_mean_lag217 0.9958780527789427
sales_window7_mean_lag210 0.9690266506586949
sales_window7_mean_lag203 0.945779329273259
sales_window7_mean_lag91 0.8186567122521188
sales_window7_mean_lag90 0.8183530130869048
sales_lag365 1.5428288759284008
sales_lag364 1.7068511445957883
sales_lag363 1.5315984104714726
sales_lag362 1.4806073030403253
sales_lag361 1.4382807421365642
sales_lag360 1.4327079074402345
sales_lag359 1.4645385770254322
sales_lag358 1.503751374609271
sales_lag357 1.6584978717407601
sales_lag350 1.6129129137220468
sales_lag343 1.5711449149975987
sales_lag336 1.5305942914199868
sales_lag329 1.4801007583190342
sales_lag322 1.4312517525422406
sales_lag315 1.3854360372780898
sales_lag308 1.3432742723237394
sales_lag301 1.3009491644175712
sales_lag294 1.2619717301772868
sales_lag287 1.2260398326143893
sales_lag280 1.1930243990206506
sales_lag273 1.1630805421684223
sales_lag266 1.1328133453833638
sales_lag259 1.1044512490371061
sales_lag252 1.0781569912057931
sales_lag245 1.0531779375282477
sales_lag238 1.021478766392992
sales_lag231 0.9891550425260099
sales_lag224 0.9593802341016074
sales_lag217 0.9311642947824079
sales_lag210 0.9054883583290618
sales_lag203 0.882182360749077
sales_lag91 0.7711659939150688
sales_lag90 0.6799305447530023
sales_store_mean_lag365 1.2753794613022602
sales_store_mean_lag364 1.3080227902997017
sales_store_mean_lag363 1.265671440829788
sales_store_mean_lag362 1.249870546703326
sales_store_mean_lag361 1.2371947735127273
sales_store_mean_lag360 1.2362439990914063
sales_store_mean_lag359 1.2439757443307855
sales_store_mean_lag358 1.2534714428273797
sales_store_mean_lag357 1.2849805626716382
sales_store_mean_lag350 1.2607102984999885
sales_store_mean_lag343 1.23917024497986
sales_store_mean_lag336 1.2164080229034957
sales_store_mean_lag329 1.1903939282260685
sales_store_mean_lag322 1.1657420682804367
sales_store_mean_lag315 1.1413847904211798
sales_store_mean_lag308 1.1179637737869246
sales_store_mean_lag301 1.0946444557708037
sales_store_mean_lag294 1.0719827734888283
sales_store_mean_lag287 1.0496511365892147
sales_store_mean_lag280 1.0288597000800035
sales_store_mean_lag273 1.0083679018729739
sales_store_mean_lag266 0.9883057360664351
sales_store_mean_lag259 0.9683254643193558
sales_store_mean_lag252 0.948557720419333
sales_store_mean_lag245 0.9289491805703382
sales_store_mean_lag238 0.909165241860856
sales_store_mean_lag231 0.88946556008664
sales_store_mean_lag224 0.8699308094228969
sales_store_mean_lag217 0.8503555853794695
sales_store_mean_lag210 0.8324374015686551
sales_store_mean_lag203 0.8135267267297044
sales_store_mean_lag91 0.5767116691487356
sales_store_mean_lag90 0.556274004959555
sales_item_mean_lag365 1.4872211429093138
sales_item_mean_lag364 1.5946990910619132
sales_item_mean_lag363 1.463005738024913
sales_item_mean_lag362 1.427312601859022
sales_item_mean_lag361 1.3991142871756184
sales_item_mean_lag360 1.3988682366217524
sales_item_mean_lag359 1.4261419945516431
sales_item_mean_lag358 1.4537661171301226
sales_item_mean_lag357 1.5561775114842726
sales_item_mean_lag350 1.5188740282855477
sales_item_mean_lag343 1.484073220389683
sales_item_mean_lag336 1.449696506255099
sales_item_mean_lag329 1.4098125200860918
sales_item_mean_lag322 1.3699649658310094
sales_item_mean_lag315 1.332614999879127
sales_item_mean_lag308 1.2976207967383357
sales_item_mean_lag301 1.2625138481104479
sales_item_mean_lag294 1.229726648951559
sales_item_mean_lag287 1.1986519370678796
sales_item_mean_lag280 1.1701886462884084
sales_item_mean_lag273 1.142982904018747
sales_item_mean_lag266 1.1155650408524214
sales_item_mean_lag259 1.0890404415841137
sales_item_mean_lag252 1.0633631393252287
sales_item_mean_lag245 1.0386147309086262
sales_item_mean_lag238 1.011037362438915
sales_item_mean_lag231 0.9840659938202266
sales_item_mean_lag224 0.9576935647546022
sales_item_mean_lag217 0.9323673102262462
sales_item_mean_lag210 0.9092823522982376
sales_item_mean_lag203 0.8882553912601927
sales_item_mean_lag91 0.7354926500650864
sales_item_mean_lag90 0.666398591126863
sales_history_weekofyear_mean 0.08782930698141703
sales_history_dayofweek_mean 0.030077094301948566
sales_history_day_mean 0.0034466399745189747
sales_history_month_mean 0.06965465974410086
sales_history_store_mean 0.08080738800839304
sales_history_item_mean 0.469412480597298
sales_history_weekofyear_store_mean 0.21953950631919206
sales_history_weekofyear_item_mean 0.7436885190780576
sales_history_dayofweek_store_mean 0.12401652660193628
sales_history_dayofweek_item_mean 0.5564892799881955
sales_history_day_store_mean 0.1052727031215333
sales_history_day_item_mean 0.5450252116429011
sales_history_month_store_mean 0.17894585878296027
sales_history_month_item_mean 0.6661012170729359
sales_history_weekofyear_dayofweek_mean 0.14787268049987484
sales_history_day_month_mean 0.10334354483935156
sales_history_store_item_mean 0.6901952697313661
sales_history_weekofyear_store_item_mean 0.9585142015172402
sales_history_dayofweek_store_item_mean 0.8814446097788212
sales_history_day_store_item_mean 0.7471214078496499
sales_history_month_store_item_mean 1.0667371195422752
sales_history_store_weekofyear_dayofweek_mean 0.35664433630431197
sales_history_item_weekofyear_dayofweek_mean 0.7833272921241553
sales_history_store_day_month_mean 0.29090700391794627
sales_history_item_day_month_mean 0.6244040177161247
sales_history_weekofyear_dayofweek_store_item_mean 1.2360346591846927
sales_history_day_month_store_item_mean 1.0228071962425083
sales_extrme_mpe_window2_month_day_concat_large_20_thousandths_small_20_thousandths 0.015212307602020565
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_20_thousandths_small_20_thousandths 0.01997965848850855
sales_extrme_mpe_window2_month_day_concat_large_40_thousandths_small_40_thousandths 0.01828024686392657
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_40_thousandths_small_40_thousandths 0.025621428393252762
sales_extrme_mpe_window2_month_day_concat_large_60_thousandths_small_60_thousandths 0.018603039422871243
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_60_thousandths_small_60_thousandths 0.027582484480738356
sales_extrme_mpe_window2_month_day_concat_large_80_thousandths_small_80_thousandths 0.018081927710443334
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_80_thousandths_small_80_thousandths 0.028239226281625278
sales_extrme_mpe_window2_month_day_concat_large_100_thousandths_small_100_thousandths 0.017633003885116266
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_100_thousandths_small_100_thousandths 0.02897644666669359
sales_extrme_mpe_window2_month_day_concat_large_120_thousandths_small_120_thousandths 0.017512852556258673
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_120_thousandths_small_120_thousandths 0.029932275904200386
sales_extrme_mpe_window2_month_day_concat_large_140_thousandths_small_140_thousandths 0.01698754990668503
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_140_thousandths_small_140_thousandths 0.030244469017131154
sales_extrme_mpe_window2_month_day_concat_large_160_thousandths_small_160_thousandths 0.01638278685892023
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_160_thousandths_small_160_thousandths 0.030266041467366493
sales_extrme_mpe_window2_month_day_concat_large_180_thousandths_small_180_thousandths 0.016450609834076042
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_180_thousandths_small_180_thousandths 0.030813340148728982
sales_extrme_mpe_window2_month_day_concat_large_200_thousandths_small_200_thousandths 0.01650274397494867
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_200_thousandths_small_200_thousandths 0.03130432338305259
sales_extrme_mpe_window2_month_day_concat_large_220_thousandths_small_220_thousandths 0.01633644534114337
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_220_thousandths_small_220_thousandths 0.031003116230695887
sales_extrme_mpe_window2_month_day_concat_large_240_thousandths_small_240_thousandths 0.014989035069389386
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_240_thousandths_small_240_thousandths 0.02931639745614659
sales_extrme_mpe_window2_month_day_concat_large_260_thousandths_small_260_thousandths 0.01474192077945883
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_260_thousandths_small_260_thousandths 0.028576379412660814
sales_extrme_mpe_window2_month_day_concat_large_280_thousandths_small_280_thousandths 0.013605560245606907
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_280_thousandths_small_280_thousandths 0.026571290823032695
sales_extrme_mpe_window2_month_day_concat_large_300_thousandths_small_300_thousandths 0.012354691565596009
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_300_thousandths_small_300_thousandths 0.024514456459519177
sales_extrme_mpe_window2_month_day_concat_large_320_thousandths_small_320_thousandths 0.011623431765179545
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_320_thousandths_small_320_thousandths 0.022871743709217386
sales_extrme_mpe_window2_month_day_concat_large_340_thousandths_small_340_thousandths 0.01032414961386577
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_340_thousandths_small_340_thousandths 0.020603708201828788
sales_extrme_mpe_window2_month_day_concat_large_360_thousandths_small_360_thousandths 0.00908664331229992
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_360_thousandths_small_360_thousandths 0.01842229820027593
sales_extrme_mpe_window2_month_day_concat_large_380_thousandths_small_380_thousandths 0.008278374649357358
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_380_thousandths_small_380_thousandths 0.01689273274513347
sales_extrme_mpe_window2_month_day_concat_large_400_thousandths_small_400_thousandths 0.007219547505801948
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_400_thousandths_small_400_thousandths 0.014876568775984084
sales_extrme_mpe_window2_month_day_concat_large_420_thousandths_small_420_thousandths 0.0059019145798464255
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_420_thousandths_small_420_thousandths 0.012583131529787371
sales_extrme_mpe_window2_month_day_concat_large_440_thousandths_small_440_thousandths 0.0033260197485903698
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_440_thousandths_small_440_thousandths 0.008662877602837824
sales_extrme_mpe_window2_month_day_concat_large_460_thousandths_small_460_thousandths 0.003468119852805167
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_460_thousandths_small_460_thousandths 0.007920178867112625
sales_extrme_mpe_window2_month_day_concat_large_480_thousandths_small_480_thousandths 0.0067941256731543845
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_480_thousandths_small_480_thousandths 0.010646103037510742
sales_extrme_mpe_window2_month_day_concat_large_500_thousandths_small_500_thousandths 0.0030217515093261915
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_500_thousandths_small_500_thousandths 0.007760692658491546
sales_extrme_mpe_window3_month_day_concat_large_20_thousandths_small_20_thousandths 0.015526787298153377
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_20_thousandths_small_20_thousandths 0.01922407096846642
sales_extrme_mpe_window3_month_day_concat_large_40_thousandths_small_40_thousandths 0.01855658011407716
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_40_thousandths_small_40_thousandths 0.024268033920328076
sales_extrme_mpe_window3_month_day_concat_large_60_thousandths_small_60_thousandths 0.018711697187290897
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_60_thousandths_small_60_thousandths 0.025819169672630322
sales_extrme_mpe_window3_month_day_concat_large_80_thousandths_small_80_thousandths 0.018060900344876566
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_80_thousandths_small_80_thousandths 0.02664277822393813
sales_extrme_mpe_window3_month_day_concat_large_100_thousandths_small_100_thousandths 0.017022284825154266
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_100_thousandths_small_100_thousandths 0.026469607869647867
sales_extrme_mpe_window3_month_day_concat_large_120_thousandths_small_120_thousandths 0.016449130428501862
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_120_thousandths_small_120_thousandths 0.027209294645532813
sales_extrme_mpe_window3_month_day_concat_large_140_thousandths_small_140_thousandths 0.0157860857780883
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_140_thousandths_small_140_thousandths 0.02755351723384873
sales_extrme_mpe_window3_month_day_concat_large_160_thousandths_small_160_thousandths 0.015304608108076545
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_160_thousandths_small_160_thousandths 0.028261220772346628
sales_extrme_mpe_window3_month_day_concat_large_180_thousandths_small_180_thousandths 0.01534546154764925
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_180_thousandths_small_180_thousandths 0.029198778976733307
sales_extrme_mpe_window3_month_day_concat_large_200_thousandths_small_200_thousandths 0.014720484703605638
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_200_thousandths_small_200_thousandths 0.029323055328452738
sales_extrme_mpe_window3_month_day_concat_large_220_thousandths_small_220_thousandths 0.014190541564711794
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_220_thousandths_small_220_thousandths 0.02911838472896596
sales_extrme_mpe_window3_month_day_concat_large_240_thousandths_small_240_thousandths 0.013418217650663552
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_240_thousandths_small_240_thousandths 0.028520863955555323
sales_extrme_mpe_window3_month_day_concat_large_260_thousandths_small_260_thousandths 0.01282395966018192
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_260_thousandths_small_260_thousandths 0.0280273727246588
sales_extrme_mpe_window3_month_day_concat_large_280_thousandths_small_280_thousandths 0.012321389176211318
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_280_thousandths_small_280_thousandths 0.0272889485058289
sales_extrme_mpe_window3_month_day_concat_large_300_thousandths_small_300_thousandths 0.011151783691910966
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_300_thousandths_small_300_thousandths 0.025431976050171886
sales_extrme_mpe_window3_month_day_concat_large_320_thousandths_small_320_thousandths 0.010500820579983628
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_320_thousandths_small_320_thousandths 0.024283753686053206
sales_extrme_mpe_window3_month_day_concat_large_340_thousandths_small_340_thousandths 0.009865774237801548
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_340_thousandths_small_340_thousandths 0.023133672997118868
sales_extrme_mpe_window3_month_day_concat_large_360_thousandths_small_360_thousandths 0.008628243910296917
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_360_thousandths_small_360_thousandths 0.02108953417723299
sales_extrme_mpe_window3_month_day_concat_large_380_thousandths_small_380_thousandths 0.007644665603312087
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_380_thousandths_small_380_thousandths 0.019407347618076838
sales_extrme_mpe_window3_month_day_concat_large_400_thousandths_small_400_thousandths 0.00673419509719919
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_400_thousandths_small_400_thousandths 0.01758928969884081
sales_extrme_mpe_window3_month_day_concat_large_420_thousandths_small_420_thousandths 0.005905294918388536
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_420_thousandths_small_420_thousandths 0.016129534073204808
sales_extrme_mpe_window3_month_day_concat_large_440_thousandths_small_440_thousandths 0.005105594582850818
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_440_thousandths_small_440_thousandths 0.014601718994740892
sales_extrme_mpe_window3_month_day_concat_large_460_thousandths_small_460_thousandths 0.003371819624396426
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_460_thousandths_small_460_thousandths 0.011527757766376216
sales_extrme_mpe_window3_month_day_concat_large_480_thousandths_small_480_thousandths 0.005966398569944602
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_480_thousandths_small_480_thousandths 0.013962100465293266
sales_extrme_mpe_window3_month_day_concat_large_500_thousandths_small_500_thousandths 0.003312229287821155
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_500_thousandths_small_500_thousandths 0.011501981353459347
sales_extrme_mpe_window4_month_day_concat_large_20_thousandths_small_20_thousandths 0.015030779224067623
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_20_thousandths_small_20_thousandths 0.018177054775703347
sales_extrme_mpe_window4_month_day_concat_large_40_thousandths_small_40_thousandths 0.018189738939540984
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_40_thousandths_small_40_thousandths 0.023322397058424864
sales_extrme_mpe_window4_month_day_concat_large_60_thousandths_small_60_thousandths 0.01741274451795787
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_60_thousandths_small_60_thousandths 0.023906936294092462
sales_extrme_mpe_window4_month_day_concat_large_80_thousandths_small_80_thousandths 0.016915595641059357
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_80_thousandths_small_80_thousandths 0.024581842370751943
sales_extrme_mpe_window4_month_day_concat_large_100_thousandths_small_100_thousandths 0.015869989394191597
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_100_thousandths_small_100_thousandths 0.02474544477764317
sales_extrme_mpe_window4_month_day_concat_large_120_thousandths_small_120_thousandths 0.014893785253403815
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_120_thousandths_small_120_thousandths 0.024875230627361633
sales_extrme_mpe_window4_month_day_concat_large_140_thousandths_small_140_thousandths 0.01408858709743048
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_140_thousandths_small_140_thousandths 0.025274424936542042
sales_extrme_mpe_window4_month_day_concat_large_160_thousandths_small_160_thousandths 0.013145320782080725
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_160_thousandths_small_160_thousandths 0.025257316512898153
sales_extrme_mpe_window4_month_day_concat_large_180_thousandths_small_180_thousandths 0.012409733517044719
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_180_thousandths_small_180_thousandths 0.025576911917680887
sales_extrme_mpe_window4_month_day_concat_large_200_thousandths_small_200_thousandths 0.01178972091596051
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_200_thousandths_small_200_thousandths 0.025783471849813035
sales_extrme_mpe_window4_month_day_concat_large_220_thousandths_small_220_thousandths 0.01105543626031499
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_220_thousandths_small_220_thousandths 0.025691185420664567
sales_extrme_mpe_window4_month_day_concat_large_240_thousandths_small_240_thousandths 0.01019711771647156
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_240_thousandths_small_240_thousandths 0.02536614758085974
sales_extrme_mpe_window4_month_day_concat_large_260_thousandths_small_260_thousandths 0.009655562101376596
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_260_thousandths_small_260_thousandths 0.02514781050996355
sales_extrme_mpe_window4_month_day_concat_large_280_thousandths_small_280_thousandths 0.00885299082166008
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_280_thousandths_small_280_thousandths 0.024497941819825026
sales_extrme_mpe_window4_month_day_concat_large_300_thousandths_small_300_thousandths 0.008344901652075644
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_300_thousandths_small_300_thousandths 0.02419876873096241
sales_extrme_mpe_window4_month_day_concat_large_320_thousandths_small_320_thousandths 0.007764741204254976
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_320_thousandths_small_320_thousandths 0.02330987561005013
sales_extrme_mpe_window4_month_day_concat_large_340_thousandths_small_340_thousandths 0.0071696155205118635
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_340_thousandths_small_340_thousandths 0.022433771064299
sales_extrme_mpe_window4_month_day_concat_large_360_thousandths_small_360_thousandths 0.00644490240713952
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_360_thousandths_small_360_thousandths 0.021244087728200453
sales_extrme_mpe_window4_month_day_concat_large_380_thousandths_small_380_thousandths 0.005633740003148018
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_380_thousandths_small_380_thousandths 0.019942814267273096
sales_extrme_mpe_window4_month_day_concat_large_400_thousandths_small_400_thousandths 0.005108557067238912
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_400_thousandths_small_400_thousandths 0.018845606744526865
sales_extrme_mpe_window4_month_day_concat_large_420_thousandths_small_420_thousandths 0.00446479673417627
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_420_thousandths_small_420_thousandths 0.01762059097335061
sales_extrme_mpe_window4_month_day_concat_large_440_thousandths_small_440_thousandths 0.003951416262473731
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_440_thousandths_small_440_thousandths 0.01670479711663497
sales_extrme_mpe_window4_month_day_concat_large_460_thousandths_small_460_thousandths 0.003536272225879329
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_460_thousandths_small_460_thousandths 0.01572448359316855
sales_extrme_mpe_window4_month_day_concat_large_480_thousandths_small_480_thousandths 0.0035895056705657173
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_480_thousandths_small_480_thousandths 0.015481726675359332
sales_extrme_mpe_window4_month_day_concat_large_500_thousandths_small_500_thousandths 0.0036424720050536813
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_500_thousandths_small_500_thousandths 0.0159174719743109
sales_extrme_mpe_window5_month_day_concat_large_20_thousandths_small_20_thousandths 0.014678635481098608
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_20_thousandths_small_20_thousandths 0.017651028463705132
sales_extrme_mpe_window5_month_day_concat_large_40_thousandths_small_40_thousandths 0.017000824764258484
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_40_thousandths_small_40_thousandths 0.0217736906387108
sales_extrme_mpe_window5_month_day_concat_large_60_thousandths_small_60_thousandths 0.01692236083515477
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_60_thousandths_small_60_thousandths 0.02311286755185133
sales_extrme_mpe_window5_month_day_concat_large_80_thousandths_small_80_thousandths 0.016490949852784934
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_80_thousandths_small_80_thousandths 0.0241059766554772
sales_extrme_mpe_window5_month_day_concat_large_100_thousandths_small_100_thousandths 0.014821336950199
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_100_thousandths_small_100_thousandths 0.02380765678968567
sales_extrme_mpe_window5_month_day_concat_large_120_thousandths_small_120_thousandths 0.013819224469392622
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_120_thousandths_small_120_thousandths 0.023862096158467946
sales_extrme_mpe_window5_month_day_concat_large_140_thousandths_small_140_thousandths 0.012819964123826
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_140_thousandths_small_140_thousandths 0.02399600185411558
sales_extrme_mpe_window5_month_day_concat_large_160_thousandths_small_160_thousandths 0.011936163074573639
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_160_thousandths_small_160_thousandths 0.024194088823890205
sales_extrme_mpe_window5_month_day_concat_large_180_thousandths_small_180_thousandths 0.010824631540160704
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_180_thousandths_small_180_thousandths 0.02411340640634406
sales_extrme_mpe_window5_month_day_concat_large_200_thousandths_small_200_thousandths 0.010011729958928454
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_200_thousandths_small_200_thousandths 0.024135184144373576
sales_extrme_mpe_window5_month_day_concat_large_220_thousandths_small_220_thousandths 0.009423486242204755
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_220_thousandths_small_220_thousandths 0.02426774485178402
sales_extrme_mpe_window5_month_day_concat_large_240_thousandths_small_240_thousandths 0.008832619210435816
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_240_thousandths_small_240_thousandths 0.024208060943086918
sales_extrme_mpe_window5_month_day_concat_large_260_thousandths_small_260_thousandths 0.008073485820995756
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_260_thousandths_small_260_thousandths 0.023741755918198444
sales_extrme_mpe_window5_month_day_concat_large_280_thousandths_small_280_thousandths 0.007421870767888353
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_280_thousandths_small_280_thousandths 0.023357434015543854
sales_extrme_mpe_window5_month_day_concat_large_300_thousandths_small_300_thousandths 0.006739091703463419
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_300_thousandths_small_300_thousandths 0.022953704472834142
sales_extrme_mpe_window5_month_day_concat_large_320_thousandths_small_320_thousandths 0.006112186874413307
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_320_thousandths_small_320_thousandths 0.022309172982987666
sales_extrme_mpe_window5_month_day_concat_large_340_thousandths_small_340_thousandths 0.005590850349655419
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_340_thousandths_small_340_thousandths 0.02194178709130319
sales_extrme_mpe_window5_month_day_concat_large_360_thousandths_small_360_thousandths 0.005122919993132261
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_360_thousandths_small_360_thousandths 0.021345236950764946
sales_extrme_mpe_window5_month_day_concat_large_380_thousandths_small_380_thousandths 0.004652719386091483
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_380_thousandths_small_380_thousandths 0.020588036996139876
sales_extrme_mpe_window5_month_day_concat_large_400_thousandths_small_400_thousandths 0.004257184612818339
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_400_thousandths_small_400_thousandths 0.019878326358521037
sales_extrme_mpe_window5_month_day_concat_large_420_thousandths_small_420_thousandths 0.0038556845892035495
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_420_thousandths_small_420_thousandths 0.01909558976042143
sales_extrme_mpe_window5_month_day_concat_large_440_thousandths_small_440_thousandths 0.0034676565717966347
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_440_thousandths_small_440_thousandths 0.018164303938485615
sales_extrme_mpe_window5_month_day_concat_large_460_thousandths_small_460_thousandths 0.0031654338741826586
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_460_thousandths_small_460_thousandths 0.017326447064173158
sales_extrme_mpe_window5_month_day_concat_large_480_thousandths_small_480_thousandths 0.0031264124108463533
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_480_thousandths_small_480_thousandths 0.01648836233598016
sales_extrme_mpe_window5_month_day_concat_large_500_thousandths_small_500_thousandths 0.0031873099329844065
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_500_thousandths_small_500_thousandths 0.017365587198026975
sales_extrme_mpe_window6_month_day_concat_large_20_thousandths_small_20_thousandths 0.01514568017870424
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_20_thousandths_small_20_thousandths 0.017482930103950196
sales_extrme_mpe_window6_month_day_concat_large_40_thousandths_small_40_thousandths 0.017542646417369752
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_40_thousandths_small_40_thousandths 0.021721786747940473
sales_extrme_mpe_window6_month_day_concat_large_60_thousandths_small_60_thousandths 0.017458385271470443
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_60_thousandths_small_60_thousandths 0.02317488273199843
sales_extrme_mpe_window6_month_day_concat_large_80_thousandths_small_80_thousandths 0.01664366295446592
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_80_thousandths_small_80_thousandths 0.023617730502016456
sales_extrme_mpe_window6_month_day_concat_large_100_thousandths_small_100_thousandths 0.016011098702036826
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_100_thousandths_small_100_thousandths 0.024341424468656114
sales_extrme_mpe_window6_month_day_concat_large_120_thousandths_small_120_thousandths 0.014507647858484237
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_120_thousandths_small_120_thousandths 0.024145931735182313
sales_extrme_mpe_window6_month_day_concat_large_140_thousandths_small_140_thousandths 0.013282478249139421
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_140_thousandths_small_140_thousandths 0.02433978057773376
sales_extrme_mpe_window6_month_day_concat_large_160_thousandths_small_160_thousandths 0.012358574060183068
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_160_thousandths_small_160_thousandths 0.02456871765056813
sales_extrme_mpe_window6_month_day_concat_large_180_thousandths_small_180_thousandths 0.01148473327878155
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_180_thousandths_small_180_thousandths 0.024625136735304837
sales_extrme_mpe_window6_month_day_concat_large_200_thousandths_small_200_thousandths 0.010578788930019024
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_200_thousandths_small_200_thousandths 0.024444403043114297
sales_extrme_mpe_window6_month_day_concat_large_220_thousandths_small_220_thousandths 0.009669439345233963
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_220_thousandths_small_220_thousandths 0.02435184315692
sales_extrme_mpe_window6_month_day_concat_large_240_thousandths_small_240_thousandths 0.00860000580951398
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_240_thousandths_small_240_thousandths 0.023811797715995654
sales_extrme_mpe_window6_month_day_concat_large_260_thousandths_small_260_thousandths 0.007991731024593836
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_260_thousandths_small_260_thousandths 0.023751558396165717
sales_extrme_mpe_window6_month_day_concat_large_280_thousandths_small_280_thousandths 0.00721228334763445
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_280_thousandths_small_280_thousandths 0.023265767905862322
sales_extrme_mpe_window6_month_day_concat_large_300_thousandths_small_300_thousandths 0.0065573631256911425
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_300_thousandths_small_300_thousandths 0.022797719560758614
sales_extrme_mpe_window6_month_day_concat_large_320_thousandths_small_320_thousandths 0.006023691063430106
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_320_thousandths_small_320_thousandths 0.022470144936407737
sales_extrme_mpe_window6_month_day_concat_large_340_thousandths_small_340_thousandths 0.005399177047170137
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_340_thousandths_small_340_thousandths 0.02166062473995825
sales_extrme_mpe_window6_month_day_concat_large_360_thousandths_small_360_thousandths 0.004772100955272829
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_360_thousandths_small_360_thousandths 0.020708526345277478
sales_extrme_mpe_window6_month_day_concat_large_380_thousandths_small_380_thousandths 0.0042067310878244665
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_380_thousandths_small_380_thousandths 0.01990751421272923
sales_extrme_mpe_window6_month_day_concat_large_400_thousandths_small_400_thousandths 0.0037793976614893857
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_400_thousandths_small_400_thousandths 0.0193496278794787
sales_extrme_mpe_window6_month_day_concat_large_420_thousandths_small_420_thousandths 0.003292544727523843
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_420_thousandths_small_420_thousandths 0.01842952775519452
sales_extrme_mpe_window6_month_day_concat_large_440_thousandths_small_440_thousandths 0.003037251876661773
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_440_thousandths_small_440_thousandths 0.017848190199303067
sales_extrme_mpe_window6_month_day_concat_large_460_thousandths_small_460_thousandths 0.002791639438074609
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_460_thousandths_small_460_thousandths 0.016990789940521507
sales_extrme_mpe_window6_month_day_concat_large_480_thousandths_small_480_thousandths 0.003071943587140911
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_480_thousandths_small_480_thousandths 0.016674741675744162
sales_extrme_mpe_window6_month_day_concat_large_500_thousandths_small_500_thousandths 0.002230806516230318
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_500_thousandths_small_500_thousandths 0.015494148243635937
sales_extrme_mpe_window7_month_day_concat_large_20_thousandths_small_20_thousandths 0.015236928952861023
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_20_thousandths_small_20_thousandths 0.017236816028123107
sales_extrme_mpe_window7_month_day_concat_large_40_thousandths_small_40_thousandths 0.017760873780388108
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_40_thousandths_small_40_thousandths 0.021456966968765114
sales_extrme_mpe_window7_month_day_concat_large_60_thousandths_small_60_thousandths 0.017789173216797516
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_60_thousandths_small_60_thousandths 0.023151210596345267
sales_extrme_mpe_window7_month_day_concat_large_80_thousandths_small_80_thousandths 0.017145184065365048
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_80_thousandths_small_80_thousandths 0.024065365985392773
sales_extrme_mpe_window7_month_day_concat_large_100_thousandths_small_100_thousandths 0.016431063373541506
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_100_thousandths_small_100_thousandths 0.02479311394493324
sales_extrme_mpe_window7_month_day_concat_large_120_thousandths_small_120_thousandths 0.01516666353558165
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_120_thousandths_small_120_thousandths 0.025076081455191385
sales_extrme_mpe_window7_month_day_concat_large_140_thousandths_small_140_thousandths 0.014273354480975086
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_140_thousandths_small_140_thousandths 0.025399253360402566
sales_extrme_mpe_window7_month_day_concat_large_160_thousandths_small_160_thousandths 0.013230235340203489
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_160_thousandths_small_160_thousandths 0.025516893517171756
sales_extrme_mpe_window7_month_day_concat_large_180_thousandths_small_180_thousandths 0.012416858061325923
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_180_thousandths_small_180_thousandths 0.02569010767892093
sales_extrme_mpe_window7_month_day_concat_large_200_thousandths_small_200_thousandths 0.011245845100811763
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_200_thousandths_small_200_thousandths 0.025254258034564072
sales_extrme_mpe_window7_month_day_concat_large_220_thousandths_small_220_thousandths 0.0105127177160828
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_220_thousandths_small_220_thousandths 0.024959342518745434
sales_extrme_mpe_window7_month_day_concat_large_240_thousandths_small_240_thousandths 0.009609298831843405
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_240_thousandths_small_240_thousandths 0.024455294494253106
sales_extrme_mpe_window7_month_day_concat_large_260_thousandths_small_260_thousandths 0.00886126963146134
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_260_thousandths_small_260_thousandths 0.024138496812166094
sales_extrme_mpe_window7_month_day_concat_large_280_thousandths_small_280_thousandths 0.007792579914125382
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_280_thousandths_small_280_thousandths 0.023285511945337456
sales_extrme_mpe_window7_month_day_concat_large_300_thousandths_small_300_thousandths 0.007142065810935336
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_300_thousandths_small_300_thousandths 0.02288363710156495
sales_extrme_mpe_window7_month_day_concat_large_320_thousandths_small_320_thousandths 0.00626824421090016
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_320_thousandths_small_320_thousandths 0.021936755189772943
sales_extrme_mpe_window7_month_day_concat_large_340_thousandths_small_340_thousandths 0.005699365597048846
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_340_thousandths_small_340_thousandths 0.021167458739884355
sales_extrme_mpe_window7_month_day_concat_large_360_thousandths_small_360_thousandths 0.004884464064357861
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_360_thousandths_small_360_thousandths 0.02031308766895239
sales_extrme_mpe_window7_month_day_concat_large_380_thousandths_small_380_thousandths 0.004226274623172549
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_380_thousandths_small_380_thousandths 0.01935279069035402
sales_extrme_mpe_window7_month_day_concat_large_400_thousandths_small_400_thousandths 0.0036700003315205346
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_400_thousandths_small_400_thousandths 0.018441824300850347
sales_extrme_mpe_window7_month_day_concat_large_420_thousandths_small_420_thousandths 0.0031660249579953197
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_420_thousandths_small_420_thousandths 0.017465814697763715
sales_extrme_mpe_window7_month_day_concat_large_440_thousandths_small_440_thousandths 0.0028150110565658957
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_440_thousandths_small_440_thousandths 0.016786703995044214
sales_extrme_mpe_window7_month_day_concat_large_460_thousandths_small_460_thousandths 0.0027025425772909924
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_460_thousandths_small_460_thousandths 0.01608496798357035
sales_extrme_mpe_window7_month_day_concat_large_480_thousandths_small_480_thousandths 0.002855285731913743
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_480_thousandths_small_480_thousandths 0.015525488988027381
sales_extrme_mpe_window7_month_day_concat_large_500_thousandths_small_500_thousandths 0.0017782669428385986
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_500_thousandths_small_500_thousandths 0.013753822337555874
sales_mpe_window2_month_day_concat 0.006593603844642813
sales_mpe_window2_weekofyear_dayofweek_concat 0.0016380656008927797
sales_mpe_window3_month_day_concat 0.008558692190518444
sales_mpe_window3_weekofyear_dayofweek_concat 0.002333332488525078
sales_mpe_window4_month_day_concat 0.009596815824306545
sales_mpe_window4_weekofyear_dayofweek_concat 0.0024249434636324806
sales_mpe_window5_month_day_concat 0.011135415292879713
sales_mpe_window5_weekofyear_dayofweek_concat 0.0025792455575072344
sales_mpe_window6_month_day_concat 0.012193943279402708
sales_mpe_window6_weekofyear_dayofweek_concat 0.0025385538528652973
sales_mpe_window7_month_day_concat 0.013253649157970627
sales_mpe_window7_weekofyear_dayofweek_concat 0.0025706810960258435
In [63]:
df_all.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 913000 entries, 0 to 912999
Columns: 604 entries, sales to sales_mpe_window7_weekofyear_dayofweek_concat
dtypes: datetime64[ns](1), float32(261), float64(33), int16(3), int8(306)
memory usage: 1.4 GB
In [64]:
df_all = df_all.sort_values(by=['store','item','date'])

df_all.to_pickle('./catboost/df_all_cat.pkl')
del df_all

4.模型训练

为什么采用catboost? 集成方法中boost方法也许是最有效的,那么有3个选择catboost、lightgbm和xgboost。 此3个包运算速度依次递减,而提升算法又是较耗时的。 而且,catboost和lightgbm可以处理分类特征,不必在数据准备阶段预处理。

In [65]:
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from catboost import CatBoostRegressor, Pool, cv

import time
import sys
import gc
import pickle
import gzip
sys.version_info
Out[65]:
sys.version_info(major=3, minor=6, micro=7, releaselevel='final', serial=0)
In [66]:
#导入数据
df_all = pd.read_pickle('./catboost/df_all_cat.pkl')
df_all = df_all.sort_values(by=['store','item','date'])
In [67]:
from datetime import datetime
train = df_all[(df_all.date>=datetime(2014,1,1))&(df_all.date<=datetime(2016,12,31))]
train = train.sort_values(by=['date','store','item',])#似乎has_time=True时候需要按date顺序排列,且重排第一顺序为date

validation = df_all[(df_all.date>=datetime(2017,1,1))&(df_all.date<=datetime(2017,12,31))]
validation = validation.sort_values(by=['date','store','item',])

q1_2014 = df_all[(df_all.date>=datetime(2014,1,1))&(df_all.date<=datetime(2014,3,31))]
q1_2014 = q1_2014.sort_values(by=['date','store','item',])

q1_2015 = df_all[(df_all.date>=datetime(2015,1,1))&(df_all.date<=datetime(2015,3,31))]
q1_2015 = q1_2015.sort_values(by=['date','store','item',])

q1_2016 = df_all[(df_all.date>=datetime(2016,1,1))&(df_all.date<=datetime(2016,3,31))]
q1_2016 = q1_2016.sort_values(by=['date','store','item',])

q1_2017 = df_all[(df_all.date>=datetime(2017,1,1))&(df_all.date<=datetime(2017,3,31))]
q1_2017 = q1_2017.sort_values(by=['date','store','item',])
In [68]:
train.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 548000 entries, 365 to 912634
Columns: 604 entries, sales to sales_mpe_window7_weekofyear_dayofweek_concat
dtypes: datetime64[ns](1), float32(261), float64(33), int16(3), int8(306)
memory usage: 855.0 MB
In [69]:
validation.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 182500 entries, 1461 to 912999
Columns: 604 entries, sales to sales_mpe_window7_weekofyear_dayofweek_concat
dtypes: datetime64[ns](1), float32(261), float64(33), int16(3), int8(306)
memory usage: 284.7 MB
In [70]:
q1_2014.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 45000 entries, 365 to 911628
Columns: 604 entries, sales to sales_mpe_window7_weekofyear_dayofweek_concat
dtypes: datetime64[ns](1), float32(261), float64(33), int16(3), int8(306)
memory usage: 70.2 MB
In [71]:
q1_2015.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 45000 entries, 730 to 911993
Columns: 604 entries, sales to sales_mpe_window7_weekofyear_dayofweek_concat
dtypes: datetime64[ns](1), float32(261), float64(33), int16(3), int8(306)
memory usage: 70.2 MB
In [72]:
q1_2016.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 45500 entries, 1095 to 912359
Columns: 604 entries, sales to sales_mpe_window7_weekofyear_dayofweek_concat
dtypes: datetime64[ns](1), float32(261), float64(33), int16(3), int8(306)
memory usage: 71.0 MB
In [73]:
q1_2017.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 45000 entries, 1461 to 912724
Columns: 604 entries, sales to sales_mpe_window7_weekofyear_dayofweek_concat
dtypes: datetime64[ns](1), float32(261), float64(33), int16(3), int8(306)
memory usage: 70.2 MB
In [74]:
columns = list(train.columns)
time_features_names = ['day_from_start']
time_features = [columns.index(time_features_names[i]) for i in range(len(time_features_names))]

cat_features_names = ['dayofweek', 'weekofyear', 'day', 'month', 'store','item']
cat_features = [columns.index(cat_features_names[i]) for i in range(len(cat_features_names))]

auxiliary_features_names = ['date']
auxiliary_features = [columns.index(auxiliary_features_names[i]) for i in range(len(auxiliary_features_names))]
In [75]:
time_features
Out[75]:
[1]
In [76]:
cat_features
Out[76]:
[3, 2, 4, 5, 8, 9]
In [77]:
auxiliary_features
Out[77]:
[7]
In [78]:
train.to_csv('./catboost/train_pool.csv',index=False)
validation.to_csv('./catboost/validation_pool.csv',index=False)
q1_2014.to_csv('./catboost/q1_2014_pool.csv',index=False)
q1_2015.to_csv('./catboost/q1_2015_pool.csv',index=False)
q1_2016.to_csv('./catboost/q1_2016_pool.csv',index=False)
q1_2017.to_csv('./catboost/q1_2017_pool.csv',index=False)
In [79]:
def catboostCD(fname=None, 
               time_features=None, time_features_names=None, 
               cat_features=None, cat_features_names=None,
               auxiliary_features=None,auxiliary_features_names=None,
               weight_features=None,weight_features_names=None,
               sep='\t',pred=False):
    with open(fname,"w") as fout:
        if pred is False:
            fout.write('0{0}Label'.format(sep))
        else:
            fout.write('0{0}Auxiliary'.format(sep))
        if time_features is not None:
            fout.write(''.join(['\n{0}{1}Timestamp{1}{2}'.format(el[0], sep, el[1]) for el in zip(time_features, time_features_names)]))
        if cat_features is not None:
            fout.write(''.join(['\n{0}{1}Categ{1}{2}'.format(el[0], sep, el[1]) for el in zip(cat_features, cat_features_names)]))
        if auxiliary_features is not None:
            fout.write(''.join(['\n{0}{1}Auxiliary{1}{2}'.format(el[0], sep, el[1]) for el in zip(auxiliary_features, 
                                                                                              auxiliary_features_names)]))
        if weight_features is not None:
            fout.write(''.join(['\n{0}{1}Weight{1}{2}'.format(el[0], sep, el[1]) for el in zip(weight_features, 
                                                                                              weight_features_names)]))
In [80]:
catboostCD('./catboost/train_pool.cd',
           time_features,time_features_names,
           cat_features,cat_features_names,
           auxiliary_features,auxiliary_features_names,
           pred=False)
catboostCD('./catboost/validation_pool.cd',
           time_features,time_features_names,
           cat_features,cat_features_names,
           auxiliary_features,auxiliary_features_names,
           pred=False)
catboostCD('./catboost/q1_2014_pool.cd',
           time_features,time_features_names,
           cat_features,cat_features_names,
           auxiliary_features,auxiliary_features_names,
           pred=False)
catboostCD('./catboost/q1_2015_pool.cd',
           time_features,time_features_names,
           cat_features,cat_features_names,
           auxiliary_features,auxiliary_features_names,
           pred=False)
catboostCD('./catboost/q1_2016_pool.cd',
           time_features,time_features_names,
           cat_features,cat_features_names,
           auxiliary_features,auxiliary_features_names,
           pred=False)
catboostCD('./catboost/q1_2017_pool.cd',
           time_features,time_features_names,
           cat_features,cat_features_names,
           auxiliary_features,auxiliary_features_names,
           pred=False)
In [81]:
from datetime import datetime
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from catboost import CatBoostRegressor, Pool, cv

from sklearn.model_selection import train_test_split

from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
#import os
In [82]:
train_pool = Pool(
    './catboost/train_pool.csv', 
    delimiter=',', 
    column_description='./catboost/train_pool.cd',
    has_header=True
)
validation_pool = Pool(
    './catboost/validation_pool.csv', 
    delimiter=',', 
    column_description='./catboost/validation_pool.cd',
    has_header=True
)
q1_2014_pool = Pool(
    './catboost/q1_2014_pool.csv', 
    delimiter=',', 
    column_description='./catboost/q1_2014_pool.cd',
    has_header=True
)
q1_2015_pool = Pool(
    './catboost/q1_2015_pool.csv', 
    delimiter=',', 
    column_description='./catboost/q1_2015_pool.cd',
    has_header=True
)
q1_2016_pool = Pool(
    './catboost/q1_2016_pool.csv', 
    delimiter=',', 
    column_description='./catboost/q1_2016_pool.cd',
    has_header=True
)
q1_2017_pool = Pool(
    './catboost/q1_2017_pool.csv', 
    delimiter=',', 
    column_description='./catboost/q1_2017_pool.cd',
    has_header=True
)
In [83]:
del train,q1_2014,q1_2015,q1_2016,q1_2017,df_all   #,validation ,q1_2014

调参感想: 用hyperopt包可以网格调参,注意采用GPU计算,要不然极其耗时!

eval_metric按竞赛要求采用smape,如果loss_function采用rmse的话会造成sales小的时候smape大,而销量存在123月10、11、12月小,那么loss_function采用rmse是不合适的。查询可用的loss_function,mape是恰当的。但不知怎的,mape训练极其慢,而且似乎采用GPU不支持mape损失函数。

关于损失函数,rmse将使得预测值与目标值的差值的绝对值接近。

假设预测值与目标值的差值绝对值一定,按smape度量,反映起来是目标值大者smape小,目标值小者smape大。

多次建立模型发现:2014-2016年数据训练,其smape约12.4,2017年smape可为11.8,每年第一季度的smape则在14上下。说明若能找到更适应度量指标的目标函数使smape平均化,2017第一季度的smape可以更加下降。

catboost自带的损失函数中mape关注误差的比例,接近smape,但是无奈其更新很慢。

设想构造一个关注误差比例的损失函数,其将达到2017年的smape约等于12.000,比如损失函数为: $\sqrt{\frac{\sum_{i}^{N}(1-\frac{a_{i}}{t_{i}})^{2}w_i}{\sum_{i}^{N}w_i}}$,其将惩罚误差比例过大者,这样,预计2018年第一季度的smape将在12.00附近。

虽然catboost支持自定义函数,但不推荐用python代码而推荐c++代码,详见https://github.com/catboost/catboost/issues/521https://github.com/catboost/tutorials/blob/master/custom_loss/custom_metric_tutorial.mdhttps://tech.yandex.com/catboost/doc/dg/concepts/python-usages-examples-docpage/#custom-objective-function

In [84]:
import hyperopt

def hyperopt_objective(params):
    model = CatBoostRegressor(task_type = 'GPU',
                              loss_function='RMSE', 
                              eval_metric='RMSE', #SMAPE not support by GPU
                              random_seed=42,
                              logging_level='Silent',
                              iterations=500,
                              learning_rate=params['learning_rate'],
                              depth = params['depth'],
                              l2_leaf_reg=params['l2_leaf_reg'],
                              random_strength=params['random_strength'],
                              bagging_temperature=params['bagging_temperature'],
                              border_count=int(params['border_count']),
                              has_time=bool(params['has_time']))
    
    
    model.fit(
    train_pool,
    eval_set=[(q1_2017_pool), #2017q1
              ])    
    
    best_smape = np.min(model.evals_result_['validation_0']['RMSE'])
    
    return best_smape # as hyperopt minimises
In [85]:
from numpy.random import RandomState
from time import time
ts = time()
params_space = {
    'learning_rate': hyperopt.hp.uniform('learning_rate', 1e-3, 5e-1), #default 0.03
    'depth':  hyperopt.hp.randint('depth', 10),  #default 6
    'l2_leaf_reg': hyperopt.hp.qloguniform('l2_leaf_reg', 0, 2, 1),  #default 3
    'random_strength':  hyperopt.hp.loguniform('random_strength', -5, 5),   #default 1
    'bagging_temperature':  hyperopt.hp.loguniform('bagging_temperature', -5, 5),   #default 1
    'border_count':  hyperopt.hp.qloguniform('border_count', 0, 4.853, 1),   #default 128GPU 256CPU
    'has_time':  hyperopt.hp.choice('has_time', [0, 1]),   
}

trials = hyperopt.Trials()

best = hyperopt.fmin(
    hyperopt_objective,
    space=params_space,
    algo=hyperopt.tpe.suggest,
    max_evals=300,   #300
    trials=trials,
    rstate=RandomState(123)
)

print(time()-ts)
print(best)
100%|██████████| 300/300 [2:51:39<00:00, 38.53s/it, best loss: 6.828264380092825]  
10299.033137083054
{'bagging_temperature': 0.9973890983953271, 'border_count': 22.0, 'depth': 5, 'has_time': 1, 'l2_leaf_reg': 2.0, 'learning_rate': 0.23449206385636862, 'random_strength': 0.15737694912029557}
In [86]:
learning_rate = best['learning_rate']
iterations = 2000
early_stopping_rounds = int(iterations/40)
print('learning_rate:',learning_rate)
print('iterations:',iterations)
print('early_stopping_rounds:',early_stopping_rounds)

model_1 = CatBoostRegressor(task_type = 'GPU',
                          loss_function='RMSE',
                          eval_metric='SMAPE',
                          random_seed=42,
                          verbose=early_stopping_rounds,
                          iterations=iterations,
                          learning_rate=learning_rate,
                          depth = best['depth'],
                          l2_leaf_reg=best['l2_leaf_reg'],
                          random_strength=best['random_strength'],
                          bagging_temperature=best['bagging_temperature'],
                          border_count=int(best['border_count']),
                          has_time=bool(best['has_time']))

# Fit model
model_1.fit(
    train_pool,
    eval_set=[(q1_2017_pool),
              ],
    use_best_model=True,
    early_stopping_rounds=early_stopping_rounds,
    plot=True)
learning_rate: 0.23449206385636862
iterations: 2000
early_stopping_rounds: 50
Metric SMAPE is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time
0:	learn: 120.9562447	test: 122.2423256	best: 122.2423256 (0)	total: 75.2ms	remaining: 2m 30s
50:	learn: 12.8503057	test: 13.7267155	best: 13.7267155 (50)	total: 3.39s	remaining: 2m 9s
100:	learn: 12.6604353	test: 13.5217038	best: 13.5217038 (100)	total: 6.75s	remaining: 2m 6s
150:	learn: 12.5808594	test: 13.4399236	best: 13.4398285 (149)	total: 10.1s	remaining: 2m 3s
200:	learn: 12.5323648	test: 13.3989362	best: 13.3989362 (200)	total: 13.5s	remaining: 2m
250:	learn: 12.4950803	test: 13.3664679	best: 13.3663259 (249)	total: 16.8s	remaining: 1m 57s
300:	learn: 12.4687967	test: 13.3463381	best: 13.3463381 (300)	total: 20.1s	remaining: 1m 53s
350:	learn: 12.4465211	test: 13.3350348	best: 13.3350348 (350)	total: 23.5s	remaining: 1m 50s
400:	learn: 12.4295972	test: 13.3240810	best: 13.3240810 (400)	total: 26.9s	remaining: 1m 47s
450:	learn: 12.4135546	test: 13.3228977	best: 13.3223194 (420)	total: 30.3s	remaining: 1m 44s
bestTest = 13.32231942
bestIteration = 420
Shrink model to first 421 iterations.
Out[86]:
<catboost.core.CatBoostRegressor at 0x136ae74a2e8>
In [87]:
learning_rate = 0.1
iterations = int(2000*best['learning_rate']/learning_rate)
early_stopping_rounds = int(iterations/40) 
print('learning_rate:',learning_rate)
print('iterations:',iterations)
print('early_stopping_rounds:',early_stopping_rounds)

model_2 = CatBoostRegressor(task_type = 'GPU',
                          loss_function='RMSE',
                          eval_metric='SMAPE',
                          random_seed=42,
                          verbose=early_stopping_rounds,
                          iterations=iterations,
                          learning_rate=learning_rate,
                          depth = best['depth'],
                          l2_leaf_reg=best['l2_leaf_reg'],
                          random_strength=best['random_strength'],
                          bagging_temperature=best['bagging_temperature'],
                          border_count=int(best['border_count']),
                          has_time=bool(best['has_time']))

# Fit model
model_2.fit(
    train_pool,
    eval_set=[(q1_2017_pool),
              ],
    use_best_model=True,
    early_stopping_rounds=early_stopping_rounds,
    plot=True)
learning_rate: 0.1
iterations: 4689
early_stopping_rounds: 117
Metric SMAPE is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time
0:	learn: 161.7161831	test: 162.3766654	best: 162.3766654 (0)	total: 67.1ms	remaining: 5m 14s
117:	learn: 12.7155361	test: 13.5709689	best: 13.5709689 (117)	total: 8.01s	remaining: 5m 10s
234:	learn: 12.5919127	test: 13.4484830	best: 13.4484830 (234)	total: 16s	remaining: 5m 4s
351:	learn: 12.5405638	test: 13.4038824	best: 13.4038824 (351)	total: 24s	remaining: 4m 55s
468:	learn: 12.5026739	test: 13.3810967	best: 13.3810967 (468)	total: 31.9s	remaining: 4m 46s
585:	learn: 12.4762018	test: 13.3799598	best: 13.3678893 (559)	total: 39.7s	remaining: 4m 38s
bestTest = 13.36788927
bestIteration = 559
Shrink model to first 560 iterations.
Out[87]:
<catboost.core.CatBoostRegressor at 0x136806c32e8>
In [88]:
learning_rate = 0.03
iterations = int(2000*best['learning_rate']/learning_rate)
early_stopping_rounds = int(iterations/40)
print('learning_rate:',learning_rate)
print('iterations:',iterations)
print('early_stopping_rounds:',early_stopping_rounds)

model_3 = CatBoostRegressor(task_type = 'GPU',
                          loss_function='RMSE',
                          eval_metric='SMAPE',
                          random_seed=42,
                          verbose=early_stopping_rounds,
                          iterations=iterations,
                          learning_rate=learning_rate,
                          depth = best['depth'],
                          l2_leaf_reg=best['l2_leaf_reg'],
                          random_strength=best['random_strength'],
                          bagging_temperature=best['bagging_temperature'],
                          border_count=int(best['border_count']),
                          has_time=bool(best['has_time']))

# Fit model
model_3.fit(
    train_pool,
    eval_set=[(q1_2017_pool),
              ],
    use_best_model=True,
    early_stopping_rounds=early_stopping_rounds,
    plot=True)
learning_rate: 0.03
iterations: 15632
early_stopping_rounds: 390
Metric SMAPE is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time
0:	learn: 187.6284377	test: 187.8478337	best: 187.8478337 (0)	total: 72.6ms	remaining: 18m 55s
390:	learn: 12.6758197	test: 13.5281846	best: 13.5281846 (390)	total: 26.4s	remaining: 17m 7s
780:	learn: 12.5607571	test: 13.4106706	best: 13.4106706 (780)	total: 52.7s	remaining: 16m 41s
1170:	learn: 12.5133971	test: 13.3697410	best: 13.3697155 (1169)	total: 1m 19s	remaining: 16m 18s
1560:	learn: 12.4836919	test: 13.3538867	best: 13.3538867 (1560)	total: 1m 45s	remaining: 15m 53s
1950:	learn: 12.4601188	test: 13.3419564	best: 13.3419564 (1950)	total: 2m 12s	remaining: 15m 27s
2340:	learn: 12.4407869	test: 13.3331274	best: 13.3331274 (2340)	total: 2m 39s	remaining: 15m 2s
2730:	learn: 12.4239943	test: 13.3284768	best: 13.3284551 (2715)	total: 3m 6s	remaining: 14m 39s
3120:	learn: 12.4085507	test: 13.3233653	best: 13.3232824 (3117)	total: 3m 33s	remaining: 14m 14s
3510:	learn: 12.3939158	test: 13.3195970	best: 13.3195970 (3510)	total: 4m	remaining: 13m 48s
3900:	learn: 12.3802231	test: 13.3162927	best: 13.3162927 (3900)	total: 4m 27s	remaining: 13m 23s
4290:	learn: 12.3673427	test: 13.3151152	best: 13.3151089 (4289)	total: 4m 54s	remaining: 12m 57s
4680:	learn: 12.3553498	test: 13.3139383	best: 13.3137916 (4575)	total: 5m 21s	remaining: 12m 31s
5070:	learn: 12.3434517	test: 13.3130250	best: 13.3128047 (5024)	total: 5m 48s	remaining: 12m 5s
5460:	learn: 12.3318341	test: 13.3123570	best: 13.3118034 (5379)	total: 6m 15s	remaining: 11m 39s
bestTest = 13.31180337
bestIteration = 5379
Shrink model to first 5380 iterations.
Out[88]:
<catboost.core.CatBoostRegressor at 0x136806c3a58>
In [89]:
learning_rate = 0.01
iterations = int(2000*best['learning_rate']/learning_rate)
early_stopping_rounds = int(iterations/40)
print('learning_rate:',learning_rate)
print('iterations:',iterations)
print('early_stopping_rounds:',early_stopping_rounds)

model_4 = CatBoostRegressor(task_type = 'GPU',
                          loss_function='RMSE',
                          eval_metric='SMAPE',
                          random_seed=42,
                          verbose=early_stopping_rounds,
                          iterations=iterations,
                          learning_rate=learning_rate,
                          depth = best['depth'],
                          l2_leaf_reg=best['l2_leaf_reg'],
                          random_strength=best['random_strength'],
                          bagging_temperature=best['bagging_temperature'],
                          border_count=int(best['border_count']),
                          has_time=bool(best['has_time']))

# Fit model
model_4.fit(
    train_pool,
    eval_set=[(q1_2017_pool),
              ],
    use_best_model=True,
    early_stopping_rounds=early_stopping_rounds,
    plot=True)
learning_rate: 0.01
iterations: 46898
early_stopping_rounds: 1172
Metric SMAPE is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time
0:	learn: 195.7821841	test: 195.8574539	best: 195.8574539 (0)	total: 70.9ms	remaining: 55m 23s
1172:	learn: 12.6683498	test: 13.5131565	best: 13.5131565 (1172)	total: 1m 19s	remaining: 51m 21s
2344:	learn: 12.5526822	test: 13.3963876	best: 13.3963876 (2344)	total: 2m 37s	remaining: 49m 56s
3516:	learn: 12.5062891	test: 13.3561450	best: 13.3561450 (3516)	total: 3m 57s	remaining: 48m 45s
4688:	learn: 12.4778417	test: 13.3392934	best: 13.3392676 (4633)	total: 5m 16s	remaining: 47m 29s
5860:	learn: 12.4560891	test: 13.3289082	best: 13.3289030 (5859)	total: 6m 35s	remaining: 46m 11s
7032:	learn: 12.4367122	test: 13.3214773	best: 13.3214543 (7031)	total: 7m 54s	remaining: 44m 51s
8204:	learn: 12.4202419	test: 13.3162977	best: 13.3162639 (8202)	total: 9m 14s	remaining: 43m 32s
9376:	learn: 12.4055063	test: 13.3126110	best: 13.3126110 (9376)	total: 10m 33s	remaining: 42m 13s
10548:	learn: 12.3911809	test: 13.3089877	best: 13.3089816 (10547)	total: 11m 52s	remaining: 40m 54s
11720:	learn: 12.3777618	test: 13.3072797	best: 13.3072797 (11720)	total: 13m 11s	remaining: 39m 36s
12892:	learn: 12.3649843	test: 13.3062944	best: 13.3062349 (12450)	total: 14m 30s	remaining: 38m 16s
14064:	learn: 12.3525869	test: 13.3052956	best: 13.3050143 (13795)	total: 15m 50s	remaining: 36m 57s
15236:	learn: 12.3403968	test: 13.3040751	best: 13.3040732 (15235)	total: 17m 9s	remaining: 35m 39s
16408:	learn: 12.3288047	test: 13.3028955	best: 13.3028805 (16403)	total: 18m 28s	remaining: 34m 20s
17580:	learn: 12.3173912	test: 13.3029374	best: 13.3028477 (17542)	total: 19m 50s	remaining: 33m 5s
18752:	learn: 12.3063179	test: 13.3023721	best: 13.3022334 (18625)	total: 21m 11s	remaining: 31m 48s
19924:	learn: 12.2953005	test: 13.3020586	best: 13.3017768 (19773)	total: 22m 33s	remaining: 30m 32s
bestTest = 13.30177676
bestIteration = 19773
Shrink model to first 19774 iterations.
Out[89]:
<catboost.core.CatBoostRegressor at 0x136806c3358>
In [90]:
model_list = [model_1,model_2,model_3,model_4]
score_list = [i.get_best_score()['validation_0']['SMAPE'] for i in model_list]
best_score = min(score_list)
best_model = model_list[score_list.index(best_score)]
print(best_score)
13.301776762020786
In [91]:
best_model.eval_metrics(validation_pool, 'SMAPE')['SMAPE'][-1]
Out[91]:
11.860766606767626
In [92]:
best_model.eval_metrics(q1_2014_pool, 'SMAPE')['SMAPE'][-1]
Out[92]:
14.645917711865364
In [93]:
best_model.eval_metrics(q1_2015_pool, 'SMAPE')['SMAPE'][-1]
Out[93]:
14.091883155407277
In [94]:
best_model.eval_metrics(q1_2016_pool, 'SMAPE')['SMAPE'][-1]
Out[94]:
13.476791353148277

模型重要度可用来解释哪些特征比较重要,其计算方式见于https://tech.yandex.com/catboost/doc/dg/concepts/fstr-docpage/#fstr

In [95]:
feature_importances = best_model.get_feature_importance(train_pool)
validation = pd.read_csv('./catboost/validation_pool.csv')
feature_names = validation.columns[3:]
In [96]:
for score, name in sorted(zip(feature_importances, feature_names), reverse=True):
    print('{}: {}'.format(name, score))
sales_history_month_store_item_mean: 33.225506148917546
sales_lag364: 13.338587960860407
sales_history_dayofweek_store_item_mean: 7.6009961964024475
sales_lag357: 6.301990669362181
sales_history_weekofyear_dayofweek_store_item_mean: 4.752763186062749
sales_lag350: 3.7618202648058516
sales_item_mean_lag364: 3.568488305673506
sales_store_mean_lag364: 2.5861397565078135
day: 2.4986122499377923
sales_window7_mean_lag357: 2.4681943412885734
sales_window2_mean_lag357: 2.027140007770657
sales_history_dayofweek_mean: 1.7312688453760838
sales_history_dayofweek_item_mean: 1.408258755336699
sales_lag343: 1.3822053549737476
year: 1.273346500621811
sales_history_dayofweek_store_mean: 1.0466549659984028
sales_history_item_weekofyear_dayofweek_mean: 1.0117778658589305
sales_history_weekofyear_dayofweek_mean: 0.9780220337066374
date: 0.962899467278183
sales_history_store_weekofyear_dayofweek_mean: 0.8861207148970165
sales_lag336: 0.6014774082185084
sales_window4_mean_lag357: 0.5461416670931637
sales_history_month_item_mean: 0.46887735878493336
sales_history_month_mean: 0.44857476007482633
sales_history_store_item_mean: 0.3463118744866554
sales_window3_mean_lag357: 0.26093769731481486
sales_history_month_store_mean: 0.23394314295618493
sales_lag91: 0.19847855149040308
item: 0.1569375480443672
sales_item_mean_lag357: 0.1514118345660494
sales_item_mean_lag91: 0.12956964032981566
sales_history_item_mean: 0.12011415436621471
month: 0.11024423936338822
sales_window5_mean_lag357: 0.09039350112976616
sales_lag365: 0.08867806049219339
sales_history_day_store_item_mean: 0.08560721882472551
sales_store_mean_lag365: 0.08540885882735667
sales_window2_mean_lag358: 0.05637223146351748
dayofweek: 0.05344649523686936
sales_history_weekofyear_store_item_mean: 0.052427549806879206
sales_history_weekofyear_mean: 0.051155251285754495
sales_window5_mean_lag91: 0.04909705949456874
sales_window7_mean_lag350: 0.04769847897591316
sales_lag329: 0.042074750466433254
sales_history_store_mean: 0.04122994144749839
sales_window4_mean_lag358: 0.03914487092185996
sales_window7_mean_lag329: 0.03704024739931782
sales_history_day_item_mean: 0.034629153095521684
sales_store_mean_lag91: 0.0335091169063377
sales_item_mean_lag365: 0.03328333001095288
sales_history_store_day_month_mean: 0.028842578179854257
sales_history_weekofyear_store_mean: 0.02842348670130895
sales_history_day_mean: 0.02769155371827591
sales_history_day_store_mean: 0.027373145592025024
sales_lag362: 0.026951121830930135
sales_window7_mean_lag336: 0.026607756109187673
sales_lag238: 0.026378465893736026
sales_window7_mean_lag343: 0.024849406661552987
store: 0.024491619063014888
sales_history_item_day_month_mean: 0.02401725895216608
sales_history_day_month_store_item_mean: 0.022791365348252587
sales_lag203: 0.02233450823732845
sales_lag322: 0.022020252656001
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_360_thousandths_small_360_thousandths: 0.021741443682946768
sales_lag245: 0.021084700283087864
sales_item_mean_lag361: 0.019258434179216587
sales_window3_mean_lag91: 0.01909559205123863
sales_lag359: 0.018935171925093874
sales_lag266: 0.018640032380433328
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_400_thousandths_small_400_thousandths: 0.017923923684650546
sales_item_mean_lag363: 0.01763036051258916
sales_window3_mean_lag266: 0.017493909116867008
sales_item_mean_lag329: 0.016699531616684044
sales_item_mean_lag90: 0.016426026621885918
sales_item_mean_lag362: 0.016424930811050925
sales_lag363: 0.01526769101919245
sales_store_mean_lag361: 0.015181350842500222
sales_window5_mean_lag358: 0.015117050923383247
sales_item_mean_lag259: 0.015068917193643491
sales_window5_mean_lag273: 0.015028180898931121
sales_store_mean_lag203: 0.014824927829994897
sales_lag90: 0.014412722540355201
sales_lag259: 0.014218932781241476
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_300_thousandths_small_300_thousandths: 0.014154941233546995
sales_window5_mean_lag238: 0.014110484031488579
sales_lag280: 0.014094738604269604
sales_store_mean_lag280: 0.01396095210828416
sales_history_weekofyear_item_mean: 0.013922997093660239
sales_lag224: 0.013466825861714682
sales_item_mean_lag350: 0.013440356668791677
sales_item_mean_lag359: 0.013435980629830025
sales_store_mean_lag362: 0.013271337483975696
sales_lag252: 0.013143880719026866
sales_lag308: 0.013023585816671939
sales_window6_mean_lag329: 0.01293162996803953
sales_lag294: 0.012697002672870956
sales_lag301: 0.01267398565335604
sales_item_mean_lag336: 0.012634592555822464
sales_store_mean_lag90: 0.012579636855972836
sales_window5_mean_lag329: 0.012563650675384013
sales_window2_mean_lag203: 0.012245112934312375
sales_item_mean_lag224: 0.012106519385306925
sales_window6_mean_lag358: 0.012104872642615455
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_220_thousandths_small_220_thousandths: 0.011767367594750656
sales_store_mean_lag336: 0.01149337008864761
sales_window3_mean_lag252: 0.011476526656848951
sales_window2_mean_lag91: 0.011444034897223418
sales_window2_mean_lag90: 0.011184894410027561
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_440_thousandths_small_440_thousandths: 0.011113241476991179
sales_item_mean_lag343: 0.011061564939911318
sales_window3_mean_lag315: 0.011058562472290005
sales_window7_mean_lag91: 0.011008440701113422
sales_lag360: 0.010816210475994806
sales_lag287: 0.010739415804978066
sales_lag315: 0.010570237596377237
sales_lag231: 0.010560566237092752
sales_lag361: 0.010494595357084958
sales_item_mean_lag360: 0.010437150889180027
sales_window3_mean_lag203: 0.010403007797622252
sales_window3_mean_lag287: 0.010229482086671349
sales_store_mean_lag363: 0.010197812934923792
sales_window4_mean_lag350: 0.009816408262040747
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_280_thousandths_small_280_thousandths: 0.009617148500598086
sales_window2_mean_lag308: 0.009614567219667374
sales_item_mean_lag203: 0.009411907042553971
sales_window3_mean_lag358: 0.009333393855427651
sales_window3_mean_lag336: 0.009309750274244822
sales_lag210: 0.0092388615809278
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_380_thousandths_small_380_thousandths: 0.00918954053619245
sales_window4_mean_lag343: 0.009162844899140153
sales_lag273: 0.009078451384018167
sales_history_day_month_mean: 0.009070655272431538
sales_window4_mean_lag329: 0.009030664803163762
sales_window6_mean_lag350: 0.008887543778294916
sales_window7_mean_lag203: 0.00881266288237939
sales_window2_mean_lag329: 0.008693189894473685
sales_item_mean_lag231: 0.008675861700425627
sales_lag217: 0.008513538735968575
sales_window7_mean_lag238: 0.008481941489952583
sales_window7_mean_lag210: 0.008459962921824209
sales_window2_mean_lag252: 0.008396475672711373
sales_window7_mean_lag315: 0.00835026641375162
sales_window6_mean_lag203: 0.008339101601144839
sales_window7_mean_lag224: 0.008312399480775085
sales_store_mean_lag245: 0.00824314066334226
sales_window2_mean_lag210: 0.008212920917567881
sales_window6_mean_lag90: 0.008211966230083328
sales_store_mean_lag358: 0.008170346257847537
sales_store_mean_lag357: 0.008146921432597246
sales_store_mean_lag360: 0.008095455731713472
sales_window3_mean_lag308: 0.008093899469133832
sales_window5_mean_lag336: 0.008069271026093802
sales_window2_mean_lag301: 0.007991160956493826
sales_lag358: 0.007961326621503536
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_200_thousandths_small_200_thousandths: 0.00786205245721362
sales_window3_mean_lag301: 0.007842280840234583
sales_window3_mean_lag238: 0.00779943894793812
sales_window3_mean_lag343: 0.007672421382030075
sales_window2_mean_lag294: 0.007668954428330628
sales_window6_mean_lag224: 0.007552452839415531
sales_store_mean_lag231: 0.007538295166068751
sales_window6_mean_lag315: 0.007528838201987366
sales_store_mean_lag308: 0.007513676784680801
sales_item_mean_lag217: 0.007499723733879129
sales_window3_mean_lag294: 0.007484971281518623
sales_window3_mean_lag231: 0.0074665852071439005
sales_window3_mean_lag273: 0.00744990083408055
sales_window7_mean_lag252: 0.00736656061066292
sales_window3_mean_lag90: 0.007365979509356207
sales_window4_mean_lag210: 0.007312122405295552
sales_window5_mean_lag343: 0.007288218641793414
sales_window6_mean_lag343: 0.0072663054730006974
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_240_thousandths_small_240_thousandths: 0.007228207292395689
sales_window4_mean_lag231: 0.007171236071155193
sales_item_mean_lag358: 0.007142799394623589
sales_window4_mean_lag91: 0.007100539722606158
sales_window6_mean_lag336: 0.0070454775537740965
sales_window6_mean_lag301: 0.007043408779956962
sales_window6_mean_lag238: 0.00703627497575198
sales_window2_mean_lag266: 0.007018443742788935
sales_window5_mean_lag217: 0.006994787780064972
sales_window3_mean_lag210: 0.006952839395556522
sales_item_mean_lag238: 0.0068336219744481175
sales_window2_mean_lag280: 0.006794866963148865
sales_window7_mean_lag245: 0.006781620395245074
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_380_thousandths_small_380_thousandths: 0.00674376629485102
sales_window5_mean_lag231: 0.006732052800545642
sales_window5_mean_lag259: 0.0067165213942739915
sales_store_mean_lag259: 0.00671150273268474
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_420_thousandths_small_420_thousandths: 0.00670635319534175
sales_window7_mean_lag217: 0.006673401944500535
sales_item_mean_lag315: 0.006631609683228273
sales_window2_mean_lag273: 0.006571674333682457
sales_window6_mean_lag245: 0.006537631226364109
sales_window6_mean_lag357: 0.006531597176584247
sales_window6_mean_lag273: 0.006526762251531504
sales_window6_mean_lag322: 0.006507641457271996
sales_window3_mean_lag245: 0.006391054658126251
sales_window2_mean_lag336: 0.006364065068758303
sales_store_mean_lag294: 0.006338129558519998
sales_window2_mean_lag322: 0.006311086656154959
sales_window2_mean_lag231: 0.006220709113616492
sales_window2_mean_lag245: 0.006212978097770002
sales_window7_mean_lag90: 0.006159000292087311
sales_window2_mean_lag224: 0.0061478250916567985
sales_store_mean_lag210: 0.006116052213101978
sales_window5_mean_lag308: 0.006113259076719554
sales_window4_mean_lag224: 0.006102762204743509
sales_window6_mean_lag91: 0.005995546141712456
sales_window4_mean_lag252: 0.005991840015573163
sales_window2_mean_lag350: 0.005974583627328098
sales_window2_mean_lag315: 0.005962146692586001
sales_item_mean_lag322: 0.005885626707254993
sales_window3_mean_lag259: 0.0058462644153501496
sales_window5_mean_lag350: 0.005799298902606357
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_200_thousandths_small_200_thousandths: 0.005746337096959566
sales_window3_mean_lag329: 0.005700491276713739
sales_window2_mean_lag287: 0.0056842955003345375
sales_window2_mean_lag238: 0.005668191938283891
sales_store_mean_lag350: 0.005664964007854678
sales_window5_mean_lag252: 0.005655929657023448
sales_window7_mean_lag358: 0.005653587755583094
sales_item_mean_lag266: 0.005641190903125821
sales_window5_mean_lag280: 0.0055991035379855374
sales_store_mean_lag252: 0.005594129582494134
sales_window4_mean_lag301: 0.005571566460414923
sales_window7_mean_lag322: 0.005554243204249537
sales_window5_mean_lag245: 0.005540110313339261
sales_window7_mean_lag301: 0.005532014866000805
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_500_thousandths_small_500_thousandths: 0.00550482768792356
sales_window4_mean_lag203: 0.005501487281179091
sales_item_mean_lag273: 0.00548171113016481
sales_window6_mean_lag252: 0.005479851740555014
sales_window3_mean_lag322: 0.005477798570343424
sales_store_mean_lag238: 0.005442048919854945
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_400_thousandths_small_400_thousandths: 0.005429362733891789
sales_window3_mean_lag350: 0.005414630732963445
sales_window5_mean_lag90: 0.0053484524242115835
sales_window4_mean_lag90: 0.005345406130850225
sales_item_mean_lag245: 0.005331929608850668
sales_item_mean_lag252: 0.005315486628503919
sales_window4_mean_lag238: 0.005283032361140492
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_460_thousandths_small_460_thousandths: 0.005238684215795418
sales_store_mean_lag343: 0.005211814530695297
sales_window3_mean_lag224: 0.005141251956762925
sales_window4_mean_lag217: 0.005140902781762191
sales_window3_mean_lag280: 0.005107306989261718
sales_window6_mean_lag217: 0.0051012544316404815
sales_store_mean_lag273: 0.00508407706354375
sales_window5_mean_lag266: 0.005047164374042338
sales_window5_mean_lag224: 0.005040761468355054
sales_window6_mean_lag210: 0.005008293177496238
sales_window5_mean_lag203: 0.004964067718390156
sales_store_mean_lag359: 0.004936248051849901
sales_window5_mean_lag322: 0.004876510671106752
sales_window4_mean_lag322: 0.004859123262227183
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_440_thousandths_small_440_thousandths: 0.004852478491906867
sales_window7_mean_lag280: 0.004843147561116777
sales_item_mean_lag210: 0.004838430506313581
sales_window2_mean_lag343: 0.004812637314851545
sales_window7_mean_lag294: 0.004811707981440674
sales_window4_mean_lag336: 0.004785943750213106
sales_store_mean_lag322: 0.0047807974221678266
sales_window4_mean_lag273: 0.004739139604646331
sales_window7_mean_lag231: 0.004674513139558337
sales_window3_mean_lag217: 0.004664998511956681
sales_window4_mean_lag308: 0.004637071320984797
sales_window5_mean_lag210: 0.00463210807321996
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_220_thousandths_small_220_thousandths: 0.004478521793809425
sales_window6_mean_lag287: 0.0044766843156907096
sales_window4_mean_lag266: 0.004420703363418543
sales_window7_mean_lag287: 0.004418654883734558
sales_window4_mean_lag245: 0.004356289553422069
sales_window6_mean_lag231: 0.004343427341335544
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_420_thousandths_small_420_thousandths: 0.0042773305359260425
sales_store_mean_lag329: 0.0042278813561056235
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_220_thousandths_small_220_thousandths: 0.004152872076920307
sales_window6_mean_lag259: 0.0041504541196164075
sales_window2_mean_lag217: 0.0041176248727100165
sales_store_mean_lag266: 0.00409507281661889
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_500_thousandths_small_500_thousandths: 0.004075568451953078
sales_window6_mean_lag280: 0.00405457035238745
sales_window2_mean_lag259: 0.004031290954488838
sales_store_mean_lag224: 0.004025504624279502
sales_window4_mean_lag287: 0.004011819888026024
sales_item_mean_lag308: 0.003987575508093116
sales_window5_mean_lag287: 0.003965471026749241
sales_window7_mean_lag259: 0.003913434530403426
sales_item_mean_lag301: 0.003896294300007214
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_100_thousandths_small_100_thousandths: 0.003840160853041324
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_420_thousandths_small_420_thousandths: 0.0038146017093953787
sales_window4_mean_lag315: 0.003788337892227787
sales_window4_mean_lag259: 0.003773786638513613
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_340_thousandths_small_340_thousandths: 0.003772117423459945
sales_window7_mean_lag273: 0.003744201693478795
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_340_thousandths_small_340_thousandths: 0.003741688518001488
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_240_thousandths_small_240_thousandths: 0.0036581918347293766
sales_window5_mean_lag315: 0.0035508959622241004
sales_store_mean_lag217: 0.003540679547585224
sales_window5_mean_lag301: 0.003493288651058428
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_260_thousandths_small_260_thousandths: 0.0034322712594431946
sales_window5_mean_lag294: 0.003431398011741658
sales_window4_mean_lag280: 0.003399944215184105
sales_store_mean_lag287: 0.0033576486116829985
sales_window6_mean_lag294: 0.0032926024680473946
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_260_thousandths_small_260_thousandths: 0.0031845210129686653
sales_item_mean_lag280: 0.0031710158857967334
sales_window7_mean_lag266: 0.003116569580116091
sales_window7_mean_lag308: 0.0031019737562865608
sales_item_mean_lag287: 0.003088274344589063
sales_window6_mean_lag266: 0.0030169947436253287
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_280_thousandths_small_280_thousandths: 0.002974571333523574
sales_window6_mean_lag308: 0.0029489222915430654
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_340_thousandths_small_340_thousandths: 0.0029124388588236813
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_360_thousandths_small_360_thousandths: 0.002880784071830106
sales_window4_mean_lag294: 0.002758260427917005
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_160_thousandths_small_160_thousandths: 0.002591431599498445
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_320_thousandths_small_320_thousandths: 0.0025682090002413756
sales_item_mean_lag294: 0.002541135706112082
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_180_thousandths_small_180_thousandths: 0.002449725537866448
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_380_thousandths_small_380_thousandths: 0.002414155273777346
sales_store_mean_lag315: 0.0023851483177029023
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_300_thousandths_small_300_thousandths: 0.00227460937847158
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_260_thousandths_small_260_thousandths: 0.0021293618762267415
sales_extrme_mpe_window3_month_day_concat_large_80_thousandths_small_80_thousandths: 0.0020790844062931867
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_40_thousandths_small_40_thousandths: 0.0020255143288136746
sales_extrme_mpe_window3_month_day_concat_large_320_thousandths_small_320_thousandths: 0.002021729383263069
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_160_thousandths_small_160_thousandths: 0.0020104888761485495
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_380_thousandths_small_380_thousandths: 0.002006554969311398
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_60_thousandths_small_60_thousandths: 0.0019788622914179307
sales_extrme_mpe_window3_month_day_concat_large_200_thousandths_small_200_thousandths: 0.0019466581522484864
sales_extrme_mpe_window2_month_day_concat_large_400_thousandths_small_400_thousandths: 0.0018390706664335097
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_140_thousandths_small_140_thousandths: 0.0018072827422824012
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_220_thousandths_small_220_thousandths: 0.0018057775021705074
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_80_thousandths_small_80_thousandths: 0.0017945702270429225
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_100_thousandths_small_100_thousandths: 0.0017762367492171025
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_280_thousandths_small_280_thousandths: 0.0017751094164105425
sales_store_mean_lag301: 0.0017689638548615918
sales_extrme_mpe_window6_month_day_concat_large_380_thousandths_small_380_thousandths: 0.0017663581622833205
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_140_thousandths_small_140_thousandths: 0.0017457926069074092
sales_extrme_mpe_window4_month_day_concat_large_460_thousandths_small_460_thousandths: 0.0017237171804262304
sales_extrme_mpe_window3_month_day_concat_large_100_thousandths_small_100_thousandths: 0.001693623513983949
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_420_thousandths_small_420_thousandths: 0.0016922281129205376
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_80_thousandths_small_80_thousandths: 0.0016628836562055153
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_320_thousandths_small_320_thousandths: 0.0016593332091523689
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_440_thousandths_small_440_thousandths: 0.0015257124693731554
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_160_thousandths_small_160_thousandths: 0.0015193562961382152
sales_extrme_mpe_window3_month_day_concat_large_220_thousandths_small_220_thousandths: 0.0015126504620123276
sales_extrme_mpe_window2_month_day_concat_large_420_thousandths_small_420_thousandths: 0.0015095231220903775
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_500_thousandths_small_500_thousandths: 0.0015049719081477876
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_140_thousandths_small_140_thousandths: 0.0014962135757601162
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_200_thousandths_small_200_thousandths: 0.001485792227848993
sales_extrme_mpe_window3_month_day_concat_large_380_thousandths_small_380_thousandths: 0.001450682643492056
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_180_thousandths_small_180_thousandths: 0.0014488056184627763
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_120_thousandths_small_120_thousandths: 0.0014398874405614703
sales_extrme_mpe_window4_month_day_concat_large_360_thousandths_small_360_thousandths: 0.001420703820620157
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_220_thousandths_small_220_thousandths: 0.0014101973983849853
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_80_thousandths_small_80_thousandths: 0.0014028465547926494
sales_mpe_window2_month_day_concat: 0.001391477725660233
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_20_thousandths_small_20_thousandths: 0.0013880374917058843
sales_extrme_mpe_window7_month_day_concat_large_40_thousandths_small_40_thousandths: 0.0013870449774897663
sales_extrme_mpe_window3_month_day_concat_large_180_thousandths_small_180_thousandths: 0.0013704764262172952
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_140_thousandths_small_140_thousandths: 0.0013564964983284822
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_80_thousandths_small_80_thousandths: 0.001345549004991132
sales_extrme_mpe_window2_month_day_concat_large_200_thousandths_small_200_thousandths: 0.0013197897350183388
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_320_thousandths_small_320_thousandths: 0.001318635732801848
sales_extrme_mpe_window2_month_day_concat_large_180_thousandths_small_180_thousandths: 0.0013122778230325368
sales_extrme_mpe_window2_month_day_concat_large_120_thousandths_small_120_thousandths: 0.0013060260500763617
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_440_thousandths_small_440_thousandths: 0.0012881369725010088
sales_extrme_mpe_window2_month_day_concat_large_220_thousandths_small_220_thousandths: 0.0012804331477546595
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_120_thousandths_small_120_thousandths: 0.0012299570384092613
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_160_thousandths_small_160_thousandths: 0.0012124259518688542
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_420_thousandths_small_420_thousandths: 0.0012021994319547057
sales_extrme_mpe_window4_month_day_concat_large_60_thousandths_small_60_thousandths: 0.0011943039746371911
sales_extrme_mpe_window4_month_day_concat_large_320_thousandths_small_320_thousandths: 0.001173263427848543
sales_extrme_mpe_window2_month_day_concat_large_280_thousandths_small_280_thousandths: 0.001165547805961642
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_100_thousandths_small_100_thousandths: 0.0011552294636563145
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_40_thousandths_small_40_thousandths: 0.0011464302187120556
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_360_thousandths_small_360_thousandths: 0.0011354491561553822
sales_extrme_mpe_window6_month_day_concat_large_400_thousandths_small_400_thousandths: 0.001119690614240887
sales_extrme_mpe_window2_month_day_concat_large_360_thousandths_small_360_thousandths: 0.0011185204432558951
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_300_thousandths_small_300_thousandths: 0.0011019849742815873
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_300_thousandths_small_300_thousandths: 0.0010889149069915082
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_280_thousandths_small_280_thousandths: 0.0010745235225034788
sales_extrme_mpe_window2_month_day_concat_large_40_thousandths_small_40_thousandths: 0.0010666406425423997
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_460_thousandths_small_460_thousandths: 0.0010408857967729584
sales_extrme_mpe_window2_month_day_concat_large_440_thousandths_small_440_thousandths: 0.0010390671848466274
sales_extrme_mpe_window5_month_day_concat_large_100_thousandths_small_100_thousandths: 0.0009998078022137302
sales_extrme_mpe_window4_month_day_concat_large_220_thousandths_small_220_thousandths: 0.0009883723349600865
sales_extrme_mpe_window5_month_day_concat_large_40_thousandths_small_40_thousandths: 0.0009742756260814728
sales_extrme_mpe_window6_month_day_concat_large_280_thousandths_small_280_thousandths: 0.0009532065179690468
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_320_thousandths_small_320_thousandths: 0.0009414712804807504
sales_extrme_mpe_window2_month_day_concat_large_160_thousandths_small_160_thousandths: 0.0009368648340012032
sales_extrme_mpe_window4_month_day_concat_large_480_thousandths_small_480_thousandths: 0.0009174819603100196
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_280_thousandths_small_280_thousandths: 0.0008904263745688766
sales_extrme_mpe_window7_month_day_concat_large_240_thousandths_small_240_thousandths: 0.0008869558954620182
sales_extrme_mpe_window2_month_day_concat_large_480_thousandths_small_480_thousandths: 0.0008797700355056566
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_380_thousandths_small_380_thousandths: 0.0008671178968741725
sales_extrme_mpe_window2_month_day_concat_large_60_thousandths_small_60_thousandths: 0.0008647752007935268
sales_extrme_mpe_window6_month_day_concat_large_160_thousandths_small_160_thousandths: 0.0008557734473113772
sales_extrme_mpe_window4_month_day_concat_large_260_thousandths_small_260_thousandths: 0.000841614467261428
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_420_thousandths_small_420_thousandths: 0.0008306730378340078
sales_extrme_mpe_window4_month_day_concat_large_40_thousandths_small_40_thousandths: 0.0008193604053740301
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_140_thousandths_small_140_thousandths: 0.0008180909678121501
sales_extrme_mpe_window2_month_day_concat_large_80_thousandths_small_80_thousandths: 0.0008132584932994114
sales_extrme_mpe_window5_month_day_concat_large_440_thousandths_small_440_thousandths: 0.0008070736119360156
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_260_thousandths_small_260_thousandths: 0.0007789784395792989
sales_extrme_mpe_window2_month_day_concat_large_300_thousandths_small_300_thousandths: 0.0007601536772125415
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_180_thousandths_small_180_thousandths: 0.00075140568368855
sales_extrme_mpe_window6_month_day_concat_large_60_thousandths_small_60_thousandths: 0.0007402198150588691
sales_extrme_mpe_window2_month_day_concat_large_140_thousandths_small_140_thousandths: 0.000732590212318793
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_480_thousandths_small_480_thousandths: 0.0007222569197624848
sales_mpe_window7_month_day_concat: 0.0007220218576012816
sales_extrme_mpe_window7_month_day_concat_large_100_thousandths_small_100_thousandths: 0.0007141977269325802
sales_extrme_mpe_window2_month_day_concat_large_260_thousandths_small_260_thousandths: 0.0007082058256752768
sales_extrme_mpe_window2_month_day_concat_large_100_thousandths_small_100_thousandths: 0.000707609407861191
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_400_thousandths_small_400_thousandths: 0.000701195749265976
sales_extrme_mpe_window3_month_day_concat_large_360_thousandths_small_360_thousandths: 0.0006972000433791898
sales_extrme_mpe_window3_month_day_concat_large_480_thousandths_small_480_thousandths: 0.000696958556346802
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_200_thousandths_small_200_thousandths: 0.0006889691255944579
sales_extrme_mpe_window5_month_day_concat_large_320_thousandths_small_320_thousandths: 0.0006851254100479423
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_180_thousandths_small_180_thousandths: 0.0006827878960782073
sales_extrme_mpe_window3_month_day_concat_large_160_thousandths_small_160_thousandths: 0.0006809964730126628
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_100_thousandths_small_100_thousandths: 0.0006744734104203455
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_480_thousandths_small_480_thousandths: 0.0006716325427881586
sales_extrme_mpe_window5_month_day_concat_large_240_thousandths_small_240_thousandths: 0.0006684925288380075
sales_extrme_mpe_window3_month_day_concat_large_420_thousandths_small_420_thousandths: 0.0006627528994622854
sales_extrme_mpe_window4_month_day_concat_large_280_thousandths_small_280_thousandths: 0.0006624732781314725
sales_extrme_mpe_window3_month_day_concat_large_120_thousandths_small_120_thousandths: 0.0006561612756837592
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_40_thousandths_small_40_thousandths: 0.0006539774129919113
sales_extrme_mpe_window7_month_day_concat_large_260_thousandths_small_260_thousandths: 0.0006467604343222167
sales_extrme_mpe_window7_month_day_concat_large_300_thousandths_small_300_thousandths: 0.0006442714709702695
sales_extrme_mpe_window5_month_day_concat_large_400_thousandths_small_400_thousandths: 0.0006439780376401152
sales_extrme_mpe_window5_month_day_concat_large_160_thousandths_small_160_thousandths: 0.0006393636395494849
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_260_thousandths_small_260_thousandths: 0.0006381165388035843
sales_extrme_mpe_window6_month_day_concat_large_40_thousandths_small_40_thousandths: 0.0006376354006790957
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_440_thousandths_small_440_thousandths: 0.0006346987791465605
sales_extrme_mpe_window3_month_day_concat_large_400_thousandths_small_400_thousandths: 0.0006326848914331274
sales_extrme_mpe_window3_month_day_concat_large_460_thousandths_small_460_thousandths: 0.0006316608710228261
sales_extrme_mpe_window4_month_day_concat_large_180_thousandths_small_180_thousandths: 0.0006200510805884393
sales_extrme_mpe_window2_month_day_concat_large_20_thousandths_small_20_thousandths: 0.0006075627090807337
sales_extrme_mpe_window5_month_day_concat_large_220_thousandths_small_220_thousandths: 0.0006013135233084581
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_60_thousandths_small_60_thousandths: 0.0005922239175481612
sales_extrme_mpe_window4_month_day_concat_large_200_thousandths_small_200_thousandths: 0.0005897670565891882
sales_extrme_mpe_window6_month_day_concat_large_300_thousandths_small_300_thousandths: 0.000581789005864939
sales_extrme_mpe_window3_month_day_concat_large_240_thousandths_small_240_thousandths: 0.0005782896575459019
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_440_thousandths_small_440_thousandths: 0.0005778869138437162
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_20_thousandths_small_20_thousandths: 0.0005768411006130001
sales_extrme_mpe_window7_month_day_concat_large_360_thousandths_small_360_thousandths: 0.0005764414758133586
sales_extrme_mpe_window2_month_day_concat_large_240_thousandths_small_240_thousandths: 0.0005751765687928265
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_300_thousandths_small_300_thousandths: 0.0005619296000907902
sales_extrme_mpe_window4_month_day_concat_large_380_thousandths_small_380_thousandths: 0.0005519065705300448
sales_extrme_mpe_window6_month_day_concat_large_220_thousandths_small_220_thousandths: 0.0005483174495620255
sales_extrme_mpe_window3_month_day_concat_large_260_thousandths_small_260_thousandths: 0.0005443427868405526
sales_extrme_mpe_window5_month_day_concat_large_260_thousandths_small_260_thousandths: 0.0005438554233222465
sales_extrme_mpe_window5_month_day_concat_large_460_thousandths_small_460_thousandths: 0.0005297821471026847
sales_extrme_mpe_window3_month_day_concat_large_280_thousandths_small_280_thousandths: 0.0005293256622925683
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_180_thousandths_small_180_thousandths: 0.0005258080130152348
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_20_thousandths_small_20_thousandths: 0.0005189155894360321
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_460_thousandths_small_460_thousandths: 0.0005164087310249743
sales_extrme_mpe_window7_month_day_concat_large_20_thousandths_small_20_thousandths: 0.0005155106565818766
sales_extrme_mpe_window7_month_day_concat_large_60_thousandths_small_60_thousandths: 0.0005121777919379136
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_220_thousandths_small_220_thousandths: 0.0005099897415202599
sales_extrme_mpe_window4_month_day_concat_large_440_thousandths_small_440_thousandths: 0.0005066323375351337
sales_extrme_mpe_window6_month_day_concat_large_320_thousandths_small_320_thousandths: 0.0005013710988462306
sales_extrme_mpe_window3_month_day_concat_large_340_thousandths_small_340_thousandths: 0.0004999005724216623
sales_extrme_mpe_window5_month_day_concat_large_180_thousandths_small_180_thousandths: 0.0004994884900675438
sales_extrme_mpe_window3_month_day_concat_large_60_thousandths_small_60_thousandths: 0.0004979638164424547
sales_extrme_mpe_window7_month_day_concat_large_280_thousandths_small_280_thousandths: 0.0004947393700616261
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_340_thousandths_small_340_thousandths: 0.0004926896260811105
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_120_thousandths_small_120_thousandths: 0.0004921609721768429
sales_extrme_mpe_window5_month_day_concat_large_60_thousandths_small_60_thousandths: 0.0004916284776186904
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_160_thousandths_small_160_thousandths: 0.0004912174622198543
sales_extrme_mpe_window6_month_day_concat_large_360_thousandths_small_360_thousandths: 0.0004832813483178661
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_300_thousandths_small_300_thousandths: 0.00046568384855265365
sales_extrme_mpe_window4_month_day_concat_large_340_thousandths_small_340_thousandths: 0.00046213837074457747
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_180_thousandths_small_180_thousandths: 0.000456289393458909
sales_extrme_mpe_window4_month_day_concat_large_400_thousandths_small_400_thousandths: 0.0004549772942893196
sales_extrme_mpe_window7_month_day_concat_large_120_thousandths_small_120_thousandths: 0.0004538767229312423
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_240_thousandths_small_240_thousandths: 0.00045345804245159796
sales_extrme_mpe_window5_month_day_concat_large_380_thousandths_small_380_thousandths: 0.0004510708310788006
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_400_thousandths_small_400_thousandths: 0.00044940835476054856
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_100_thousandths_small_100_thousandths: 0.0004492242296955676
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_40_thousandths_small_40_thousandths: 0.00044910289498995015
sales_extrme_mpe_window7_month_day_concat_large_380_thousandths_small_380_thousandths: 0.0004428539111975355
sales_extrme_mpe_window5_month_day_concat_large_140_thousandths_small_140_thousandths: 0.00044272455634049605
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_480_thousandths_small_480_thousandths: 0.0004297815518559853
sales_extrme_mpe_window7_month_day_concat_large_320_thousandths_small_320_thousandths: 0.00042876336605659446
sales_extrme_mpe_window7_month_day_concat_large_420_thousandths_small_420_thousandths: 0.0004244410409655576
sales_extrme_mpe_window6_month_day_concat_large_420_thousandths_small_420_thousandths: 0.00041909111184294735
sales_extrme_mpe_window3_month_day_concat_large_140_thousandths_small_140_thousandths: 0.0004116825942201701
sales_extrme_mpe_window3_month_day_concat_large_40_thousandths_small_40_thousandths: 0.0004114420329010942
sales_extrme_mpe_window7_month_day_concat_large_140_thousandths_small_140_thousandths: 0.0004098583908409933
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_100_thousandths_small_100_thousandths: 0.0004072775416665761
sales_extrme_mpe_window6_month_day_concat_large_340_thousandths_small_340_thousandths: 0.0004047836955671627
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_200_thousandths_small_200_thousandths: 0.0004008167570672225
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_80_thousandths_small_80_thousandths: 0.00039921123917639024
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_40_thousandths_small_40_thousandths: 0.0003915764326299558
sales_extrme_mpe_window7_month_day_concat_large_460_thousandths_small_460_thousandths: 0.00038813190624268636
sales_extrme_mpe_window7_month_day_concat_large_200_thousandths_small_200_thousandths: 0.0003869950465988504
sales_extrme_mpe_window7_month_day_concat_large_180_thousandths_small_180_thousandths: 0.00038464294768822605
sales_extrme_mpe_window4_month_day_concat_large_100_thousandths_small_100_thousandths: 0.00037887302441506687
sales_extrme_mpe_window5_month_day_concat_large_120_thousandths_small_120_thousandths: 0.0003788421134829423
sales_extrme_mpe_window2_month_day_concat_large_320_thousandths_small_320_thousandths: 0.0003774930812062606
sales_mpe_window6_month_day_concat: 0.0003765915966064618
sales_extrme_mpe_window4_month_day_concat_large_20_thousandths_small_20_thousandths: 0.00037639701946136237
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_320_thousandths_small_320_thousandths: 0.0003760209469960476
sales_extrme_mpe_window5_month_day_concat_large_200_thousandths_small_200_thousandths: 0.0003744117796193422
sales_extrme_mpe_window6_month_day_concat_large_440_thousandths_small_440_thousandths: 0.0003710840446152225
sales_extrme_mpe_window2_month_day_concat_large_380_thousandths_small_380_thousandths: 0.00036645380896618666
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_40_thousandths_small_40_thousandths: 0.00036368012796808043
sales_extrme_mpe_window6_month_day_concat_large_180_thousandths_small_180_thousandths: 0.00035923132977371393
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_60_thousandths_small_60_thousandths: 0.0003576361001951079
sales_extrme_mpe_window3_month_day_concat_large_300_thousandths_small_300_thousandths: 0.0003542353114799952
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_360_thousandths_small_360_thousandths: 0.0003524159834882784
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_460_thousandths_small_460_thousandths: 0.0003503795388241595
sales_extrme_mpe_window6_month_day_concat_large_240_thousandths_small_240_thousandths: 0.00034121173472838047
sales_extrme_mpe_window4_month_day_concat_large_420_thousandths_small_420_thousandths: 0.0003341084655631443
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_140_thousandths_small_140_thousandths: 0.00033325025534822325
sales_extrme_mpe_window2_month_day_concat_large_460_thousandths_small_460_thousandths: 0.00033314380416538884
sales_extrme_mpe_window6_month_day_concat_large_120_thousandths_small_120_thousandths: 0.00032931510565138386
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_240_thousandths_small_240_thousandths: 0.00032516733468130346
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_360_thousandths_small_360_thousandths: 0.00032007475170968544
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_200_thousandths_small_200_thousandths: 0.0003197607317146526
sales_extrme_mpe_window5_month_day_concat_large_280_thousandths_small_280_thousandths: 0.00031091827374118064
sales_extrme_mpe_window7_month_day_concat_large_80_thousandths_small_80_thousandths: 0.00031055561504055767
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_260_thousandths_small_260_thousandths: 0.0003095623981441389
sales_extrme_mpe_window7_month_day_concat_large_340_thousandths_small_340_thousandths: 0.0003093489724230579
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_240_thousandths_small_240_thousandths: 0.00030324264203820077
sales_extrme_mpe_window6_month_day_concat_large_480_thousandths_small_480_thousandths: 0.00028921249250090807
sales_extrme_mpe_window4_month_day_concat_large_300_thousandths_small_300_thousandths: 0.0002872827763420565
sales_extrme_mpe_window6_month_day_concat_large_260_thousandths_small_260_thousandths: 0.0002866926924624104
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_460_thousandths_small_460_thousandths: 0.00028307732639488216
sales_extrme_mpe_window7_month_day_concat_large_440_thousandths_small_440_thousandths: 0.00027889496055886765
sales_extrme_mpe_window5_month_day_concat_large_480_thousandths_small_480_thousandths: 0.00027517015567943347
sales_mpe_window4_month_day_concat: 0.00027111449543523015
sales_extrme_mpe_window6_month_day_concat_large_460_thousandths_small_460_thousandths: 0.0002682829138581026
sales_extrme_mpe_window6_month_day_concat_large_140_thousandths_small_140_thousandths: 0.0002677821994827352
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_120_thousandths_small_120_thousandths: 0.000262760693577816
sales_extrme_mpe_window3_weekofyear_dayofweek_concat_large_60_thousandths_small_60_thousandths: 0.000260251706939414
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_340_thousandths_small_340_thousandths: 0.00025698440988725416
sales_extrme_mpe_window4_month_day_concat_large_140_thousandths_small_140_thousandths: 0.00025370708053714704
sales_extrme_mpe_window5_month_day_concat_large_420_thousandths_small_420_thousandths: 0.00024896705523734487
sales_extrme_mpe_window3_month_day_concat_large_440_thousandths_small_440_thousandths: 0.0002479663340688765
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_480_thousandths_small_480_thousandths: 0.0002477244372146764
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_460_thousandths_small_460_thousandths: 0.000246547590129374
sales_extrme_mpe_window4_month_day_concat_large_120_thousandths_small_120_thousandths: 0.0002461433757163415
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_500_thousandths_small_500_thousandths: 0.0002448785433088419
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_320_thousandths_small_320_thousandths: 0.00023788824635904534
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_380_thousandths_small_380_thousandths: 0.00023741383926725168
sales_extrme_mpe_window7_month_day_concat_large_220_thousandths_small_220_thousandths: 0.0002346648156912803
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_280_thousandths_small_280_thousandths: 0.0002345902812081139
sales_extrme_mpe_window5_month_day_concat_large_80_thousandths_small_80_thousandths: 0.00022820860025899679
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_500_thousandths_small_500_thousandths: 0.00022737148661054204
sales_extrme_mpe_window4_month_day_concat_large_240_thousandths_small_240_thousandths: 0.00022590674837875968
sales_extrme_mpe_window2_month_day_concat_large_500_thousandths_small_500_thousandths: 0.00022424284077827438
sales_extrme_mpe_window5_month_day_concat_large_360_thousandths_small_360_thousandths: 0.0002215119173949909
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_120_thousandths_small_120_thousandths: 0.00021920308826165795
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_60_thousandths_small_60_thousandths: 0.0002188117357949417
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_480_thousandths_small_480_thousandths: 0.00021254343908823567
sales_extrme_mpe_window5_month_day_concat_large_300_thousandths_small_300_thousandths: 0.0002118193576981517
sales_extrme_mpe_window3_month_day_concat_large_20_thousandths_small_20_thousandths: 0.00021156532055688227
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_160_thousandths_small_160_thousandths: 0.00021075511097507963
sales_extrme_mpe_window6_month_day_concat_large_200_thousandths_small_200_thousandths: 0.0002098505010032307
sales_extrme_mpe_window6_month_day_concat_large_100_thousandths_small_100_thousandths: 0.00020950638546018648
sales_extrme_mpe_window4_month_day_concat_large_80_thousandths_small_80_thousandths: 0.00019280657268714728
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_20_thousandths_small_20_thousandths: 0.00018347631642576695
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_400_thousandths_small_400_thousandths: 0.0001786699663140216
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_340_thousandths_small_340_thousandths: 0.00017202347921795754
sales_extrme_mpe_window6_month_day_concat_large_20_thousandths_small_20_thousandths: 0.00016912259626703527
sales_extrme_mpe_window5_month_day_concat_large_500_thousandths_small_500_thousandths: 0.00016817936211618775
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_80_thousandths_small_80_thousandths: 0.00016478116594444473
sales_extrme_mpe_window7_month_day_concat_large_400_thousandths_small_400_thousandths: 0.00016143743342628253
sales_extrme_mpe_window7_weekofyear_dayofweek_concat_large_240_thousandths_small_240_thousandths: 0.00015513380909737512
sales_extrme_mpe_window4_month_day_concat_large_500_thousandths_small_500_thousandths: 0.0001546521326241343
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_20_thousandths_small_20_thousandths: 0.00013875843503509752
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_120_thousandths_small_120_thousandths: 0.00013453498141391643
sales_mpe_window5_month_day_concat: 0.00012579880493527188
sales_extrme_mpe_window4_month_day_concat_large_160_thousandths_small_160_thousandths: 0.00012477901717758386
sales_extrme_mpe_window3_month_day_concat_large_500_thousandths_small_500_thousandths: 0.000123559094255025
sales_mpe_window4_weekofyear_dayofweek_concat: 0.00011411596368017415
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_60_thousandths_small_60_thousandths: 0.0001139969965125205
sales_mpe_window6_weekofyear_dayofweek_concat: 0.00011169954345835037
sales_extrme_mpe_window7_month_day_concat_large_480_thousandths_small_480_thousandths: 0.00010734747824849314
sales_extrme_mpe_window6_month_day_concat_large_80_thousandths_small_80_thousandths: 0.00010677928702961476
sales_extrme_mpe_window6_weekofyear_dayofweek_concat_large_360_thousandths_small_360_thousandths: 0.00010436847593365105
sales_extrme_mpe_window7_month_day_concat_large_500_thousandths_small_500_thousandths: 0.00010367676841031651
sales_extrme_mpe_window7_month_day_concat_large_160_thousandths_small_160_thousandths: 0.00010106507979832609
sales_extrme_mpe_window5_month_day_concat_large_20_thousandths_small_20_thousandths: 8.38394629342132e-05
sales_mpe_window7_weekofyear_dayofweek_concat: 8.328990622957476e-05
sales_mpe_window2_weekofyear_dayofweek_concat: 7.825377783813853e-05
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_480_thousandths_small_480_thousandths: 7.57998826527488e-05
sales_extrme_mpe_window6_month_day_concat_large_500_thousandths_small_500_thousandths: 7.479637315004657e-05
sales_mpe_window3_month_day_concat: 6.947671155366378e-05
sales_extrme_mpe_window5_month_day_concat_large_340_thousandths_small_340_thousandths: 6.753260021351272e-05
sales_extrme_mpe_window2_weekofyear_dayofweek_concat_large_20_thousandths_small_20_thousandths: 6.433613079356328e-05
sales_extrme_mpe_window5_weekofyear_dayofweek_concat_large_400_thousandths_small_400_thousandths: 5.453669235313152e-05
sales_mpe_window3_weekofyear_dayofweek_concat: 5.2380850295598536e-05
sales_extrme_mpe_window2_month_day_concat_large_340_thousandths_small_340_thousandths: 5.1315949705615953e-05
sales_extrme_mpe_window4_weekofyear_dayofweek_concat_large_500_thousandths_small_500_thousandths: 2.895520394022357e-05
sales_mpe_window5_weekofyear_dayofweek_concat: 2.878083748039715e-05
In [97]:
#feature_importances条形图,前29个重要特征
#特征重要度的计算细节详见https://tech.yandex.com/catboost/doc/dg/concepts/fstr-docpage/#fstr
ind = np.arange(len(sorted(zip(feature_importances, feature_names), reverse=True)))[:29]
fearture_importances = [score for score, name in sorted(zip(feature_importances, feature_names), reverse=True)][:29]
fearture_names = [name for score, name in sorted(zip(feature_importances, feature_names), reverse=True)][:29]

plt.rcParams['figure.figsize'] = (12, 18)
fig, ax = plt.subplots()

ax.barh(ind, fearture_importances, height=0.7, align='center', color='#AAAAAA', tick_label=fearture_names)    
ax.set_yticks(ind)
ax.set_xlabel('fearture_importances')
ax.set_ylabel('Feature Name')
ax.invert_yaxis()
ax.set_title('fearture_importances')

plt.show()

5.模型运用

In [98]:
validation_preds = best_model.predict(validation_pool)
In [99]:
validation.insert(validation.shape[1],'pred',validation_preds)
validation = validation[['date','store','item','sales','pred']]
In [100]:
#经存储成csv,ds又变为str了,转成datetime
validation.date=pd.to_datetime(validation.date)
In [101]:
#随机选择2个商店、3种货物作图
a = list(np.random.choice(range(1,11), size=2, replace=False))
b = list(np.random.choice(range(1,51), size=3, replace=False))
for (s,i),group in validation.groupby(['store','item']):
    if s in a and i in b:
        plt.rcParams['figure.figsize'] = (12, 6)
        fig, ax = plt.subplots()
        
        ax.plot(group.iloc[:,0],group.iloc[:,-2],'b',label='Sales')
        ax.plot(group.iloc[:,0],group.iloc[:,-1],'r',label='Preds')
        ax.set_xlabel('Date')
        ax.set_ylabel('Sales')
        ax.set_title("Store_{0},Item_{1}'s erverday sales and preds".format(s,i))
        ax.legend()
        plt.show()
In [102]:
print(time()-start)
14634.349162101746
In [ ]: