import pandas as pd
import numpy as np
import matplotlib.pylab as plt
df_raw = pd.read_csv('http://www.guofei.site/StatisticsBlog/origin_files/timeseries/Train_SU63ISt.csv', nrows = 11856)
df_raw.index = pd.to_datetime(df_raw.Datetime,format='%d-%m-%Y %H:%M')
ts=df_raw.resample('D').mean().loc[:,'Count']
ts.name='data'
train=ts[0:433]
test=ts[433:]
train.plot( title= 'Daily Ridership')
test.plot(title= 'Daily Ridership')
plt.show()
Naive Method:简单地将最近一个观测值作为下一期的预测值来做预测 $\hat y_{t+1}=y_t$
y_hat = test.copy()
y_hat[:] = train.iloc[-1]
plt.plot(train.index, train, label='Train')
plt.plot(test.index,test, label='Test')
plt.plot(y_hat.index,y_hat, label='Naive Forecast')
plt.legend(loc='best')
plt.title("Naive Forecast")
plt.show()
# r-squared
from sklearn.metrics import r2_score
r2_score(test, y_hat)
Simple Average:用历史全部观测值的均值作为下一期的预测值来做预测 $\hat y_{t+1}=\dfrac{1}{n}\sum_{i=1}^ty_i$
y_hat = test.copy()
y_hat[:] = train.mean()
plt.plot(train.index, train, label='Train')
plt.plot(test.index,test, label='Test')
plt.plot(y_hat.index,y_hat, label='avg_forecast')
plt.legend(loc='best')
plt.show()
# r-squared
from sklearn.metrics import r2_score
r2_score(test, y_hat)
移动平均法用近期固定期数的历史观测值的均值作为下一期的预测值来做预测 $\hat y_l=\dfrac{1}{p}(y_{i-1}+y_{i-2}+...+y_{i-p})$
y_hat = test.copy()
y_hat[:] = train.rolling(60).mean().iloc[-1]
plt.plot(train.index, train, label='Train')
plt.plot(test.index,test, label='Test')
plt.plot(y_hat.index,y_hat, label='Moving Average Forecast')
plt.legend(loc='best')
plt.show()
# r-squared
from sklearn.metrics import r2_score
r2_score(test, y_hat)
加权移动平均法 $\hat y_l=\dfrac{1}{m} \sum w_k y_{i-k}$
from statsmodels.tsa import holtwinters
y_hat = test.copy()
model=holtwinters.SimpleExpSmoothing(train).fit(smoothing_level=0.6,optimized=False)
y_hat[:]=model.forecast(len(test))
plt.plot(train.index, train, label='Train')
plt.plot(test.index,test, label='Test')
plt.plot(y_hat.index,y_hat, label='Simple Exponential Smoothing')
plt.legend(loc='best')
plt.show()
# r-squared
from sklearn.metrics import r2_score
r2_score(test, y_hat)
import statsmodels.api as sm
model=sm.tsa.seasonal_decompose(train)
# 分解后的情况
model.trend,model.seasonal,model.resid
model.plot()
plt.show()
y_hat = test.copy()
model = sm.tsa.Holt(train).fit(smoothing_level = 0.3,smoothing_slope = 0.1)
y_hat=model.forecast(len(test))
plt.plot(train.index, train, label='Train')
plt.plot(test.index,test, label='Test')
plt.plot(y_hat.index,y_hat, label='Holt_linear')
plt.legend(loc='best')
plt.show()
# r-squared
from sklearn.metrics import r2_score
r2_score(test, y_hat)
import statsmodels.api as sm
y_hat = test.copy()
model = sm.tsa.ExponentialSmoothing(train,seasonal_periods=7 ,trend='add', seasonal='add').fit()
y_hat=model.forecast(len(test))
plt.plot(train.index, train, label='Train')
plt.plot(test.index,test, label='Test')
plt.plot(y_hat.index,y_hat, label='Holt_Winter')
plt.legend(loc='best')
plt.show()
# r-squared
from sklearn.metrics import r2_score
r2_score(test, y_hat)
import statsmodels.api as sm
y_hat = test.copy()
model = sm.tsa.statespace.SARIMAX(train, order=(2, 1, 4),seasonal_order=(0,1,1,7)).fit()
y_hat=model.forecast(len(test))
plt.plot(train.index, train, label='Train')
plt.plot(test.index,test, label='Test')
plt.plot(y_hat.index,y_hat, label='SARIMA')
plt.legend(loc='best')
plt.show()
# r-squared
from sklearn.metrics import r2_score
r2_score(test, y_hat)