Short-Long Term Forecasting Approaches

Short-Long Term Forecasting Approaches#

Notebook under construction: future improvements of the project.

Here we have been looking for the best model the predict weather variables 15 days in a forecasting period of 15 days, but the best model in this scenario could not be the best forecasting the next day, or nex two days, or next five days. Would be interesting to find the best model for a short term forecasting period (let say 1-5 days) and the best for a long term period (6-15 days, for instance).

We have incorporate the param score_window in the KFold_score_time_series function to perform this last suggested task

estimator = LinearRegressionTS()
series_name = 'T'
lag=20
X=X_train_sk[series_name][lag]
y=Y_train_sk[series_name][lag]

score_1 = KFold_score_time_series(estimator, X, y, n_splits, inner_test_window, scoring=mean_absolute_error, 
                        framework='PyTS',  
                        score_window=None)

score_2 = KFold_score_time_series(estimator, X, y, n_splits, inner_test_window, scoring=mean_absolute_error, 
                        framework='PyTS',  
                        score_window=range(6,15))

score_3 = KFold_score_time_series(estimator, X, y, n_splits, inner_test_window, scoring=mean_absolute_error, 
                        framework='PyTS', 
                        score_window=[0,1,7,14])

# Should be observed differences among these scores
print(score_1, score_2, score_3)

estimator = VAR()
series_name = 'T'
X=X_multi_train
y=Y_train_st[series_name]

score_1 = KFold_score_time_series(estimator, X, y, n_splits, inner_test_window, scoring=mean_absolute_error, 
                        framework='PyTS', approach='multiple', level=series_name,
                        score_window=None)

score_2 = KFold_score_time_series(estimator, X, y, n_splits, inner_test_window, scoring=mean_absolute_error, 
                        framework='PyTS', approach='multiple', level=series_name,
                        score_window=range(6,15))

score_3 = KFold_score_time_series(estimator, X, y, n_splits, inner_test_window, scoring=mean_absolute_error, 
                        framework='PyTS', approach='multiple', level=series_name,
                        score_window=[0,1,2,7,14])

# Should be observed differences among these scores
print(score_1, score_2, score_3)

estimator = ForecasterAutoreg(regressor=RandomForestRegressor(random_state=123, n_estimators=15), lags=20)
series_name = 'T'
X=X_train[series_name]
y=Y_train[series_name]

score_1 = KFold_score_time_series(estimator, X, y, n_splits, inner_test_window, scoring=mean_absolute_error, 
                        framework='Skforecast',  approach='univariate',
                        score_window=None)

score_2 = KFold_score_time_series(estimator, X, y, n_splits, inner_test_window, scoring=mean_absolute_error, 
                        framework='Skforecast',  approach='univariate',
                        score_window=range(6,15))

score_3 = KFold_score_time_series(estimator, X, y, n_splits, inner_test_window, scoring=mean_absolute_error, 
                        framework='Skforecast', approach='univariate',
                        score_window=[0,1,7,14])

# Should be observed differences among these scores
print(score_1, score_2, score_3)

series_name = 'T'
estimator = ForecasterAutoregMultiVariate(regressor=RandomForestRegressor(random_state=123, n_estimators=10), 
                                          lags=20, steps=inner_test_window, level=series_name)   
X=X_multi_train
y=Y_train[series_name]
score_1 = KFold_score_time_series(estimator, X, y, n_splits, inner_test_window, scoring=mean_absolute_error, 
                        framework='Skforecast',  approach='multivariate',
                        score_window=None)

score_2 = KFold_score_time_series(estimator, X, y, n_splits, inner_test_window, scoring=mean_absolute_error, 
                        framework='Skforecast',  approach='multivariate',
                        score_window=range(6,15))

score_3 = KFold_score_time_series(estimator, X, y, n_splits, inner_test_window, scoring=mean_absolute_error, 
                        framework='Skforecast', approach='multivariate',
                        score_window=[0,1,7,14])

# Should be observed differences among these scores
print(score_1, score_2, score_3)

series_name = 'T'
estimator = ForecasterAutoregMultiSeries(regressor=RandomForestRegressor(random_state=123, n_estimators=10), lags=20)                                    
X=X_multi_train
y=Y_train[series_name]

score_1 = KFold_score_time_series(estimator, X, y, n_splits, inner_test_window, scoring=mean_absolute_error, 
                        framework='Skforecast',  approach='multiple', level=series_name,
                        score_window=None)

score_2 = KFold_score_time_series(estimator, X, y, n_splits, inner_test_window, scoring=mean_absolute_error, 
                        framework='Skforecast',  approach='multiple', level=series_name,
                        score_window=range(6,15))

score_3 = KFold_score_time_series(estimator, X, y, n_splits, inner_test_window, scoring=mean_absolute_error, 
                        framework='Skforecast', approach='multiple', level=series_name,
                        score_window=[0,1,7,14])

# Should be observed differences among these scores
print(score_1, score_2, score_3)