Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Codes used for geneating the forecasts #9

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata
13 changes: 13 additions & 0 deletions M4-methods.Rproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX
Binary file added ThiyangaTalagala/m4competition_report.pdf
Binary file not shown.
123 changes: 123 additions & 0 deletions ThiyangaTalagala/src/code_daily.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
## ---- load-pkgs
library(tidyverse)
library(forecast)
library(Mcomp)
library(forecTheta)
# devtools::install_github("thiyangt/seer")
library(seer)
# devtools::install_github("robjhyndman/tsfeatures")
library(tsfeatures)
library(foreach)

## ---- load-data
data(M4)
M4_daily <- subset(M4, "daily")

## --- convert the time series into suitable msts object
M4_daily_msts <- lapply(M4_daily, function(temp){
temp$x <- convert_msts(temp$x, "daily")
return(temp)
})

## ---- load-rmConstantSeries
M4_daily_constant_train <- sapply(M4_daily_msts, function(temp){
ts1 <- temp$x
training <- head_ts(ts1, h=14)
if (is.constant(training)==TRUE){print(temp$st)}
})
# D2085

# split the M4 daily series into training and test
names_m4_use_d <- names(M4_daily_rm)
set.seed(8)
index_test_d <- sample(names_m4_use_d, 226)
save(index_test_d, file="data/daily/index_test_d.rda")
M4_training_daily <- M4_daily_rm[!names(M4_daily_rm) %in% index_test_d]
length(M4_training_daily) # 4000
save(M4_training_daily, file="data/daily/M4_training_daily.rda")
M4_test_daily <- M4_daily_rm[names(M4_daily_rm) %in% index_test_d]
length(M4_test_daily) #100
save(M4_test_daily, file="data/daily/M4_test_daily.rda")


# simulation
set.seed(8) #
M4Dmstlets <- lapply(M4_daily_msts, sim_mstlbased, Future=TRUE, Nsim=10, extralength=14, Combine=FALSE, mtd="ets")

set.seed(8)#
M4Dmstlarima <- lapply(M4_daily_msts, sim_mstlbased, Future=TRUE, Nsim=10, extralength=14, Combine=FALSE, mtd="arima")


# convert to msts object
set.seed(8)
M4Dets_msts <- lapply(M4_daily_msts, sim_mstlbased, Future=TRUE, Nsim=5, extralength=14, Combine=FALSE, mtd="ets")
M4simets_daily_msts <- lapply(M4Dets_msts, function(temp){
lapply(temp, convert_msts, category="daily")})


set.seed(8)
M4Darima_msts <- lapply(M4_daily_msts, sim_mstlbased, Future=TRUE, Nsim=5, extralength=14, Combine=FALSE, mtd="arima")
M4simarima_daily_msts <- lapply(M4Darima_msts, function(temp){
lapply(temp, convert_msts, category="daily")})


# Calculate features: M4-training
M4D_train <- lapply(M4_training_daily, function(temp){temp$x})
featuresM4D_training <- cal_features(M4D_train, seasonal=TRUE, h=14, m=7, lagmax=8L, database="other", highfreq=TRUE)
# labels M4-training
data_train <- lapply(M4_training_daily, function(temp){temp$x})
m4daily_train <- fcast_accuracy(data_train,
models=c("rw", "rwd", "wn",
"theta", "stlar", "nn", "snaive", "mstlets","mstlarima", "tbats"),
database="other",
h=14, accuracyFun=cal_m4measures, length_out=2)
m0 <- m4daily_train$accuracy
ARIMA <- rep("arima", 4000)
ETS <- rep("ets", 4000)
acc_list <- list(accuracy=m0, ARIMA=ARIMA, ETS=ETS)
M4D_ms <- cal_medianscaled(acc_list)
M4D_training <- prepare_trainingset(accuracy_set=M4D_ms,
feature_set=featuresM4D_training)
m4dtraining <- M4D_training$trainingset

# classlabel for simulated data
M4Dmstlets <- lapply(M4simets_daily_msts, fcast_accuracy,
models=c("rw", "rwd", "wn",
"theta", "stlar", "nn", "snaive", "mstlets","mstlarima", "tbats"),
database="other",
h=14, accuracyFun=cal_m4measures, length_out=2)
m4d_accuracy <- lapply(M4Dmstlets, function(temp){temp$accuracy})
m4d_mat <- do.call(rbind, m4d_accuracy)
accuracy <- m4d_mat
ARIMA <- rep("arima", 42270)
ETS <- rep("ets", 42270)
accsim_list_sim <- list(accuracy=accuracy, ARIMA=ARIMA, ETS=ETS)
M4D_msim <- cal_medianscaled(accsim_list_sim)

# features - simulated data
M4D_ets <- lapply(M4Dmstlets, function(temp){
lapply(temp, function(temp){convert_msts(temp, "daily")})})
features_M4Dets <-lapply(M4D_ets, function(temp){
lapply(temp, cal_features,seasonal=TRUE, h=14, m=7, lagmax=8L, database="other", highfreq=TRUE)})
features_M4DS <- lapply(features_M4Dets, function(temp){
do.call(rbind, temp)
})
featuresM4DS <- data.table::rbindlist(features_M4DS, use.names = TRUE, fill = TRUE)
featuresM4DS <- as.data.frame(featuresM4DS)
dim(featuresM4DS) # 42270 27
featuresM4DS$seasonal_strength1[is.na(featuresM4DS$seasonal_strength1)==TRUE] =
featuresM4DS$seasonality[is.na(featuresM4DS$seasonality)==FALSE]
featuresM4DS$seasonal_strength2[is.na(featuresM4DS$seasonal_strength2)==TRUE]=0
dim(featuresM4DS)

featuresM4DS <- featuresM4DS %>% dplyr::select(-dplyr::one_of("seasonality"))
featuresM4DS <- featuresM4DS %>% dplyr::select(-dplyr::one_of("seas_pacf"))

M4Dsim_rf <- prepare_trainingset(accuracy_set=M4D_msim,
feature_set=featuresM4DS)

# combine the data frames for daily series
daily_training <- dplyr::bind_rows(m4dtraining, M4D_training_sim)
daily_training <- daily_training %>% dplyr::select(-dplyr::one_of("seas_pacf"))
save(daily_training, file="data/daily_training.rda")

91 changes: 91 additions & 0 deletions ThiyangaTalagala/src/code_hourly.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
## ---- load-pkgs
library(tidyverse)
library(forecast)
library(Mcomp)
library(forecTheta)
# devtools::install_github("thiyangt/seer")
library(seer)
# devtools::install_github("robjhyndman/tsfeatures")
library(tsfeatures)
library(foreach)

## ---- load-data
data(M4)
M4_hourly <- subset(M4, "hourly")

## --- convert the time series into suitable msts object
M4_hourly_msts <- lapply(M4_hourly, function(temp){
temp$x <- convert_msts(temp$x, "hourly")
return(temp)
})

## ---- load-rmConstantSeries
M4_hourly_constant_train <- sapply(M4_hourly_msts, function(temp){
ts1 <- temp$x
training <- head_ts(ts1, h=48)
if (is.constant(training)==TRUE){print(temp$st)}
})
# No hourly series with constant values

# split the M4 hourly series into training and test
names_m4_use_h <- names(M4_hourly_msts)
set.seed(8)
index_test_h <- sample(names_m4_use_h, 64)
M4_training_hourly <- M4_hourly_msts[!names(M4_hourly_msts) %in% index_test_h]
M4_test_hourly <- M4_hourly_msts[names(M4_hourly_msts) %in% index_test_h]


# simulation
set.seed(8)
M4Hmstlets <- lapply(M4_hourly_msts, sim_mstlbased, Future=TRUE, Nsim=10, extralength=48, Combine=FALSE, mtd="ets")
set.seed(8)
M4Hmstlarima <- lapply(M4_hourly_msts, sim_mstlbased, Future=TRUE, Nsim=10, extralength=48, Combine=FALSE, mtd="arima")

# convert simulate ts to msts
M4Hmstlarima_msts <- lapply(M4Hmstlarima, function(temp){
lapply(temp, function(temp){convert_msts(temp, "hourly")})})

# features
M4H_train <- lapply(M4_training_hourly, function(temp){temp$x})
featuresM4H_training <- cal_features(M4H_train, seasonal=TRUE, h=48, m=24, lagmax=25L, database="other", highfreq=TRUE)

features_m4hmstlets <- lapply(M4Hmstlarima_msts, function(temp){
lapply(temp, cal_features,seasonal=TRUE, h=48, m=24, lagmax=25L, database="other", highfreq=TRUE)})
features_M4H_mstl <- lapply(features_m4hmstlets, function(temp){
do.call(rbind, temp)
})
features_M4Hmstl_DF <- do.call(rbind, features_M4H_mstl)

# Class label
data_train <- lapply(M4_training_hourly, function(temp){temp$x})
M4Htraining_label <- fcast_accuracy(data_train,
models=c("rw", "rwd", "wn", "stlar", "nn", "snaive", "mstlets","mstlarima", "tbats"),
database="other",
h=48, accuracyFun=cal_m4measures, length_out=2)
m0 <- M4Htraining_label$accuracy
ARIMA <- rep("arima", 350)
ETS <- rep("ets", 350)
acc_list <- list(accuracy=m0, ARIMA=ARIMA, ETS=ETS)
M4H_ms <- cal_medianscaled(acc_list)
M4H_training <- prepare_trainingset(accuracy_set=M4H_ms,
feature_set=featuresM4H_training)

# simulated series
m4hets <- lapply(M4Hmstlets, fcast_accuracy,
models=c("rw", "rwd", "wn",
"theta", "stlar", "nn", "snaive", "mstlets","mstlarima", "tbats"),
database="other",
h=48, accuracyFun=cal_m4measures, length_out=2)
accuracy <- m4hets$accuracy
ARIMA <- rep("arima", 4140)
ETS <- rep("ets", 4140)
accsim_list <- list(accuracy=accuracy, ARIMA=ARIMA, ETS=ETS)
M4H_msim <- cal_medianscaled(accsim_list)
M4H_training_sim <- prepare_trainingset(accuracy_set=M4H_msim,
feature_set=features_M4Hmstl_DF)


# combine dataframes
hourly_training <- dplyr::bind_rows(M4H_training$trainingset, M4H_training_sim$trainingset)
save(hourly_training, file="data/hourly_training.rda")

142 changes: 142 additions & 0 deletions ThiyangaTalagala/src/code_monthly.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
## ---- load-pkgs
library(tidyverse)
library(forecast)
library(Mcomp)
library(forecTheta)
# devtools::install_github("thiyangt/seer")
library(seer)
# devtools::install_github("robjhyndman/tsfeatures")
library(tsfeatures)
library(foreach)

## ---- load-data
M1_monthly <- subset(M1, "monthly")
M3_monthly <- subset(M3, "monthly")
data(M4)
M4_monthly <- subset(M4, "monthly")

## ---- load-rmConstantSeries
M4_monthly_constant_train <- sapply(M4_monthly, function(temp){
ts1 <- temp$x
training <- head_ts(ts1, h=18)
if (is.constant(training)==TRUE){print(temp$st)}
})
## for monthly data there is no series with constant values

## ---- extract series number

sn <- sapply(M4_monthly, function(temp){temp$st})
set.seed("27-4-2018")
index_test <- sample(sn, 1000)
M4_monthly_training <- M4_monthly[ !names(M4_monthly) %in% index_test]
length(M4_monthly_training) # 47000
save(M4_monthly_training, file="data/monthly/M4_monthly_training.rda")

M4_monthly_test <- M4_monthly[names(M4_monthly) %in% index_test]
length(M4_monthly_test) #1000
save(M4_monthly_test, file="data/monthly/M4_monthly_test.rda")

# ---- simulation
data(M4)
m4_monthly <- subset(M4, "monthly")
set.seed(8)
M4MAS <- lapply(m4_monthly, sim_etsbased, Future=TRUE, Nsim=5, extralength=18, Combine=FALSE)

M4MES <- lapply(m4_monthly, sim_arimabased, Future=TRUE, Nsim=5, extralength=18, Combine=FALSE)


# ---- classlabel
## M1-monthly
classlabelM1M <- fcast_accuracy(monthly_m1,
models=c("ets", "arima", "rw", "rwd", "wn",
"theta", "stlar", "nn", "snaive", "mstl", "tbats"),
database="Mcomp",
h=18, accuracyFun=cal_m4measures, length_out = 2)

## M3-monthly
classlabelM3M <- fcast_accuracy(monthly_m3,
models=c("ets", "arima", "rw", "rwd", "wn",
"theta", "stlar", "nn", "snaive", "mstl", "tbats"),
database="Mcomp",
h=18, accuracyFun=cal_m4measures, length_out = 2)

## M4-monthly-training
M4Mtraining_fcast_accuracy <- fcast_accuracy(M4_monthly_training,
models=c("ets", "arima", "rw", "rwd", "wn",
"theta", "stlar", "nn", "snaive", "mstl", "tbats"),
database="other",
h=18, accuracyFun=cal_m4measures, length_out = 2)


## M4-monthly_simulate based on ARIMA
M4MAS_fcast_accuracy <- lapply(M4MAS, fcast_accuracy,
models=c("ets", "arima", "rw", "rwd", "wn",
"theta", "stlar", "nn", "snaive", "mstl", "tbats"),
database="other",
h=18, accuracyFun=cal_m4measures, length_out=2)

## M4-monthly_simulate based on ETS
m4_monthly_training <- load("M4/data/M4MES.rda") # length 48000
M4MES_fcast_accuracy <- lapply(data, fcast_accuracy,
models=c("ets", "arima", "rw", "rwd", "wn",
"theta", "stlar", "nn", "snaive", "mstl", "tbats"),
database="other",
h=18, accuracyFun=cal_m4measures, length_out=2)

# ---- features
## M1-monthly
featuresM1M <- cal_features(M1_monthly, seasonal=TRUE, h=18, m=12, lagmax=13L, database="M1", highfreq = FALSE)

## M3-monthly
featuresM3M <- cal_features(M3_monthly, seasonal=TRUE, h=18, m=12, lagmax=13L, database="M3", highfreq = FALSE)

## M4-monthly(training)
M4M_training <- lapply(M4_monthly_training, function(temp){temp$x})
featuresM4M_training <- cal_features(M4M_training, seasonal=TRUE, h=18, m=12, lagmax=13L, database="other", highfreq = FALSE)

# calculate features on simulated data - simulated based on ARIMA
featuresM4MSA <- lapply(M4MAS, function(temp){
lapply(temp, cal_features,seasonal=TRUE, h=18, m=12, lagmax=13L, database="other", highfreq=FALSE)})

# calculate features on simulated data - simulated based on ETS
featuresM4MSE <- lapply(M4MES, function(temp){
lapply(temp, cal_features,seasonal=TRUE, h=18, m=12, lagmax=13L, database="other", highfreq=FALSE)})


# processing class labels for monthly data

# M1 series
classlabelM1M_ms <- cal_medianscaled(classlabelM1M)
m1m_df <- prepare_trainingset(accuracy_set = classlabelM1M_ms, featuresM1M)
m1m_df_training <- m1m_df$training

# M3 series
classlabelM3M_ms <- cal_medianscaled(classlabelM3M)
m3m_df <- prepare_trainingset(accuracy_set = classlabelM3M_ms, featuresM3M)
m3m_df_training <- m3m_df$training

# M4-training set
M4Mtraining_fcast_accuaracy_ms <- cal_medianscaled(M4Mtraining_fcast_accuaracy)
features_training <- featuresM4M_training[1:44650, ]
M4M_trainingset <- prepare_trainingset(accuracy_set=M4Mtraining_fcast_accuaracy_ms,
feature_set=features_training)
M4M_training <- M4M_trainingset$trainingset

# M4 - simulate based on ARIMA
M4MAS_fcast_accuaracy_ms <- cal_medianscaled(M4MAS_fcast_accuaracy)
features_M4MSA <- lapply(featuresM4MSA, function(temp){
do.call(rbind, temp)
})
features_M4MSA_DF <- do.call(rbind, features_M4MSA)
M4MAS_rfset_arima <- prepare_trainingset(accuracy_set=M4MAS_fcast_accuaracy_ms,
feature_set=features_M4MSA_DF)

M4MAS_rfset <- M4MAS_rfset_arima$trainingset

# training set
monthly_training <- dplyr::bind_rows(m1m_df_training, m3m_df_training)
monthly_training <- dplyr::bind_rows(monthly_training,M4M_training)
monthly_training <- dplyr::bind_rows(monthly_training, M4MAS_rfset)
save(monthly_training, file="data/monthly_training.rda")


Loading