version 0.2.0

jobstdavid · May 21, 2024 · a3a64e7 · a3a64e7
1 parent 661cab0
commit a3a64e7
Show file tree

Hide file tree

Showing 9 changed files with 77 additions and 76 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: tsEMOS
 Type: Package
 Title: Time Series based Ensemble Model Output Statistics 
-Version: 0.1.0
+Version: 0.2.0
 Authors@R: c(
     person(given = "David",
            family = "Jobst",
@@ -23,5 +23,5 @@ BugReports: https://github.com/jobstdavid/tsEMOS/issues
 License: GPL-3
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,7 @@
+# tsEMOS 0.2.0 (May 20, 2024)
+
+-   Slightly improved code for `semos`, `dargarchadd_semos`, `dargarchmult_semos`.
+
 # tsEMOS 0.1.0 (February 1, 2024)
 
 -   First release.
diff --git a/R/dargarchadd_semos.R b/R/dargarchadd_semos.R
@@ -172,7 +172,7 @@ dargarchadd_semos <- function(train,
   omega0 <- as.numeric(coef(fit_garch)[1])
   omega1 <- as.numeric(coef(fit_garch)[3])
   omega2 <- as.numeric(coef(fit_garch)[2])
-  coef_garch <- c(omega0, omega1, omega2)
+  coef_garch <- sqrt(c(omega0, omega1, omega2))
 
   # prediction function for the AR(p)-process with n_ahead forecast step
   predict_r <- function(r, mu, a, p, n_ahead) {
@@ -212,21 +212,22 @@ dargarchadd_semos <- function(train,
       r_c <- predict_r(r = r, mu = pars[21], a = pars[22:(21+p)], p = p, n_ahead = 0)
       n <- length(r_c)
       z <- tail(r, n = n) - r_c
+      SIGMA <- tail(SIGMA, n-1)
 
-      # initial values
+      # initial value for sig_c
       sig_c <- sqrt(mean(z^2))
-      sig_s <- tail(SIGMA, n = n)
-
+      # predict z-value for correction
       for (k in 1:(n-1)) {
-        sig_c <- c(sig_c, sqrt(sig_s[k+1]^2 + pars[22+p]^2 + pars[23+p]^2 * sig_c[k]^2 + pars[24+p]^2 * z[k]^2))
+        sig_c <- c(sig_c, sqrt(SIGMA[k]^2 + pars[22+p]^2 + pars[23+p]^2 * sig_c[k]^2 + pars[24+p]^2 * z[k]^2))
       }
+      sig_c <- sig_c[-1]
 
       # adapt length of parameters based on lag p
-      OBS <- obs[-c(1:p)]
-      MU <- MU[-c(1:p)]
+      OBS <- obs[-c(1:(p+1))]
+      MU <- MU[-c(1:(p+1))]
 
       # correct MU and SIGMA
-      MU <- r_c + MU
+      MU <- r_c[-1] + MU
       SIGMA <- sig_c
       # calculate CRPS
       z <- (OBS - MU)/SIGMA
@@ -262,50 +263,37 @@ dargarchadd_semos <- function(train,
 
     # predict z-value for correction
     sig_c <- sqrt(mean(z^2))
-    sig_s <- tail(SIGMA, n = n_r_c)
-
-    for (k in 1:(n_r_c-1)) {
-      sig_c <- c(sig_c, sqrt(sig_s[k+1]^2 + pars[22+p]^2 + pars[23+p]^2 * sig_c[k]^2 + pars[24+p]^2 * z[k]^2))
-    }
-
-    # initial sig_c and z
-    sig_c <- tail(sig_c, n = 1)
-    z <- tail(z, n = 1)
 
     # get necessary test data
-    doy <- c(doy[(n-p+1-n_ahead):n], test[, doy_col])
-    obs <- c(obs[(n-p+1-n_ahead):n], test[, obs_col])
-    m <- c(m[(n-p+1-n_ahead):n], test[, mean_col])
-    s <- c(s[(n-p+1-n_ahead):n], test[, sd_col])
+    doy <- c(doy[(n-(2*n_ahead+p)):n], test[, doy_col])
+    obs <- c(obs[(n-(2*n_ahead+p)):n], test[, obs_col])
+    m <- c(m[(n-(2*n_ahead+p)):n], test[, mean_col])
+    s <- c(s[(n-(2*n_ahead+p)):n], test[, sd_col])
 
     sin1 <- sin(2*pi*doy/365.25)
     sin2 <- sin(4*pi*doy/365.25)
     cos1 <- cos(2*pi*doy/365.25)
     cos2 <- cos(4*pi*doy/365.25)
     doys <- cbind(1, sin1, sin2, cos1, cos2)
 
-    # get updated initial values for sig_c and z for the last iteration in the optimization
-    MU <- doys %*% pars[1:5] + doys %*% pars[6:10]*m
+    MU <- as.vector(doys %*% pars[1:5] + doys %*% pars[6:10]*m)
     SIGMA <- as.vector(exp(doys %*% pars[11:15] + doys %*% pars[16:20]*s))
     r <- as.vector((obs-MU))
-
     # predict r-values for correction
     r_c <- predict_r(r = r, mu = pars[21], a = pars[22:(21+p)], p = p, n_ahead = n_ahead)
     n_r_c <- length(r_c)
-    z <- c(z, # initial z value from above
-           tail(r, n = n_r_c) - r_c)
-
-    # predict z-value for correction
-    sig_s <- tail(SIGMA, n = n_r_c)
+    z <- as.vector((tail(r, n = n_r_c) - r_c))
+    n2 <- nrow(test)
+    SIGMA <- tail(SIGMA, n2)
 
-    for (k in 1:n_r_c) {
-      sig_c <- c(sig_c, sqrt(sig_s[k]^2 + pars[22+p]^2 + pars[23+p]^2 * sig_c[k]^2 + pars[24+p]^2 * z[k]^2))
+    # predict sig_c
+    for (k in 1:n2) {
+      sig_c <- c(sig_c, sqrt(SIGMA[k]^2 + pars[22+p]^2 + pars[23+p]^2 * sig_c[k]^2 + pars[24+p]^2 * z[k]^2))
     }
-    sig_c <- sig_c[-1]
 
     # predicted/correct parameters
-    MU <- r_c + MU[-c(1:(p+n_ahead))]
-    SIGMA <- sig_c
+    MU <- tail(MU, n2) + tail(r_c, n2)
+    SIGMA <- tail(sig_c, n2)
 
   } else {
 

diff --git a/R/dargarchmult_semos.R b/R/dargarchmult_semos.R
@@ -170,7 +170,7 @@ dargarchmult_semos <- function(train,
   omega0 <- as.numeric(coef(fit_garch)[1])
   omega1 <- as.numeric(coef(fit_garch)[3])
   omega2 <- as.numeric(coef(fit_garch)[2])
-  coef_garch <- c(omega0, omega1, omega2)
+  coef_garch <- sqrt(c(omega0, omega1, omega2))
 
   # prediction function for the AR(p)-process with n_ahead forecast step
   predict_r <- function(r, mu, a, p, n_ahead) {
@@ -202,7 +202,7 @@ dargarchmult_semos <- function(train,
 
     optim_fun1 <- function(pars, obs, m, s, doys, p) {
 
-      MU <- doys %*% pars[1:5] + doys %*% pars[6:10]*m
+      MU <- as.vector(doys %*% pars[1:5] + doys %*% pars[6:10]*m)
       SIGMA <- as.vector(exp(doys %*% pars[11:15] + doys %*% pars[16:20]*s))
       r <- as.vector((obs-MU))
 
@@ -211,19 +211,21 @@ dargarchmult_semos <- function(train,
       n <- length(r_c)
       z <- (tail(r, n = n) - r_c)/tail(SIGMA, n = n)
 
-      # predict z-value for correction
+      # initial value for sig_c
       sig_c <- sqrt(mean(z^2))
+      # predict z-value for correction
       for (k in 1:(n-1)) {
         sig_c <- c(sig_c, sqrt(pars[22+p]^2 + pars[23+p]^2 * sig_c[k]^2 + pars[24+p]^2 * z[k]^2))
       }
+      sig_c <- sig_c[-1]
 
       # adapt length of parameters based on lag p
-      OBS <- obs[-c(1:p)]
-      MU <- MU[-c(1:p)]
-      SIGMA <- SIGMA[-c(1:p)]
+      OBS <- obs[-c(1:(p+1))]
+      MU <- MU[-c(1:(p+1))]
+      SIGMA <- SIGMA[-c(1:(p+1))]
 
       # correct MU and SIGMA
-      MU <- r_c + MU
+      MU <- r_c[-1] + MU
       SIGMA <- sig_c * SIGMA
       # calculate CRPS
       z <- (OBS - MU)/SIGMA
@@ -248,29 +250,22 @@ dargarchmult_semos <- function(train,
     pars <- res$par
 
     # get initial values for sig_c and z from the last iteration in the above optimization
-    MU <- doys %*% pars[1:5] + doys %*% pars[6:10]*m
-    SIGMA <- exp(doys %*% pars[11:15] + doys %*% pars[16:20]*s)
+    MU <- as.vector(doys %*% pars[1:5] + doys %*% pars[6:10]*m)
+    SIGMA <- as.vector(exp(doys %*% pars[11:15] + doys %*% pars[16:20]*s))
     r <- as.vector((obs-MU))
 
     r_c <- predict_r(r = r, mu = pars[21], a = pars[22:(21+p)], p = p, n_ahead = 0)
     n_r_c <- length(r_c)
     z <- (tail(r, n = n_r_c) - r_c)/tail(SIGMA, n = n_r_c)
 
+    # initial value for sig_c
     sig_c <- sqrt(mean(z^2))
-    # predict sig_c
-    for (k in 1:(n_r_c-1)) {
-      sig_c <- c(sig_c, sqrt(pars[22+p]^2 + pars[23+p]^2 * sig_c[k]^2 + pars[24+p]^2 * z[k]^2))
-    }
-
-    # initial sig_c and z
-    sig_c <- tail(sig_c, n = 1)
-    z <- tail(z, n = 1)
 
     # get necessary test data
-    doy <- c(doy[(n-p+1-n_ahead):n], test[, doy_col])
-    obs <- c(obs[(n-p+1-n_ahead):n], test[, obs_col])
-    m <- c(m[(n-p+1-n_ahead):n], test[, mean_col])
-    s <- c(s[(n-p+1-n_ahead):n], test[, sd_col])
+    doy <- c(doy[(n-(2*n_ahead+p)):n], test[, doy_col])
+    obs <- c(obs[(n-(2*n_ahead+p)):n], test[, obs_col])
+    m <- c(m[(n-(2*n_ahead+p)):n], test[, mean_col])
+    s <- c(s[(n-(2*n_ahead+p)):n], test[, sd_col])
 
     sin1 <- sin(2*pi*doy/365.25)
     sin2 <- sin(4*pi*doy/365.25)
@@ -279,25 +274,24 @@ dargarchmult_semos <- function(train,
     doys <- cbind(1, sin1, sin2, cos1, cos2)
 
 
-    MU <- doys %*% pars[1:5] + doys %*% pars[6:10]*m
-    SIGMA <- exp(doys %*% pars[11:15] + doys %*% pars[16:20]*s)
+    MU <- as.vector(doys %*% pars[1:5] + doys %*% pars[6:10]*m)
+    SIGMA <- as.vector(exp(doys %*% pars[11:15] + doys %*% pars[16:20]*s))
     r <- as.vector((obs-MU))
     # predict r-values for correction
     r_c <- predict_r(r = r, mu = pars[21], a = pars[22:(21+p)], p = p, n_ahead = n_ahead)
     n_r_c <- length(r_c)
-    z <- c(z, # initial z from above
-           (tail(r, n = n_r_c) - r_c)/tail(SIGMA, n = n_r_c))
+    z <- as.vector((tail(r, n = n_r_c) - r_c)/tail(SIGMA, n = n_r_c))
+    n2 <- nrow(test)
 
     # predict sig_c
-    for (k in 1:n_r_c) {
+    for (k in 1:n2) {
       sig_c <- c(sig_c, sqrt(pars[22+p]^2 + pars[23+p]^2 * sig_c[k]^2 + pars[24+p]^2 * z[k]^2))
     }
-    # remove initial sig_c
-    sig_c <- sig_c[-1]
 
     # predicted/correct parameters
-    MU <- r_c + MU[-c(1:(p+n_ahead))]
-    SIGMA <- sig_c * SIGMA[-c(1:(p+n_ahead))]
+    MU <- tail(r_c, n2) + tail(MU, n2)
+    SIGMA <- tail(sig_c, n2)*tail(SIGMA, n2)
+
 
   } else {
 

diff --git a/R/semos.R b/R/semos.R
@@ -8,6 +8,9 @@
 #' @param obs_col column of the observation variable.
 #' @param mean_col column of the ensemble mean forecast variable.
 #' @param sd_col column of the ensemble standard deviation variable.
+#' @param n_ahead integer corresponding to the forecast ahead time
+#' (0 for ahead times not greater than 24 hours,
+#' 1 for ahead times greater than 24 hours and not greater than 48 hours, and so on).
 #' @param ... unused
 #'
 #' @return A data frame containing the distribution (location and scale) parameters.
@@ -33,7 +36,8 @@
 #'              doy_col = 3,
 #'              obs_col = 9,
 #'              mean_col = 10,
-#'              sd_col = 11)
+#'              sd_col = 11,
+#'              n_ahead = 0)
 #'
 #' # distribution parameters
 #' head(fit)
@@ -51,7 +55,8 @@
 #'              doy_col = 3,
 #'              obs_col = 9,
 #'              mean_col = 10,
-#'              sd_col = 11)
+#'              sd_col = 11,
+#'              n_ahead = 1)
 #'
 #' # distribution parameters
 #' head(fit)
@@ -65,12 +70,14 @@ semos <- function(train,
                   obs_col,
                   mean_col,
                   sd_col,
+                  n_ahead = 0,
                   ...) {
 
   # get necessary training data
   n <- nrow(train)
   doy <- train[, doy_col]
-  obs <- train[, obs_col]
+  # replace values which can not be used for n_ahead by NA
+  obs <- c(train[1:(n-n_ahead), obs_col], rep(NA, n_ahead))
   # impute missing values due to leadtime to be able to calculate approximate residuals
   obs <- na_ma(obs)
   m <- train[, mean_col]

diff --git a/README.Rmd b/README.Rmd
@@ -71,7 +71,8 @@ fit <- semos(train = train,
              doy_col = 3,
              obs_col = 9,
              mean_col = 10,
-             sd_col = 11)
+             sd_col = 11,
+             n_ahead = 0)
 ```
 
 
@@ -128,4 +129,4 @@ fit <- sar_semos(train = train,
 Feel free to contact [[email protected]](mailto:[email protected]) if you have any questions or suggestions.
 
 ## References
-Jobst, D., Möller, A., and Groß, J. (2023). Time Series based Ensemble Model Output Statistics for Temperature Forecasts Postprocessing. https://doi.org/10.48550/arXiv.2402.00555.
+Jobst, D., Möller, A., and Groß, J. (2024). Time Series based Ensemble Model Output Statistics for Temperature Forecasts Postprocessing. https://doi.org/10.48550/arXiv.2402.00555.
diff --git a/README.md b/README.md
@@ -73,7 +73,8 @@ fit <- semos(train = train,
              doy_col = 3,
              obs_col = 9,
              mean_col = 10,
-             sd_col = 11)
+             sd_col = 11,
+             n_ahead = 0)
 ```
 
 ### DAR-SEMOS
@@ -131,6 +132,6 @@ questions or suggestions.
 
 ## References
 
-Jobst, D., Möller, A., and Groß, J. (2023). Time Series based Ensemble
+Jobst, D., Möller, A., and Groß, J. (2024). Time Series based Ensemble
 Model Output Statistics for Temperature Forecasts Postprocessing.
 <https://doi.org/10.48550/arXiv.2402.00555>.
diff --git a/man/semos.Rd b/man/semos.Rd