tidy.mvgam() Sigma splitting; tests

- added a seed to all mvgam_example*'s - added in a new example, mvgam_example6, for an AR with heirarchical correlated residuals model - added a snapshot for tidy.mvgam for this example - Implemented `tidy.mvgam()` Sigma splitting
nicholasjclark · Feb 1, 2025 · dbc2cf4 · dbc2cf4
1 parent 1250b80
commit dbc2cf4
Show file tree

Hide file tree

Showing 5 changed files with 183 additions and 32 deletions.
diff --git a/R/sysdata.rda b/R/sysdata.rda
diff --git a/R/tidier_methods.R b/R/tidier_methods.R
@@ -274,13 +274,59 @@ tidy.mvgam <- function(x, probs = c(0.025, 0.5, 0.975), ...) {
   }
   # END trend random effects
 
-  # OUTPUT --------
+  # Cleanup output --------
   # TODO: might need to put this prior to every bind_rows to avoid rowname dups.
   out <- tibble::rownames_to_column(out, "parameter")
+
+  # Split Sigma in case of hierarchical residual correlations
+  alpha_cor_matches <- grep("alpha_cor", out$parameter, fixed = TRUE)
+  if (length(alpha_cor_matches) > 0) {
+    out <- split_hier_Sigma(object, out)
+  }
+  # END Cleanup output
+
   out
 }
 
 
+#' Helper function to split apart Sigma into its constituent sub-matrixes in
+#' the case of a hierarchical latent process.
+#'
+#' The default MCMC output has dummy parameters filling out Sigma to make it
+#' an nxn matrix. This removes those, and renames the remaining sub-matrixes
+#' to align with the `gr` and `subgr` sizes from `mvgam()`'s `trend_model` argument.
+#'
+#' @param object An object of class `mvgam`.
+#' @param params `tibble` The parameters that are going to be returned by
+#'   `tidy.mvgam()`. Assumed that the columns match what `tidy.mvgam()` will return.
+#'   Specifically, that there is a "parameter" column.
+#' @returns `tibble` The `params`, but with the Sigma parameters split up by `gr`.
+#' @noRd
+split_hier_Sigma <- function(object, params) {
+  params_nonSigma <- dplyr::filter(params, !grepl("^Sigma", parameter))
+  params_Sigma <- dplyr::filter(params, grepl("^Sigma", parameter))
+
+  gr <- object$trend_model$gr
+  subgr <- object$trend_model$subgr
+  gr_levels <- levels(object$obs_data[[gr]])
+  subgr_levels <- levels(object$obs_data[[subgr]])
+  n_gr <- length(gr_levels)
+  n_subgr <- length(subgr_levels)
+
+  # anything besides the dummy params should have non-zero sd
+  params_Sigma <- dplyr::filter(params_Sigma, mean != 0, sd != 0)
+  index_strs <- sub("Sigma", "", params_Sigma$parameter)[1:(n_subgr ** 2)]
+
+  # new names
+  new_names <- paste0("Sigma_",
+                     rep(seq_len(n_gr), each = n_subgr ** 2),
+                     index_strs)
+  params_Sigma["parameter"] <- new_names
+
+  dplyr::bind_rows(params_nonSigma, params_Sigma)
+}
+
+
 #' Augment an mvgam object's data
 #'
 #' Add fits and residuals to the data, implementing the generic `augment` from

diff --git a/tests/mvgam_examples.R b/tests/mvgam_examples.R
@@ -1,7 +1,8 @@
 # Small mvgam examples for testing post-fitting functions such as
 # predict, forecast, hindcast etc...
 testthat::skip_on_cran()
-set.seed(1234)
+SEED = 1234
+set.seed(SEED)
 library(mvgam)
 mvgam_examp_dat <- list(data_train =
                           structure(list(y = c(-0.317295790188251, 0.220334092025582,
@@ -151,7 +152,8 @@ mvgam_example1 <- mvgam(y ~ s(season, k = 5),
                         burnin = 300,
                         samples = 30,
                         chains = 1,
-                        backend = 'rstan')
+                        backend = 'rstan',
+                        seed = SEED)
 
 # Univariate process with trend_formula, trend_map and correlated process errors
 trend_map <- data.frame(series = unique(mvgam_examp_dat$data_train$series),
@@ -165,7 +167,8 @@ mvgam_example2 <- mvgam(y ~ 1,
                         burnin = 300,
                         samples = 30,
                         chains = 1,
-                        backend = 'rstan')
+                        backend = 'rstan',
+                        seed = SEED)
 
 # Multivariate process without trend_formula
 mvgam_example3 <- mvgam(y ~ s(season, k = 5),
@@ -175,7 +178,8 @@ mvgam_example3 <- mvgam(y ~ s(season, k = 5),
                         burnin = 300,
                         samples = 30,
                         chains = 1,
-                        backend = 'rstan')
+                        backend = 'rstan',
+                        seed = SEED)
 
 # Multivariate process with trend_formula and moving averages
 mvgam_example4 <- mvgam(y ~ series,
@@ -186,7 +190,8 @@ mvgam_example4 <- mvgam(y ~ series,
                         burnin = 300,
                         samples = 30,
                         chains = 1,
-                        backend = 'rstan')
+                        backend = 'rstan',
+                        seed = SEED)
 
 # GP dynamic factors (use list format to ensure it works in tests)
 list_data <- list()
@@ -203,7 +208,42 @@ mvgam_example5 <- mvgam(y ~ series + s(season, k = 5),
                         burnin = 300,
                         samples = 30,
                         chains = 1,
-                        backend = 'rstan')
+                        backend = 'rstan',
+                        seed = SEED)
+
+
+# Hierarchical dynamics example adapted from RW documentation example.
+# The difference is that this uses 4 species rather than 3.
+simdat1 <- sim_mvgam(trend_model = VAR(cor = TRUE),
+                     prop_trend = 0.95,
+                     n_series = 4,
+                     mu = c(1, 2, 3, 4))
+simdat2 <- sim_mvgam(trend_model = VAR(cor = TRUE),
+                     prop_trend = 0.95,
+                     n_series = 4,
+                     mu = c(1, 2, 3, 4))
+simdat3 <- sim_mvgam(trend_model = VAR(cor = TRUE),
+                     prop_trend = 0.95,
+                     n_series = 4,
+                     mu = c(1, 2, 3, 4))
+
+simdat_all <- rbind(simdat1$data_train %>%
+                      dplyr::mutate(region = 'qld'),
+                    simdat2$data_train %>%
+                      dplyr::mutate(region = 'nsw'),
+                    simdat3$data_train %>%
+                      dplyr::mutate(region = 'vic')) %>%
+  dplyr::mutate(species = gsub('series', 'species', series),
+                species = as.factor(species),
+                region = as.factor(region)) %>%
+  dplyr::arrange(series, time) %>%
+  dplyr::select(-series)
+
+mvgam_example6 <- mvgam(formula = y ~ species,
+                        trend_model = AR(gr = region, subgr = species),
+                        data = simdat_all,
+                        backend = 'rstan',
+                        seed = SEED)
 
 # Save examples as internal data
 usethis::use_data(
@@ -213,6 +253,7 @@ usethis::use_data(
   mvgam_example3,
   mvgam_example4,
   mvgam_example5,
+  mvgam_example6,
   internal = TRUE,
   overwrite = TRUE
 )