JGCRI · abigailsnyder · Jun 19, 2019 · Jun 19, 2019 · Jun 19, 2019 · Jun 19, 2019
diff --git a/NAMESPACE b/NAMESPACE
@@ -31,6 +31,7 @@ export(run_xml_tests)
 export(standardize_iso)
 export(unprotect_integer_cols)
 importFrom(assertthat,assert_that)
+importFrom(assertthat,is.scalar)
 importFrom(data.table,data.table)
 importFrom(dplyr,anti_join)
 importFrom(dplyr,arrange)
@@ -92,3 +93,4 @@ importFrom(tidyr,separate)
 importFrom(tidyr,spread)
 importFrom(tidyr,unite)
 importFrom(utils,capture.output)
+importFrom(utils,tail)
diff --git a/R/constants.R b/R/constants.R
@@ -33,6 +33,11 @@ MODEL_BASE_YEARS        <- c(1975, 1990, 2005, 2010)
 MODEL_FUTURE_YEARS      <- seq(2015, 2100, 5)
 MODEL_YEARS             <- c(MODEL_BASE_YEARS, MODEL_FUTURE_YEARS)
 
+# intermediate BYU year constant. Need for early dev where not all data sets get extended.
+# Goal would be to have it handled automatically with a change to HISTORICAL_YEARS.
+# Should be able to just find and replace BYU_YEAR with max(HISTORICAL_YEARS) once the
+# BYU skeleton is in place for all chunks.
+BYU_YEAR                <- 2015
 
 # GCAM constants ======================================================================
 

diff --git a/R/extrapolators.R b/R/extrapolators.R
@@ -0,0 +1,83 @@
+# extrapolators.R
+# helper extrapolator functions for use in base year changes
+
+#' extrapolate_constant
+#'
+#'  computes the mean of last n original year values (with
+#'  \code{mean(., na.rm = TRUE)}) and uses this constant value to fill in NA
+#'  values corresponding to the extrapolation years at the end of the time series.
+#'  NOTE that this extrapolator does not touch any of the original data. It ONLY
+#'  fills in data corresponding to the extrapolation years. It is the user's
+#'  responsibility to account for this behavior in preparing raw data to be
+#'  extrapolated.
+#'
+#' @param x Vector of values with NA's corresponding to the extrapolation years
+#' to be filled in via constant extrapolation of the mean of last n original
+#' year values.
+#' @param n Number of final original year values to be averaged to provide the
+#' filler value. Averaging is done with \code{na.rm = TRUE}.
+#' Defaults to n = 1: using the last recorded year's value to constantly fill in
+#' the tail of vector missing values corresponding to extrapolation years.
+#' @param numExtrapYrs The number of NA values at the tail end of each vector that
+#' correspond to the extrapolation years and will be filled in. This will always
+#' be known for each data set in each chunk.
+#' @details Computes the mean of last n original year values of input vector x
+#' and uses this constant value to fill in NA values in x that correspond to the
+#' added extrapolation years.
+#' @return Vector with all NA values replaced with the specified mean.
+#' @importFrom assertthat assert_that is.scalar
+#' @importFrom utils tail
+#' @author ACS June 2019
+extrapolate_constant <- function(x, n=1, numExtrapYrs){
+
+  # Some assertion tests to make sure working on right data types
+  assert_that(is.numeric(x))
+  assert_that(is.scalar(n))
+  assert_that(is.integer(numExtrapYrs))
+
+
+  # The constant value to fill in all extrapolation year NA's with.
+  # = mean(. , na.rm = TRUE) of the last n  values in the original
+  # data.
+  index_last_n_orig_yrs <- (length(x) - numExtrapYrs - n + 1):(length(x) - numExtrapYrs)
+  meanval <- mean(x[index_last_n_orig_yrs], na.rm = TRUE)
+
+
+  # fill in only the tail end, extrapolation years
+  # NA values with this constant.
+  index_extrap_yrs <- (length(x) - numExtrapYrs + 1):length(x)
+  x[index_extrap_yrs] <- meanval
+
+  return(x)
+}
+
+
+
+#' last_n_nonNA
+#'
+#'  finds the last n non-NA values in an input vector.
+#'  A convenience functions for users who wish to customize
+#'  their extrapolations beyond the default or who wish to
+#'  identify NA values in their original (unextrapolated)
+#'  data.
+#'
+#' @param x Vector with some NA values
+#' @param n The number of non-NA values sought.
+#' @details finds the last n non-NA values in an input vector.
+#' @return A vector with the last n non-NA values from input
+#' vector x.
+#' @importFrom assertthat assert_that is.scalar
+#' @author ACS June 2019
+last_n_nonNA <- function(x, n){
+
+  assert_that(is.scalar(n))
+
+  if(n > length(x[!is.na(x)])){
+    stop('asking for more nonNA years than you have.')
+  }
+
+
+  return(tail(x[!is.na(x)], n))
+}
+
+
diff --git a/R/zchunk_L100.GDP_hist.R b/R/zchunk_L100.GDP_hist.R
@@ -17,7 +17,7 @@
 #' @importFrom assertthat assert_that
 #' @importFrom tibble tibble
 #' @importFrom dplyr filter mutate select
-#' @importFrom tidyr gather spread
+#' @importFrom tidyr gather spread complete
 #' @author BBL February 2017
 module_socioeconomics_L100.GDP_hist <- function(command, ...) {
   if(command == driver.DECLARE_INPUTS) {
@@ -34,13 +34,46 @@ module_socioeconomics_L100.GDP_hist <- function(command, ...) {
     usda_gdp_mer <- get_data(all_data, "socioeconomics/USDA_GDP_MER")
     assert_that(tibble::is.tibble(usda_gdp_mer))
 
-    # Convert to long form, filter to historical years, convert units
+    # Convert to long form, convert units
     usda_gdp_mer %>%
       select(-Country) %>%
       gather_years %>%
       filter(!is.na(value), !is.na(iso)) %>%
       mutate(value = value * CONV_BIL_MIL * gdp_deflator(1990, base_year = 2010),
-             year = as.integer(year)) %>%
+             year = as.integer(year)) ->
+      long_iso_year_gdp
+
+
+    # Perform BYU
+    # Skeleton - constant extrapolation
+    # BYUcompliant
+    #
+    # BYU NOTE - must make sure the units are all 1990 USD (or at least the
+    # same year basis) so averaging behaves. Fine here, but worth noting
+    # across other chunk where money comes up, especially since we drop
+    # unit information pretty early on in most chunks.
+    # BYU NOTE - need to think about labeling that the output has BYU
+    # update done, and a note about the method?
+    if(max(long_iso_year_gdp$year) < BYU_YEAR){
+
+      extrapyears <- (max(long_iso_year_gdp$year) +  1):BYU_YEAR
+
+      # Constant extrapolation operating only on
+      # numeric vector. Can operate on any numeric vector, regardless of grouping
+      # or column name.
+      # What gets output in this chunk
+      long_iso_year_gdp %>%
+        complete(year = c(year, extrapyears), iso) %>%
+        group_by(iso) %>%
+        mutate(value = extrapolate_constant(value, n = 1,
+                                            numExtrapYrs = length(extrapyears))) %>%
+        ungroup ->
+        long_iso_year_gdp
+    }
+
+
+    # filter to historical years, convert units
+    long_iso_year_gdp %>%
       add_title("Historical GDP downscaled to country (iso)") %>%
       add_comments("Units converted to constant 1990 USD") %>%
       add_precursors("socioeconomics/USDA_GDP_MER") %>%

diff --git a/man/extrapolate_constant.Rd b/man/extrapolate_constant.Rd
diff --git a/man/last_n_nonNA.Rd b/man/last_n_nonNA.Rd
diff --git a/tests/testthat/test_oldnew.R b/tests/testthat/test_oldnew.R
@@ -89,6 +89,18 @@ test_that("matches old data system output", {
       olddata <- COMPDATA[[oldf]]
       expect_is(olddata, "data.frame", info = paste("No comparison data found for", oldf))
 
+      # During the base year update development process, some outputs from the data
+      # system will be extended and therefore have different dimensions from the
+      # old comparison data. This causes the old-new test to fail.
+      # The extended extrapolation years will be dropped from the newdata
+      # so that comparison can be made to the old data. This also serves to check
+      # that the extrapolation procedure does not touch original data.
+      if(max(newdata$year) == BYU_YEAR){ # one way to check that it's a BYU without flags.
+        newdata %>%
+          filter(year <= max(HISTORICAL_YEARS)) ->
+          newdata
+      }
+
       # Finally, test (NB rounding numeric columns to a sensible number of
       # digits; otherwise spurious mismatches occur)
       # Also first converts integer columns to numeric (otherwise test will