diff --git a/NAMESPACE b/NAMESPACE index 2907ba21f..e4bd5635d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,6 +31,7 @@ export(run_xml_tests) export(standardize_iso) export(unprotect_integer_cols) importFrom(assertthat,assert_that) +importFrom(assertthat,is.scalar) importFrom(data.table,data.table) importFrom(dplyr,anti_join) importFrom(dplyr,arrange) @@ -92,3 +93,4 @@ importFrom(tidyr,separate) importFrom(tidyr,spread) importFrom(tidyr,unite) importFrom(utils,capture.output) +importFrom(utils,tail) diff --git a/R/constants.R b/R/constants.R index 3d634d91d..ce9f1ada8 100644 --- a/R/constants.R +++ b/R/constants.R @@ -33,6 +33,11 @@ MODEL_BASE_YEARS <- c(1975, 1990, 2005, 2010) MODEL_FUTURE_YEARS <- seq(2015, 2100, 5) MODEL_YEARS <- c(MODEL_BASE_YEARS, MODEL_FUTURE_YEARS) +# intermediate BYU year constant. Need for early dev where not all data sets get extended. +# Goal would be to have it handled automatically with a change to HISTORICAL_YEARS. +# Should be able to just find and replace BYU_YEAR with max(HISTORICAL_YEARS) once the +# BYU skeleton is in place for all chunks. +BYU_YEAR <- 2015 # GCAM constants ====================================================================== diff --git a/R/extrapolators.R b/R/extrapolators.R new file mode 100644 index 000000000..564827c8e --- /dev/null +++ b/R/extrapolators.R @@ -0,0 +1,83 @@ +# extrapolators.R +# helper extrapolator functions for use in base year changes + +#' extrapolate_constant +#' +#' computes the mean of last n original year values (with +#' \code{mean(., na.rm = TRUE)}) and uses this constant value to fill in NA +#' values corresponding to the extrapolation years at the end of the time series. +#' NOTE that this extrapolator does not touch any of the original data. It ONLY +#' fills in data corresponding to the extrapolation years. It is the user's +#' responsibility to account for this behavior in preparing raw data to be +#' extrapolated. +#' +#' @param x Vector of values with NA's corresponding to the extrapolation years +#' to be filled in via constant extrapolation of the mean of last n original +#' year values. +#' @param n Number of final original year values to be averaged to provide the +#' filler value. Averaging is done with \code{na.rm = TRUE}. +#' Defaults to n = 1: using the last recorded year's value to constantly fill in +#' the tail of vector missing values corresponding to extrapolation years. +#' @param numExtrapYrs The number of NA values at the tail end of each vector that +#' correspond to the extrapolation years and will be filled in. This will always +#' be known for each data set in each chunk. +#' @details Computes the mean of last n original year values of input vector x +#' and uses this constant value to fill in NA values in x that correspond to the +#' added extrapolation years. +#' @return Vector with all NA values replaced with the specified mean. +#' @importFrom assertthat assert_that is.scalar +#' @importFrom utils tail +#' @author ACS June 2019 +extrapolate_constant <- function(x, n=1, numExtrapYrs){ + + # Some assertion tests to make sure working on right data types + assert_that(is.numeric(x)) + assert_that(is.scalar(n)) + assert_that(is.integer(numExtrapYrs)) + + + # The constant value to fill in all extrapolation year NA's with. + # = mean(. , na.rm = TRUE) of the last n values in the original + # data. + index_last_n_orig_yrs <- (length(x) - numExtrapYrs - n + 1):(length(x) - numExtrapYrs) + meanval <- mean(x[index_last_n_orig_yrs], na.rm = TRUE) + + + # fill in only the tail end, extrapolation years + # NA values with this constant. + index_extrap_yrs <- (length(x) - numExtrapYrs + 1):length(x) + x[index_extrap_yrs] <- meanval + + return(x) +} + + + +#' last_n_nonNA +#' +#' finds the last n non-NA values in an input vector. +#' A convenience functions for users who wish to customize +#' their extrapolations beyond the default or who wish to +#' identify NA values in their original (unextrapolated) +#' data. +#' +#' @param x Vector with some NA values +#' @param n The number of non-NA values sought. +#' @details finds the last n non-NA values in an input vector. +#' @return A vector with the last n non-NA values from input +#' vector x. +#' @importFrom assertthat assert_that is.scalar +#' @author ACS June 2019 +last_n_nonNA <- function(x, n){ + + assert_that(is.scalar(n)) + + if(n > length(x[!is.na(x)])){ + stop('asking for more nonNA years than you have.') + } + + + return(tail(x[!is.na(x)], n)) +} + + diff --git a/R/zchunk_L100.GDP_hist.R b/R/zchunk_L100.GDP_hist.R index 9c4322400..be07560c7 100644 --- a/R/zchunk_L100.GDP_hist.R +++ b/R/zchunk_L100.GDP_hist.R @@ -17,7 +17,7 @@ #' @importFrom assertthat assert_that #' @importFrom tibble tibble #' @importFrom dplyr filter mutate select -#' @importFrom tidyr gather spread +#' @importFrom tidyr gather spread complete #' @author BBL February 2017 module_socioeconomics_L100.GDP_hist <- function(command, ...) { if(command == driver.DECLARE_INPUTS) { @@ -34,13 +34,46 @@ module_socioeconomics_L100.GDP_hist <- function(command, ...) { usda_gdp_mer <- get_data(all_data, "socioeconomics/USDA_GDP_MER") assert_that(tibble::is.tibble(usda_gdp_mer)) - # Convert to long form, filter to historical years, convert units + # Convert to long form, convert units usda_gdp_mer %>% select(-Country) %>% gather_years %>% filter(!is.na(value), !is.na(iso)) %>% mutate(value = value * CONV_BIL_MIL * gdp_deflator(1990, base_year = 2010), - year = as.integer(year)) %>% + year = as.integer(year)) -> + long_iso_year_gdp + + + # Perform BYU + # Skeleton - constant extrapolation + # BYUcompliant + # + # BYU NOTE - must make sure the units are all 1990 USD (or at least the + # same year basis) so averaging behaves. Fine here, but worth noting + # across other chunk where money comes up, especially since we drop + # unit information pretty early on in most chunks. + # BYU NOTE - need to think about labeling that the output has BYU + # update done, and a note about the method? + if(max(long_iso_year_gdp$year) < BYU_YEAR){ + + extrapyears <- (max(long_iso_year_gdp$year) + 1):BYU_YEAR + + # Constant extrapolation operating only on + # numeric vector. Can operate on any numeric vector, regardless of grouping + # or column name. + # What gets output in this chunk + long_iso_year_gdp %>% + complete(year = c(year, extrapyears), iso) %>% + group_by(iso) %>% + mutate(value = extrapolate_constant(value, n = 1, + numExtrapYrs = length(extrapyears))) %>% + ungroup -> + long_iso_year_gdp + } + + + # filter to historical years, convert units + long_iso_year_gdp %>% add_title("Historical GDP downscaled to country (iso)") %>% add_comments("Units converted to constant 1990 USD") %>% add_precursors("socioeconomics/USDA_GDP_MER") %>% diff --git a/man/extrapolate_constant.Rd b/man/extrapolate_constant.Rd new file mode 100644 index 000000000..f862775bf --- /dev/null +++ b/man/extrapolate_constant.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extrapolators.R +\name{extrapolate_constant} +\alias{extrapolate_constant} +\title{extrapolate_constant} +\usage{ +extrapolate_constant(x, n = 1, numExtrapYrs) +} +\arguments{ +\item{x}{Vector of values with NA's corresponding to the extrapolation years +to be filled in via constant extrapolation of the mean of last n original +year values.} + +\item{n}{Number of final original year values to be averaged to provide the +filler value. Averaging is done with \code{na.rm = TRUE}. +Defaults to n = 1: using the last recorded year's value to constantly fill in +the tail of vector missing values corresponding to extrapolation years.} + +\item{numExtrapYrs}{The number of NA values at the tail end of each vector that +correspond to the extrapolation years and will be filled in. This will always +be known for each data set in each chunk.} +} +\value{ +Vector with all NA values replaced with the specified mean. +} +\description{ +computes the mean of last n original year values (with + \code{mean(., na.rm = TRUE)}) and uses this constant value to fill in NA + values corresponding to the extrapolation years at the end of the time series. + NOTE that this extrapolator does not touch any of the original data. It ONLY + fills in data corresponding to the extrapolation years. It is the user's + responsibility to account for this behavior in preparing raw data to be + extrapolated. +} +\details{ +Computes the mean of last n original year values of input vector x +and uses this constant value to fill in NA values in x that correspond to the +added extrapolation years. +} +\author{ +ACS June 2019 +} diff --git a/man/last_n_nonNA.Rd b/man/last_n_nonNA.Rd new file mode 100644 index 000000000..45a3bc33a --- /dev/null +++ b/man/last_n_nonNA.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extrapolators.R +\name{last_n_nonNA} +\alias{last_n_nonNA} +\title{last_n_nonNA} +\usage{ +last_n_nonNA(x, n) +} +\arguments{ +\item{x}{Vector with some NA values} + +\item{n}{The number of non-NA values sought.} +} +\value{ +A vector with the last n non-NA values from input +vector x. +} +\description{ +finds the last n non-NA values in an input vector. + A convenience functions for users who wish to customize + their extrapolations beyond the default or who wish to + identify NA values in their original (unextrapolated) + data. +} +\details{ +finds the last n non-NA values in an input vector. +} +\author{ +ACS June 2019 +} diff --git a/tests/testthat/test_oldnew.R b/tests/testthat/test_oldnew.R index 8ff23950c..29612bf76 100644 --- a/tests/testthat/test_oldnew.R +++ b/tests/testthat/test_oldnew.R @@ -89,6 +89,18 @@ test_that("matches old data system output", { olddata <- COMPDATA[[oldf]] expect_is(olddata, "data.frame", info = paste("No comparison data found for", oldf)) + # During the base year update development process, some outputs from the data + # system will be extended and therefore have different dimensions from the + # old comparison data. This causes the old-new test to fail. + # The extended extrapolation years will be dropped from the newdata + # so that comparison can be made to the old data. This also serves to check + # that the extrapolation procedure does not touch original data. + if(max(newdata$year) == BYU_YEAR){ # one way to check that it's a BYU without flags. + newdata %>% + filter(year <= max(HISTORICAL_YEARS)) -> + newdata + } + # Finally, test (NB rounding numeric columns to a sensible number of # digits; otherwise spurious mismatches occur) # Also first converts integer columns to numeric (otherwise test will