Skip to content

Commit

Permalink
include mapping
Browse files Browse the repository at this point in the history
  • Loading branch information
Damonamajor committed Nov 18, 2024
1 parent 45accc7 commit 84593d4
Showing 1 changed file with 19 additions and 9 deletions.
28 changes: 19 additions & 9 deletions pipeline/02-assess.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,31 @@ lgbm_final_full_recipe <- readRDS(paths$output$workflow_recipe$local)
# Load the data for assessment. This is the universe of condo units
# that need values. Use the trained lightgbm model to estimate a single
# FMV per unit
tic()
assessment_data_pred <- read_parquet(paths$input$assessment$local) %>%
as_tibble() %>%
# Bake the recipe once and store the result in a variable
{ baked_data <- bake(lgbm_final_full_recipe, new_data = .);
# Bake the data first and extract meta strata columns
{ baked_data <- bake(lgbm_final_full_recipe, new_data = ., all_predictors());
mutate(
.,
pred_card_initial_fmv = predict(lgbm_final_full_fit, new_data = baked_data)$.pred,
pred_card_initial_fmv = as.numeric(predict(lgbm_final_full_fit, new_data = baked_data)$.pred),
new_strata_1 = baked_data$meta_strata_1,
new_strata_2 = baked_data$meta_strata_2
)
}

mapping_1 <- assessment_data_pred %>%
select(new_strata_1, meta_strata_1) %>%
distinct() %>%
filter(!is.na(meta_strata_1)) %>%
arrange(new_strata_1)

mapping_2 <- assessment_data_pred %>%
select(new_strata_2, meta_strata_2) %>%
distinct() %>%
filter(!is.na(meta_strata_2)) %>%
arrange(new_strata_2)


# The imputing stage does not change any existing values. However, for the lightgbm
# model, values have to be encoded to a base of 0. Because of this, the output does
# not match the input. Since these are 1:1 matches, we map the new column to the old.
Expand All @@ -64,16 +76,14 @@ strata_mapping_2 <- setNames(mapping_2$meta_strata_2, mapping_2$new_strata_2)
assessment_data_pred <- assessment_data_pred %>%
mutate(
# Binary variable to identify condos which have imputed strata
meta_strata_imputed = ifelse(is.na(meta_strata_1), 1, 0),
meta_strata_is_imputed = ifelse(is.na(meta_strata_1), 1, 0),
# Use mappings to replace meta_strata_1 and meta_strata_2 directly
meta_strata_1 = strata_mapping_1[as.character(new_strata_1)],
meta_strata_2 = strata_mapping_2[as.character(new_strata_2)]
) %>%
select(-c("new_strata_1", "new_strata_2"))
toc()



#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# 3. Post-Modeling Adjustments -------------------------------------------------
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Expand Down Expand Up @@ -171,8 +181,8 @@ assessment_data_merged %>%
select(
meta_year, meta_pin, meta_class, meta_card_num, meta_lline_num,
meta_modeling_group, ends_with("_num_sale"), pred_card_initial_fmv,
all_of(params$model$predictor$all), township_code, meta_strata_1, meta_strata_2,
meta_strata_is_imputed
all_of(params$model$predictor$all),
meta_strata_is_imputed, township_code
) %>%
mutate(
ccao_n_years_exe_homeowner = as.integer(ccao_n_years_exe_homeowner)
Expand Down

0 comments on commit 84593d4

Please sign in to comment.