Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update desk review workbook for 2024 model #23

Merged
merged 7 commits into from
Feb 15, 2024
Binary file modified misc/desk_review_template.xlsx
Binary file not shown.
240 changes: 181 additions & 59 deletions pipeline/07-export.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,26 +101,28 @@ assessment_pin_prepped <- assessment_pin %>%
prior_near_tot <= params$pv$nonlivable_threshold,
0
),
char_type_resd = NA
char_type_resd = NA,
valuations_note = NA # Empty notes field for Valuations to fill out
) %>%
select(
township_code, meta_pin, meta_class, meta_nbhd_code,
property_full_address, loc_cook_municipality_name, meta_pin10,
property_full_address, loc_tax_municipality_name, meta_pin10,
meta_tieback_key_pin, meta_tieback_proration_rate,
prior_near_land, prior_near_bldg, prior_near_tot,
prior_near_land_rate, prior_near_bldg_rate, prior_near_land_pct_total,
pred_pin_final_fmv, pred_pin_final_fmv_land, pred_pin_final_fmv_bldg,
pred_pin_final_fmv_round, land_rate_per_sqft, pred_pin_land_rate_effective,
pred_pin_bldg_rate_effective, pred_pin_land_pct_total,
prior_near_yoy_change_nom, prior_near_yoy_change_pct,
sale_recent_1_date, sale_recent_1_price, sale_recent_1_document_num,
sale_recent_2_date, sale_recent_2_price, sale_recent_2_document_num,
prior_near_yoy_change_nom, prior_near_yoy_change_pct, valuations_note,
sale_recent_1_date, sale_recent_1_price,
sale_recent_1_outlier_type, sale_recent_1_document_num,
sale_recent_2_date, sale_recent_2_price,
sale_recent_2_outlier_type, sale_recent_2_document_num,
char_yrblt, char_total_bldg_sf, char_type_resd, char_land_sf,
char_unit_sf, flag_nonlivable_space, flag_pin10_5yr_num_sale,
flag_common_area, flag_proration_sum_not_1, flag_pin_is_multiland,
flag_land_gte_95_percentile, flag_bldg_gte_95_percentile,
flag_land_gte_95_percentile,
flag_land_value_capped, flag_prior_near_to_pred_unchanged,
flag_pred_initial_to_final_changed,
Comment on lines -121 to -123
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

flag_bldg_gte_95_percentile and flag_pred_initial_to_final_changed appear not to be present anymore in assessment_pin, so I removed them from the workbook flags. They also didn't seem to be present in the workbook template anyway.

flag_prior_near_yoy_inc_gt_50_pct, flag_prior_near_yoy_dec_gt_5_pct
) %>%
mutate(
Expand All @@ -144,7 +146,7 @@ assessment_pin10_prepped <- assessment_pin_prepped %>%
group_by(township_code, meta_pin10, meta_nbhd_code) %>%
summarize(
property_full_address = first(property_full_address),
loc_cook_municipality_name = first(loc_cook_municipality_name),
loc_tax_municipality_name = first(loc_tax_municipality_name),
num_pin_livable = sum(!flag_nonlivable_space),
num_pin_nonlivable = sum(flag_nonlivable_space),
total_tieback_proration_rate = sum(meta_tieback_proration_rate),
Expand Down Expand Up @@ -174,13 +176,72 @@ assessment_pin10_prepped <- assessment_pin_prepped %>%
for (town in unique(assessment_pin_prepped$township_code)) {
message("Now processing: ", town_convert(town))


## 4.1. PIN-Level ------------------------------------------------------------

# Filter building data to specific township
assessment_pin10_filtered <- assessment_pin10_prepped %>%
filter(township_code == town) %>%
select(-township_code)

building_coords <- assessment_pin10_filtered %>%
# Handle a rare edge case where neighborhoods span multiple townships
# and so introduce duplicate pin10s; more details here:
# https://github.com/ccao-data/data-architecture/issues/275
distinct(meta_pin10) %>%
Comment on lines +187 to +190
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think it's OK to leave this bandaid in here for now, or do we need to spend some time resolving the root problem?

tibble::rowid_to_column("building_coord") %>%
select(meta_pin10, building_coord)

pin_sheet_name <- "PIN Detail"
bldg_sheet_name <- "Buildings"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This sheet used to be called Building (PIN10), but unfortunately as I discovered when updating the res model, sheet names can't have spaces if we want to link to them via between-sheet hyperlink formulas.


# Filter overall data to specific township
assessment_pin_filtered <- assessment_pin_prepped %>%
filter(township_code == town) %>%
select(-township_code)
select(-township_code) %>%
left_join(building_coords, by = "meta_pin10") %>%
mutate(
building_coord = ifelse(
is.na(building_coord),
NA,
getCellRefs(data.frame(row = building_coord + 4, column = 1))
)
) %>%
mutate(
meta_pin10 = ifelse(
is.na(building_coord),
NA,
glue::glue(
'=HYPERLINK(@CELL("address",{bldg_sheet_name}!{building_coord}),',
'"{meta_pin10}")'
)
)
) %>%
select(-building_coord)

# Get range of rows in the PIN data + number of header rows
num_head <- 6
pin_row_range <- (num_head + 1):(nrow(assessment_pin_filtered) + num_head)
pin_row_range_w_header <- c(num_head, pin_row_range)
pin_col_range <- 1:50

# Calculate AVs so we can store them as separate, hidden columns for use
# in the neighborhood breakouts pivot table
assessment_pin_avs <- assessment_pin_filtered %>%
tibble::rowid_to_column("row_id") %>%
mutate(
row_id = row_id + num_head,
total_av = glue::glue("=R{row_id} * 0.1"),
av_difference = glue::glue("=(R{row_id} * 0.1) - (K{row_id} * 0.1)")
) %>%
select(total_av, av_difference)

# Make AV fields formulas
class(assessment_pin_avs$total_av) <- c(
class(assessment_pin_avs$total_av), "formula"
)
class(assessment_pin_avs$av_difference) <- c(
class(assessment_pin_avs$av_difference), "formula"
)

# Generate sheet and column headers
model_header <- str_to_title(paste(
Expand All @@ -194,14 +255,13 @@ for (town in unique(assessment_pin_prepped$township_code)) {
.sep = " "
))

pin_sheet_name <- "PIN Detail"
class(assessment_pin_filtered$meta_pin10) <- c(
class(assessment_pin_filtered$meta_pin10), "formula"
)
class(assessment_pin_filtered$meta_pin) <- c(
class(assessment_pin_filtered$meta_pin), "formula"
)

# Get range of rows in the PIN data + number of header rows
pin_row_range <- 7:(nrow(assessment_pin_filtered) + 9)

# Load the excel workbook template from file
wb <- loadWorkbook(here("misc", "desk_review_template.xlsx"))

Expand All @@ -210,12 +270,14 @@ for (town in unique(assessment_pin_prepped$township_code)) {
style_2digit <- createStyle(numFmt = "$#,##0.00")
style_pct <- createStyle(numFmt = "PERCENTAGE")
style_comma <- createStyle(numFmt = "COMMA")
style_link <- createStyle(fontColour = "blue", textDecoration = "underline")

# Add styles to PIN sheet
addStyle(
wb, pin_sheet_name,
style = style_price,
rows = pin_row_range, cols = c(9:11, 15:18, 23, 26, 29), gridExpand = TRUE
rows = pin_row_range,
cols = c(9:11, 15:18, 23, 27, 31, 49, 50), gridExpand = TRUE
)
addStyle(
wb, pin_sheet_name,
Expand All @@ -230,9 +292,44 @@ for (town in unique(assessment_pin_prepped$township_code)) {
addStyle(
wb, pin_sheet_name,
style = style_comma,
rows = pin_row_range, cols = c(32, 34, 35, 37), gridExpand = TRUE
rows = pin_row_range, cols = c(35, 37, 38, 40), gridExpand = TRUE
)
addStyle(
wb, pin_sheet_name,
style = style_link,
rows = pin_row_range, cols = c(6), gridExpand = TRUE
)
addFilter(wb, pin_sheet_name, 6, pin_col_range)

# Format YoY % change column with a range of colors from low to high
conditionalFormatting(
wb, pin_sheet_name,
cols = c(24),
rows = pin_row_range,
style = c("#F8696B", "#FFFFFF", "#00B0F0"),
rule = c(-1, 0, 1),
type = "colourScale"
)
# Format sale columns such that they are red if the sale has an outlier flag
conditionalFormatting(
wb, pin_sheet_name,
cols = 26:29,
rows = pin_row_range,
style = createStyle(bgFill = "#FF9999"),
rule = '$AB7!=""',
type = "expression"
)
# For some reason vector cols don't work with expressions, so we have
# to duplicate the conditional formatting for the sale outlier flag above
# to apply it to the second range of columns
conditionalFormatting(
wb, pin_sheet_name,
cols = 30:33,
rows = pin_row_range,
style = createStyle(bgFill = "#FF9999"),
rule = '$AF7!=""',
type = "expression"
)
addFilter(wb, pin_sheet_name, 6, 1:44)

# Write PIN-level data to workbook
writeData(
Expand All @@ -244,6 +341,13 @@ for (town in unique(assessment_pin_prepped$township_code)) {
writeFormula(
wb, pin_sheet_name,
assessment_pin_filtered$meta_pin,
startCol = 1,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to specify startCol in these formulas now, since there are two of them; otherwise, the second formula clobbers the first one (which is confusing API behavior, but it's what we have to work with).

startRow = 7
)
writeFormula(
wb, pin_sheet_name,
assessment_pin_filtered$meta_pin10,
startCol = 6,
startRow = 7
)
writeData(
Expand All @@ -263,15 +367,35 @@ for (town in unique(assessment_pin_prepped$township_code)) {
startCol = 15, startRow = 5, colNames = FALSE
)

# Write hidden formulas
writeFormula(
wb, pin_sheet_name,
assessment_pin_avs$total_av,
startCol = 49,
startRow = 7
)
writeFormula(
wb, pin_sheet_name,
assessment_pin_avs$av_difference,
startCol = 50,
startRow = 7
)
setColWidths(
wb, pin_sheet_name,
c(49, 50),
widths = 1,
hidden = c(TRUE, TRUE), ignoreMergedCells = FALSE
)

# 4.2. Building-Level --------------------------------------------------------

# Filter building data to specific township
assessment_pin10_filtered <- assessment_pin10_prepped %>%
filter(township_code == town) %>%
select(-township_code)
# Add a named range for the PIN-level data, which the template will use
# to populate the Neighborhood Breakouts pivot table
createNamedRegion(
wb, pin_sheet_name,
cols = pin_col_range, rows = pin_row_range_w_header,
name = "pin_detail_range", overwrite = TRUE
)

bldg_sheet_name <- "Building (PIN10)"
# 4.2. Building-Level --------------------------------------------------------

# Get range of rows in the building data + number of header rows
bldg_row_range <- 5:(nrow(assessment_pin10_filtered) + 6)
Expand All @@ -294,6 +418,16 @@ for (town in unique(assessment_pin_prepped$township_code)) {
)
addFilter(wb, bldg_sheet_name, 4, 1:13)

# Format YoY % change column with a range of colors from low to high
conditionalFormatting(
wb, bldg_sheet_name,
cols = c(11),
rows = bldg_row_range,
style = c("#F8696B", "#FFFFFF", "#00B0F0"),
rule = c(-1, 0, 1),
type = "colourScale"
)

# Write bldg-level data to workbook
writeData(
wb, bldg_sheet_name, assessment_pin10_filtered,
Expand All @@ -311,22 +445,30 @@ for (town in unique(assessment_pin_prepped$township_code)) {
)

# Save workbook to file based on town name
workbook_name <- glue(
params$assessment$year,
str_replace(town_convert(town), " ", "_"),
"Initial_Model_Values_Condo.xlsx",
.sep = "_"
)
saveWorkbook(
wb,
here(
"output", "desk_review",
glue(
params$assessment$year,
str_replace(town_convert(town), " ", "_"),
"Initial_Model_Values_Condo.xlsx",
.sep = "_"
)
),
wb, here("output", "desk_review", workbook_name),
overwrite = TRUE
)
rm(wb)
}

### NOTE ###
# OpenXLSX is not perfect and messes up the macros and formatting on saved
# workbooks. To finish each workbook, you must manually:

# 1. Open the Neighborhood Breakout sheet and ensure that the values are
# all formatted correctly in the pivot table; if not (e.g. if
# `Average of YoY ∆ %` is formatted as a date when it should be a percentage)
# then manually update the formatting by selecting
# PivotTable Fields > Values > {fieldname} > Value Field Settings... >
# Number Format.




Expand All @@ -341,45 +483,25 @@ upload_data_prepped <- assessment_pin %>%
select(meta_year, meta_pin, meta_card_num, meta_lline_num),
by = c("meta_year", "meta_pin")
) %>%
mutate(meta_pin10 = str_sub(meta_pin, 1, 10)) %>%
group_by(meta_pin10, meta_tieback_proration_rate) %>%
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By my reading, the group_by statements are no longer necessary in this query, since we don't need to report pred_pin10_final_fmv_bldg; but I could be wrong!

# For PINs missing an individual building value, fill with the average of
# PINs with the same proration rate in the building. This is super rare,
# maybe 1 PIN out of every 100K. It happens mostly because of mis-coded nbhds
mutate(
# For PINs missing an individual building value, fill with the average of
# PINs with the same proration rate in the building. This is super rare,
# maybe 1 PIN out of every 100K. It happens mostly because of mis-coded
# nbhds
pred_pin_final_fmv_bldg = ifelse(
is.na(pred_pin_final_fmv_bldg),
mean(pred_pin_final_fmv_bldg, na.rm = TRUE),
pred_pin_final_fmv_bldg
)
) %>%
group_by(meta_pin10) %>%
mutate(
# Sum the building value of each PIN to the building total value
pred_pin10_final_fmv_bldg = sum(pred_pin_final_fmv_bldg, na.rm = TRUE),

# Hotfix for adjusting the total building value such that bldg_total *
# proration_rate = unit_value. Only applies to buildings where rates don't
# sum to 100%
pred_pin10_final_fmv_bldg = round(
pred_pin10_final_fmv_bldg * (1 / sum(
meta_tieback_proration_rate,
na.rm = TRUE
))
),

# For any missing LLINE values, simply fill with 1
meta_lline_num = replace_na(meta_lline_num, 1)
) %>%
ungroup() %>%
select(
township_code = meta_township_code,
PARID = meta_pin,
CARD = meta_card_num,
LLINE = meta_lline_num,
USER18 = pred_pin10_final_fmv_bldg,
USER20 = meta_tieback_proration_rate,
OVRRCNLD = pred_pin_final_fmv_bldg
MV = pred_pin_final_fmv_bldg
) %>%
arrange(township_code, PARID)

Expand Down
Loading