Skip to content

Commit

Permalink
Add info from outlier reason 2 and clean up graphs (#69)
Browse files Browse the repository at this point in the history
* Add maing changes

* Remove hard coded triad refs

* Remove quotes

* Incorporate feedback
  • Loading branch information
wagnerlmichael authored Dec 17, 2024
1 parent 4c55cc3 commit 7568264
Showing 1 changed file with 28 additions and 23 deletions.
51 changes: 28 additions & 23 deletions reports/performance/_outliers.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,28 @@ exclusively on these price-based factors that determine outlier status.
### Count by Type

```{r _outliers_type_breakdown}
# Make new column for a few cells of the outliers section
# Subset important outlier information
training_data <- training_data %>%
mutate(price_outlier_reason = case_when(
# Also grabs outlier reason Non-livable computed in 00-ingest.R
str_detect(sv_outlier_reason1, regex("price|Non-livable", ignore_case = TRUE)) ~ sv_outlier_reason1,
str_detect(sv_outlier_reason2, regex("price", ignore_case = TRUE)) ~ sv_outlier_reason2,
str_detect(sv_outlier_reason3, regex("price", ignore_case = TRUE)) ~ sv_outlier_reason3,
TRUE ~ NA_character_
))
mutate(
outlier_reasons_to_graph = case_when(
sv_is_outlier & !is.na(sv_outlier_reason2) ~
paste0(sv_outlier_reason1, " (", sv_outlier_reason2, ")"),
sv_is_outlier & is.na(sv_outlier_reason2) ~ sv_outlier_reason1,
TRUE ~ NA_character_
)
)
# Determine the axis limit
y_lim_axis_outlier_breakdown <- training_data %>%
filter(sv_is_outlier) %>%
count(price_outlier_reason) %>%
count(outlier_reasons_to_graph) %>%
summarise(max_value = max(n)) %>%
pull(max_value)
training_data %>%
filter(sv_is_outlier) %>%
count(price_outlier_reason) %>%
ggplot(aes(x = reorder(price_outlier_reason, -n), y = n)) +
count(outlier_reasons_to_graph) %>%
ggplot(aes(x = reorder(outlier_reasons_to_graph, -n), y = n)) +
geom_bar(stat = "identity") +
geom_text(aes(label = comma(n)), vjust = -0.5) +
ylim(0, 1.03 * y_lim_axis_outlier_breakdown) +
Expand All @@ -57,19 +58,19 @@ training_data %>%
```{r _outliers_type_breakdown_table}
training_data %>%
filter(sv_is_outlier) %>%
group_by(meta_year, price_outlier_reason) %>%
group_by(meta_year, outlier_reasons_to_graph) %>%
summarise(n = n()) %>%
rename(Year = meta_year) %>%
pivot_wider(id_cols = Year, names_from = price_outlier_reason, values_from = n) %>%
pivot_wider(id_cols = Year, names_from = outlier_reasons_to_graph, values_from = n) %>%
kable() %>%
kable_styling("striped")
training_data %>%
filter(sv_is_outlier) %>%
group_by(meta_year, price_outlier_reason) %>%
group_by(meta_year, outlier_reasons_to_graph) %>%
summarise(n = n(), .groups = "drop") %>%
rename(Year = meta_year) %>%
pivot_wider(id_cols = Year, names_from = price_outlier_reason, values_from = n) %>%
pivot_wider(id_cols = Year, names_from = outlier_reasons_to_graph, values_from = n) %>%
kable() %>%
kable_styling("striped")
```
Expand Down Expand Up @@ -262,6 +263,10 @@ outliers_ratio_comparison <- training_data %>%
) %>%
distinct(meta_township_name, percent, above_below, triad)
axis_limit_outlier_ratio_comparison <- outliers_ratio_comparison %>%
slice_max(percent, n = 1) %>%
pull(percent)
outliers_ratio_comparison %>%
ggplot(aes(x = reorder(meta_township_name, percent), y = percent)) +
labs(
Expand All @@ -270,7 +275,7 @@ outliers_ratio_comparison %>%
geom_bar(stat = "identity", aes(fill = above_below)) +
coord_flip() +
geom_text(aes(label = round(percent, 2)), size = 3.2, hjust = -0.2) +
scale_y_continuous(limits = c(0, 1.5)) +
scale_y_continuous(limits = c(0, 1.1 * axis_limit_outlier_ratio_comparison)) +
theme_minimal() +
theme(
axis.title.y = element_blank(),
Expand Down Expand Up @@ -301,7 +306,7 @@ outliers_table_township_summary <- outliers_table_township_summary %>%
`Med. Sale Price` = median(meta_sale_price, na.rm = TRUE),
`Max. Sale Price` = max(meta_sale_price, na.rm = TRUE),
Count = n(),
.by = c(price_outlier_reason, meta_township_name)
.by = c(outlier_reasons_to_graph, meta_township_name)
) %>%
left_join(
outliers_table_township_summary %>%
Expand All @@ -315,7 +320,7 @@ outliers_table_township_summary <- outliers_table_township_summary %>%
mutate(across(contains("Sale"), dollar)) %>%
relocate(meta_township_name) %>%
dplyr::rename(
"Outlier Type" = price_outlier_reason,
"Outlier Type" = outlier_reasons_to_graph,
"Township Name" = meta_township_name
) %>%
arrange(`Township Name`, desc(Count))
Expand Down Expand Up @@ -346,7 +351,7 @@ outliers_table_class_summary <- outliers_table_class_summary %>%
`Med. Sale Price` = median(meta_sale_price, na.rm = TRUE),
`Max. Sale Price` = max(meta_sale_price, na.rm = TRUE),
Count = n(),
.by = c(price_outlier_reason, meta_class)
.by = c(outlier_reasons_to_graph, meta_class)
) %>%
left_join(
outliers_table_class_summary %>%
Expand All @@ -360,7 +365,7 @@ outliers_table_class_summary <- outliers_table_class_summary %>%
mutate(across(contains("Sale"), dollar)) %>%
relocate(meta_class) %>%
dplyr::rename(
"Outlier Type" = price_outlier_reason,
"Outlier Type" = outlier_reasons_to_graph,
"Class" = meta_class
) %>%
arrange(`Class`, desc(Count))
Expand Down Expand Up @@ -431,7 +436,7 @@ outlier_decile_breakout <- function(data, dec) {
arrange(meta_sale_price) %>%
mutate(decile = ntile(meta_sale_price, 10)) %>%
filter(decile == dec & sv_is_outlier) %>%
group_by(price_outlier_reason) %>%
group_by(outlier_reasons_to_graph) %>%
summarise(count = n()) %>%
ungroup() %>%
slice_max(count, n = 1) %>%
Expand All @@ -441,8 +446,8 @@ outlier_decile_breakout <- function(data, dec) {
arrange(meta_sale_price) %>%
mutate(decile = ntile(meta_sale_price, 10)) %>%
filter(decile == dec & sv_is_outlier) %>%
summarise(count = n(), .by = price_outlier_reason) %>%
ggplot(aes(x = reorder(price_outlier_reason, -count), y = count)) +
summarise(count = n(), .by = outlier_reasons_to_graph) %>%
ggplot(aes(x = reorder(outlier_reasons_to_graph, -count), y = count)) +
labs(
y = "Number of Sales",
x = "Outlier Types"
Expand Down

0 comments on commit 7568264

Please sign in to comment.