Skip to content

Commit

Permalink
Add maing changes
Browse files Browse the repository at this point in the history
  • Loading branch information
wagnerlmichael committed Dec 17, 2024
1 parent 1a227fa commit e6c8805
Showing 1 changed file with 34 additions and 27 deletions.
61 changes: 34 additions & 27 deletions reports/performance/_outliers.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,29 @@ exclusively on these price-based factors that determine outlier status.
### Count by Type

```{r _outliers_type_breakdown}
# Make new column for a few cells of the outliers section
# Subset important outlier information
training_data <- training_data %>%
mutate(price_outlier_reason = case_when(
# Also grabs outlier reason Non-livable computed in 00-ingest.R
str_detect(sv_outlier_reason1, regex("price|Non-livable", ignore_case = TRUE)) ~ sv_outlier_reason1,
str_detect(sv_outlier_reason2, regex("price", ignore_case = TRUE)) ~ sv_outlier_reason2,
str_detect(sv_outlier_reason3, regex("price", ignore_case = TRUE)) ~ sv_outlier_reason3,
TRUE ~ NA_character_
))
mutate(
outlier_reasons_to_graph = if_else(
sv_is_outlier,
paste0(sv_outlier_reason1, " (", sv_outlier_reason2, ")"),
NA_character_
)
) %>%
# Yank NA string values to make output cleaner
mutate(outlier_reasons_to_graph = str_remove_all(outlier_reasons_to_graph, " \\(NA\\)"))
# Determine the axis limit
y_lim_axis_outlier_breakdown <- training_data %>%
filter(sv_is_outlier) %>%
count(price_outlier_reason) %>%
count(outlier_reasons_to_graph) %>%
summarise(max_value = max(n)) %>%
pull(max_value)
training_data %>%
filter(sv_is_outlier) %>%
count(price_outlier_reason) %>%
ggplot(aes(x = reorder(price_outlier_reason, -n), y = n)) +
count(outlier_reasons_to_graph) %>%
ggplot(aes(x = reorder(outlier_reasons_to_graph, -n), y = n)) +
geom_bar(stat = "identity") +
geom_text(aes(label = comma(n)), vjust = -0.5) +
ylim(0, 1.03 * y_lim_axis_outlier_breakdown) +
Expand All @@ -57,19 +59,19 @@ training_data %>%
```{r _outliers_type_breakdown_table}
training_data %>%
filter(sv_is_outlier) %>%
group_by(meta_year, price_outlier_reason) %>%
group_by(meta_year, outlier_reasons_to_graph) %>%
summarise(n = n()) %>%
rename(Year = meta_year) %>%
pivot_wider(id_cols = Year, names_from = price_outlier_reason, values_from = n) %>%
pivot_wider(id_cols = Year, names_from = outlier_reasons_to_graph, values_from = n) %>%
kable() %>%
kable_styling("striped")
training_data %>%
filter(sv_is_outlier) %>%
group_by(meta_year, price_outlier_reason) %>%
group_by(meta_year, outlier_reasons_to_graph) %>%
summarise(n = n(), .groups = "drop") %>%
rename(Year = meta_year) %>%
pivot_wider(id_cols = Year, names_from = price_outlier_reason, values_from = n) %>%
pivot_wider(id_cols = Year, names_from = outlier_reasons_to_graph, values_from = n) %>%
kable() %>%
kable_styling("striped")
```
Expand Down Expand Up @@ -186,7 +188,7 @@ training_data %>%

```{r _outliers_dist_township, fig.height=8, fig.width=7}
training_data %>%
filter(meta_triad_name == run_triad) %>%
filter(meta_triad_name == "North") %>%
mutate(
township_name = ccao::town_convert(meta_township_code),
Category = ifelse(sv_is_outlier, "Outlier", "Not Outlier"),
Expand Down Expand Up @@ -214,7 +216,7 @@ training_data %>%
```{r _outliers_dist_class, fig.height=8, fig.width=7}
training_data %>%
filter(
meta_triad_name == run_triad,
meta_triad_name == "North",
!meta_class %in% c("218", "219")
) %>%
mutate(
Expand Down Expand Up @@ -262,6 +264,11 @@ outliers_ratio_comparison <- training_data %>%
) %>%
distinct(meta_township_name, percent, above_below, triad)
axis_limit_outlier_ratio_comparison <- outliers_ratio_comparison %>%
arrange(desc(percent)) %>%
Matrix::head(1) %>%
pull(percent)
outliers_ratio_comparison %>%
ggplot(aes(x = reorder(meta_township_name, percent), y = percent)) +
labs(
Expand All @@ -270,7 +277,7 @@ outliers_ratio_comparison %>%
geom_bar(stat = "identity", aes(fill = above_below)) +
coord_flip() +
geom_text(aes(label = round(percent, 2)), size = 3.2, hjust = -0.2) +
scale_y_continuous(limits = c(0, 1.5)) +
scale_y_continuous(limits = c(0, 1.1 * axis_limit_outlier_ratio_comparison)) +
theme_minimal() +
theme(
axis.title.y = element_blank(),
Expand All @@ -292,7 +299,7 @@ outliers_ratio_comparison %>%
# This object is joined to itself using different filters, which is why this
# filtering is applied here rather than below.
outliers_table_township_summary <- training_data %>%
filter(meta_class != "200" & meta_triad_name == run_triad)
filter(meta_class != "200" & meta_triad_name == "North")
outliers_table_township_summary <- outliers_table_township_summary %>%
filter(sv_is_outlier) %>%
Expand All @@ -301,7 +308,7 @@ outliers_table_township_summary <- outliers_table_township_summary %>%
`Med. Sale Price` = median(meta_sale_price, na.rm = TRUE),
`Max. Sale Price` = max(meta_sale_price, na.rm = TRUE),
Count = n(),
.by = c(price_outlier_reason, meta_township_name)
.by = c(outlier_reasons_to_graph, meta_township_name)
) %>%
left_join(
outliers_table_township_summary %>%
Expand All @@ -315,7 +322,7 @@ outliers_table_township_summary <- outliers_table_township_summary %>%
mutate(across(contains("Sale"), dollar)) %>%
relocate(meta_township_name) %>%
dplyr::rename(
"Outlier Type" = price_outlier_reason,
"Outlier Type" = outlier_reasons_to_graph,
"Township Name" = meta_township_name
) %>%
arrange(`Township Name`, desc(Count))
Expand All @@ -337,7 +344,7 @@ outliers_table_township_summary %>%

```{r _outliers_table_class_summary}
outliers_table_class_summary <- training_data %>%
filter(meta_class != "200" & meta_triad_name == run_triad)
filter(meta_class != "200" & meta_triad_name == "North")
outliers_table_class_summary <- outliers_table_class_summary %>%
filter(sv_is_outlier) %>%
Expand All @@ -346,7 +353,7 @@ outliers_table_class_summary <- outliers_table_class_summary %>%
`Med. Sale Price` = median(meta_sale_price, na.rm = TRUE),
`Max. Sale Price` = max(meta_sale_price, na.rm = TRUE),
Count = n(),
.by = c(price_outlier_reason, meta_class)
.by = c(outlier_reasons_to_graph, meta_class)
) %>%
left_join(
outliers_table_class_summary %>%
Expand All @@ -360,7 +367,7 @@ outliers_table_class_summary <- outliers_table_class_summary %>%
mutate(across(contains("Sale"), dollar)) %>%
relocate(meta_class) %>%
dplyr::rename(
"Outlier Type" = price_outlier_reason,
"Outlier Type" = outlier_reasons_to_graph,
"Class" = meta_class
) %>%
arrange(`Class`, desc(Count))
Expand Down Expand Up @@ -431,7 +438,7 @@ outlier_decile_breakout <- function(data, dec) {
arrange(meta_sale_price) %>%
mutate(decile = ntile(meta_sale_price, 10)) %>%
filter(decile == dec & sv_is_outlier) %>%
group_by(price_outlier_reason) %>%
group_by(outlier_reasons_to_graph) %>%
summarise(count = n()) %>%
ungroup() %>%
slice_max(count, n = 1) %>%
Expand All @@ -441,8 +448,8 @@ outlier_decile_breakout <- function(data, dec) {
arrange(meta_sale_price) %>%
mutate(decile = ntile(meta_sale_price, 10)) %>%
filter(decile == dec & sv_is_outlier) %>%
summarise(count = n(), .by = price_outlier_reason) %>%
ggplot(aes(x = reorder(price_outlier_reason, -count), y = count)) +
summarise(count = n(), .by = outlier_reasons_to_graph) %>%
ggplot(aes(x = reorder(outlier_reasons_to_graph, -count), y = count)) +
labs(
y = "Number of Sales",
x = "Outlier Types"
Expand Down

0 comments on commit e6c8805

Please sign in to comment.