From 89586152def23890038023e20c217c46e55737f3 Mon Sep 17 00:00:00 2001 From: Alex Gold Date: Thu, 2 Jan 2020 19:24:18 +0000 Subject: [PATCH] Fix ETL --- ETL/clean_raw/ETL_clean_raw.Rmd | 31 +++++++++++++++++++------------ pkg/R/helper_funcs.R | 2 +- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/ETL/clean_raw/ETL_clean_raw.Rmd b/ETL/clean_raw/ETL_clean_raw.Rmd index c62ee3f..bde2f8c 100644 --- a/ETL/clean_raw/ETL_clean_raw.Rmd +++ b/ETL/clean_raw/ETL_clean_raw.Rmd @@ -29,16 +29,23 @@ stats <- dplyr::tbl(con, "bike_station_info") # Clean data ```{r} -(df <- df_con %>% - # Clean data - transmute(id = station_id, - hour = hour(time), - date = date(time), - month = month(time), - n_bikes = num_bikes_available, - dow = to_char(time, "dy")) %>% - # Add location - left_join(select(stats, id = station_id, lat, lon)) %>% - # Write back to db - compute("bike_model_data", overwrite = TRUE)) +db_drop_table(con, "bike_model_data", force = TRUE) +query <- df_con %>% + group_by( + id = station_id, + hour = hour(time), + date = date(time), + month = month(time), + dow = TRIM(to_char(time, "Day")) + ) %>% + summarize( + n_bikes = mean(num_bikes_available, na.rm = TRUE) + ) %>% + inner_join( + select(stats, id = station_id, lat, lon) + ) %>% + dbplyr::sql_render() %>% + stringr::str_replace("SELECT", "CREATE TABLE bike_model_data AS SELECT") +dbSendQuery(con, query) +tbl(con, "bike_model_data") ``` diff --git a/pkg/R/helper_funcs.R b/pkg/R/helper_funcs.R index 2b34eaa..8e87ea9 100644 --- a/pkg/R/helper_funcs.R +++ b/pkg/R/helper_funcs.R @@ -235,7 +235,7 @@ bike_get_mod_preds <- function(mod, mod_name, test_df, pred_mat_func = NULL) { date = date, # Predictions n_bikes, - preds = predict(mod$model, newdata = pred_mat) %>% round(), + preds = predict(mod$model, newdata = pred_mat), resid = test_df$n_bikes - preds) }