Skip to content

Commit

Permalink
Fix ETL
Browse files Browse the repository at this point in the history
  • Loading branch information
akgold committed Jan 2, 2020
1 parent a0e85c9 commit 8958615
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 13 deletions.
31 changes: 19 additions & 12 deletions ETL/clean_raw/ETL_clean_raw.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,23 @@ stats <- dplyr::tbl(con, "bike_station_info")

# Clean data
```{r}
(df <- df_con %>%
# Clean data
transmute(id = station_id,
hour = hour(time),
date = date(time),
month = month(time),
n_bikes = num_bikes_available,
dow = to_char(time, "dy")) %>%
# Add location
left_join(select(stats, id = station_id, lat, lon)) %>%
# Write back to db
compute("bike_model_data", overwrite = TRUE))
db_drop_table(con, "bike_model_data", force = TRUE)
query <- df_con %>%
group_by(
id = station_id,
hour = hour(time),
date = date(time),
month = month(time),
dow = TRIM(to_char(time, "Day"))
) %>%
summarize(
n_bikes = mean(num_bikes_available, na.rm = TRUE)
) %>%
inner_join(
select(stats, id = station_id, lat, lon)
) %>%
dbplyr::sql_render() %>%
stringr::str_replace("SELECT", "CREATE TABLE bike_model_data AS SELECT")
dbSendQuery(con, query)
tbl(con, "bike_model_data")
```
2 changes: 1 addition & 1 deletion pkg/R/helper_funcs.R
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ bike_get_mod_preds <- function(mod, mod_name, test_df, pred_mat_func = NULL) {
date = date,
# Predictions
n_bikes,
preds = predict(mod$model, newdata = pred_mat) %>% round(),
preds = predict(mod$model, newdata = pred_mat),
resid = test_df$n_bikes - preds)
}

Expand Down

0 comments on commit 8958615

Please sign in to comment.