-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathprocess.R
112 lines (92 loc) · 2.68 KB
/
process.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
library(readr)
library(tidyverse)
library(zoo)
categories = c(
"shake_intensity",
"buildings",
"medical",
"power",
"roads_and_bridges",
"sewer_and_water"
)
locNames = c(
"Palace Hills",
"Northwest",
"Old Town",
"Safe Town",
"Southwest",
"Downtown",
"Wilson Forest",
"Scenic Vista",
"Broadview",
"Chapparal",
"Terrapin",
"Pepper Mill",
"Cheddarford",
"Easton",
"Weston",
"Southton",
"Oak Willow",
"East Parton",
"West Parton"
)
all_summary <- tibble()
all_aggregated <- tibble()
for (cat in categories) {
data_summary <-
read_csv(paste("./out/", cat, "_summary.csv", sep = ""))
data_summary <- data_summary %>% arrange(loc, time)
data_summary <-
data_summary %>% mutate(CIR = ifelse(hdi95.upper > 10, 10 - hdi95.lower, hdi95.upper - hdi95.lower))
# round to 2 d.p. to reduce csv file size
data_summary[4:13] <- round(data_summary[4:13], 2)
data_summary$CIR <- round(data_summary$CIR, 2)
time_min <- as.POSIXct("2020-04-06", tz = "GMT")
time_max <- as.POSIXct("2020-04-11", tz = "GMT")
time_points <- seq(time_min, time_max, by = "5 min")
loc <- unique(data_summary$loc)
data_time <- expand.grid(time = time_points, loc = loc, cat = cat)
processed_data <-
left_join(data_time, data_summary, by = c("cat", "loc", "time"))
processed_data <- processed_data %>%
group_by(loc) %>%
mutate(time_end = ifelse(row_number() == 1, time, time_end)) %>%
do(na.locf(.)) %>%
mutate(time_diff = as.numeric(time) - as.numeric(time_end)) %>%
mutate(in15min = ifelse(time_diff <= 15 * 60, 1, 0)) %>%
mutate(in60min = ifelse((15 * 60 < time_diff) &
(time_diff <= 60 * 60), 1, 0)) %>%
mutate(over60min = ifelse(60 * 60 < time_diff, 1, 0)) %>%
mutate(locName = locNames[loc])
aggregated_data <- processed_data %>%
filter(time_diff == 0) %>%
mutate(dateHour = format(time, "%Y-%m-%d %H:00:00")) %>%
group_by(loc, cat, dateHour) %>%
summarize(maxMAP = max(MAP), maxCIR = max(CIR), CIRatMaxMAP = mean(CIR[which(MAP == max(MAP))]))
all_summary <- bind_rows(all_summary, processed_data)
all_aggregated <- bind_rows(all_aggregated, aggregated_data)
write.csv(
processed_data,
paste("./out/", cat, "_summary_processed.csv", sep = ""),
quote = F,
row.names = F
)
write.csv(
processed_data,
paste("./out/", cat, "_summary_aggregated.csv", sep = ""),
quote = F,
row.names = F
)
}
write.csv(
all_summary,
paste("./out/", "all", "_summary_processed.csv", sep = ""),
quote = F,
row.names = F
)
write.csv(
all_aggregated,
paste("./out/", "all", "_summary_aggregated.csv", sep = ""),
quote = F,
row.names = F
)