-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathglobal mortality rates.R
88 lines (67 loc) · 2.35 KB
/
global mortality rates.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
library(tidyverse)
library(readxl)
mortality <- read_xlsx("Datasets/globalMortality/global_mortality.xlsx")
mortality_processed <- mortality %>%
gather(CoD, Share, -c("country", "country_code", "year"))
mortality_processed %>%
distinct(CoD)
#What are the most common causes of death across the globe?
mortality_processed %>%
group_by(CoD) %>%
summarise(Avg = mean(Share, na.rm = TRUE)) %>%
arrange(desc(Avg))%>%
mutate(CoD = fct_reorder(CoD, Avg)) %>%
head(15) %>%
ggplot(aes(CoD, Avg, fill = CoD)) +
geom_col() +
coord_flip() +
theme(legend.position = "none")
#What are the least common causes of death across the globe?
mortality_processed %>%
group_by(CoD) %>%
summarise(Avg = mean(Share, na.rm = TRUE)) %>%
arrange(desc(Avg))%>%
mutate(CoD = fct_reorder(CoD, Avg)) %>%
tail(15) %>%
ggplot(aes(CoD, Avg, fill = CoD)) +
geom_col() +
coord_flip() +
theme(legend.position = "none")
# What were the highest share of cause of death for a country when compared to the global average for that cod
mortality_processed %>%
group_by(year, CoD) %>%
mutate(avg_share = median(Share)) %>%
ungroup() %>%
mutate(dev_from_avg = Share - avg_share,
unique_name = paste(country, year, CoD),
unique_name = fct_reorder(unique_name, dev_from_avg)) %>%
arrange(desc(dev_from_avg)) %>%
head(20) %>%
ggplot(aes(unique_name, dev_from_avg)) +
geom_col(fill = "blue", col = "black") +
coord_flip()
# Dominated by HIV/AIDS
#---------------------------------------- How has share changed over the years? ------------------------
by_cod <- mortality_processed %>%
mutate(year1990 = year - 1990) %>%
group_by(CoD) %>%
nest()
mod <- function(df) {
lm(Share ~ year1990, data = df)
}
models <- by_cod %>%
mutate(model = map(data, mod),
glance = map(model, broom::glance),
rsq = map_dbl(glance, "r.squared"),
tidy = map(model, broom::tidy),
augment = map(model, broom::augment))
models %>%
unnest(tidy) %>%
select(CoD, term, estimate, rsq) %>%
spread(term, estimate) %>%
ggplot(aes(x = `(Intercept)`, y = year1990)) +
geom_point(aes(size = rsq), alpha = 0.5) +
geom_smooth(se = FALSE) +
labs(x = "1990 Share",
y = "Yearly Change in Share")
# All of these models have terrible fit