-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path8_Synth.R
153 lines (103 loc) · 4.11 KB
/
8_Synth.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
## Barbara Sianesi
## ([email protected])
## POLICY EVALUATION METHODS Course
##
## PRACTICAL SESSION 8
## SYNTHETIC CONTROL METHOD
## ************************************************************************
## In the spirit of R, if you can improve this script, please let me know :)
#install.packages("Synth")
library(Synth)
library(tidyverse)
#setwd("your working directory")
smoking <- read.csv("smoking.csv")
# need to assign numbers to each state
smoking <- smoking %>%
group_by(state) %>%
mutate(state.num = cur_group_id())
smoking <- as.data.frame(smoking)
# 1. Prep data for synth
dataprep.out <-
dataprep(foo = smoking,
unit.variable = "state.num",
unit.names.variable = "state",
time.variable = "year",
dependent = "cigsale",
predictors = c("lnincome", "retprice", "age15to24"),
special.predictors = list(
list("cigsale", 1988, "mean"),
list("cigsale", 1980, "mean"),
list("cigsale", 1975, "mean"),
list("beer", 1984:1988, "mean")
),
treatment.identifier = 3, # California
controls.identifier = c(1,2, 4:39),
time.predictors.prior = c(1980:1988),
time.optimize.ssr = c(1970:1988), # Pre-treatment periods
time.plot = c(1970:2000)
)
# 2. Construct synthetic control group
synth.out <- synth(dataprep.out)
# 3. Model diagnostics & results
synth.tables <- synth.tab(dataprep.res = dataprep.out, synth.res = synth.out)
print(synth.tables)
# Graph the outcome paths of treated vs synthetic control
path.plot(dataprep.res = dataprep.out,synth.res = synth.out, tr.intake = 1988, Ylab = c(""))
# Graph difference between the two (gaps)
gaps.plot(dataprep.res = dataprep.out,synth.res = synth.out, tr.intake = 1988,
Main = c("Gaps: Treated - Synthetic"),
Ylab = c("")
)
# List California's outcome
dataprep.out$Y1plot
# List Synthetic California's outcome
dataprep.out$Y0plot %*% synth.out$solution.w
# List difference between the two (gaps)
gaps <- dataprep.out$Y1plot-(dataprep.out$Y0plot %*% synth.out$solution.w)
gaps
## Using -tidysynth- (especially useful for inference)
## ************************************************************************
#install.packages('tidysynth')
library(tidysynth)
library(tidyverse)
smoking <- read.csv("smoking.csv")
# Set up everything & construct the synthetic control
smoking_out <-
smoking %>%
# set up the synthetic control object
synthetic_control(outcome = cigsale,
unit = state,
time = year,
i_unit = "California",
i_time = 1988,
generate_placebos = TRUE # for inference later
) %>%
# Generate the aggregate predictors, averaged over appropriate time frames
generate_predictor(time_window = 1980:1988,
ln_income = mean(lnincome, na.rm = TRUE),
ret_price = mean(retprice, na.rm = TRUE),
youth = mean(age15to24, na.rm = TRUE)) %>%
generate_predictor(time_window = 1984:1988, beer_sales = mean(beer, na.rm = TRUE)) %>%
generate_predictor(time_window = 1975, cigsale_1975 = cigsale) %>%
generate_predictor(time_window = 1980, cigsale_1980 = cigsale) %>%
generate_predictor(time_window = 1988, cigsale_1988 = cigsale) %>%
# Generate the fitted weights for the synthetic control
generate_weights(optimization_window = 1970:1988,
margin_ipop = .02, sigf_ipop = 7, bound_ipop = 6 # optimizer options
) %>%
# Generate the synthetic control
generate_control()
# Weighting of units and variables
smoking_out %>% plot_weights()
# Comparability of synthetic control to treated unit
smoking_out %>% grab_balance_table()
# Graph comparing paths
smoking_out %>% plot_trends()
# Graph of difference between the two
smoking_out %>% plot_differences()
# INFERENCE
# Plot placebo runs
smoking_out %>% plot_placebos()
smoking_out %>% plot_placebos(prune = FALSE)
# Plot (post-MSPE / pre-MSP)
smoking_out %>% plot_mspe_ratio()