-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEstimators.jl
315 lines (275 loc) · 11.8 KB
/
Estimators.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
# This script lists classical estimators used to estimate linear models, and estimators using kernels ---------------------------
# OLS function. Include a vector of ones in x if one wants to include an intercept
function ols(x, y)
beta =inv(x'*x)* x'*y
return(beta)
end
# The Two Stage Least Squares (TSLS) function. Include a vector of ones in x and/or z if one wants to include an intercept the first and/or second stage
function tsls(x, y, z)
z = hcat(ones(size(z, 1), 1), z)
pz = z*inv(z'*z)*z'
beta = inv(x'*pz*x)* x'*pz*y
return(beta)
end
# Function that considers a kernel regression estimator of the conditional expectation of x given z, and uses the fitted values in the second stage equation.
# It uses the KernelEstimator.jl package, found on https://github.com/panlanfeng/KernelEstimator.jl
# Does not include a ridging option for small bandwidths, so NAs can be returned.
# Here, the bandwidth is automatically chosen via Cross Validation
using KernelEstimator
function np_tsls(x, y, z; option = localconstant)
xhat = npr(x, z, reg = option, kernel = gaussiankernel)
beta = inv(xhat'*x)* xhat'*y
return(beta)
end
# Here, the bandwidth is fixed by the user.
#Pkg.add("KernelEstimator")
using KernelEstimator
function np_tsls_h(x, y, z, h; option = localconstant)
xhat = npr(x, z, reg = option, kernel = gaussiankernel, h = h)
beta = inv(xhat'*x)* xhat'*y
return(beta)
end
# The k-class estimator of Theil(1958). It includes the OLS (k = 0) and TSLS (k = 1) as special cases
function kclass(x, y, z; k = 1)
n = size(y, 1)
Pz = z*inv(z'*z)*z'
Mz = Matrix{Float64}(I, n, n) - Pz
betahat = inv(x'*(Matrix{Float64}(I, n, n) - k*Mz)*x)*x'*(Matrix{Float64}(I, n, n) - k*Mz)*y
return betahat
end
# The Limited Information Maximum Likelihood (LIML) estimator, also a special case of the k-class estimator.
function liml(x, y, z)
n = size(y, 1)
Pz = z*inv(z'*z)*z'
Mz = Matrix{Float64}(I, n, n) - Pz
w = hcat(y, x)
function obj(k)
expr = det(w'*w - k.*w'*Mz*w)
end
kliml = nlsolve(obj, vec([0.5])).zero
betahat = inv(x'*(Matrix{Float64}(I, n, n) - kliml.*Mz)*x)*x'*(Matrix{Float64}(I, n, n) - kliml.*Mz)*y
return betahat
end
# The WMDF estimator of Antoine & Lavergne (2014)
function wmdf(x, y, z; intercept = "false")
n = size(y, 1)
if intercept == "true"
Y1star = hcat(ones(n, 1), x)
else Y1star = x
end
Ystar = hcat( y , Y1star )
z = z./repeat(std(z, dims = 1), n, 1) # some rescaling
if size(z, 2)>1
Ktilde = ones(n, n)
for i = 1:size(z, 2)
K = dnorm( repeat(z[:, i], 1, n) - repeat(z[:, i]', n, 1) )
K = K - Diagonal(K)
Ktilde = Ktilde.*K
end
else
Ktilde = dnorm( repeat(z, 1, n) - repeat(z', n, 1) )
Ktilde = Ktilde - Diagonal(Ktilde)
end
eig = eigvals(inv(Ystar'*Ystar)*(Ystar'*Ktilde*Ystar))
lambdatilde = minimum(eig)
lambda_WMDF = (lambdatilde - (1 - lambdatilde)/n)/(1 - (1 - lambdatilde)/n)
In = Diagonal(vec(ones(n, 1)))
WMD = inv(Y1star'*(Ktilde-lambdatilde.*In)*Y1star)*(Y1star'*(Ktilde-lambdatilde*In)*y)
WMDF = inv(Y1star'*(Ktilde-lambda_WMDF.*In)*Y1star)*(Y1star'*(Ktilde-lambda_WMDF*In)*y)
return (WMD = WMD, WMDF = WMDF)
end
# Example
#=
include("auxiliary functions.jl")
n = 1000
mu = [0.0, 0]
sigma = [1.0, 0.5, 0.5, 1.0]
sigma = reshape(sigma, 2, 2)
# Generate some data where the coeffecieint of interest, beta_0, is equal to 1.
data = dgp(n, 1, mu, sigma; design = "linear")
OLS = ols(data.x, data.y)
TSLS = tsls(data.x, data.y, data.z)
LIML = liml(data.x, data.y, data.z)
KCLASS_0 = kclass(data.x, data.y, data.z, k = 0) # returns the same as OLS
KCLASS_1 = kclass(data.x, data.y, data.z, k = 1) # returns the same as TSLS
WMDF = wmdf(data.x, data.y, data.z, intercept = "false")
NP = np_tsls(data.x, data.y, data.z, option = localconstant)
=#
# Estimators computing or using kernels -------------------------------------
# This function computes the Nadaraya-Watson (type = "lc") and local linear (type = "ll") kernel regression estimators
# A Gaussian kernel is used
# No ridging to handle small bandwidths, instead a numerical adjustment to avoid dividing by 0. It is not ideal.
function lcll( dep, expl; bw = 0.1, type = "lc" )
n = size(expl, 1)
firsterm = repeat(expl, 1, n)
secterm = firsterm'
U = (firsterm-secterm)./bw
K = gaussian(U) # Replace that by any other kernel function if desired
if type == "ll"
Sn1 = sum(K.*(firsterm-secterm), dims = 2)
Sn1 = repeat(Sn1, 1, n)
Sn2 = sum(K.*(firsterm-secterm).^2, dims = 2)
Sn2 = repeat(Sn2, 1, n)
B = K.*(Sn2 - (firsterm-secterm).*Sn1)
SB = sum(B, dims = 2)
if any(SB==0)
println("Careful mate ! An adjustment was made to avoid Nan!")
ind = find( x -> x == 0, SB)
SB[which(SB==0)]= 0.0001
end # if there is a sum that is equal to 0}
L = B./repeat(SB, 1, n)
else
SB = sum(K, dims = 2)
if any(SB==0)
println("Careful mate ! An adjustment was made to avoid Nan!")
ind = find( x -> x == 0, SB)
SB[which(SB==0)]= 0.0001
end # if there is a sum that is equal to 0}
L = K./repeat(SB, 1, n)
end
fit = L*dep
return(fit)
end
# Function that considers a kernel regression estimator of the conditional expectation of x given z, and uses the fitted values in the second stage equation.
# It uses the lcll function, present in this script.
# Does not include a ridging option for small bandwidths, so a numerical adjustment is made to avoid dividing by 0. But it is not ideal.
function kernel_tv(x, y, z; option, bw )
if option != "lc" && option != "ll"
error("option should be lc or ll ! Careful mate !")
end
xhat = lcll(x, z, bw = bw, type = option)
beta = inv(xhat'*x)* xhat'*y
return(beta)
end
# Example
#=
include("auxiliary function.jl")
n = 1000
mu = [0.0, 0]
sigma = [1.0, 0.5, 0.5, 1.0]
sigma = reshape(sigma, 2, 2)
data = dgp(n, 1, mu, sigma)
OLS = ols(data.x, data.y)
TSLS = tsls(data.x, data.y, data.z)
LIML = liml(data.x, data.y, data.z)
KCLASS_0 = kclass(data.x, data.y, data.z, k = 0)
KCLASS_1 = kclass(data.x, data.y, data.z, k = 1)
WMDF = wmdf(data.x, data.y, data.z, intercept = "true")
NP = np_tsls(data.x, data.y, data.z, option = localconstant)
TV = kernel_tv(data.x, data.y, data.z; option = "ll", bw = 1)
# Note that if the bandwidth is very high, the estimator coincides with teh TSLS one
TV_500 = kernel_tv(data.x, data.y, data.z; option = "ll", bw = 500)
=#
# Extract the "hat" matrix when using the Nadaraya-Watson or local linear kernel estimators.
function kernel_hat_matrix( dep, expl; bw = 0.1, type = "lc" )
n = size(expl, 1)
firsterm = repeat(expl, 1, n)
secterm = firsterm'
U = (firsterm-secterm)./bw
K = gaussian(U)
if type == "ll"
Sn1 = sum(K.*(firsterm-secterm), dims = 2)
Sn1 = repeat(Sn1, 1, n)
Sn2 = sum(K.*(firsterm-secterm).^2, dims = 2)
Sn2 = repeat(Sn2, 1, n)
B = K.*(Sn2 - (firsterm-secterm).*Sn1)
SB = sum(B, dims = 2)
if any(SB==0)
println("Careful mate ! An adjustment was made to avoid Nan!")
ind = find( x -> x == 0, SB)
SB[which(SB==0)]= 0.0001
end # if there is a sum that is equal to 0}
L = B./repeat(SB, 1, n)
else
SB = sum(K, dims = 2)
if any(SB==0)
println("Careful mate ! An adjustment was made to avoid Nan!")
ind = find( x -> x == 0, SB)
SB[which(SB==0)]= 0.0001
end # if there is a sum that is equal to 0}
L = K./repeat(SB, 1, n)
end
return(L)
end
# Implements the AIC criterion function for the bandwidth from Hurvic, Simonoff & Tsai (1999)
function hurvic_AIC(dep, expl; bw = 0.1, type = "lc")
H = kernel_hat_matrix(dep, expl, bw = bw, type = type)
n = size(dep, 1)
sig2 = ( dep'*( Matrix{Float64}(I, n, n) - H )'*( Matrix{Float64}(I, n, n) - H )'*dep ) / n
crit = log(sig2) + (1 + tr(H)/ n)/( 1 - (tr(H) + 2) / n)
return(crit)
end
# An extension of the AIC function of Hurvic, Simonoff & Tsai (1999) for the IV case
function iv_AIC(expl, endo, instru; bw = 0.1, type = "lc")
n = size(expl, 1)
H = kernel_hat_matrix(endo, instru, bw = bw, type = type)
# In the IV case, the effective hat matrix is (X'H'X)^(-1) X'H'
H = endo*inv(endo'*H'*endo)*endo'*H'
sig2 = ( expl'*( Matrix{Float64}(I, n, n) - H )'*( Matrix{Float64}(I, n, n) - H )*expl ) / n
crit = log(sig2) + (1 + tr(H)/ n)/( 1 - (tr(H) + 2) / n)
return(crit)
end
# Example:
#=
include("auxiliary function.jl")
n = 1000
mu = [0.0, 0]
sigma = [1.0, 0.5, 0.5, 1.0]
sigma = reshape(sigma, 2, 2)
data = dgp(n, 1, mu, sigma)
AIC_h = hurvic_AIC(data.y, data.x, bw = 0.1)
AIC_iv_h = iv_AIC(data.y, data.x, data.z; bw = 0.1, type = "ll")
=#
# Function that computes the optimal bandwidth by minimizing to the iv_AIC function. Still in the works, use at your own risk.
function min_AIC_iv( expl, endo, instru; lower = 0.01, upper = 50.0, type = "lc") #, loo = FALSE )
# bboptimize(iv_AIC; SearchRange = [lower, upper], NumDimensions = 1, expl = expl, endo = endo, instru = instru, type = type) # Pkg.add("BlackBoxOptim") DE type algorithm
n = size(expl, 1)
# H = kernel_hat_matrix(endo, instru, bw = h, type = type) #, loo)
# # In the IV case, the effective hat matrix is (X'H'X)^(-1) X'H'
# H = endo*inv(endo'*H'*endo)*endo'*H'
# sig2 = ( expl'*( Matrix{Float64}(I, n, n) - H )'*( Matrix{Float64}(I, n, n) - H )*expl ) / n
# # crit = log(sig2) + (1 + tr(H)/ n)/( 1 - (tr(H) + 2) / n)
function obj(h) # I define the criterion function directly inside the min_AIC_iv function, so that it depends on the bandwidth only. Makes the minimization code easier
# n = size(expl, 1)
H = kernel_hat_matrix(endo, instru, bw = h, type = type) #, loo)
# # In the IV case, the effective hat matrix is (X'H'X)^(-1) X'H'
H = endo*inv(endo'*H'*endo)*endo'*H'
sig2 = ( expl'*( Matrix{Float64}(I, n, n) - H )'*( Matrix{Float64}(I, n, n) - H )*expl ) / n
crit = log(sig2) + (1 + tr(H)/ n)/( 1 - (tr(H) + 2) / n)
return crit
end
# With a classical optimizer
sol = optimize(obj, lower, upper, Brent() )
minimum = Optim.minimum(sol)
minimizer = Optim.minimizer(sol)
# With a Differential Evolution algorithm
# sol = bboptimize(obj; SearchRange = (0.01, 50.0), NumDimensions = 1 )
# minimum = best_fitness(sol)
# minimizer = best_candidate(sol)
# convergence = sol$convergence
return(opt_obj = minimum, opt_h = minimizer)
end
# Computes the optimal bandwidth and estiamtes the linear iv model with it. Still in the works, use at your own risk.
function TV_kernel_iv(expl, endo, instru; option = "lc")
if option != "lc" && option != "ll"
error("option should be lc or ll ! Careful mate !")
end
sol = min_AIC_iv(expl, endo, instru, lower = 0.0001, upper = 100.0, type = option)
minimum = sol.opt_obj
opt_h = sol.opt_h
bt_hat = kernel_tv( endo, expl, instru, option, opt_h)
return (bt_hat = bt_hat, opt_h = opt_h, opt_obj = minimum)
end
# Example:
# mu = c(0,0)
# VCOV = matrix(c( 1 , 0.5 ,
# 0.5 , 1 ), nrow = 2, ncol = 2)
# errors = MASS::mvrnorm(100, mu, Sigma = VCOV )
# z = rnorm(100)
# x = z + errors[, 2]
# y = 2*x + errors[, 1]
#
# iv_AIC(expl = y, endo = x, instru = z, h = 0.1)
# iv_AIC(expl = y, endo = x, instru = z, h = 0.1, loo = TRUE)
# iv_AIC(expl = y, endo = x, instru = z, h = 0.1, type = "ll")
# iv_AIC(expl = y, endo = x, instru = z, h = 0.1, type = "ll", loo = TRUE)