# 계량경제학 강의 R 코드

# 제15장 도구변수 추정

# 15.2 2단계 최소제곱법

datadir <- "http://econ.korea.ac.kr/~chirokhan/book/data"
ivdata <- read.csv(file.path(datadir,"ivdata.csv"))
names(ivdata)
nrow(ivdata)
# OLS
library(lmtest)
coeftest(lm(y~x1+x2,data=ivdata))
# First-stage regression
stage1 <- lm(x2~x1+z2a,data=ivdata)
coeftest(stage1)
ivdata$x2hat <- fitted(stage1)
# Second-stage regression
stage2 <- lm(y~x1+x2hat,data=ivdata)
coeftest(stage2)
pi2 <- lm(x2~x1+z2a,data=ivdata)$coef['z2a']
rf2 <- lm(y~x1+z2a,data=ivdata)$coef['z2a']
rf2/pi2

# 15.3 2SLS 추정량의 분산

# 완전수동 분산 추정

# 2SLS estimation
stage1 <- lm(x2~x1+z2a,data=ivdata)
ivdata$x2hat <- stage1$fitted
stage2 <- lm(y~x1+x2hat,data=ivdata)
# Manual standard error computation
s2wrong <- summary(stage2)$sigma^2
uhat <- resid(stage2)-stage2$coef['x2hat']*resid(stage1)
s2correct <- sum(uhat^2)/stage2$df.resid
v <- vcov(stage2)/s2wrong*s2correct
coeftest(stage2,vcov=v)

# 반자동 분산 추정

# Half-auto
ivdata$v2hat <- resid(stage1)
b2hat <- stage2$coef['x2hat']
ivdata$yadj <- with(ivdata, y-b2hat*v2hat)
stage2a <- lm(yadj~x1+x2hat,data=ivdata)
coeftest(stage2a)

# 완전 자동 도구변수 추정법

# Full-auto
#install.packages('AER')
library(AER)
tsls <- ivreg(y~x1+x2|x1+z2a,data=ivdata)
coeftest(tsls)
library(sandwich)
coeftest(tsls,vcov=vcovHC(tsls,type="HC0"))

# 15.4 2SLS와 관련된 검정들

# (i) 추가적 도구변수들의 관련성 검정: 첫째 단계 F검정

coeftest(stage1)
coeftest(stage1,vcov=vcovHC)
stage1a <- lm(x2~x1+z2a+z2b,data=ivdata)
coeftest(stage1a)
waldtest(stage1a, x2~x1)    # requires lmtest

# (ii) 설명변수 외생성의 검정

stage1 <- lm(x2~x1+z2a,data=ivdata)
ivdata$v2hat <- resid(stage1)
coeftest(lm(y~x1+x2+v2hat,data=ivdata))

# (iii) 도구변수 외생성의 검정

tsls <- ivreg(y~x1+x2|x1+z2a+z2b,data=ivdata)
ivdata$uhat <- tsls$resid
aux <- lm(uhat~x1+z2a+z2b,data=ivdata)
stat <- nrow(ivdata)*summary(aux)$r.sq
stat
qchisq(.95,1)
1-pchisq(stat,1)
stage1 <- lm(x2~x1+z2a+z2b,data=ivdata)
ivdata$x2hat <- stage1$fitted
stage2 <- lm(y~x1+x2hat,data=ivdata)
aux1 <- lm(z2b~x1+x2hat,data=ivdata)
ivdata$w <- aux1$resid*stage2$resid
ivdata$one <- 1
aux2 <- lm(one~w-1,data=ivdata)
stat <- nrow(ivdata)*summary(aux2)$r.sq
stat
1-pchisq(stat,1)

# 15.5 예제: 교육수익률

data(Schooling,package="Ecdat")
nrow(Schooling)
names(Schooling)

# OLS 추정

model <- lwage76~ed76+exp76+smsa76
ols0 <- lm(model,data=Schooling)
coeftest(ols0)      # needs library(lmtest)
confint(ols0,"ed76")

# IQ를 대리변수로 사용

ols1 <- lm(update(model,.~.+iqscore),data=Schooling)
coeftest(ols1)

# 도구변수 추정

stage1 <- lm(update(model,ed76~.-ed76+momed),data=Schooling)
coeftest(stage1)
library(AER)
model <- lwage76~ed76+exp76+smsa76
inst <- ~momed+exp76+smsa76
tsls <- ivreg(model,inst,data=Schooling)
coeftest(tsls)
coeftest(tsls,vcov=vcovHC(tsls,type="HC0"))

# 설명변수의 내생성 검정

Schooling$vhat <- stage1$resid
aux <- lm(update(model, .~.+vhat),data=Schooling)
coeftest(aux)
coeftest(aux,vcov=vcovHC)

# 도구변수들의 외생성 검정(과다식별검정)

model <- lwage76~ed76+exp76+smsa76
inst <- ~momed+exp76+smsa76
tsls <- ivreg(model,inst,data=Schooling)
Schooling$uhat <- tsls$resid     # 2SLS residuals
aux <- lm(update(inst,uhat~.),data=Schooling)
summary(aux)
inst2 <- ~momed+daded+exp76+smsa76
tsls <- ivreg(model,inst2,data=Schooling)
Schooling$uhat <- tsls$resid
aux <- lm(update(inst2,uhat~.),data=Schooling)
stat <- nrow(Schooling)*summary(aux)$r.squared
stat
qchisq(.95,1)
1-pchisq(stat,1)
# first stage regression
stage1 <- lm(ed76~momed+daded+exp76+smsa76,data=Schooling)
Schooling$ed76hat <- stage1$fitted
# regression to get tranformed overidentifying instruments
aux.model <- daded~ed76hat+exp76+smsa76
aux.inst <- ~ed76+exp76+smsa76
w1 <- ivreg(aux.model, aux.inst, data=Schooling)$resid
# multiply w1 to 2SLS residual
model <- lwage76~ed76+exp76+smsa76
inst2 <- ~momed+daded+exp76+smsa76
Schooling$w1u <- w1*ivreg(model, inst2, data=Schooling)$resid
# get the n*R^2 test statistic
Schooling$one <- 1
aux <- lm(one~-1+w1u,data=Schooling)
stat <- nrow(Schooling)*summary(aux)$r.sq
stat
# p-value
1-pchisq(stat,1)

# 15.6 통제함수 방법

# 2SLS
coeftest(ivreg(y~x1+x2|x1+z2a+z2b,data=ivdata))
# control-function approach
ivdata$v2hat <- lm(x2~x1+z2a+z2b,data=ivdata)$resid
coeftest(lm(y~x1+x2+v2hat,data=ivdata))
stage1 <- lm(update(inst2,ed76~.),data=Schooling)
Schooling$vhat <- stage1$resid
cfreg <- lm(update(model,.~.+vhat),data=Schooling)
uhat <- cfreg$resid + cfreg$coef['vhat']*Schooling$vhat
s2u <- sum(uhat^2)/(nrow(Schooling)-4)
coeftest(cfreg,vcov=s2u*summary(cfreg)$cov.unscaled)
ivdata$v2hat <- lm(x2~x1+z2a+z2b,data=ivdata)$resid
cfreg <- lm(y~x1+x2+v2hat,data=ivdata)
# naive (wrong) standard errors
coeftest(cfreg)
# corrected standard errors
va <- vcov(cfreg)
d <- lm(cbind(1,x1,z2a,z2b)~x1+x2+v2hat,data=ivdata)$coef
ivdata$x2rho <- ivdata$x2*cfreg$coef['v2hat']
aux <- lm(x2rho~x1+z2a+z2b,data=ivdata)
vb <- vcov(aux)
v <- va+d%*%vb%*%t(d)
coeftest(cfreg,vcov=v)
# compare to 2SLS
coeftest(ivreg(y~x1+x2|x1+z2a+z2b,data=ivdata))

# 15.7 내생적 설명변수의 제곱항과 상호작용항

coeftest(ivreg(y~x1+x2+I(x2^2)|x1+z2a+I(z2a^2),data=ivdata))
ivdata$v2hat <- lm(x2~x1+z2a+z2b,data=ivdata)$resid
coeftest(lm(y~x1+x2+I(x2^2)+v2hat,data=ivdata))
ivdata$v2hat <- lm(x2~x1+z2a+z2b,data=ivdata)$resid
cfreg <- lm(y~x1+x2+I(x2^2)+v2hat,data=ivdata)
# naive
coeftest(cfreg)
# corrected
va <- vcov(cfreg)
d <- lm(cbind(1,x1,z2a,z2b)~x1+x2+I(x2^2)+v2hat,data=ivdata)$coef
ivdata$x2rho <- ivdata$x2*cfreg$coef['v2hat']
aux <- lm(x2rho~x1+z2a+z2b,data=ivdata)
v <- va+d%*%vcov(aux)%*%t(d)
coeftest(cfreg,vcov=v)
coeftest(ivreg(y~x1*x2|x1*(z2a+z2b),data=ivdata))
ivdata$v2hat <- lm(x2~x1+z2a+z2b,data=ivdata)$resid
coeftest(lm(y~x1*x2+v2hat,data=ivdata))
ivdata$v2hat <- lm(x2~x1+z2a+z2b,data=ivdata)$resid
cfreg <- lm(y~x1*x2+v2hat,data=ivdata)
va <- vcov(cfreg)
d <- lm(cbind(1,x1,z2a,z2b)~x1*x2+v2hat,data=ivdata)$coef
ivdata$x2rho <- ivdata$x2*cfreg$coef['v2hat']
aux <- lm(x2rho~x1+z2a+z2b,data=ivdata)
vb <- vcov(aux)
coeftest(cfreg,vcov=va+d%*%vb%*%t(d))