### caliper  0.25*standard deviations of the propensity score 
library()
?? MatchIt
library(MatchIt)
??MatchIt
data(lalonde)
??lalonde
attach(lalonde)
fix(lalonde)

n <- dim(lalonde)[1]
n
black_sub <- which(race=="black")
hispan_sub <- which(race=="hispan")

black <- rep(0,n)
black[black_sub] <- 1
black
lalonde$black <- black
fix(lalonde)

hispan <- rep(0,n)
hispan[hispan_sub] <- 1
hispan
lalonde$hispan<- hispan
fix(lalonde)

#### check initial imbalance

#no matching; constructing a pre-match  matchit object 
m.out0 <- matchit(treat ~ age + educ + race + married + nodegree + re74 + re75, data = lalonde, method = NULL, distance = "glm")
m.out0
# Checking balance prior to matching
summary(m.out0)

# matching can be performed: different classes and methods of matching, described in vignette("matching-methods")

# 1:1 NN PS matching w/o replacement
m.out1 <- matchit(treat ~ age + educ + race + married + nodegree + re74 + re75, data = lalonde, method = "nearest", distance = "glm")
m.out1 

# Checking balance after NN matching
summary(m.out1, un = FALSE)

# We can visualize the distribution of propensity scores of those who were matched using plot() with type = "jitter"
plot(m.out1, type = "jitter", interactive = FALSE)

## We can visually examine balance on the covariates using plot() with type = "qq":

plot(m.out1, type = "qq", interactive = FALSE, which.xs = c("age", "married", "re75"))
## points far from the solid diagonal line are the areas of the covariate distributions that differ between the treatment groups.
# Although married and re75 appear to have improved balance after matching, the case is mixed for age.

####
# we can try a different matching method or make other changes to the matching algorithm or distance specification.

install.packages("optmatch")
# Full matching on a probit PS
m.out2 <- matchit(treat ~ age + educ + race + married + nodegree + re74 + re75, data = lalonde, method = "full", distance = "glm", link = "probit")
m.out2
# Checking balance after full matching
summary(m.out2, un = FALSE)
plot(summary(m.out2))
## Love plots are a simple and straightforward way to summarize balance visually. 
## See vignette("assessing-balance") for more information on how to customize MatchIt’s Love plot and how 
## to use cobalt, a package designed specifically for balance assessment and reporting that is compatible with MatchIt.


#### ESTIMATING THE TREATMENT EFFECT 
## How treatment effects are estimated depends on what form of matching was performed. See vignette("estimating-effects")
## After 1:1 matching without replacement (i.e., the first matching specification above), 
## we can run a simple regression of the outcome on the treatment in the matched sample 
## (i.e., including the matching weights).

###  With continuous outcomes, it is often a good idea to also include the covariates used in the matching 
## in the effect estimation, as doing so can provide additional robustness to slight imbalances remaining 
## after the matching and can improve precision.

m.data1 <- match.data(m.out1)
head(m.data1)
#####   psmatch2 stata       com: ssc install psmatch2 
### We can then estimate a treatment effect in this dataset using the standard regression functions in R, 
## like lm() or glm(), being sure to include the matching weights (stored in the weights variable of the match.data() output) 
## in the estimation3. We recommend using cluster-robust standard errors for most analyses,
##  with pair membership as the clustering variable; the lmtest and sandwich packages together make this straightforward.

install.packages("lmtest")
install.packages("sandwich")
library("lmtest") #coeftest
library("sandwich") #vcovCL

### M1 NN 1:1
fit1 <- lm(re78 ~ treat + age + educ + race + married + nodegree + re74 + re75, data = m.data1, weights = weights)
summary(fit1)


coeftest(fit1, vcov. = vcovCL, cluster = ~subclass)
## The coefficient on treat is the estimated ATT. The other coefficients and tests should not be interpreted or reported.
## With 1:1 nearest neighbor matching, we failed to achieve balance, so one should be cautious about trusting the estimated effect

### M2 full matching
m.data2 <- match.data(m.out2)
head(m.data2)
fit2 <- lm(re78 ~ treat + age + educ + race + married + nodegree + re74 + re75, data = m.data2, weights = weights)
summary(fit2)
coeftest(fit2, vcov. = vcovCL, cluster = ~subclass)

## Given the results of these two estimates, we would be inclined to trust the one resulting from the second analysis,
# i.e., using full matching, because better balance was achieved on all the variables, making the effect estimate 
# less sensitive to the form of the outcome model we used.


## compare
###install stargazer
install.packages("stargazer")
??stargazer
stargazer::stargazer(fit1, fit2, type="text", single.row=T)


### model
names(lalonde)
lalonde_model<-  treat ~ age + educ + black + hispan + married + nodegree + re74 + re75

glm_1 <- glm(lalonde_model, family=binomial)
summary(glm_1)
###install stargazer
??stargazer
stargazer::stargazer(glm_1, type="text", single.row=T)

#### matching
## 1) Mahalanobis metric matching

mahalan <- matchit(lalonde_model, data=lalonde,  mahvars=c("age", "educ", "nodegree", "re74", "re75"), distance="glm", caliper=0.25)
mahalan
summary(mahalan)

mahalan_r<- matchit(lalonde_model, data=lalonde,  mahvars=c("age", "educ", "nodegree", "re74", "re75"), distance="glm", caliper=0.25, replace=T)
mahalan_r
summary(mahalan_r)

mahalan_match <- match.data(mahalan_r)
head(mahalan_match)

### nnmatch
nnmatch_m <- matchit(lalonde_model, data=lalonde, caliper=0.1, method="nearest")
nnmatch_m
summary(nnmatch_m)

nn_match <- match.data(nnmatch_m)
head(nn_match)

### compare
reg1 <-  lm(re78 ~ treat + age + educ + black + hispan + married + nodegree + re74 + re75, data=lalonde)
reg2 <-  lm(re78 ~ treat + age + educ + black + hispan + married + nodegree + re74 + re75, data=mahalan_match)
reg3 <-  lm(re78 ~ treat + age + educ + black + hispan + married + nodegree + re74 + re75, data=nn_match)
stargazer::stargazer(reg1, reg2, reg3, type="text", single.row=T)


####### see https://users.nber.org/~rdehejia/data/.nswdata2.html
 install.packages("Matching")
library (Matching)
mymatch1 <-Match(Y=re78, Tr=treat, X=age + educ + black + hispan + married + nodegree + re74 + re75, estimand="ATT")

summary(mymatch1)

mymatch1$index.treated
mymatch1$index.control

mymatch2 <-Match(Y=re78, Tr=treat, X=age + educ + black + hispan + married + nodegree + re74 + re75, estimand="ATC")
summary(mymatch2)


mymatch3 <-Match(Y=re78, Tr=treat, X=age + educ + black + hispan + married + nodegree + re74 + re75, estimand="ATE")
summary(mymatch3)