### caliper 0.25*standard deviations of the propensity score library() ?? MatchIt library(MatchIt) ??MatchIt data(lalonde) ??lalonde attach(lalonde) fix(lalonde) n <- dim(lalonde)[1] n black_sub <- which(race=="black") hispan_sub <- which(race=="hispan") black <- rep(0,n) black[black_sub] <- 1 black lalonde$black <- black fix(lalonde) hispan <- rep(0,n) hispan[hispan_sub] <- 1 hispan lalonde$hispan<- hispan fix(lalonde) #### check initial imbalance #no matching; constructing a pre-match matchit object m.out0 <- matchit(treat ~ age + educ + race + married + nodegree + re74 + re75, data = lalonde, method = NULL, distance = "glm") m.out0 # Checking balance prior to matching summary(m.out0) # matching can be performed: different classes and methods of matching, described in vignette("matching-methods") # 1:1 NN PS matching w/o replacement m.out1 <- matchit(treat ~ age + educ + race + married + nodegree + re74 + re75, data = lalonde, method = "nearest", distance = "glm") m.out1 # Checking balance after NN matching summary(m.out1, un = FALSE) # We can visualize the distribution of propensity scores of those who were matched using plot() with type = "jitter" plot(m.out1, type = "jitter", interactive = FALSE) ## We can visually examine balance on the covariates using plot() with type = "qq": plot(m.out1, type = "qq", interactive = FALSE, which.xs = c("age", "married", "re75")) ## points far from the solid diagonal line are the areas of the covariate distributions that differ between the treatment groups. # Although married and re75 appear to have improved balance after matching, the case is mixed for age. #### # we can try a different matching method or make other changes to the matching algorithm or distance specification. install.packages("optmatch") # Full matching on a probit PS m.out2 <- matchit(treat ~ age + educ + race + married + nodegree + re74 + re75, data = lalonde, method = "full", distance = "glm", link = "probit") m.out2 # Checking balance after full matching summary(m.out2, un = FALSE) plot(summary(m.out2)) ## Love plots are a simple and straightforward way to summarize balance visually. ## See vignette("assessing-balance") for more information on how to customize MatchIt’s Love plot and how ## to use cobalt, a package designed specifically for balance assessment and reporting that is compatible with MatchIt. #### ESTIMATING THE TREATMENT EFFECT ## How treatment effects are estimated depends on what form of matching was performed. See vignette("estimating-effects") ## After 1:1 matching without replacement (i.e., the first matching specification above), ## we can run a simple regression of the outcome on the treatment in the matched sample ## (i.e., including the matching weights). ### With continuous outcomes, it is often a good idea to also include the covariates used in the matching ## in the effect estimation, as doing so can provide additional robustness to slight imbalances remaining ## after the matching and can improve precision. m.data1 <- match.data(m.out1) head(m.data1) ##### psmatch2 stata com: ssc install psmatch2 ### We can then estimate a treatment effect in this dataset using the standard regression functions in R, ## like lm() or glm(), being sure to include the matching weights (stored in the weights variable of the match.data() output) ## in the estimation3. We recommend using cluster-robust standard errors for most analyses, ## with pair membership as the clustering variable; the lmtest and sandwich packages together make this straightforward. install.packages("lmtest") install.packages("sandwich") library("lmtest") #coeftest library("sandwich") #vcovCL ### M1 NN 1:1 fit1 <- lm(re78 ~ treat + age + educ + race + married + nodegree + re74 + re75, data = m.data1, weights = weights) summary(fit1) coeftest(fit1, vcov. = vcovCL, cluster = ~subclass) ## The coefficient on treat is the estimated ATT. The other coefficients and tests should not be interpreted or reported. ## With 1:1 nearest neighbor matching, we failed to achieve balance, so one should be cautious about trusting the estimated effect ### M2 full matching m.data2 <- match.data(m.out2) head(m.data2) fit2 <- lm(re78 ~ treat + age + educ + race + married + nodegree + re74 + re75, data = m.data2, weights = weights) summary(fit2) coeftest(fit2, vcov. = vcovCL, cluster = ~subclass) ## Given the results of these two estimates, we would be inclined to trust the one resulting from the second analysis, # i.e., using full matching, because better balance was achieved on all the variables, making the effect estimate # less sensitive to the form of the outcome model we used. ## compare ###install stargazer install.packages("stargazer") ??stargazer stargazer::stargazer(fit1, fit2, type="text", single.row=T) ### model names(lalonde) lalonde_model<- treat ~ age + educ + black + hispan + married + nodegree + re74 + re75 glm_1 <- glm(lalonde_model, family=binomial) summary(glm_1) ###install stargazer ??stargazer stargazer::stargazer(glm_1, type="text", single.row=T) #### matching ## 1) Mahalanobis metric matching mahalan <- matchit(lalonde_model, data=lalonde, mahvars=c("age", "educ", "nodegree", "re74", "re75"), distance="glm", caliper=0.25) mahalan summary(mahalan) mahalan_r<- matchit(lalonde_model, data=lalonde, mahvars=c("age", "educ", "nodegree", "re74", "re75"), distance="glm", caliper=0.25, replace=T) mahalan_r summary(mahalan_r) mahalan_match <- match.data(mahalan_r) head(mahalan_match) ### nnmatch nnmatch_m <- matchit(lalonde_model, data=lalonde, caliper=0.1, method="nearest") nnmatch_m summary(nnmatch_m) nn_match <- match.data(nnmatch_m) head(nn_match) ### compare reg1 <- lm(re78 ~ treat + age + educ + black + hispan + married + nodegree + re74 + re75, data=lalonde) reg2 <- lm(re78 ~ treat + age + educ + black + hispan + married + nodegree + re74 + re75, data=mahalan_match) reg3 <- lm(re78 ~ treat + age + educ + black + hispan + married + nodegree + re74 + re75, data=nn_match) stargazer::stargazer(reg1, reg2, reg3, type="text", single.row=T) ####### see https://users.nber.org/~rdehejia/data/.nswdata2.html install.packages("Matching") library (Matching) mymatch1 <-Match(Y=re78, Tr=treat, X=age + educ + black + hispan + married + nodegree + re74 + re75, estimand="ATT") summary(mymatch1) mymatch1$index.treated mymatch1$index.control mymatch2 <-Match(Y=re78, Tr=treat, X=age + educ + black + hispan + married + nodegree + re74 + re75, estimand="ATC") summary(mymatch2) mymatch3 <-Match(Y=re78, Tr=treat, X=age + educ + black + hispan + married + nodegree + re74 + re75, estimand="ATE") summary(mymatch3)