***************************** ***************************** ** start here *************** ***************************** ***************************** ** install: psmatch2 pstest rbounds *** use data from the web (recommended) global pfad http://www.barkhof.uni-bremen.de/~mwindzio use $pfad/propensity_scoresGESIS.dta, clear *cd c:\temp\ *use propensity_scoresGESIS_.dta, clear list pid syear pglabgro pgisei in 1/200, clean header(20) gen reg_emp2006 = 0 replace reg_emp2006 = 1 if reg_employed == 1 & syear == 2006 sort pid syear by pid: egen re6 = max(reg_emp2006) keep if re6 == 1 drop reg_emp2006 re6 sort pid syear by pid: egen y_last = max(syear) gen last_inc = 0 replace last_inc = pglabgro if syear == y_last & reg_employed == 1 // regularly employed sort pid syear by pid: egen last_income = max(last_inc) drop if last_income == 0 gen first_inc = 0 replace first_inc = pglabgro if syear == 2006 sort pid syear by pid: egen first_income = max(first_inc) sum first_income last_income drop if first_income == 0 gen first_income2 = first_income^2 / 1000 ** vocational degree tab pgbbil01 gen voc_degr=0 replace voc_degr = 1 if pgbbil01 !=. tab pgbbil02 gen uni=0 replace uni = 1 if pgbbil02 !=. tab1 sex geburt tab sex female gen age = syear - geburt gen age2 = age^2 recode migback 1=0 2/3=1, gen(mig_back) tab migback mig_back *************************** ** treatment: unemployed ** *************************** tab unemp sort pid syear by pid: egen once_unemp = max(unemp) tab once_unemp if syear == 2006 list pid syear pglabgro unemp once_unemp in 1/100, clean header(20) list pid syear pglabgro unemp once_unemp if once_unemp == 1, clean header(20) list pid syear first_income first_income2 pgisei voc_degr uni last_income in 1/200, clean list pid syear pglabgro first_income last_income pgisei voc_degr uni once_unemp in 1/200, clean header(20) list pid pglabgro first_income last_income once_unemp in 1/200, clean header(20) ** now, you have everything we need in one row drop if syear > 2006 ** differences in differences: compute diff. between first and last income for each individual gen diff_INC = last_income - first_income set seed 1234 gen rand=runiform() sort rand logit once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 pgisei voc_degr uni quietly probit once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 pgisei voc_degr uni predict probits, xb *** tiny difference psmatch2 once_unemp, pscore(probits) common out(last_income) neighbor(5) psmatch2 once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 pgisei voc_degr uni, out(last_income) n(5) psmatch2 once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 pgisei voc_degr uni, out(last_income) n(5) logit psmatch2 once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 pgisei voc_degr uni, out(diff_INC) n(5) logit pstest female age age2 mig_back white_collar_qual married first_income /// first_income2 pgisei voc_degr uni, both *** Rosenbaum Bounds: psmatch2 generates some variables * _treated: dummy(1) => observation is treated * _support: dummy(1) => observation is in common support area * _outcome (_nachbar_mig) => for any treatment observation the value of the matched outcome *** calculate for treatment observations the difference between its y and y of its "matches" from the control group *** ("matched outcome" can be based on several matched control-cases). Here we regard only the treated! gen d_outcome = diff_INC - _diff_INC if _treated == 1 & _support == 1 *** here the Rosenbaum sensitivity analysis: is there still an effect *** if we assume differences in the odds of a treatment due to unobserved heterogeneity? rbounds d_outcome, gamma(1(0.1)2) psmatch2 once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 pgisei voc_degr uni, out(last_income) n(5) logit psmatch2 once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 pgisei voc_degr uni, out(last_income) n(5) logit caliper(0.01) psmatch2 once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 pgisei voc_degr uni, out(last_income) n(5) logit psmatch2 once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 pgisei voc_degr uni, out(diff_INC) n(5) logit teffects psmatch (last_income) (once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 /// pgisei voc_degr uni), atet nneighbor(5) teffects psmatch (diff_INC) (once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 /// pgisei voc_degr uni), atet nneighbor(5) *tebalance summarize => Stata 14 teffects psmatch (diff_INC) (once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 /// pgisei voc_degr uni), atet nneighbor(5) *tebalance summarize => Stata 14 *tebalance box *** further features of teffects psmatch teffects psmatch (diff_INC) (once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 /// pgisei voc_degr uni), atet nneighbor(5) caliper(0.5) // maximum difference in pscore allowed teffects psmatch (diff_INC) (once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 /// pgisei voc_degr uni), atet nneighbor(1) gen(match) // generates variable that identifies the matches capture drop match* gen id =_n teffects psmatch (diff_INC) (once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 /// pgisei voc_degr uni), atet nneighbor(5) gen(match)