*****************************
*****************************
** start here ***************
*****************************
*****************************

** install: psmatch2 pstest rbounds


*** use data from the web (recommended)
global pfad http://www.barkhof.uni-bremen.de/~mwindzio

use $pfad/propensity_scoresGESIS.dta, clear


*cd c:\temp\
*use propensity_scoresGESIS_.dta, clear


list pid syear pglabgro  pgisei  in 1/200, clean header(20)

gen reg_emp2006 = 0
replace reg_emp2006 = 1 if reg_employed == 1 & syear == 2006

sort pid syear
by pid: egen re6 = max(reg_emp2006)

keep if re6 == 1

drop reg_emp2006 re6

sort pid syear
by pid: egen y_last = max(syear)

gen last_inc = 0
replace last_inc = pglabgro if syear == y_last & reg_employed == 1 // regularly employed
sort pid syear
by pid: egen last_income = max(last_inc)

drop if last_income == 0

gen first_inc = 0
replace first_inc = pglabgro if syear == 2006
sort pid syear
by pid: egen first_income = max(first_inc)

sum first_income last_income
drop if first_income == 0

gen first_income2 = first_income^2 / 1000

** vocational degree
tab pgbbil01
gen voc_degr=0
replace voc_degr = 1 if pgbbil01 !=.

tab pgbbil02
gen uni=0
replace uni = 1 if pgbbil02 !=.


tab1 sex geburt
tab sex female

gen age = syear - geburt
gen age2 = age^2

recode migback 1=0 2/3=1, gen(mig_back)
tab migback mig_back
***************************
** treatment: unemployed **
***************************
tab unemp
sort pid syear
by pid: egen once_unemp = max(unemp)
tab once_unemp if syear == 2006


list pid syear pglabgro unemp once_unemp in 1/100, clean header(20)
list pid syear pglabgro unemp once_unemp if once_unemp == 1, clean header(20)

list pid syear first_income first_income2 pgisei voc_degr uni last_income in 1/200, clean
list pid syear pglabgro first_income last_income pgisei voc_degr uni once_unemp in 1/200, clean header(20)
list pid pglabgro first_income last_income once_unemp in 1/200, clean header(20)

** now, you have everything we need in one row
drop if syear > 2006

** differences in differences: compute diff. between first and last income for each individual
gen diff_INC = last_income - first_income

set seed 1234
gen rand=runiform()
sort rand


logit once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 pgisei voc_degr uni

quietly probit once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 pgisei voc_degr uni
predict  probits, xb

*** tiny difference 
psmatch2 once_unemp,  pscore(probits) common out(last_income) neighbor(5) 
psmatch2 once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 pgisei voc_degr uni,  out(last_income) n(5)

psmatch2 once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 pgisei voc_degr uni,  out(last_income) n(5) logit
psmatch2 once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 pgisei voc_degr uni,  out(diff_INC) n(5) logit


pstest female age age2 mig_back white_collar_qual married first_income ///
first_income2 pgisei voc_degr uni, both

*** Rosenbaum Bounds: psmatch2 generates some variables 
* _treated: dummy(1) 		=> observation is treated
* _support: dummy(1) 		=> observation is in common support area
* _outcome (_nachbar_mig)	=> for any treatment observation the value of the matched outcome

*** calculate for treatment observations the difference between its y and y of its "matches" from the control group
*** ("matched outcome" can be based on several matched control-cases). Here we regard only the treated!
gen d_outcome = diff_INC - _diff_INC if _treated == 1 & _support == 1

*** here the Rosenbaum sensitivity analysis: is there still an effect
*** if we assume differences in the odds of a treatment due to unobserved heterogeneity?
rbounds d_outcome, gamma(1(0.1)2)

psmatch2 once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 pgisei voc_degr uni,  out(last_income) n(5) logit
psmatch2 once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 pgisei voc_degr uni,  out(last_income) n(5) logit caliper(0.01)

psmatch2 once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 pgisei voc_degr uni,  out(last_income) n(5) logit
psmatch2 once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 pgisei voc_degr uni,  out(diff_INC) n(5) logit

teffects psmatch (last_income) (once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 /// 
pgisei voc_degr uni), atet nneighbor(5)

teffects psmatch (diff_INC) (once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 /// 
pgisei voc_degr uni), atet nneighbor(5)

*tebalance summarize => Stata 14


teffects psmatch (diff_INC) (once_unemp female age age2 mig_back white_collar_qual married first_income first_income2  /// 
pgisei voc_degr uni), atet nneighbor(5)

*tebalance summarize => Stata 14
*tebalance box

*** further features of teffects psmatch
teffects psmatch (diff_INC) (once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 /// 
pgisei voc_degr uni), atet nneighbor(5) caliper(0.5) // maximum difference in pscore allowed 

teffects psmatch (diff_INC) (once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 /// 
pgisei voc_degr uni), atet nneighbor(1) gen(match) // generates variable that identifies the matches
capture drop match*

gen id =_n
teffects psmatch (diff_INC) (once_unemp female age age2 mig_back white_collar_qual married first_income first_income2 /// 
pgisei voc_degr uni), atet nneighbor(5) gen(match)