/*
HW 4 -- Propensity scores
*/
version 16
webuse set "https://perraillon.com/s/"
webuse "help_1_stata12.dta", clear
set scheme s1mono
cd "H:\Teaching\Methods 2020\lectures\Week 4 ps and did\code"
log using "W4 ps and did.txt", text replace
/// --- Explore
rename i1 ndrinks
rename homeless intervention
label var intervention "1 if received intervention"
recode intervention (0 =1)(1=0)
tab intervention
* keep non-missing for all variables
qui reg pcs intervention age female ndrinks drugrisk
keep if e(sample)
desc
/// --- Regression adjustment
reg pcs intervention age female ndrinks drugrisk
* Different specification
reg pcs i.intervention##c.ndrinks age i.female drugrisk
margins, dydx(intervention)
* Different specification
reg pcs i.intervention##(c.ndrinks##c.ndrinks) i.female drugrisk
margins, dydx(intervention)
/// --- Using a nonparametric model
* nonparametric
npregress kernel pcs i.intervention age i.female ndrinks drugrisk, vce(boot, reps(100))
* note that interactions are not necessary. If you try, Stata tells you:
/*
interactions are unnecessary
You are estimating an arbitrary function of the regressors. Interactions are accounted
for implicitly.
*/
/// --- teffects ra way ATE
* Steps 1 and 2
qui reg pcs age female ndrinks drugrisk if intervention == 1
predict double yhat_t
* Steps 3 and 4
qui reg pcs age female ndrinks drugrisk if intervention == 0
predict double yhat_c
* Step 5: Difference (contrast)
sum yhat_t
local pom_t = r(mean)
sum yhat_c
local pom_c = r(mean)
di `pom_t' - `pom_c'
* Using teffects
teffects ra (pcs age female ndrinks drugrisk) (intervention), ate
*teffects ra (pcs age female ndrinks drugrisk) (intervention), ate aeq
* same as fully interacted model averaging treatment effects
qui reg pcs i.intervention##(c.age i.female c.ndrinks c.drugrisk)
margins, dydx(intervention)
/// --- ATET
* Steps 1 and 2
qui reg pcs age female ndrinks drugrisk if intervention == 1
predict yhat_t1 if intervention == 1
* Steps 3 and 4
qui reg pcs age female ndrinks drugrisk if intervention == 0
predict yhat_t11 if intervention == 1
sum yhat_t1
local pom_t1 = r(mean)
* same as
sum pcs if intervention ==1
sum yhat_t11
local pom_t11 = r(mean)
di `pom_t1' - `pom_t11'
* same as
teffects ra (pcs age female ndrinks drugrisk) (intervention), atet aeq
/// -- Checking overalp
sum age female ndrinks drugrisk if intervention ==1
sum age female ndrinks drugrisk if intervention ==0
corr ndrinks pcs
scatter pcs ndrinks if intervention ==1, color(red) msize(small) || ///
scatter pcs ndrinks if intervention ==0, color(blue) msize(small) ///
legend(off)
graph export pcs_drinks.png, replace
* another example
preserve
use http://www.stata-press.com/data/r13/bweightex.dta, clear
scatter bweight mage if mbsmoke ==1, color(red) msize(small) || ///
scatter bweight mage if mbsmoke ==0, color(blue) msize(small) ///
legend(off)
graph export overlap.png, replace
restore
/// -- Propensity scores
* Estimate the propensity score
qui logit intervention ndrinks age female drugrisk, nolog
predict double pscore if e(sample)
* Calculate statistics to check overlap
tabstat pscore, by(intervention) stats(N mean median min max)
sum age female ndrinks drugrisk if intervention ==0 & pscore < 0.083
* Plot the propensity score
kdensity pscore if intervention ==1, color(red) bw(0.02) ///
addplot(kdensity pscore if intervention ==0, bw(0.02)) legend(off)
graph export ps_kernel.png, replace
* Create weight
gen ipw = 1/pscore if intervention == 1
replace ipw = 1/(1-pscore) if intervention ==0
* Compare with weights
bysort intervention: sum age female ndrinks drugrisk [aweight=ipw]
* Bubble plot
scatter pcs ndrinks [pweight=ipw] if intervention ==1, msymbol(circle_hollow) msize(small) ///
color(red) saving(bubl_treated.gph, replace) title("Treated")
scatter pcs ndrinks [pweight=ipw] if intervention ==0, msymbol(circle_hollow) msize(small) ///
color(blue) saving(bubl_cont.gph, replace) title("Control")
graph combine bubl_treated.gph bubl_cont.gph, col(1) xcommon ysize(10) xsize(8)
graph export buble.png, replace
tabstat ipw, by(intervention) stats(mean median min max)
sum ipw, det
scatter pcs ndrinks [pweight=ipw] if intervention ==1 & ipw > 1.55, msymbol(circle_hollow) msize(small) ///
color(red) saving(bubl_treated1.gph, replace) title("Treated")
scatter pcs ndrinks [pweight=ipw] if intervention ==0 & ipw > 1.55, msymbol(circle_hollow) msize(small) ///
color(blue) saving(bubl_cont1.gph, replace) title("Control")
graph combine bubl_treated1.gph bubl_cont1.gph, col(1) xcommon ysize(10) xsize(8)
graph export buble1.png, replace
/// --- IPW RA with weights
* Regression adjustment with weights
reg pcs intervention age female ndrinks drugrisk [pweight= ipw], robust
*******************************************
* Digression: some intuition about weights
preserve
* make a smaller dataset so changes are easier to see
keep if _n <=20
gen w = 1
* The regression below
reg pcs age female ndrinks
* is the same as regression in which everybody is given the same weights
reg pcs age female ndrinks [pweight=w]
* Now suppose we want the 20th observation to count for 10
replace w = 10 if _n==20
* the model below
reg pcs age female ndrinks [pweight=w]
est sto weighted
* is the same as a model that creates 10 replicas of the 20th observation
* Stata has a command for that: expand
expand 10 if _n==20
reg pcs age female ndrinks
est sto expanded_noweight
est table weighted expanded_noweight, se stats(N)
* The expanded version SEs need to be corrected
restore
***********************************************
/// --- IPW with teffects ipwra
teffects ipwra (pcs age female ndrinks drugrisk) ///
(intervention ndrinks age female drugrisk)
tebalance summarize
*tebalance overid
teffects, aeq
qui teffects ipwra (pcs age female ndrinks drugrisk) ///
(intervention ndrinks age female drugrisk)
teffects overlap, ptl(1)
graph export ps_kernel_te.png, replace
*** improving balance with a more flexible propensity score
qui teffects ipwra (pcs age female ndrinks drugrisk) ///
(intervention c.ndrinks##(c.age i.female c.drugrisk))
tebalance summarize
*tebalance overid
teffects ipwra (pcs age female ndrinks drugrisk) ///
(intervention c.ndrinks##(c.age i.female c.drugrisk##c.drugrisk))
tebalance summarize
tebalance overid
* last one seems better
* Maybe we should just compare where there is overlap?
reg pcs intervention age female ndrinks drugrisk if ndrinks <51, robust
reg pcs i.intervention##(c.age i.female c.ndrinks c.drugrisk) if ndrinks <51, robust
margins, dydx(intervention)
* Could also improve outcome model
teffects ipwra (pcs c.ndrinks##(c.age i.female c.drugrisk)) ///
(intervention c.ndrinks##(c.age i.female c.drugrisk##c.drugrisk))
/// --- Now that we account for lack of overlap, what is the treatment effct for the
* treated
teffects ipwra (pcs age female ndrinks drugrisk) ///
(intervention ndrinks age female drugrisk), atet
* A lot lower and more uncertain (see confidence interval). The treated were healthier
/// --- Matching IPWRA by hand
reg pcs age female ndrinks drugrisk [pweight= ipw] if intervention ==1
predict double pom_t
reg pcs age female ndrinks drugrisk [pweight= ipw] if intervention ==0
predict double pom_c
mean pom_c pom_t
teffects ipwra (pcs age female ndrinks drugrisk) ///
(intervention ndrinks age female drugrisk), pom
log close