/* Methods I, MLE October 2020 */ cd "H:\Teaching\Methods 2020\lectures\Week 6 marginal effects mle\MLE\code" clear set seed 1234567 set obs 100 // --- Logistic regression * Simulate a Bernoulli gen bernie = uniform()<0.4 *list bernie, c sum bernie scalar p = r(mean) * Calculate highest log-likelihood di 100*0.46*ln(0.46) + (100-100*0.46)*ln(1-0.46) * same as di 100*p*ln(p) + (100-100*p)*ln(1-p) * Verify with logit logit bernie * get in robability scale di 1/(1+exp(.1603427 )) * Plot -log-likelihood twoway function y= -(100*x*ln(x) + (100-100*x)*ln(1-x)), range(0 1) /// xtitle("p") ytitle("-Ln P") saving(l100.gph, replace) graph export l100.png, replace // --- Logistic response function twoway function y=exp(x) / (1+ exp(x)), range(-10 10) saving(l1.gph, replace) twoway function y=exp(-x) / (1+ exp(-x)), range(-10 10) saving(l2.gph, replace) graph combine l1.gph l2.gph, xsize(20) ysize(10) graph export lboth.png, replace // --- Linear model, normal regression with MLE * Normal clear set seed 1234567 set obs 100 gen ynorm = rnormal(100, 10) sum ynorm reg ynorm sysuse auto, clear qui reg price weight mpg ereturn list qui reg price weight mpg * Save sample size and SSE local N = e(N) local rss = e(rss) * Use formula local ll = -0.5*`N'*(ln(2*_pi)+ln(`rss'/`N')+1) display %20.6f `ll' display %20.6f e(ll) // --- The mlexp command mlexp (ln(normalden(price, {xb: weight mpg _cons}, {sigma}))) // --- MLE example use "H:\Teaching\Methods 2018\DNM book\heus\heus_mepssample.dta", clear gen lexp = log(exp_tot +1) reg lexp age female /* mlexp (ln(normalden(lexp,{sigma}))) mlexp (ln(normalden(lexp, {b0=-2000}+{b1=120}*age,{sigma=9000}))), diff */ mlexp (ln(normalden(lexp, {xb: age female _cons} , {sigma}))) * Use a small program capture program drop lfols program lfols args lnf xb lnsigma local y "$ML_y1" quietly replace `lnf' = ln(normalden(`y', `xb',exp(`lnsigma'))) end ml model lf lfols (xb: lexp = age female) (lnsigma:) ml maximize display exp([lnsigma]_cons) // --- Likelihood function negative twoway function y =log(x), range(-2 2) xline(0 1) yline(0) /// color(red) title("y = log(x)") // --- Likelihood ratio test use "H:\Teaching\Methods 2017\data\GPA1.dta", clear rename colGPA colgpa rename hsGPA hsgpa quietly { reg colgpa est sto m1 reg colgpa hsgpa est sto m2 reg colgpa hsgpa skipped est sto m3 } est table m1 m2 m3, star stat(r2 r2_a ll bic aic) b(%7.3f) * LRT lrtest m3 m2 lrtest m3 m1 * LRT by hand qui reg colgpa est sto m0 scalar ll0 = e(ll) reg colgpa male campus est sto m1 scalar ll1 = e(ll) lrtest m0 m1 * By hand di -2*[ll0 - ll1] // --- Logistic vs normal clear set seed 123456 set obs 5000 gen u = uniform() * Simulate logistic distribution gen l = -ln((1 - u)/u) sum l * Simulated normal with same parameters gen n = rnormal(r(mean), r(sd)) * Plot kdensity l, bw(0.3) gen(xl dl) kdensity n, bw(0.3) gen(xn dn) line dl xl, sort color(red) || line dn xn, sort /// title("Logistic (red) vs normal distribution") ytitle("Density") /// xtitle("x") legend(off) graph export logvsnorm.png, replace // *** New dataset bcuse mroz, clear lowess inlf nwifeinc, gen(lflow) nograph scatter inlf nwifeinc, jitter(5) msize(small) || line lflow nwifeinc, sort /// legend(off) saving(lblow.gph, replace) graph export lblow.png, replace logit inlf nwifeinc, nolog * LRT qui logit inlf nwifeinc, nolog est sto full qui logit inlf, nolog est sto redu lrtest full redu qui logit inlf nwifeinc, nolog scalar ll_cm = e(ll) qui logit inlf, nolog scalar ll_n = e(ll) di 1 - (ll_cm/ll_n) di "cm: " ll_cm " " "null: " ll_n " " "(ll_cm/ll_n): " (ll_cm/ll_n) gen hsp = 0 replace hsp = 1 if educ > 12 & educ ~= . // -- Predictions qui logit inlf hsp, nolog * Predictions for logit manually gen phat_manu = 1/(1+exp(-(_b[_cons] +_b[hsp]*hsp))) *Same as using the inverse logit function gen phat_invl = invlogit(_b[_cons] +_b[hsp]*hsp) * Same as default of predict command predict phat_pred sum phat_manu phat_invl phat_pred *probit qui probit inlf hsp, nolog * use inverse normal gen phat1_norm = normal(_b[_cons] + _b[hsp]*hsp) predict phat1_predprob predict zscore, xb sum phat1* zscore di normal(0.1760512)