/*

   Methods I, MLE

   October 2020
*/ 


cd "H:\Teaching\Methods 2020\lectures\Week 6 marginal effects mle\MLE\code"


clear
set seed 1234567
set obs 100

// --- Logistic regression 

* Simulate a Bernoulli
gen bernie = uniform()<0.4
*list bernie, c
sum bernie
scalar p = r(mean)

* Calculate highest log-likelihood
di 100*0.46*ln(0.46) + (100-100*0.46)*ln(1-0.46)
* same as 
di 100*p*ln(p) + (100-100*p)*ln(1-p)

* Verify with logit
logit bernie
* get in robability scale
di 1/(1+exp(.1603427 ))

* Plot -log-likelihood
twoway function y= -(100*x*ln(x) + (100-100*x)*ln(1-x)), range(0 1) ///
       xtitle("p") ytitle("-Ln P") saving(l100.gph, replace)	   
graph export l100.png, replace

// --- Logistic response function 
twoway function y=exp(x) / (1+ exp(x)), range(-10 10) saving(l1.gph, replace)
twoway function y=exp(-x) / (1+ exp(-x)), range(-10 10) saving(l2.gph, replace)
graph combine l1.gph l2.gph, xsize(20) ysize(10)
graph export lboth.png, replace

	  
// --- Linear model, normal regression with MLE 	    
* Normal
clear
set seed 1234567
set obs 100
gen ynorm = rnormal(100, 10)
sum ynorm

reg ynorm

sysuse auto, clear
qui reg price weight mpg
ereturn list 


qui reg price weight mpg
* Save sample size and SSE
local N   = e(N)
local rss = e(rss)
* Use formula
local ll  = -0.5*`N'*(ln(2*_pi)+ln(`rss'/`N')+1)
display %20.6f `ll'
display %20.6f e(ll)

// --- The mlexp command
mlexp (ln(normalden(price, {xb: weight mpg _cons}, {sigma})))


// --- MLE example 

use "H:\Teaching\Methods 2018\DNM book\heus\heus_mepssample.dta", clear
gen lexp = log(exp_tot +1)
reg lexp age female

/*
mlexp (ln(normalden(lexp,{sigma})))
mlexp (ln(normalden(lexp, {b0=-2000}+{b1=120}*age,{sigma=9000}))), diff
*/	

mlexp (ln(normalden(lexp, {xb: age female _cons} , {sigma})))


* Use a small program 	
capture program drop lfols
program lfols
  args lnf xb lnsigma
  local y "$ML_y1"
  quietly replace `lnf' = ln(normalden(`y', `xb',exp(`lnsigma')))
end 

ml model lf lfols (xb: lexp =  age female) (lnsigma:)
ml maximize
display exp([lnsigma]_cons)
	
// --- Likelihood function negative 
twoway function y =log(x), range(-2 2) xline(0 1) yline(0) ///
       color(red) title("y = log(x)")
	   
	
	
// --- Likelihood ratio test

use "H:\Teaching\Methods 2017\data\GPA1.dta", clear
rename colGPA colgpa
rename hsGPA hsgpa


quietly {
   reg colgpa  
   est sto m1
   reg colgpa hsgpa
   est sto m2
   reg colgpa hsgpa skipped 
est sto m3
}
est table m1 m2 m3, star stat(r2 r2_a ll bic aic) b(%7.3f) 

* LRT 
lrtest m3 m2
lrtest m3 m1

* LRT by hand
qui reg colgpa
est sto m0
scalar ll0 = e(ll)
reg colgpa male campus
est sto m1
scalar ll1 = e(ll)

lrtest m0 m1

* By hand
di -2*[ll0 - ll1]

// --- Logistic vs normal

clear 
set seed 123456
set obs 5000
gen u = uniform()

* Simulate logistic distribution
gen l = -ln((1 - u)/u)
sum l

* Simulated normal with same parameters
gen n = rnormal(r(mean), r(sd))

* Plot 
kdensity l, bw(0.3) gen(xl dl) 
kdensity n, bw(0.3) gen(xn dn)
line dl xl, sort color(red) || line dn xn, sort ///
   title("Logistic (red) vs normal distribution") ytitle("Density") ///
   xtitle("x") legend(off)
graph export logvsnorm.png, replace

	


// *** New dataset 
bcuse mroz, clear

lowess inlf nwifeinc, gen(lflow) nograph
scatter inlf nwifeinc, jitter(5) msize(small) || line lflow nwifeinc, sort ///
        legend(off) saving(lblow.gph, replace)
graph export lblow.png, replace

logit inlf nwifeinc, nolog


* LRT
qui logit inlf nwifeinc, nolog
est sto full

qui logit inlf, nolog
est sto redu

lrtest full redu

qui logit inlf nwifeinc, nolog
scalar ll_cm = e(ll)

qui logit inlf, nolog
scalar ll_n = e(ll)

di 1 - (ll_cm/ll_n)

di  "cm: " ll_cm  " "  "null: "  ll_n  "  " "(ll_cm/ll_n): "  (ll_cm/ll_n)

gen hsp = 0
replace hsp = 1 if educ > 12 & educ ~= .  

// -- Predictions 
qui logit inlf hsp, nolog

* Predictions for logit manually 
gen phat_manu = 1/(1+exp(-(_b[_cons] +_b[hsp]*hsp)))
*Same as using the inverse logit function
gen phat_invl = invlogit(_b[_cons] +_b[hsp]*hsp)
* Same as default of predict command
predict phat_pred

sum phat_manu phat_invl phat_pred


*probit
qui probit inlf hsp, nolog

* use inverse normal
gen phat1_norm = normal(_b[_cons] + _b[hsp]*hsp)
predict  phat1_predprob
predict zscore, xb

sum phat1* zscore

di normal(0.1760512)