# load the data
eagles=read.delim("eagle CR.txt")
eagles=eagles[complete.cases(eagles),]

#define the dependent variable: breading success
eagly=eagles$succ

names(eagles)

# define the X matrix (with an intercept embedded)
eaglX=cbind(rep(1,length(eagly)),eagles$NND,eagles$quality,eagles$snow,eagles$year)

# estimate the glm logit model
model1.glm=glm(eagly~eaglX-1,family=binomial(link="logit"))

# look at the output of the MLE estimation procedure
summary(model1.glm)

# probability of breading success is given by
#exp(84+4.257*10^(-8) quality - 0.042 year) / [1 + exp( ....)]

range(quality)

mean(eagly,na.rm=T)

# try now to estimate a probit model
model2.glm=glm(eagly~eaglX-1,family=binomial(link="probit"))
summary(model2.glm)

# probability of breading success is given by
# pnorm(52+2.6*10^(-8) quality - 2.6*10^(-2) year)

new=data.frame(c(1,eagles[1,]$NND,eagles[1,]$quality,eagles[1,]$snow,eagles[1,]$year))
new=data.frame(1,1.577,5495916,23,2008)
predict(model1.glm,newdataew,type="response")

# MCMC algorithm
source("progs.4.R")

# there is a function to run a Metropolis-Hastings algorithm called hm1
# it is a random walk algorithm to simulate from a normal target


# the variance of the proposal is 10^(-3)=0.001 very small for this target
test=hm1(10000,1,0.001)

par(mfrow=c(3,1))
plot(test,type="l",xlab="Iterations",ylab="MH chain")

# compare the histogram of the sampled points with the true target density:
# they are very far: this is an indication that we have convergence problems with our sampler
hist(test[8001:10000],nclass=50,prob=TRUE,main="",xlab="x")
curve(dnorm,-3,3,add=TRUE)

# the autocorrelation fct goes to zero only after more than 1000 lags
# this is also an indication f bad exploration of the space
acf(test,lag=1000,main="",ylab="Autocorrelation",ci=F)

# we now change the scale of the proposal to 1000
test=hm1(10000,1,1000)
par(mfrow=c(3,1))

# we now explore the support of the target better (should be bwn +3 and -3)
# but a lot of values are rejected
plot(test,type="l",xlab="Iterations",ylab="MH chain")
hist(test[8001:10000],nclass=50,prob=TRUE,main="",xlab="x")
curve(dnorm,-3,3,add=TRUE)

# the correlation fct shows that we should subsample every 5 or 60 steps approximately
acf(test,lag=100,main="",ylab="Autocorrelation",ci=F)
acf(test,lag=1000,main="",ylab="Autocorrelation",ci=F)

# finally we set the scale of the proposal to 1: this looks much better!
# this is the type of graphs we should aim at
test=hm1(10000,1,1)
par(mfrow=c(3,1))
plot(test,type="l",xlab="Iterations",ylab="MH chain")
hist(test[8001:10000],nclass=50,prob=TRUE,main="",xlab="x")
curve(dnorm,-3,3,add=TRUE)

# the correlation fct shows that we should subsample every 10 or 15 steps approximately
acf(test,lag=100,main="",ylab="Autocorrelation",ci=F)
acf(test,lag=1000,main="",ylab="Autocorrelation",ci=F)

# modify the hm1 function going from a normal proposal to a student-t proposal with 3 df
# note: no need to change the acceptnace probability since the proposal is still symmetric
# so it cancels at the numerator and denominator of the acceptance probability
hm2=function(n,x0,sigma2){
x=rep(0,n)
x[1]=x0
for (i in 2:n){
  y=rt(1,df=3)*sqrt(sigma2)+x[i-1]
  if (runif(1)<=exp(-0.5*(y^2-x[i-1]^2))) x[i]=y else x[i]=x[i-1]
  }
x
}

# let's now try it again
test=hm2(10000,1,0.001)
par(mfrow=c(3,1))
plot(test,type="l",xlab="Iterations",ylab="MH chain")
hist(test[8001:10000],nclass=50,prob=TRUE,main="",xlab="x")
curve(dnorm,-3,3,add=TRUE)

# it looks a little better than with the normal proposal
# but not much.
acf(test,lag=1000,main="",ylab="Autocorrelation",ci=F)


# illustration of the need for burnin:
# starting far away
test=hm1(10000,50,0.01)
par(mfrow=c(3,1))
plot(test,type="l",xlab="Iterations",ylab="MH chain")
hist(test[8001:10000],nclass=50,prob=T,main="",xlab="x")
curve(dnorm,-3,3,add=T)
acf(test,lag=1000,main="",ylab="Autocorrelation",ci=F)
par(mfrow=c(3,1))
plot(test,type="l",xlab="Iterations",ylab="MH chain")
par(new=T);plot(dnorm(test),ty="l",col="steelblue",axes=F)
hist(test[8001:10000],nclass=50,prob=T,main="",xlab="x")
curve(dnorm,-3,3,add=T)
acf(test,lag=1000,main="",ylab="Autocorrelation",ci=F)
#with a different scale
test=hm1(10000,50,0.1)
par(mfrow=c(3,1))
plot(test,type="l",xlab="Iterations",ylab="MH chain")
par(new=T);plot(dnorm(test),ty="l",col="steelblue",axes=F)
hist(test[8001:10000],nclass=50,prob=T,main="",xlab="x")
curve(dnorm,-3,3,add=T)
acf(test,lag=1000,main="",ylab="Autocorrelation",ci=F)

# now we work with the posterior for the eagle dataset
library(MASS)

# remove missing values
eagly=eagly[is.na(eagly)==FALSE]

#checking for more missing
sum(is.na(eagly))
eaglX=eaglX[is.na(eagles$succ)==FALSE,]
length(eagly)
dim(eaglX)

#this is much better, except that it also eliminates rows
#where there is no NA in either eagly or eaglX
eagles=eagles[complete.cases(eagles),]

#Metropolis Hastings with a flat prior
flatprobit=hmflatprobit(10000,eagly,eaglX,1)

# remove the variables that are likely to be non interesting since their confidence intervals seem
# to include zero (look at the histogram of the posterior of the corresponding beta)
flatprobit=hmflatprobit(10000,eagly,eaglX[,c(1,3)],1)

hmflatprobit=function(niter,y,X,scale)
{
# GAUSSIAN RANDOM WALK MH SAMPLER FOR THE PROBIT MODEL
# UNDER FLAT PRIOR
library(mnormt)
p=dim(X)[2]
mod=summary(glm(y~-1+X,family=binomial(link="probit")))
beta=matrix(0,niter,p)
beta[1,]=as.vector(mod$coeff[,1])
Sigma2=as.matrix(mod$cov.unscaled)
for (i in 2:niter)
{
tildebeta=rmnorm(1,beta[i-1,],scale*Sigma2)
llr=probitll(tildebeta,y,X)-probitll(beta[i-1,],y,X)
if (runif(1)<=exp(llr)) beta[i,]=tildebeta else beta[i,]=beta[i-1,]
}
beta
}

# model comparison via BF
eaglX[,3]=eaglX[,3]/10^6
noinfprobit=hmnoinfprobit(10000,eagly,eaglX,1)

mkprob=apply(noinfprobit,2,mean)
vkprob=var(noinfprobit)

# compute the marginal lkh by importance sampling
simk=rmnorm(100000,mkprob,2*vkprob)

usk=probitnoinflpost(simk,eagly,eaglX)-dmnorm(simk,mkprob,2*vkprob,log=TRUE)

# run MCMC on the submodel removing covariante 3 and 4
noinfprobit0=hmnoinfprobit(10000,eagly,eaglX[,3:4],1)

mk0=apply(noinfprobit0,2,mean)
vk0=var(noinfprobit0)
simk0=rmnorm(100000,mk0,2*vk0)

# estimate the marginal under the full null hypothesis
usk0=probitnoinflpost(simk0,eagly,eaglX[,3:4])-dmnorm(simk0,mk0,2*vk0,log=TRUE)

# remove now variable 2 and 4
usk0=probitnoinflpost(simk0,eagly,eaglX[,c(2,4)])-dmnorm(simk0,mk0,2*vk0,log=TRUE)

bf0probit=mean(exp(usk))/mean(exp(usk0))

# log(bf0probit) = -8.148691 so remove these two variables (snow and distance)
rm(mk0,vk0,simk0,usk0)
rm(noinfprobit0)
