#import the data (make sure you have the proper working directory)
ibex=read.delim("ibex horns.txt")

#check the dimension of the data
dim(ibex)

#look at the initial portion of the data
head(ibex)

#remove the last 6 columns (they appear to be empty)
ibex=ibex[,1:12]

#look at the last portion of the data
tail(ibex)

#remove the last 4 lines keping all the columns (these also appear to be empty)
ibex=ibex[1:3050,]

#look at all the data for ibex number 672
ibex[ibex$id==672,]

#we note that this ibex was entered twice by "mistake" (two measurements)
#replace the two almost replica by their average
tempibex=(ibex[1074+2*(0:7),]+ibex[1075+2*(0:7),])/2

#replace the newly created data (the average) in the original dataset
ibex[1074:(1074+7),]=tempibex

#remove the replicated rows
ibex=ibex[-((1074+8):(1074+15)),]

#check that we did the right thing
ibex[ibex$id==672,]

#create the dependent variable (the one that I am to explain): log of ibex growth
y=log(ibex$c.dim)

#create the explanatory variables = a matrix made of columns of covariates
#first create the intercept
ones=rep(1,length(y))

#then put together the other explanatory variables
x=cbind(ones,log(ibex$accr/(1+ibex$accr)),ibex$year,ibex$surv1,ibex$ril)

#eliminate rows with missing values using the command
#that returns a vector of T and F
is.na(ibex$sx)[1070:1085]

#to remove the missing entries (recall to use == when using logical conditions)
ibex[is.na(ibex$sx)==FALSE,]

#test which columns contain missing entries
#and count, for each variable, the number of missing values
apply(is.na(ibex),2,sum)

#fit a classical linear regression model (remove the intercept with the -1 command
#since the intercept is already included in the x matrix
model=lm(y~x-1)

#give names to the columns of the explanatory variables in the x matrix
colnames(x)=c("intercept","age","year","survival","surveyor")
colnames(x)

#now look at the output of the fitted model
summary(model)

#note: we have repated measures here, so we should use nmle : load the package "nmle"
#and run the following command to have a random intercept for each individual
summary(lme(y~x-1,random=~1|id,ibex))

#use the code made for Bayesian Core
source("progs.3.R")
n=length(y)
X=x
k=dim(X)[2]

#usual lse
betahat=inv(t(X)%*%X)%*%t(X)%*%y
betahat

# 1/c = prior sample size
# c=.1 -> prior guess corresponds to 10 obs (strong prior information)
# c=1000 -> very little prior information, the estimators become more and more similar to the MLE

#look at the effect of different c on the posterior expectation of sigma^2
# c=0.1

S2=t(y-X%*%betahat)%*%(y-X%*%betahat)

a=2.1
b=2

M=10*diag(5)
T=inv(inv(M)+inv(t(X)%*%X))
(2*b+S2+t(betahat)%*%T%*%betahat)/(n+2*a-2)

# c=1
M=diag(5)
T=inv(inv(M)+inv(t(X)%*%X))
(2*b+S2+t(betahat)%*%T%*%betahat)/(n+2*a-2)

# c=10
M=0.1*diag(5)
T=inv(inv(M)+inv(t(X)%*%X))
(2*b+S2+t(betahat)%*%T%*%betahat)/(n+2*a-2)

# c=100
M=0.01*diag(5)
T=inv(inv(M)+inv(t(X)%*%X))
(2*b+S2+t(betahat)%*%T%*%betahat)/(n+2*a-2)

# c=1000
M=0.001*diag(5)
T=inv(inv(M)+inv(t(X)%*%X))
(2*b+S2+t(betahat)%*%T%*%betahat)/(n+2*a-2)

#look at the effect of different c on the posterior expectation of beta

# c=1000
M=0.001*diag(5)
b5beta=inv(M+t(X)%*%X)%*%t(X)%*%y
b5beta[1]

# compare with MLE for beta_1
betahat[1]
[1] 9.702976

#if we do not believe much in our prior we should pick c to be very large

# ZELLNER INFORMATIVE G-PRIOR ANALYSIS: POSTERIOR MEAN OF beta FOR c=100
100/101*betahat

# POSTERIOR VARIANCE OF beta FOR c=1000
diag(1000/(n*1001)*as.real(S2+t(betahat)%*%t(X)%*%X%*%betahat/101)*inv(t(X)%*%X))

# compare with formulas on p. 38 with tilde(beta) = 0 (due to the prior mean on beta we picked)
# c has still an impact but at least things are on the right scale since we used X^tX in our prior
# this can be used 9after taking the sqrt) to construct confidence intervals

sqrt(diag(10000/(n*10001)*as.real(S2+t(betahat)%*%t(X)%*%X%*%betahat/10001)*inv(t(X)%*%X)))

# CONFIDENCE INTERVALS
# diagonal elements in the posterior estimate of the variance coariance matrix
# 2.196655093 0.052934235 0.001101347 0.009177800 0.002363016

c=1000
(c/(c+1))*betahat[1]-2.196655093 * qt(0.975,df=n)
(c/(c+1))*betahat[1]+2.196655093 * qt(0.975,df=n)

# since the posterior variance for the intercept
# is so big compared to the variance of the other estimated parameters
# (that are of the order of 0.0 somethings)
# we should not trust the estimated value of the intercept 

#to simulate from a multivariate normal distirbution
# need to load the library MASS
library(MASS)

# to generate a sample from the posterior of beta (notice c = 100 in the sequel)
# first generate the sigma
sigmasample = 1/rgamma(10^4,n/2,(S2+t(betahat)%*% t(X) %*% X %*% betahat/101)/2)
hist(sigmasample,nclass=100)

# then create an empty matrix of betas
betasample=matrix(0,nro=10^4,ncol=5)

#finally fill in the matrix by sampling form the proper posterior distirbution
for (t in 1:10^4) {betasample[t,] =
mvrnorm(1,mu=(100/101)*betahat,Sigma=sigmasample[t]*(100/101)*inv(t(X) %*% X)) }

# and plot the samples form the posterior
par(mfrow=c(2,3))

for (i in 1:5){ hist(betasample[,i],nclass=110,col="grey", prob = TRUE);
                abline(v=(100/101)*betahat[i],lwd=2,col="gold" ) }
hist(sigmasample,n=110,col=" grey" , prob=TRUE,main=expression(sigma^ 2))

# the sampled posterior mean i.e. the bayes estimator is
apply(betasample, 2, mean)

#that compares well with the true estimator 
apply(betasample, 2, mean) - (100/101)*betahat