DICdat=function(n=1000,nburn=100,k=2,data=scan("Galaxy.data")){ # # Plain Gibbs estimation of the Gaussian mixture model # # Tidying things up in Glasgow on 02/11/03_______________ # adding overall density estimate # adding estimated overall MAP estimate of (z,theta) # adding estimated marginal MAP estimate of theta # adding new DIC's and removing old DIC's # # Completing in Paris and Grenoble_________________ # addition of DIC5 09/25/02 [removed on 02/12/03] # addition of DIC2 09/26/02 [removed on 02/13/03] # addition of a burn in stage with nburn steps # estimated log-variances # # addition of DICcomp and DEVcomp 04/10/02 # correction d'une erreur dans order order le 11/02/03 # # Started on Tenerife, 06/07/02______________ # Prior distributions # Dirichlet(alpha) on the weights (p1,...,pk) alpha=rep(1,k) # Nnormal N(xi,sigma^2/enne) on the mu's xi=rep(0,k) enne=rep(0.1,k) # Inverse gamma Ga(nu/2,esse2/2) on the sigma^2's # the nu's must be greater than 2 nu=rep(1.,k) esse2=rep(1,k) # Data obs=data nobs=length(obs) # EM algorithm for initialisation purposes only! emresu=EMalgo(obs,k,25) # DIC criteria DIC1=0 DICobs=0 DICAz=0 DICAt=0 DICB=0 DICE=0 # DICA(t and z) corresponds to the posterior mean deviance # when the complete likelihood is used to define the deviance # DICB corresponds to the deviance taken at the posterior mean theta # Both are related to DIC=c in Flo's manuscript of Feb, 2003..... # Initialisation uno=t(rep(1,nobs)) # vector of 1's of size nobs dos=rep(1,k) # vector of 1's of size k xranJ=seq(min(obs)-0.5,max(obs)+0.5,(1+diff(range(obs)))/1000) nranJ=length(xranJ) trio=t(rep(1,nranJ)) # vector of 1's of size nranJ # normal normalising constant demlog2pi=0.5*nobs*log(2*pi) # More initialisation for empirical joint MAP zbest=0*(1:nobs) pebest=rep(1,k)/k mubest=rep(0,k) sibest=rep(1,k) best=-10000000 # And more for empirical marginal MAP pebesM=emresu$p mubesM=emresu$mu sibesM=emresu$sig compas=2*emresu$like+lprior(emresu$p,emresu$mu,emresu$sig,alpha,esse2,enne,nu,xi) besM=-1000000 # First values of the parameters p=rexp(k) p=p/sum(p) mu=mean(obs)+rnorm(k,0,0.5*diff(range(obs))) sig=var(obs)/rgamma(k,2.,2.) # First allocation vector prob=(p%*%uno)*exp(-0.5*(dos%*%t(obs)-mu%*%uno)^2/sig)/sqrt(sig%*%uno) prob=t(t(prob)/apply(prob,2,sum)) z=(k+1)-apply((runif(nobs)4){ dews=1:(3*k) for (i in 1:(3*k-1)) dews[i]=paste("&",digs[i],sep="") dews[3*k]=paste("&",digs[3*k],"\\",sep="") } print(dews,quote=FALSE) } } xlogx=function(x){ x*log(x+1e-80) } EMalgo=function(sampl,k,nRep){ # EM algorithm for the k component mixture model # nRep = number of repetitions of EM (to kill the # starting value effect) nobs=length(sampl) uno=t(rep(1,nobs)) dos=rep(1,k) hiya=-10000 bestmu=rep(0,k) bestsi=rep(1,k) bestpe=rep(1,k)/k for (i in 1:nRep){ oldlike=-2000000 # Random initialisation : gu=mean(sampl)+1.5*sd(sampl)*rnorm(k) su=-var(sampl)/log(runif(k)) pu=runif(k) pu=pu/sum(pu) for (t in 2:100){ # 100 is the number of EM steps # Allocation probabilities probs=(pu%*%uno)*exp(-0.5*(dos%*%t(sampl)-gu%*%uno)^2/su)/sqrt(su%*%uno) zeds=t(t(probs)/apply(probs,2,sum)) # EM step pu=apply(zeds,1,sum)/nobs gu=(sampl%*%t(zeds)/apply(zeds,1,sum))[1,] su=apply(zeds*(dos%*%t(sampl)-gu%*%uno)^2,1,sum)/apply(zeds,1,sum) # Log-likelihood like=sum(log(apply(probs,2,sum))) # Stopping rule if (oldlike==like) break() oldlike=like } # Update if (like>hiya){ bestmu=gu bestsi=su bestpe=pu hiya=like } } list(mu=gu,sig=su,prob=pu,like=hiya) }