# function performing the Collapsed Gibbs Sampler algorithm # E is the matrix of expression data to be analyzed: one gene per row and one experiment per column; # pi is the matrix specifying the prior probabilities of existence of a binding site for all the transcription factors considered in the uup-stream regions of the genes in E. It has the same number of rows of E and as many columns as transcription factors. # alpha and beta are the hyperparameters of the prior on the gene expression variances. If one value for each is supplied, it is assumed that all genes have the same variance. Otherwise, supply a vector of length equal to the number of genes. # sigmaa is the hyperparameter of the prior on a_ij # sigmap is the hyperparameter of the prior on p_ij # the remaining arguments specify the lenght of the run, if to print on screen the iteration number while executing, the burn-in time (B) and every how many iterations values should be recorded (Print) # Note that this funtion print results to files directly. The names of the file are specified at the end of the function. Results will need to be loaded in R for analysis. To avoid performing costly computations a-posteriori, the values of p tilda and a tilda are computed and stored. Bayesalg<-function(E,pi,alpha,beta,sigmaa,sigmap,run=11000,trace=T,B=1000,Print=10){ library(MASS) N <- nrow(E) M <- ncol(E) L <- ncol(pi) A <- matrix(0,N,L) edges<-pi>0 totA<-sum(edges==T) P <- matrix(rnorm(L*M)*sigmap,L,M) if(length(beta)==1) { sigmasq <- rep(beta/alpha,N) } else { sigmasq<-beta/alpha } Z <- pi missingE <- is.na(E) # precompute the model matrices for the Z AllZ<-vector(length=N,mode="list") nonzeroz<-vector(length=N,mode="list") nz<-rep(0,N) for (i in 1:N){ nonzeroz[[i]] <- pi[i,]!=0 & pi[i,]!=1 nz[i] <- sum(nonzeroz[[i]]) AllZ[[i]] <- matrix(0,2^nz[i],nz[i]) for (j in 1:nz[i]){ AllZ[[i]][,j] <- rep(0:1,c(2^(nz[i]-j),2^(nz[i]-j)))} } # MCMC iterations for (r in 1:run){ #Impute missing E E[missingE] <- (A%*%P +matrix(rnorm(N*M,mean=0,sd=1),N,M)*sqrt(sigmasq))[missingE] #Sample Z for (i in 1:N) { AllProbs <- rep(0,2^nz[i]) tempz <- pi[i,] for (j in 1:2^nz[i]) { tempz[nonzeroz[[i]]] <- AllZ[[i]][j,] if(sum(tempz)==0){AllProbs[j] <-prod((1-pi[i,nonzeroz[[i]]]))} else { PZ <- matrix(P[tempz>0,],ncol=M) varinva <- PZ%*%t(PZ)/sigmasq[i]+diag(nrow(PZ))/sigmaa^2 AllProbs[j] <- prod(pi[i,nonzeroz[[i]]]^tempz[nonzeroz[[i]]])*prod((1-pi[i,nonzeroz[[i]]])^(1-tempz[nonzeroz[[i]]]))/(sqrt(det(varinva))*(sigmaa)^nrow(PZ))*exp(t(PZ%*%E[i,])%*%solve(varinva)%*%PZ%*%E[i,]/(2*sigmasq[i]^2)) } } AllProbs[AllProbs>10^300] <- 10^300 AllProbs<-AllProbs/sum(AllProbs) Z[i,nonzeroz[[i]]] <- AllZ[[i]][sample(2^nz[i],1,prob=AllProbs,replace=T),] } #Sample A for (i in 1:N) { if(sum(Z[i,]>0)==0){A[i,]<-0} else{ PZ <- matrix(P[Z[i,]>0,],ncol=M) vara <- solve(PZ%*%t(PZ)/sigmasq[i]+diag(nrow(PZ))/sigmaa^2) meana <- vara%*%PZ%*%E[i,]/sigmasq[i] A[i,Z[i,]>0] <- mvrnorm(1,meana,vara) A[i,Z[i,]==0] <- 0} } #Sample P AZ <- A AZ[Z==0] <- 0 varp <- solve(t(AZ)%*%(AZ/sigmasq) + diag(L)/sigmap^2) for (t in 1:M) { meanp <- varp%*%t(AZ)%*%(E[,t]/sigmasq) P[,t] <- mvrnorm(1,meanp,varp) } #Sample sigmasq SSE <- apply((E -A%*%P)^2,1,sum) if(length(beta)==1) { sigmasq<-rep(1/rgamma(1,shape=alpha+M*N/2,rate=beta+sum(SSE)/2),N) } else { sigmasq <- 1/rgamma(N,shape=alpha+M/2,rate=beta+SSE/2) } #Output results to files if(r%%Print==0) { write.table(matrix(round(c(r,sum(SSE)),4),1,2), file = "GibbsSSE", append = T, quote = F, sep = ", ", row.names=F, col.names=F) if(r>B) { #create identifiable results IP<-P*(apply(A,2,sum)/apply(A!=0,2,sum)) IA<-t(t(A)*apply(P,1,sum)/M) #write the values to be stored write.table(matrix(round(c(r,as.vector(P)),4),1,M*L+1), file = "GibbsP", append = T, quote = F, sep = ", ", row.names=F, col.names=F) write.table(matrix(round(c(r,as.vector(A[edges==T])),4),1,totA+1), file = "GibbsA", append = T, quote = F, sep = ", ", row.names=F, col.names=F) write.table(matrix(round(c(r,as.vector(IP)),4),1,M*L+1), file = "GibbsIP", append = T, quote = F, sep = ", ", row.names=F, col.names=F) write.table(matrix(round(c(r,as.vector(IA[edges==T])),4),1,totA+1), file = "GibbsIA", append = T, quote = F, sep = ", ", row.names=F, col.names=F) write.table(matrix(round(c(r,as.vector(Z[edges==T])),4),1,totA+1), file = "GibbsZ", append = T, quote = F, sep = ", ", row.names=F, col.names=F) if(length(beta)==1) { write.table(matrix(round(c(r,sigmasq[1]),4),1,2), file = "GibbsSigma", append = T, quote = F, sep = ", ", row.names=F, col.names=F) } else{ write.table(matrix(round(c(r,as.vector(sigmasq)),4),1,N+1), file = "GibbsSigma", append = T, quote = F, sep = ", ", row.names=F, col.names=F) } } } if(trace) print(r) } }