############### # LIBRARIES # ############### library(MASS) library(cluster) library(survival) library(randomForest) library(Hmisc) ######################################################################################### ######################################################################################### if (exists("Rand") ) rm(Rand) Rand <- function(tab,adjust=T) { ########################################################################## # The function computes the (adjusted) Rand index between two partitions # # Copyright Steve Horvath and Luohua Jiang, UCLA, 2003 # ########################################################################## # helper function choosenew <- function(n,k) { n <- c(n); out1 <- rep(0,length(n)); for (i in c(1:length(n)) ){ if ( n[i] 0) { if (i > 1){ xx <- ((RFproxAddcl1 + (RF1prox[c(1:nrow1),c(1:nrow1)]))/i) - (RFproxAddcl1/(i-1)) yy <- mean( c(as.dist((RFproxAddcl1 + (RF1prox[c(1:nrow1),c(1:nrow1)]))/i))) RFprox1Conver[i,2] <- max(abs(c(as.dist(xx)))) RFprox1Conver[i,3] <- mean((c(as.dist(xx)))^2) RFprox1Conver[i,4] <- yy } RFproxAddcl1 <- RFproxAddcl1 + (RF1prox[c(1:nrow1),c(1:nrow1)]) if(imp) { RFimportance1 <- RFimportance1+ 1/no.rep*(RF1$importance) } RFerrrate1 <- RFerrrate1+ 1/no.rep*(RF1$err.rate[no.tree]) } } } # addcl2 if (addcl2) { for (i in c(0:no.rep)) { index1 <- sample(c(1:(2*nrow1))) rep1[index1] <- c(1:(2*nrow1)) datRFsyn <- synthetic2(datRF)[index1,] yy <- datRFsyn[,1] RF2 <- randomForest(factor(yy)~.,data=datRFsyn[,-1], ntree=no.tree, oob.prox=oob.prox1, proximity=TRUE,do.trace=F,mtry=mtry1,importance=imp) collect.garbage() RF2prox <- RF2$proximity[rep1,rep1] if (i > 0) { if (i > 1){ xx <- ((RFproxAddcl2 + (RF2prox[c(1:nrow1),c(1:nrow1)]))/i) - (RFproxAddcl2/(i-1)) yy <- mean( c(as.dist((RFproxAddcl2 + (RF2prox[c(1:nrow1),c(1:nrow1)]))/i))) RFprox2Conver[i,2] <- max(abs(c(as.dist(xx)))) RFprox2Conver[i,3] <- mean((c(as.dist(xx)))^2) RFprox2Conver[i,4] <- yy } RFproxAddcl2 <- RFproxAddcl2 + (RF2prox[c(1:nrow1),c(1:nrow1)]) if(imp) { RFimportance2 <- RFimportance2+ 1/no.rep*(RF2$importance)} RFerrrate2 <- RFerrrate2+ 1/no.rep*(RF2$err.rate[no.tree]) } } } distRFAddcl1 <- cleandist(sqrt(1-RFproxAddcl1/no.rep)) distRFAddcl2 <- cleandist(sqrt(1-RFproxAddcl2/no.rep)) distRF <- list(cl1=NULL, err1=NULL, imp1=NULL, prox1Conver=NULL, cl2=NULL, err2=NULL, imp2=NULL, prox2Conver=NULL) if(addcl1) { distRF$cl1 <- distRFAddcl1 distRF$err1 <- RFerrrate1 if(imp) distRF$imp1 <- RFimportance1 if(proxConver) distRF$prox1Conver <- RFprox1Conver } if(addcl2) { distRF$cl2 <- distRFAddcl2 distRF$err2 <- RFerrrate2 if(imp) distRF$imp2 <- RFimportance2 if(proxConver) distRF$prox2Conver <- RFprox2Conver } distRF }