
# Written for FACS Research Group
# By Sam Wang (kwang@maths.uq.edu.au)
# On 5 Sept, 2007.'
# Adapted for X-Cyt
# By Xinli Hu (xinlihu@broadinstitute.org)



callme<-function(datadir, downsample = 100, datafiles,id,g=c(3:5),dim2cluster,dist="mvn",ncov=3, seed=123456, step = 1, analysisdir,init=NULL,output.prefix,mnames, populations, popclus,taocut=0.95,subs) {
	setwd(datadir)
    cat("read data file",'\n')
	datafile<-read.table(datafiles[id],header = T, sep = "\t")#
	
	subsum <- function(x){
		return(sum(x,na.rm=T))
	}
# downsample
	if (downsample>100) {
		downsample=100
	}
	cellnum <- downsample/100*nrow(datafile)
	s <- sample(1:nrow(datafile),trunc(cellnum))
	dat.full <- datafile[s,]

#done downsample

# take out (lower) border cells

	dat.full<- as.matrix(dat.full)
	for (d in dim2cluster) {
		m <- min(dat.full[,d])
		dat.full<- subset(dat.full,dat.full[,d]!=m)
	}



# extract population to cluster
	headers = names(datafile)
	subs = subs[subs!=""]
	if (length(subs)>0) {
		sub = 1
		c.subs = c()
		for (s in 1:length(subs)) {
			c.sub = which(headers==subs[s])
			c.subs = c(c.subs,c.sub)		
		}
		subs.cluster = as.matrix(dat.full[,c.subs])
		annosum <- apply(subs.cluster,1,subsum)
		din = dat.full[which(annosum<1),]
		dex = dat.full[which(annosum>=1),]
	}
	if (length(subs)==0) {
		sub = 0
		dex = dat.full
		din = NULL
	}

	dat<- subset(dex, select = dim2cluster)

#cluster
setwd(datadir)
	channel.names = colnames(dat)
	name<-paste(unlist(strsplit(datafiles[id],"\\.txt"))[1])
	name<-paste(unlist(strsplit(name,"\\.membership"))[1])

	for (h in g) {
		setwd(datadir)
		print(paste("start fitting", h, sep = ' '))
#		ndist<-switch(dist,mvn=1,mvt=2,msn=3,mst=4)
		ptm <- proc.time()
		if (!is.null(init)) {
			obj<-EmSkew(dat=dat,g=h,distr=dist,ncov=ncov,init=init,nrandom=50)
		}
		if (is.null(init)) {
			obj<-EmSkew(dat=dat,g=h,distr=dist,ncov=ncov,nrandom=50)
		}
		obj$runtime <- proc.time() - ptm
		# output results
		
		if (obj$error == 0) {
			setwd(analysisdir)		

			# 1. write the result to a ret file, use next fucntion to read out
			print("write .ret")
			#dput(obj,paste(name,paste(channel.names,collapse='+'),dist,h,"ret",sep='.'))
        save(obj,file=paste(name,paste(channel.names,collapse='+'),dist,h,"ret",sep='.'))
	
			# 2. write the result to a txt file
			print("write membership.txt")
			clust <- obj$clust
			appendix = "membership.txt"
			
			tau.all <- obj$tau
			tau <- apply(tau.all,1,max)
	
			populations <- populations[populations!=""]
			if (length(populations)>0) {
				popanno <- matrix(0,ncol=length(populations),nrow=nrow(dat))
				names(popanno)=populations
				for (p in 1:length(populations)) {
					popname <- populations[p]
					pclus <- as.numeric(strsplit(popclus[p],',')[[1]])
					for (pc in 1:length(pclus)) {
						w <- which(clust==pclus[pc])
						popanno[w,p]=1	
					}	
				}
				dataout <- cbind(dex,clust,tau,popanno)
			}	
			if (length(populations)==0) {
				populations=NULL
				dataout <- cbind(dex,clust,tau)
			}	
			
			if (sub == 1) {
				datin <- cbind(din,NA,NA)
				datain.popanno <- matrix(NA,ncol=length(populations),nrow=nrow(datin))
				datain <- cbind(datin,datain.popanno)
				dataoutput <- rbind(dataout,datain)
			}
			if (sub == 0) {
				dataoutput <- dataout
			}
			write.table(dataoutput, sep = "\t",paste(name,paste(mnames,collapse='+'),dist,h,appendix,sep='.'),col.names=c(headers,"cluster","tau",populations),row.names=F)


			# 3. pairplot
			print("plotting clusters")
			maxclus <- max(clust)
			labels = mnames
			if (length(mnames) == 0) {
				labels = headers(dim2cluster)
			}
			

			s <- sample(1:dim(dex)[1],min(100000,dim(dex)[1]))
			data <- dex[s,dim2cluster]
			colors <- c(rainbow(maxclus))
			png(filename = paste(name,paste("g",h,collapse='',sep=''), "pairplots.png", sep = "."), width = 960, height = 960)
			pairs(data,main = name, pch = ".", labels = mnames, col = colors[clust[s]],cex=2)
			dev.off()
		
		###
		} else {
			setwd(analysisdir)	
			dput(obj, paste(name,paste(channel.names,collapse='+'),dist,h,"error",sep='.'))
			}
		}

}