## Load source code

args=(commandArgs(TRUE))

##args is now a list of character vectors
## First check to see if arguments are passed.
## Then cycle through each element of the list and evaluate the expressions.
if(length(args)==0){
    print("No arguments supplied.")
    }else{
        for(i in 1:length(args)){
	         eval(parse(text=args[[i]]))
	 }
     }
	print(tum)
	print(fasta)
	print(samp)
	print(c)
	print(outstem)
	print(readsdir)
	print(codedir)

source(paste(codedir,"/MiscFunctions.R",sep=""))
source(paste(codedir,"/functions.R",sep=""))

##
## Process Reads


## Step 2: for each chr file, sort reads into events for both tumor and normal

 plot_hist = function(plotname,pos) {
	pdf(plotname)
	
	par(mfrow=c(2,1))
	
	hist(pos[which(pos > 0)],col="red",xlim=c(0,max(pos)),main="Where reads align to TE",xlab="Position in TE")

	hist(abs(pos[which(pos < 0)]),col="blue",xlim=c(0,max(pos)),main="Where reads align to RC of TE",xlab="Position in RC of TE")
	
	dev.off()

}

## Make clusters
get_candidates = function( weirdsfile, readsfile, TElengths) {
	elements = unique(sapply(names(TElengths),function(x) unlist(strsplit(x,split="\\|"))[3]))

	## Read in weird aligning reads
	allweirds = try( read.delim(weirdsfile,header=T,sep="\t",stringsAsFactors=F),silent=TRUE )
  	if(class(allweirds) == "try-error") { 
  		print("Weirds file does not exist")
  		return(NA) }
	
	## Sort by unique start position and filter out poor mapping quality reads
	allweirds = allweirds[ with(allweirds,order(AlignedChromosome,AlignedPosition)),]
		
	## Filter out mismatches or blastevalue
	#eval = 0.4
	#allweirds = subset(allweirds,blasteval < eval)
	

	## Filter out poorly mapped pairmates (unique reads) and those that are NA
	 ## NA means that those pairmates weren not recorded in reads file because they were flagged
	 ## as duplicate or not primary alignment
	#mapqual = 10
	#weirds = subset(allweirds, Uniquequal > mapqual)
	
		
	weirds = allweirds		

	## For each element type, cluster and combine
	## 
	weirds = cbind(weirds,sapply(weirds[,"fasta"],function(x) unlist(strsplit(x,split="\\|"))[3]))
	colnames(weirds)[ncol(weirds)] = "Element"
	
	## Keep only unique start positions - reads that start at same site shouldnt be counted twice
	forwards=subset(weirds,WhichClipped==1) # clipped end
	uniqueforwards = forwards
	reverse=subset(weirds,WhichClipped==0) # clipped beginning
	uniquereverse = reverse
	
	allclustered = vector()
	if(nrow(uniquereverse) > 0 & nrow(uniqueforwards) > 0) {
		for(el in elements) {
			print(el)
			# Subset forward and reverse reads
			forwards=subset(uniqueforwards,Element==el)
			reverse=subset(uniquereverse,Element==el)
			if( nrow(forwards)==0 && nrow(reverse)==0 ) next;

			# cluster reads into candidates
			print("Clustering candidates...")
	
			fcands = cluster_reads(forwards,TElengths,3)	
			rcands = cluster_reads(reverse,TElengths,3)	
	
	
			if(nrow(fcands) == 0 & nrow(rcands) == 0)
				return(NA)
			olap = 10
			gap = 50
			clustered = overlap_cands(fcands,rcands,olap,gap)
			
			## Clusters have to have at least 3 total reads for now
			mintotreads = 2
			clustered = subset(clustered, nreads > mintotreads)
			allclustered = rbind(allclustered,clustered)
			}
		}
	
	
		allclustered = allclustered[ with(allclustered,order(nreads,decreasing=TRUE)),]
		## Filter out clusters with too many alignments
		#counts = sapply(allclustered[,"TE_counts"],function(x) unlist(strsplit(x,split=" R: ")))
	
		## Filter out clusters where each nread is 
		counts = sapply(allclustered[,"TE_counts"],function(x) length(unlist(strsplit(x,split=" // "))))	
		singlealign = which(counts/allclustered[,"nreads"] == 1)
		if(length(singlealign)>0) allclustered = allclustered[-singlealign,]
		counts = sapply(allclustered[,"TE_counts"],function(x) length(unlist(strsplit(x,split=" // "))))
		allclustered = allclustered[which(counts<16),]

	print("Done clustering")

	return(allclustered)

}


##
## Overlap forward and reverse clusters
## return new matrix with extra column stating whether a pair was found for that cluster
overlap_cands = function( fcands, rcands, olap, gap ) {
	 print("Overlapping cands...") 
	 
	 if(nrow(fcands) == 0) {
	 	 partner=NA
	 	 rcands = cbind(rcands,partner)
	 	 combined = data.frame(matrix(nrow=nrow(rcands),ncol = 29))
	 	 colnames(combined) = c("chr","clip","start","stop","start_f","stop_f","start_r","stop_r","nreads","nreads_f","nreads_r","TE","bkp_start","bkp_stop","TErange","meanq_f","meanq_r","minpos_f","maxpos_f","minpos_r","maxpos_r","TE_counts","Reads_f","Reads_r","TE_alt","allpos_f","allpos_r","has_partner","overlap")
	 	 combined[,c("chr","clip","start","stop","start_r","stop_r","nreads","nreads_r","meanq_r","TE","TE_counts","Reads_r")] = rcands[,c("chr","clip","start","stop","start","stop","nreads","nreads","meanqual","TE","TEcounts","Reads")]
	
		 rsingleminposes = as.numeric(sapply(rcands[,"minpos"],function(x) unlist(strsplit(unlist(strsplit(x,split=" // "))[1],split=":"))[2]))
		 rsinglemaxposes = as.numeric(sapply(rcands[,"maxpos"],function(x) unlist(strsplit(unlist(strsplit(x,split=" // "))[1],split=":"))[2]))

		combined[,"TErange"] = paste(rsingleminposes,"-",rsinglemaxposes,sep="")
		combined[,"minpos_r"] = rsingleminposes
		combined[,"maxpos_r"] = rsinglemaxposes
		combined[,"allpos_r"] = paste("Min- ",rcands[,"minpos"],"Max- ",rcands[,"maxpos"])

		combined[,"bkp_start"] = rcands[,"clip"]
		combined[,"bkp_stop"] = rcands[,"clip"]

	 	 counter=1
	 	 return(combined)

	 } else {
	 	partner=NA
	 	fcands = cbind(fcands,partner)
	 	combined = matrix(nrow=(nrow(fcands)+nrow(rcands)),ncol = 29)
	 	colnames(combined) = c("chr","clip","start","stop","start_f","stop_f","start_r","stop_r","nreads","nreads_f","nreads_r","TE","bkp_start","bkp_stop","TErange","meanq_f","meanq_r","minpos_f","maxpos_f","minpos_r","maxpos_r","TE_counts","Reads_f","Reads_r","TE_alt","allpos_f","allpos_r","has_partner","overlap")
	 	counter=1

	 	for( i in 1:nrow(fcands) ) { 
	 	
	 		part = which(rcands[,"clip"] <= (fcands[i,"clip"])+gap & rcands[,"clip"] > (fcands[i,"clip"]-olap) 
				& rcands[,"clip"] > fcands[i,"clip"] & rcands[,"clip"] > fcands[i,"clip"] )
	 	
	 	
	 		if(length(part) == 0) { # no partner
	 			next;
	 		}
	 		if(length(part) > 1) {
	 			#print(paste(i,"has multiple partners"))
	 			part = part[which.max(rcands[part,"nreads"])]
	 		}
	 		if(length(part) == 1) {
	 			overlap = ifelse(rcands[part,"clip"] < fcands[i,"clip"],TRUE,FALSE)
	 			combined[counter,"overlap"] = overlap
	 			fcands[i,"partner"] = part
	 			combined[counter,"has_partner"] = part
	 		
	 			maxTE = which.max(c(fcands[i,"nreads"],rcands[part,"nreads"]))
	 		
	 			## Matrices for for and reverse 
	 			minposes = unlist(strsplit(unlist(strsplit(fcands[i,"minpos"],split=" // ")),split=":"))
	 			formatrix = matrix(nrow=3, ncol=length(minposes)/2)
	 			colnames(formatrix) = minposes[seq(1,length(minposes),2)]
	 			rownames(formatrix) = c("minpos","maxpos","nreads")
	 			formatrix["minpos",] = as.numeric(minposes[seq(2,length(minposes),2)])
	 			maxposes = unlist(strsplit(unlist(strsplit(fcands[i,"maxpos"],split=" // ")),split=":"))
				formatrix["maxpos",] = as.numeric(maxposes[seq(2,length(maxposes),2)])
	 			nreadses = unlist(strsplit(unlist(strsplit(fcands[i,"TEcounts"],split=" // ")),split=":"))
	 			formatrix["nreads",] = as.numeric(nreadses[seq(2,length(nreadses),2)])

				minposes = unlist(strsplit(unlist(strsplit(rcands[part,"minpos"],split=" // ")),split=":"))
	 			revmatrix = matrix(nrow=3, ncol=length(minposes)/2)
	 			colnames(revmatrix) = minposes[seq(1,length(minposes),2)]
	 			rownames(revmatrix) = c("minpos","maxpos","nreads")
	 			revmatrix["minpos",] = as.numeric(minposes[seq(2,length(minposes),2)])
	 			maxposes = unlist(strsplit(unlist(strsplit(rcands[part,"maxpos"],split=" // ")),split=":"))
				revmatrix["maxpos",] = as.numeric(maxposes[seq(2,length(maxposes),2)])
	 			nreadses = unlist(strsplit(unlist(strsplit(rcands[part,"TEcounts"],split=" // ")),split=":"))
	 			revmatrix["nreads",] = as.numeric(nreadses[seq(2,length(nreadses),2)])
	 		
	 			whichforrev = which.max(c(formatrix["nreads",1],revmatrix["nreads",1]))
	 			if(whichforrev == 1) { ## Forward has most reads	
	 				combined[counter,"minpos_f"] = formatrix["minpos",1]
	 				combined[counter,"maxpos_f"] = formatrix["maxpos",1]
	 				combined[counter,"TE"] = colnames(formatrix)[1]
	 				whichrev = which(colnames(revmatrix) == colnames(formatrix)[1])
	 				if(length(whichrev)>0) { ##Same element 
	 					combined[counter,"minpos_r"] = revmatrix["minpos",whichrev]
	 					combined[counter,"maxpos_r"] = revmatrix["maxpos",whichrev]
	 				} else { ## Same family at least
	 					revfamilies = sapply(colnames(revmatrix),function(x) unlist(strsplit(x,split="\\|"))[2])
	 					forfam = unlist(strsplit(colnames(formatrix)[1],split="\\|"))[2]
	 					whichrevfam = which(revfamilies == forfam) 
	 					if(length(whichrevfam)>0) { ## same family
	 						combined[counter,"minpos_r"] = revmatrix["minpos",whichrevfam[1]]
	 						combined[counter,"maxpos_r"] = revmatrix["maxpos",whichrevfam[1]]
	 						combined[counter,"TE_alt"] = colnames(revmatrix)[whichrevfam[1]]
	 					} else {
	 						combined[counter,"minpos_r"] = combined[counter,"maxpos_r"] = NA
	 					}
	 				}
	 			}
	 			if(whichforrev == 2) { ## Forward has most reads	
	 				combined[counter,"minpos_r"] = revmatrix["minpos",1]
	 				combined[counter,"maxpos_r"] = revmatrix["maxpos",1]
	 				combined[counter,"TE"] = colnames(revmatrix)[1]
	 				whichfor = which(colnames(formatrix) == colnames(revmatrix)[1])
	 				if(length(whichfor)>0) { ##Same element 
	 					combined[counter,"minpos_f"] = formatrix["minpos",whichfor]
	 					combined[counter,"maxpos_f"] = formatrix["maxpos",whichfor]
	 				} else { ## Same family at least
	 					forfamilies = sapply(colnames(formatrix),function(x) unlist(strsplit(x,split="\\|"))[2])
	 					revfam = unlist(strsplit(colnames(revmatrix)[1],split="\\|"))[2]
	 					whichforfam = which(forfamilies == revfam) 
	 					if(length(whichforfam)>0) { ## same family
	 						combined[counter,"minpos_f"] = formatrix["minpos",whichforfam[1]]
	 						combined[counter,"maxpos_f"] = formatrix["maxpos",whichforfam[1]]
	 						combined[counter,"TE_alt"] = colnames(formatrix)[whichforfam[1]]
	 					} else {
	 						combined[counter,"minpos_f"] = combined[counter,"maxpos_f"] = NA
	 					}
	 				}
				}
	 			combined[counter,"TErange"] = paste(min(as.numeric(combined[counter,"minpos_f"]),as.numeric(combined[counter,"minpos_r"]),na.rm=T),"-",
	 				max(as.numeric(combined[counter,"maxpos_f"]),as.numeric(combined[counter,"maxpos_r"]),na.rm=T),sep="")
	 			if( !is.na(combined[counter,"TE_alt"]) & combined[counter,"TE_alt"] == combined[counter,"TE"]) { combined[counter,"TE_alt"]=NA }
	 		
	 			combined[counter,"chr"] = fcands[i,"chr"]
	 			combined[counter,"start"] = fcands[i,"clip"]
	 			combined[counter,"start"] = fcands[i,"start"]
	 			combined[counter,"stop"] = rcands[part,"stop"]
		 		combined[counter,"start_f"] = fcands[i,"start"]
		 		combined[counter,"stop_f"] = fcands[i,"stop"]
	 			combined[counter,"start_r"] = rcands[part,"start"]
	 			combined[counter,"stop_r"] = rcands[part,"stop"]
	 			combined[counter,"nreads"] = fcands[i,"nreads"]+rcands[part,"nreads"]
	 			combined[counter,"nreads_f"] = fcands[i,"nreads"]
		 		combined[counter,"nreads_r"] = rcands[part,"nreads"]
				combined[counter,"meanq_f"] = fcands[i,"meanqual"]
				combined[counter,"meanq_r"] = rcands[part,"meanqual"]
		 		combined[counter,"allpos_f"] = paste("Min- ",fcands[i,"minpos"],"Max- ",fcands[i,"maxpos"])
		 		combined[counter,"allpos_r"] = paste("Min- ",rcands[part,"minpos"],"Max- ",rcands[part,"maxpos"])	 		
		 		combined[counter,"TE_counts"] = paste("F- ",fcands[i,"TEcounts"],"R- ",rcands[part,"TEcounts"])
		 		combined[counter,"Reads_f"] = fcands[i,"Reads"]
		 		combined[counter,"Reads_r"] = rcands[part,"Reads"]
	 		
	 			 		
		 		if(overlap) {
	 				combined[counter,"bkp_start"] = combined[counter,"bkp_stop"] = rcands[part,"clip"]
	 			}
				if(!overlap) {
	 				combined[counter,"bkp_start"] = fcands[i,"clip"]
	 				combined[counter,"bkp_stop"] = 	rcands[part,"clip"]
	 			}
	 			counter=counter+1
	 		}
		}
	}
	# Make into data frame
	combined=data.frame(combined,stringsAsFactors=F)
	
	if(nrow(combined) > 2) {
		combined = combined[1:which(is.na(combined[,1]))[1],]
		for(i in c(1:10,12:13,15:20))
		combined[,i] = as.numeric(combined[,i])
	} else {
		
	}
	
	## Make sure one-one mapping between fcands and rcands
	toremove = vector()
	nonunique = which(table(combined[,"has_partner"]) > 1)
	for(n in names(nonunique)) {
		whichn = which(combined[,"has_partner"] == n)
		toremove = c(toremove, whichn[which.min(combined[whichn,"nreads_f"])])
	}
	if(length(toremove)>0)
	combined = combined[-toremove,]
	

	
	# Add singleton events to end
	# Forward has no reverse partner
	if(nrow(fcands) > 0) { 
	
	fsingles = subset(fcands,is.na(partner))
	
	print(nrow(combined))	
	print(nrow(fsingles))
	if(nrow(fsingles) > 0) {
		if(nrow(combined)<=2 & is.na(combined[1,1])) { 
			print("in first if")
			startc = 1
			endc = nrow(fsingles)
		} else {
			startc = nrow(combined)
			endc = nrow(combined)+nrow(fsingles)-1
		}
		combined[startc:endc,c("chr","clip","start","stop")] = fsingles[,c("chr","clip","start","stop")]
		combined[startc:endc,c("start_f","stop_f","nreads","nreads_f","meanq_f","TE","TE_counts","Reads_f")] = fsingles[,c("start","stop","nreads","nreads","meanqual","TE","TEcounts","Reads")]
		
		fsingleminposes = as.numeric(sapply(fsingles[,"minpos"],function(x) unlist(strsplit(unlist(strsplit(x,split=" // "))[1],split=":"))[2]))
		fsinglemaxposes = as.numeric(sapply(fsingles[,"maxpos"],function(x) unlist(strsplit(unlist(strsplit(x,split=" // "))[1],split=":"))[2]))

		combined[startc:endc,"TErange"] = paste(fsingleminposes,"-",fsinglemaxposes,sep="")
		combined[startc:endc,"minpos_f"] = fsingleminposes
		combined[startc:endc,"maxpos_f"] = fsinglemaxposes
		combined[startc:endc,"allpos_f"] = paste("Min- ",fsingles[,"minpos"],"Max- ",fsingles[,"maxpos"])
		combined[startc:endc,"bkp_start"] = fsingles[,"clip"]
		combined[startc:endc,"bkp_stop"] = 	fsingles[,"clip"]
		
		}
	}
	if(nrow(rcands) > 0) {
		rsingles = rcands[setdiff(1:nrow(rcands),fcands[,"partner"]),]

		if(nrow(rsingles) > 0) {
			if(nrow(combined)<=2 ) { 
				startc = 1
				endc = 1+nrow(rsingles)-1
			} else {
				startc = nrow(combined)+1
				endc = nrow(combined)+nrow(rsingles)
			}
		combined[startc:endc,c("chr","clip","start","stop")] = rsingles[,c("chr","clip","start","stop")]
		combined[startc:endc,c("start_r","stop_r","nreads","nreads_r","meanq_r","TE","TE_counts","Reads_r")] = rsingles[,c("start","stop","nreads","nreads","meanqual","TE","TEcounts","Reads")]
	
		rsingleminposes = as.numeric(sapply(rsingles[,"minpos"],function(x) unlist(strsplit(unlist(strsplit(x,split=" // "))[1],split=":"))[2]))
		rsinglemaxposes = as.numeric(sapply(rsingles[,"maxpos"],function(x) unlist(strsplit(unlist(strsplit(x,split=" // "))[1],split=":"))[2]))

		combined[startc:endc,"TErange"] = paste(rsingleminposes,"-",rsinglemaxposes,sep="")
		combined[startc:endc,"minpos_r"] = rsingleminposes
		combined[startc:endc,"maxpos_r"] = rsinglemaxposes
		combined[startc:endc,"allpos_r"] = paste("Min- ",rsingles[,"minpos"],"Max- ",rsingles[,"maxpos"])

		combined[startc:endc,"bkp_start"] = rsingles[,"clip"]
		combined[startc:endc,"bkp_stop"] = 	rsingles[,"clip"]
	
		}
	}

	return(combined)
	
}


##
## Cluster aligned reads by given threshold
##
## Annotate positions on retroelement to which they aligned
##
cluster_reads = function( weirds, TElengths, thresh) {
	#readlength = 100
	
	chr = -1
	first = -1
	last = -1
	n = 0
	meanq = 0
	cands = matrix(nrow=nrow(weirds),ncol=11)
	colnames(cands) = c("chr","clip","start","stop","nreads","meanqual","minpos","maxpos","TE","TEcounts","Reads")
	counter=1
	
	if(nrow(cands) == 0) 
		return(cands)
		
	if(nrow(weirds) == 1) {
		print("weirds has one row")
		fastaname = paste(unlist(strsplit(weirds[1,"fasta"],split="\\|"))[1:2],collapse="|")
		cands[1,1:6] = c(weirds[1,"AlignedChromosome"],weirds[1,"ClippedPosition"],weirds[1,"AlignedPosition"],weirds[1,"AlignedEnd"],1,weirds[1,"AlignedQual"])
		cands[1,"TE"] = paste(unlist(strsplit(weirds[1,"fasta"],split="\\|"))[1:3],collapse="|")
		cands[1,"minpos"] = paste(fastaname,": ",min(weirds[1,c("fastastart","fastastop")])," // ",sep="")
		cands[1,"maxpos"] = paste(fastaname,": ",max(weirds[1,c("fastastart","fastastop")])," // ",sep="")
		cands[1,"TEcounts"] = paste(fastaname,": 1 // ",sep="")
		cands[1,"Reads"] = paste(weirds[1,"Name"]," | ",sep="")
	} else {
	
	inseg = FALSE
	for(i in 1:nrow(weirds) ) {
		#print(i)
		#print(nrow(cands))
		if(i%%10000==0) print(i)
		minfasta = min(weirds[i,c("fastastart","fastastop")])
		maxfasta = max(weirds[i,c("fastastart","fastastop")])
		
		nextbreak = weirds[i+1,"AlignedChromosome"]!=weirds[i,"AlignedChromosome"] | weirds[i+1,"ClippedPosition"] > (weirds[i,"ClippedPosition"]+thresh)
		fastaname = paste(unlist(strsplit(weirds[i,"fasta"],split="\\|"))[1:3],collapse="|")
		if( !nextbreak && !inseg && !is.na(nextbreak) ) {
			#print("!nextbreak & !inseg")
			chr = weirds[i,"AlignedChromosome"]
			first = weirds[i,"ClippedPosition"]
			last = weirds[i,"ClippedPosition"]
			n = 1;
			allReads = weirds[i,"Name"]
			TEcounts = matrix(ncol=length(TElengths),nrow=3)
			colnames(TEcounts) = names(TElengths)
			whichcolumn = grep(unlist(strsplit(weirds[i,"fasta"],split="\\|"))[1],colnames(TEcounts))
			TEcounts[c(1,3),] = 0
			TEcounts[2,] = Inf
			TEcounts[1,fastaname] = TEcounts[1,fastaname]+1
			meanq = weirds[i,"AlignedQual"]
			TEcounts[2,fastaname] = minfasta
			TEcounts[3,fastaname] = maxfasta
			inseg = TRUE
		} else if( !nextbreak && inseg && !is.na(nextbreak) ) {
			#print("!nextbreak & inseg")
			last = weirds[i,"ClippedPosition"] 
			n = n+1
			allReads = paste(allReads," | ",weirds[i,"Name"],sep="")
			meanq = sum(meanq,weirds[i,"AlignedQual"],na.rm=T)
			TEcounts[1,fastaname] = TEcounts[1,fastaname]+1
			if ( minfasta < TEcounts[2,fastaname] ) TEcounts[2,fastaname] = minfasta
			if ( maxfasta > TEcounts[3,fastaname] ) TEcounts[3,fastaname] = maxfasta
			#cands[counter,weirds[i,"fasta"]] = cands[counter,weirds[i,"fasta"]]+1
		} else if( (is.na(nextbreak) || nextbreak) && !inseg ) {
			#print("nextbreak & !inseg")
			cands[counter,1:6] = c(weirds[i,"AlignedChromosome"],weirds[i,"ClippedPosition"],weirds[i,"AlignedPosition"],weirds[i,"AlignedEnd"],1,weirds[i,"AlignedQual"])
			cands[counter,"TE"] = paste(unlist(strsplit(weirds[i,"fasta"],split="\\|"))[1:3],collapse="|")
			cands[counter,"TEcounts"] = paste(fastaname,": 1 // ",sep="")
			cands[counter,"Reads"] = paste(weirds[i,"Name"]," | ",sep="")
			cands[counter,"minpos"] = paste(fastaname,": ",minfasta," // ",sep="")
			cands[counter,"maxpos"] = paste(fastaname,": ",maxfasta," // ",sep="")
			counter=counter+1
		} else { ## ( nextbreak && inseg )
			#print("netxbreak & inseg")
			n = n+1
			allReads = paste(allReads," | ",weirds[i,"Name"],sep="")
			meanq = sum(meanq,weirds[i,"AlignedQual"],na.rm=T)
			TEcounts[1,fastaname] = TEcounts[1,fastaname]+1
			if ( minfasta < TEcounts[2,fastaname] ) TEcounts[2,fastaname] = minfasta
			if ( maxfasta > TEcounts[3,fastaname] ) TEcounts[3,fastaname] = maxfasta
			cands[counter,1:6] = c(chr,first,weirds[i,"AlignedPosition"],weirds[i,"AlignedEnd"],n,round(meanq/n))
			cands[counter,"TE"] = names(which.max(TEcounts[1,]))
			allTEs = ""
			allminpos = ""
			allmaxpos = ""
			if(ncol(TEcounts) == 1) {
				allTEs = paste(colnames(TEcounts),": ",TEcounts[1,1]," // ",sep="")
				allminpos = paste(colnames(TEcounts),": ",TEcounts[2,1]," // ",sep="")
				allmaxpos = paste(colnames(TEcounts),": ",TEcounts[3,1]," // ",sep="")

			} else {
				TEcounts = TEcounts[,order(TEcounts[1,],decreasing=T)]
				numDiffTEs = length(which(TEcounts[1,]>0))
				for (k in 1:ncol(TEcounts) ){
					if(TEcounts[1,k] > 0) {
					allTEs = paste(allTEs,colnames(TEcounts)[k],": ",TEcounts[1,k]," // ",sep="")
					allminpos = paste(allminpos,colnames(TEcounts)[k],": ",TEcounts[2,k]," // ",sep="")
					allmaxpos = paste(allmaxpos,colnames(TEcounts)[k],": ",TEcounts[3,k]," // ",sep="")
					}
				}
			}
			cands[counter,"Reads"] = allReads
			cands[counter,"TEcounts"] = allTEs
			cands[counter,"minpos"] = allminpos
			cands[counter,"maxpos"] =  allmaxpos
			counter=counter+1
			inseg = FALSE		
		}
	}
	if(any(is.na(cands[,1])))
	cands = cands[1:which(is.na(cands[,1]))[1],]
	}
	#print(nrow(cands))	
		
	# Make into data frame
	cands = data.frame( cands, stringsAsFactors=F )
	for ( i in 1:(ncol(cands)-5) )
		cands[,i] = as.numeric( cands[,i] )


	return( cands )
}




## Step 3: Get clusters
classify_candidates = function( samp, c ) {
	
	TElengths = get_unique_TEs( fasta )
	
	## Get candidates from Tumor and Normal
	print("Tumor")
	Tum = get_candidates( paste(outstem,"-Tumor.chr",c,".reads.aligned.txtparsed.txt",sep=""), paste(readsdir,samp,"-Tumor.chr",c,".reads.txt",sep=""), TElengths)
	print("Normal")
	Norm = get_candidates( paste(outstem,"-Normal.chr",c,".reads.aligned.txtparsed.txt",sep=""), paste(readsdir,samp,"-Normal.chr",c,".reads.txt",sep=""), TElengths)
	
	# Write out all candidates in each just to check germline calling
	print(paste("Writing tumor clusters to",paste(outstem,"-Tumor.clusters.chr",c,".txt",sep="")))
	write.table(Tum,file= paste(outstem,"-Tumor.clusters.chr",c,".txt",sep=""),quote=F,sep="\t")
	write.table(Norm,file=paste(outstem,"-Normal.clusters.chr",c,".txt",sep=""),quote=F,sep="\t")
	
}




classify_candidates ( samp, c)
