####################################################################################################
## Add assembly information to candidate list
####################################################################################################
####################################################################################################

####################################################################################################
## Load arguments

args=(commandArgs(TRUE))

##args is now a list of character vectors
## First check to see if arguments are passed.
## Then cycle through each element of the list and evaluate the expressions.
if(length(args)==0){
    print("No arguments supplied.")
    }else{
        for(i in 1:length(args)){
	         eval(parse(text=args[[i]]))
	 }
     }
	print(candfile)
	print(outassemblystem)
	print(outreadsstem)
	print(assembledcandfile)
	print(codedir)
	refdir = paste(refdir,"/",sep="")
	print(refdir)
	print(startcand)
	startcand = as.numeric(startcand)
	print(endcand)
	endcand = as.numeric(endcand)
	print(counter)

	source(paste(codedir,"/MiscFunctions.R",sep=""))	

	eval = 2E-07
	eval = 0.001


####################################################################################################
## Helper function: Read assembled contig file and return names, lengths and sequences
####################################################################################################

read_assembly = function ( contigs ) {
	whichseqinfo = grep(">",contigs[,1])
	seqinfo = contigs[whichseqinfo,1]
	lengths = sapply(seqinfo,function(x) as.numeric(unlist(strsplit(x,split="length: "))[2]))
	names = gsub(">","",sapply(seqinfo,function(x) unlist(strsplit(x,split=" K: "))[1]))
	seqs = vector()
	numEls = length(whichseqinfo)
	if( numEls > 1 ) {
    	for(i in 1:(numEls-1)) {
            seqs[i] = paste(contigs[(whichseqinfo[i]+1):(whichseqinfo[i+1]-1),1],collapse="")
    	}
    }
    seqs[numEls] = paste(contigs[(whichseqinfo[numEls]+1):nrow(contigs),1],collapse="")
	return(list(names,lengths,seqs))
}


####################################################################################################
## For each candidate, determine best contig and alignment and return row of information
####################################################################################################

add_contig_info_longest = function ( assembleddir ) {
	rowtoreturn = rep(NA,20)
	names(rowtoreturn) = c("F_contig_length","F_contig_ref_start","F_contig_ref_end","F_contig_TE","F_contig_TE_ambiguity","F_contig_alnstart","F_contig_alnend","F_TE_alnstart","F_TE_alnend","F_contig_seq",
	"R_contig_length","R_contig_ref_start","R_contig_ref_end","R_contig_TE","R_contig_TE_ambiguity","R_contig_alnstart","R_contig_alnend","R_TE_alnstart","R_TE_alnend","R_contig_seq")

	blastedfile_f = paste(assembleddir,"blasted_f.txt",sep="")
	blastedfile_r = paste(assembleddir,"blasted_r.txt",sep="")
	contigsfile_f = paste(assembleddir,"contigs_f.fa",sep="")
	contigsfile_r = paste(assembleddir,"contigs_r.fa",sep="")
	blattedfile_f = paste(assembleddir,"blatted_f.txt",sep="")
	blattedfile_r = paste(assembleddir,"blatted_r.txt",sep="")
	best_for = best_rev = ambig_for = ambig_rev = NA
	
	## Read in forward direction contigs and BLAST		
	contigs_f = try( read.delim(contigsfile_f,as.is=T,sep="\t",header=F),silent=T)
	blasted_f = try( read.delim(blastedfile_f,as.is=T,sep="\t",header=F),silent=T)
	blastf = try ( blasted_f[order(blasted_f[,11]),], silent = T )
	blastj_for = try ( subset(blastf,V11 <= eval), silent = T )
	blatted_f = try( read.delim(blattedfile_f,as.is=T,sep="\t",header=F),silent=T)

	## Read in reverse direction contigs and BLAST		
	contigs_r = try( read.delim(contigsfile_r,as.is=T,sep="\t",header=F),silent=T)
	blasted_r = try( read.delim(blastedfile_r,as.is=T,sep="\t",header=F),silent=T)
	blastr = try ( blasted_r[order(blasted_r[,11]),], silent = T)
	blastj_rev = try ( subset(blastr,V11 <= eval), silent = T)
	blatted_r = try( read.delim(blattedfile_r,as.is=T,sep="\t",header=F),silent=T)
	
	## No passing alignments
	if ( (class(blastf) == "try-error" || nrow(blastj_for) == 0)  && (class(blastr) == "try-error" || nrow(blastj_rev) == 0) )
		return(rowtoreturn)
		
	## No passing forward alignments
	if ( class(blastf) == "try-error" || nrow(blastj_for) == 0 ) { 
		info = read_assembly(contigs_r)
		names = info[[1]]
		lengths = info[[2]]
		seqs = info[[3]]
		longest = which.max(lengths)
		#longest = 1		
		if( class(blatted_r) == "try-error" ) {
			longest = 1
			contig_blast = blastj_rev[which(blastj_rev[,1] == names[longest]),]
		} else { 
			if( length(which(blastj_rev[,1] == names[longest])) == 0 ||  length(which(blatted_r[,1]==names[longest])) == 0 )
				longest = order(lengths,decreasing=T)[2]				
		contig_blast = blastj_rev[which(blastj_rev[,1] == names[longest]),]
		}
		#threshold = mean(contig_blast[,12]) + (1.5 * sd(contig_blast[,12]))
		#top = which(contig_blast[,12] >= threshold)
		threshold = 0.95 * contig_blast[1,12]
		top = which(contig_blast[,12] >= threshold)
		#top = which(contig_blast[,11] == min(contig_blast[,11]))
		best_rev = top[1]
		if(length(top)>1) 
			ambig_rev = top[-1]	

		rowtoreturn["R_contig_length"] = lengths[longest]
		rowtoreturn["R_contig_TE"] = contig_blast[best_rev,2]			
		rowtoreturn["R_contig_TE_ambiguity"] = paste(unique(contig_blast[ambig_rev,2]),collapse="; ")
		rowtoreturn["R_contig_alnstart"] = contig_blast[best_rev,7] ## contig alignment start			
		rowtoreturn["R_contig_alnend"] = contig_blast[best_rev,8] ## contig alignment stop			
		rowtoreturn["R_TE_alnstart"] = contig_blast[best_rev,9] ## contig alignment start			
		rowtoreturn["R_TE_alnend"] = contig_blast[best_rev,10] ## contig alignment stop			
		rowtoreturn["R_contig_seq"] = seqs[longest] ## contig sequence			
		if(class(blatted_r) != "try-error") {
			whichblat = which(blatted_r[,1]==names[longest])
			rowtoreturn["R_contig_ref_start"] = blatted_r[whichblat[1],7]
			rowtoreturn["R_contig_ref_end"] = blatted_r[whichblat[1],8]
		}
	## No passing reverse alignments			
	} else if ( class(blastr) == "try-error" || nrow(blastj_rev) == 0 ) { 
		info = read_assembly(contigs_f)
		names = info[[1]]
		lengths = info[[2]]
		seqs = info[[3]]
		longest = which.max(lengths)		
		#longest = 1
		if( class(blatted_f) == "try-error" ) {
			longest = 1
			contig_blast = blastj_for[which(blastj_for[,1] == names[longest]),]
		} else {
			if( length(which(blastj_for[,1] == names[longest])) == 0 ||  length(which(blatted_f[,1]==names[longest])) == 0 )
				longest = order(lengths,decreasing=T)[2]
			contig_blast = blastj_for[which(blastj_for[,1] == names[longest]),]
		}
		threshold = 0.95 * contig_blast[1,12]
		top = which(contig_blast[,12] >= threshold)
		#top = which(contig_blast[,11] == min(contig_blast[,11]))
		best_for = top[1]
		if(length(top)>1) 
			ambig_for = top[-1]	

		rowtoreturn["F_contig_length"] = lengths[longest]
		rowtoreturn["F_contig_TE"] = contig_blast[best_for,2]			
		rowtoreturn["F_contig_TE_ambiguity"] = paste(unique(contig_blast[ambig_for,2]),collapse="; ")
		rowtoreturn["F_contig_alnstart"] = contig_blast[best_for,7] ## contig alignment start			
		rowtoreturn["F_contig_alnend"] = contig_blast[best_for,8] ## contig alignment stop			
		rowtoreturn["F_TE_alnstart"] = contig_blast[best_for,9] ## contig alignment start			
		rowtoreturn["F_TE_alnend"] = contig_blast[best_for,10] ## contig alignment stop			
		rowtoreturn["F_contig_seq"] = seqs[longest] ## contig sequence			
		if(class(blatted_f) != "try-error") {
			whichblat = which(blatted_f[,1]==names[longest])
			rowtoreturn["F_contig_ref_start"] = blatted_f[whichblat[1],7]
			rowtoreturn["F_contig_ref_end"] = blatted_f[whichblat[1],8]
		}
	## Both directions have passing alignments			
	} else {
		info_f = read_assembly(contigs_f)
		names_f = info_f[[1]]
		lengths_f = info_f[[2]]
		seqs_f = info_f[[3]]
		longest_f = which.max(lengths_f)		
		info_r = read_assembly(contigs_r)
		names_r = info_r[[1]]
		lengths_r = info_r[[2]]
		seqs_r = info_r[[3]]
		longest_r = which.max(lengths_r)		

		whichlong_f = whichlong_r = 1
		
		## FORWARD
		## IF BLAT files doesnt exist at all just take longest contig
		if( class(blatted_f) == "try-error" ) {
			longest_f = 1
			contig_blastf = blastj_for[which(blastj_for[,1] == names_f[longest_f]),]
		} else { 
			## IF longest is not present in blast and BLAT file, go to next longest
			if( length(which(blastj_for[,1] == names_f[longest_f])) == 0 || length(which(blatted_f[,1]==names_f[longest_f])) == 0 ) {
				whichlong_f = whichlong_f+1
				longest_f = order(lengths_f,decreasing=T)[whichlong_f]
				}
				
			## BLAT must come before BLAST (with slight overlap allowed)
			contig_blastf = blastj_for[which(blastj_for[,1] == names_f[longest_f]),]
			contig_blatf = blatted_f[which(blatted_f[,1]==names_f[longest_f]),]
			if(nrow(contig_blatf) > 0 && nrow(contig_blastf) > 0 && !is.na(longest_f) ) {
				whichmin = which.min(c(contig_blatf[1,7],contig_blastf[1,7]))
				if(whichmin==1) { ## BLAT BEFORE BLAST
					contig_blastf = contig_blastf[which(contig_blatf[1,8]-contig_blastf[1,7] < 60),]
				} else {## BLAST BEFORE BLAT
					contig_blastf = contig_blastf[which(contig_blastf[1,8]-contig_blatf[1,7] < 60),]
				}
			}
			if(nrow(contig_blastf) == 0) {
				whichlong_f = whichlong_f+1
				longest_f = order(lengths_f,decreasing=T)[whichlong_f]
				contig_blastf = blastj_for[which(blastj_for[,1] == names_f[longest_f]),]
				contig_blatf = blatted_f[which(blatted_f[,1]==names_f[longest_f]),]
				if(nrow(contig_blatf) > 0 && nrow(contig_blastf) > 0 && !is.na(longest_f) ) {
					whichmin = which.min(c(contig_blatf[1,7],contig_blastf[1,7]))
					if(whichmin==1) { ## BLAT BEFORE BLAST
						contig_blastf = contig_blastf[which(contig_blatf[1,8]-contig_blastf[1,7] < 60),]
					} else {## BLAST BEFORE BLAT
						contig_blastf = contig_blastf[which(contig_blastf[1,8]-contig_blatf[1,7] < 60),]
					}
				}
			}
			# If all else fails just go with best alignement
			if(nrow(contig_blastf) == 0) {
				longest_f = which(names_f== blasted_f[1,1])
				contig_blastf = blastj_for[which(blastj_for[,1] == names_f[longest_f]),]
			}
		}
		## REVERSE
		if( class(blatted_r) == "try-error" ) {
			longest_r = 1
			contig_blastr = blastj_rev[which(blastj_rev[,1] == names_r[longest_r]),]
		} else { 
			## IF LONGEST IS NOT BLASTED or BLATTED, move on to next longest
			if( length(which(blastj_rev[,1] == names_r[longest_r])) == 0 || length(which(blatted_r[,1]==names_r[longest_r])) == 0 ) {
				whichlong_r = whichlong_r+1
				longest_r = order(lengths_r,decreasing=T)[whichlong_r]
			}
			contig_blastr = blastj_rev[which(blastj_rev[,1] == names_r[longest_r]),]
			contig_blatr = blatted_r[which(blatted_r[,1]==names_r[longest_r]),]
			if(nrow(contig_blatr) > 0 && nrow(contig_blastr) > 0 && !is.na(longest_r)) {
				whichmin = which.min(c(contig_blatr[1,7],contig_blastr[1,7]))
				if(whichmin==1) { ## BLAT BEFORE BLAST
					contig_blastr = contig_blastr[which(contig_blatr[1,8]-contig_blastr[1,7] < 60),]
				} else { ## BLAST BEFORE BLAT
					contig_blastr = contig_blastr[which(contig_blastr[1,8]-contig_blatr[1,7] < 60),]
				}
			}
				if(nrow(contig_blastr) == 0) { ##DO IT AGAIN
					whichlong_r = whichlong_r+1
					longest_r = order(lengths_r,decreasing=T)[whichlong_r]
					contig_blastr = blastj_rev[which(blastj_rev[,1] == names_r[longest_r]),]
					contig_blatr = blatted_r[which(blatted_r[,1]==names_r[longest_r]),]
					if(nrow(contig_blatr) > 0 && nrow(contig_blastr) > 0 && !is.na(longest_r)) {
						whichmin = which.min(c(contig_blatr[1,7],contig_blastr[1,7]))
						if(whichmin==1) { ## BLAT BEFORE BLAST
							contig_blastr = contig_blastr[which(contig_blatr[1,8]-contig_blastr[1,7] < 60),]
						} else { ## BLAST BEFORE BLAT
							contig_blastr = contig_blastr[which(contig_blastr[1,8]-contig_blatr[1,7] < 60),]
						}
					}
				} 
		
			## IF all else fails, just go with best alignment
			if(nrow(contig_blastr) == 0) {
				longest_r = which(names_r== blasted_r[1,1])
				contig_blastr = blastj_rev[which(blastj_rev[,1] == names_r[longest_r]),]
			}
		}
		
		# TOP 0.95percent BLAST scorers
		threshold_f = 0.95 * contig_blastf[1,12]
		top_f = which(contig_blastf[,12] >= threshold_f)
		threshold_r = 0.95 * contig_blastr[1,12]
		top_r = which(contig_blastr[,12] >= threshold_r)
	 	
	 		best_for = best_rev = 1				
			bestmatch = min(contig_blastf[,11],contig_blastr[,11])
			whichfor = which(contig_blastf[,11]==bestmatch)
			whichrev = which(contig_blastr[,11]==bestmatch)
			if(length(whichrev)>0) {
				formatch = revmatch = 1
				for (r in 1:length(whichrev) ) {
					revm = which(contig_blastf[,2]==contig_blastr[whichrev[r],2])
					if(length(revm)>0) {
						formatch = revm
						revmatch = whichrev[r]
					}
				}
				best_rev = revmatch
				ambig_rev = top_r[-1]   #if(length(whichrev)>0) ambig_rev = whichrev[-revmatch]
				best_for = formatch[1]
				ambig_for = ifelse(length(formatch)>1, 1:(formatch[1]-1), top_f[-best_for])
			} else if(length(whichfor)>0) {
				formatch = revmatch = 1
				for (r in 1:length(whichfor) ) {
					form = which(contig_blastr[,2]==contig_blastf[whichfor[r],2])
					if(length(form)>0) {
						revmatch = form
						formatch = whichfor[r]
					} 
				}
				best_for = formatch
				ambig_for = top_f[-1] #if(length(whichfor)>0) ambig_for = whichfor[-formatch]
				best_rev = revmatch[1]
				ambig_rev = ifelse(length(revmatch)>1,1:(revmatch[1]-1), top_r[-best_rev])
			}
	
		
			
		## Fill in table				
		rowtoreturn["F_contig_length"] = lengths_f[longest_f]
		rowtoreturn["F_contig_TE"] = contig_blastf[best_for,2]						
		rowtoreturn["F_contig_TE_ambiguity"] = paste(unique(contig_blastf[ambig_for,2]),collapse="; ")
		rowtoreturn["F_contig_alnstart"] = contig_blastf[best_for,7] ## contig alignment start			
		rowtoreturn["F_contig_alnend"] = contig_blastf[best_for,8] ## contig alignment stop			
		rowtoreturn["F_TE_alnstart"] = contig_blastf[best_for,9] ## contig alignment start			
		rowtoreturn["F_TE_alnend"] = contig_blastf[best_for,10] ## contig alignment stop			
		rowtoreturn["F_contig_seq"] = seqs_f[longest_f] ## contig sequence			
		if(class(blatted_f) != "try-error") {
			whichblat = which(blatted_f[,1]==names_f[longest_f])
			rowtoreturn["F_contig_ref_start"] = blatted_f[whichblat[1],7]
			rowtoreturn["F_contig_ref_end"] = blatted_f[whichblat[1],8]
		}

		## Fill in reverse
		rowtoreturn["R_contig_length"] = lengths_r[longest_r]
		rowtoreturn["R_contig_TE"] = contig_blastr[best_rev,2]						
		rowtoreturn["R_contig_TE_ambiguity"] = paste(unique(contig_blastr[ambig_rev,2]),collapse="; ")
		rowtoreturn["R_contig_alnstart"] = contig_blastr[best_rev,7] ## contig alignment start			
		rowtoreturn["R_contig_alnend"] = contig_blastr[best_rev,8] ## contig alignment stop			
		rowtoreturn["R_TE_alnstart"] = contig_blastr[best_rev,9] ## contig alignment start			
		rowtoreturn["R_TE_alnend"] = contig_blastr[best_rev,10] ## contig alignment stop			
		rowtoreturn["R_contig_seq"] = seqs_r[longest_r] ## contig sequence			
		if(class(blatted_r) != "try-error") {
			whichblat = which(blatted_r[,1]==names_r[longest_r])
			rowtoreturn["R_contig_ref_start"] = blatted_r[whichblat[1],7]
			rowtoreturn["R_contig_ref_end"] = blatted_r[whichblat[1],8]
		}

	
	
	}

	return(rowtoreturn)
}



addTSDinfo = function ( row ) {
	final_bkpt = TSD_length = TSD_seq = insertmotif = F_TSD_pos = R_TSD_pos = NA
	toreturn = c(final_bkpt,TSD_length,TSD_seq,F_TSD_pos,R_TSD_pos,insertmotif)
	chr = row["chr"]
	if(is.na(row["clippedpos_f"])) {
		bkpt = as.numeric(row["clippedpos_r"])
		toreturn[1] = bkpt
		seq = system(paste("samtools faidx ",refdir,"chr",chr,".fa chr",chr,":",bkpt-6,"-",bkpt+2,sep=""),intern=T)
		if(length(seq)>1) toreturn[6] = seq[2]
	} else if(is.na(row["clippedpos_r"])) {
		bkpt = as.numeric(row["clippedpos_f"])
		toreturn[1] = bkpt
		seq = system(paste("samtools faidx ",refdir,"chr",chr,".fa chr",chr,":",bkpt-6,"-",bkpt+2,sep=""),intern=T)
		if(length(seq)>1) toreturn[6] = seq[2]		
	} else { # if(!is.na(row["clippedpos_r"]) && !is.na(row["clippedpos_r"]))  {
		## Breakpoint is most 5-prime
		if(is.na(row["clippedpos_f"]) & is.na(row["clippedpos_r"]))  {
			bkpt = NA
		} else if(is.na(row["clippedpos_f"])) { bkpt = as.numeric(row["clippedpos_r"])
		} else if(is.na(row["clippedpos_r"])) { bkpt = as.numeric(row["clippedpos_f"])
		} else {
			if( length(row["inverted"]) == 0 || is.na(row["inverted"]) ) { bkpt = min(as.numeric(row["clippedpos_f"]),as.numeric(row["clippedpos_r"]))
			} else if( row["inverted"] == TRUE ) {
				bkpt = as.numeric(row["clippedpos_f"])	
			} else #if( row["inverted"] == FALSE )
				bkpt = as.numeric(row["clippedpos_r"])	
		}
		
		toreturn[1] = bkpt 
		toreturn[2] = (as.numeric(row["clippedpos_f"]) - as.numeric(row["clippedpos_r"])) + 1		
		tsd = system(paste("samtools faidx ",refdir,"chr",chr,".fa chr",chr,":",row["clippedpos_r"],"-",row["clippedpos_f"],sep=""),intern=T)
		if(length(tsd)>1) {
			toreturn[3] = tsd[2]
			fmatch = regexpr(tsd[2],row["F_contig_seq"],ignore.case=T)
			if(is.na(fmatch) || fmatch==-1)
				fmatch = regexpr(rev.comp(tsd[2]),row["F_contig_seq"],ignore.case=T)
			toreturn[4] = fmatch
			rmatch = regexpr(tsd[2],row["R_contig_seq"],ignore.case=T)
			if(is.na(rmatch) || rmatch==-1)
				rmatch = regexpr(rev.comp(tsd[2]),row["R_contig_seq"],ignore.case=T)
			toreturn[5] = rmatch
		}
		seq = system(paste("samtools faidx ",refdir,"chr",chr,".fa chr",chr,":",bkpt-2,"-",bkpt+4,sep=""),intern=T)
		if(length(seq)>1) toreturn[6] = seq[2]

	}	
	return(toreturn)
}


####################################################################################################
## Main function
####################################################################################################

add_assembly_info = function ( candfile, outassemblystem, outreadsstem, startcand, endcand ) {
	
	candidates = try( read.delim(candfile,as.is=T,sep="\t"),silent = TRUE)
	if(class(candidates) == "try-error") 
		return(NA)
	
	print(nrow(candidates))
	candidates = candidates[startcand:endcand,]
	print(dim(candidates))
	print("Candidates file read in")
	nSpanNormal = nSpanClipped = NormalnSpanNormal = NormalnSpanClipped = F_contig_length = F_contig_TE = R_contig_length = F_contig_ref_start = F_contig_ref_end = R_contig_ref_start = R_contig_ref_end = R_contig_TE = clippedpos_f = clippedpos_r = F_contig_seq = R_contig_seq = F_contig_alnstart = 
		F_TE_alnstart = F_TE_alnend = F_contig_alnend = R_contig_alnstart = R_contig_alnend = F_contig_TE_ambiguity = R_contig_TE_ambiguity = R_TE_alnstart = R_TE_alnend = final_bkpt = TSD_length = TSD_seq = F_TSD_pos = R_TSD_pos = insertmotif = normalClippedpos_f = normalClippedpos_r = NA 
	nWeirdsNormal = nWeirdsTumor = nTotRegion = nbadregion = nClipped_f = nClipped_r = 0
	
	candidates = cbind(candidates,nSpanNormal,nSpanClipped,NormalnSpanNormal,NormalnSpanClipped,clippedpos_f,clippedpos_r,nClipped_f,nClipped_r,normalClippedpos_f,normalClippedpos_r,nWeirdsNormal,nWeirdsTumor,nTotRegion,F_contig_length,F_contig_ref_start,F_contig_ref_end,F_contig_TE,F_contig_TE_ambiguity,F_contig_alnstart,F_contig_alnend,F_TE_alnstart,F_TE_alnend,F_contig_seq,
		R_contig_length,R_contig_ref_start,R_contig_ref_end,R_contig_TE,R_contig_TE_ambiguity,R_contig_alnstart,R_contig_alnend,R_TE_alnstart,R_TE_alnend,R_contig_seq,final_bkpt,TSD_length,TSD_seq,F_TSD_pos,R_TSD_pos,insertmotif)

	for(t in c("Tumor","Normal")) {
		print(t)
		for( i in 1:nrow(candidates))  {
			if(i%%100==0) print(i)
			#print(i)
			chr = candidates[i,"chr"]
			startreg = candidates[i,"start"]
			stopreg = candidates[i,"stop"]
			assembleddir = paste(outassemblystem,"-",t,".chr",chr,".region.",startreg,"-",stopreg,".assembly/",sep="")

			##
			## Read in nfile with clipped and reads information
			##
			nfilename = paste(assembleddir,"nfile.txt",sep="")
			nfile = try ( read.delim(nfilename,as.is=T,sep="\t",header=F), silent = TRUE )
			if(class(nfile) == "try-error") {
				#print("no nfile")
			} else if(as.numeric(unlist(strsplit(nfile[1,3],split=":"))[2]) == 1000) {
				#print("bad region")
			} else {
			
				if(t=="Tumor") {
					candidates[i,"nSpanNormal"] = as.numeric(unlist(strsplit(nfile[1,1],split=":"))[2])
					candidates[i,"nSpanClipped"] = as.numeric(unlist(strsplit(nfile[1,3],split=":"))[2])
					candidates[i,"nbadregion"] = as.numeric(unlist(strsplit(nfile[1,6],split=":"))[2])
					candidates[i,"nTotRegion"] = as.numeric(unlist(strsplit(nfile[1,4],split=":"))[2])
					candidates[i,"nWeirdsTumor"] = as.numeric(unlist(strsplit(nfile[1,2],split=":"))[2])

					## Fill clippedpos columns with mode of clipped reads
					##   Includes some strange statements to account for java file writing differences
					clipped_f = unlist(strsplit(nfile[3,1],split="\\s\\| "))	
					if(length(clipped_f) == 1 && clipped_f == "location of reverse clips:") {
						clipped_f = NA
						clipped_r = unlist(strsplit(nfile[4,1],split="\\s\\| "))
						candidates[i,"clippedpos_f"] = NA
					} else {
						clipped_r = unlist(strsplit(nfile[5,1],split="\\s\\| "))
						candidates[i,"clippedpos_f"] = names(table(clipped_f))[which.max(table(clipped_f))]
						candidates[i,"nClipped_f"] = max(table(clipped_f))
					}
			
					if(length(clipped_r) > 1 || !is.na(clipped_r)) {
						candidates[i,"clippedpos_r"] = names(table(clipped_r))[which.max(table(clipped_r))]
						candidates[i,"nClipped_r"] = max(table(clipped_r))
					}
				}
				if(t=="Normal") {
					candidates[i,"nWeirdsNormal"] = as.numeric(unlist(strsplit(nfile[1,2],split=":"))[2])				
					candidates[i,"NormalnSpanNormal"] = as.numeric(unlist(strsplit(nfile[1,1],split=":"))[2])
					candidates[i,"NormalnSpanClipped"] = as.numeric(unlist(strsplit(nfile[1,3],split=":"))[2])
				
					## Fill clippedpos columns with mode of clipped reads
					##   Includes some strange statements to account for java file writing differences
					clipped_f = unlist(strsplit(nfile[3,1],split="\\s\\| "))	
					if(length(clipped_f) == 1 && clipped_f == "location of reverse clips:") {
						clipped_f = NA
						clipped_r = unlist(strsplit(nfile[4,1],split="\\s\\| "))
						candidates[i,"normalClippedpos_f"] = NA
					} else {
						clipped_r = unlist(strsplit(nfile[5,1],split="\\s\\| "))
						candidates[i,"normalClippedpos_f"] = names(table(clipped_f))[which.max(table(clipped_f))]
					}
					if(length(clipped_r) > 1 || !is.na(clipped_r)) {
						candidates[i,"normalClippedpos_r"] = names(table(clipped_r))[which.max(table(clipped_r))]
					}

				}
			}
			
	
			## Add contig and TSD information
			if(t == "Tumor")  {
				colstoadd = c("F_contig_length","F_contig_ref_start","F_contig_ref_end","F_contig_TE","F_contig_TE_ambiguity","F_contig_alnstart","F_contig_alnend","F_TE_alnstart","F_TE_alnend","F_contig_seq",
				"R_contig_length","R_contig_ref_start","R_contig_ref_end","R_contig_TE","R_contig_TE_ambiguity","R_contig_alnstart","R_contig_alnend","R_TE_alnstart","R_TE_alnend","R_contig_seq")
				candidates[i,colstoadd] = add_contig_info_longest ( assembleddir )
				tsdcols = c("final_bkpt","TSD_length","TSD_seq","F_TSD_pos","R_TSD_pos","insertmotif")
				candidates[i,tsdcols] = addTSDinfo(candidates[i,])
			}
		}
	}
	
	# Remove columns with read information
	whichcols = match(c("Reads_f","Reads_r"),colnames(candidates))
	candidates = candidates[,-whichcols]
	
	## Filter out regions with 2000 total reads as bad
	candidates = subset(candidates,nTotRegion < 2000)
	
	return(candidates)
	
}

candidates = add_assembly_info( candfile, outassemblystem, outreadsstem, startcand, endcand )
write.table(candidates,file=paste(assembledcandfile,"_PART_",counter,".txt",sep=""),quote=F,row.names=F,sep="\t")
