weirds_to_unique = function (samps, fold, DIR, hgnum) {
	reffilesdir = paste(getwd(),"/reffiles/",sep="")
	centfile = paste(reffilesdir,"chrpositions_hg",hgnum,".txt",sep="")
	cent = read.delim(centfile,as.is=T,sep="\t")
	setwd(DIR)

	for(s in samps) {
		allfolds = dir(DIR)[grep(s,dir(DIR))]
		folds = allfolds[grep("parsed.txt",allfolds)]
		w = grep("used",folds)
		if(length(w) > 0) folds = folds[-w]
		
		for(f in folds) {
			samp = unlist(strsplit(f,"\\."))[1]
			print(f)

			tab = read.delim(f,as.is=T,header=T)
			#tab[,2:16] = tab[,1:15]
			#tab[,1] = rownames(tab)
			#rownames(tab) = NULL
			
			for(j in 1:nrow(cent)) {
				chr = substring(cent[j,"chr"],1,nchar(cent[j,"chr"])-1)
				chrtab = subset(tab,Uniquechr==chr & 
					Uniquestart < cent[j,"stop"] &
					Uniquestart > cent[j,"start"])
				#if(nrow(chrtab) > 0) {
				print(nrow(chrtab))
				chrfile = paste(DIR,samp,".chr",cent[j,"chr"],".txt",sep="")
				write.table(chrtab,file=chrfile,quote=F,row.names=F,sep="\t",col.names=F,append=TRUE)
				#}
			}
			file.rename(f,paste(f,".used",sep=""))
		}
	}
}


add_assembly_info = function ( candfile, outassemblystem, outreadsstem ) {
	candidates = read.delim(candfile,as.is=T,sep="\t")
	
	print("Candidates file read in")
	nSpanNormal = NA
	nSpanClipped = NA
	nWeirdsNormal = 0
	nWeirdsTumor = 0
	contig1_length = NA
	contig1_TE = NA
	contig2_length = NA
	contig2_TE = NA
	candidates = cbind(candidates,nSpanNormal,nSpanClipped,nWeirdsNormal,nWeirdsTumor,contig1_length,contig1_TE,contig2_length,contig2_TE)

	for(t in c("tumor","normal")) {
	for( i in 1:min(nrow(candidates),5000) ) {
		if(i%%100==0) print(i)
		chr = candidates[i,"chr"]
		startreg = candidates[i,"start"]
		stopreg = candidates[i,"stop"]
		assembleddir = paste(outassemblystem,"-",t,".chr",chr,".region.",startreg,"-",stopreg,".assembly/",sep="")
		readsfile = paste(outreadsstem,"-",t,".chr",chr,".region.",startreg,"-",stopreg,".txt",sep="")
		
		#statfile = paste(assembleddir,"stats.txt",sep="")
		#stats = read.delim(statfile,as.is=T,sep="\t",header=T)

		blastedfile = paste(assembleddir,"blasted.txt",sep="")
		contigsfile = paste(assembleddir,"contigs.fa",sep="")

		blasted = try( read.delim(blastedfile,as.is=T,sep="\t",header=F),silent=T)
		if( class(blasted) == "try-error" ) next;
		contigs = try( read.delim(contigsfile,as.is=T,sep="\t",header=F),silent=T)
		if( class(contigs) == "try-error" ) next;
	
		matchstart = match(unique(blasted[,1]),blasted[,1])
		if(length(matchstart) > 0) {
			nodename = unlist(strsplit(blasted[matchstart[1],1],split="_"))
			#whichcontig = grep(nodename,contigs[,1])
			#candidates[i,"contig1_length"] = as.numeric(unlist(strsplit(contigs[whichcontig,1],split=" "))[5])
			candidates[i,"contig1_length"] = as.numeric(nodename[4])
			candidates[i,"contig1_TE"] = blasted[matchstart[1],2]
		}
		if(length(matchstart) > 1) {
			nodename = unlist(strsplit(blasted[matchstart[2],1],split="_"))
			#whichcontig = grep(nodename,contigs[,1])
			#candidates[i,"contig2_length"] = as.numeric(unlist(strsplit(contigs[whichcontig,1],split=" "))[5])
			candidates[i,"contig2_length"] = as.numeric(nodename[4])
			candidates[i,"contig2_TE"] = blasted[matchstart[2],2]
		}
		nfilename = paste(assembleddir,"nfile.txt",sep="")
		nfile = read.delim(nfilename,as.is=T,sep="\t",header=F)
		candidates[i,"nSpanNormal"] = as.numeric(unlist(strsplit(nfile[1,1],split=":"))[2])
		candidates[i,"nSpanClipped"] = as.numeric(unlist(strsplit(nfile[1,2],split=":"))[2])
		
		reads = try ( read.delim(readsfile,as.is=T,header=F), silent=TRUE )
		if(class(reads) == "try-error") {
			print("no reads file")
			next
		}
		if(t == "tumor") candidates[i,"nWeirdsTumor"] = nrow(reads)/2
		if(t == "normal") candidates[i,"nWeirdsNormal"] = nrow(reads)/2

		}
	}
	return(candidates)
}

get_unique_TEs = function ( fastafile ) {
	f = read.delim(fastafile,as.is=T,header=F)
	whichTEs = grep(">",f[,1])
	parts = sapply(f[whichTEs,1],function(x) unlist(strsplit(x,split="\\|")))
	names = gsub(">","",parts[1,])
	#names = gsub("\\s","",names)
	families = parts[2,]
	elements = parts[3,]
	nTE = length(names)
	lengths = vector()
	newfasta = ""
	if( nTE > 1 ) {
		for(i in 1:(nTE-1))
			lengths[i] = sum(nchar(f[whichTEs[i]:(whichTEs[i+1]-1),1]))
	}
	lengths[nTE] = sum(nchar(f[whichTEs[nTE]:nrow(f),1]))	
	if(length(families) == 0) families = NA
	if(length(elements) == 0) elements = NA
	newnames = paste(names,families,elements,sep="|")
	names(lengths) = newnames	
	return(lengths)
}


rev.comp<-function(x,rev=TRUE)
{
x<-toupper(x)
y<-rep("N",nchar(x))
xx<-unlist(strsplit(x,NULL))
for (bbb in 1:nchar(x))
	{
		if(xx[bbb]=="A") y[bbb]<-"T"		
		if(xx[bbb]=="C") y[bbb]<-"G"		
		if(xx[bbb]=="G") y[bbb]<-"C"		
		if(xx[bbb]=="T") y[bbb]<-"A"
    }
if(rev==FALSE) 
	{
	for(ccc in (1:nchar(x)))
		{
		if(ccc==1) yy<-y[ccc] else yy<-paste(yy,y[ccc],sep="")
		}
	}
if(rev==T)
	{
	zz<-rep(NA,nchar(x))
	for(ccc in (1:nchar(x)))
		{
		zz[ccc]<-y[nchar(x)+1-ccc]
		if(ccc==1) yy<-zz[ccc] else yy<-paste(yy,zz[ccc],sep="")
		}
	}
	return(yy)	
}



convertToComplement<-function(x){
bases=c("A","C","G","T")
xx<-unlist(strsplit(toupper(x),NULL))
paste(unlist(lapply(xx,function(bbb){
if(bbb=="A") compString<-"T"
if(bbb=="C") compString<-"G"
if(bbb=="G") compString<-"C"
if(bbb=="T") compString<-"A"
if(!bbb %in% bases) compString<-"N"
return(compString)
})),collapse="")
}

