package org.broadinstitute.cga.tools.seq;

import org.broadinstitute.cga.tools.seq.BamGraspMine;
import org.broadinstitute.cga.tools.seq.BlastAligner;
import org.broadinstitute.cga.tools.seq.SubstringAligner;
import org.broadinstitute.cga.tools.seq.HashAligner;
import org.broadinstitute.cga.tools.seq.BowtieAligner;
import org.broadinstitute.cga.tools.seq.VELVETassembler;
import org.broadinstitute.cga.tools.seq.INCHWORMassembler;
import net.sf.samtools.*;
import java.io.*;
import java.lang.*;
import java.util.*;

public class CandidateAnalysis {
    
    private static final String usage =
	"Usage: CandidateAnalysis <BAMFilename> <BlacklistFilename> <TargetFasta> <KeyLen> <ValRes> <AllowOneMismatch> "+
	                   "<Chromosome> <OutFilestem>\n"+
	"   <BAMFilename> = name of bam to preprocess\n"+
	"   <BlacklistFilename> = file listing blacklisted lanes e.g. '42ABC.1' -- or specify 'none'\n"+
	"   <TargetFasta> = fasta file with sequences to look for\n"+
	"   <KeyLen> = length of keys to add to hash (should be major readlength of BAMfile)\n"+
	"   <ValRes> = binning resolution for values (i.e. positions in TargetFasta)\n"+
	"   <AllowOneMismatch> = if 1, will also hash every single-mismatch version of each key\n"+
	"   <Chromosome> = chromosome to process (1-24)\n"+      
	"   <OutFilestem> = filestem for output files\n";
    
    // parameters and input/output files
    
    private static String bamname = null;
    private static String blacklistname = null;
    private static String tum = null;
    private static String samp = null;
    private static int startreg = 0;
    private static int stopreg = 0;
    private static int breakstart = 0;
    private static int breakstop = 0;
    private static int chr = 0;
    private static String reads_f = null;
    private static String reads_r = null;
    private static String assembleddir = null;
    private static String readsdir = null;
    private static String type = null;

    private static BamGraspMine bam = null;
    private static BamGraspMine bam2 = null;
    private static SAMFileReader reader = null;
    private static SAMRecordIterator c = null;

    // info about current read
    
    private static int mate_chr=0, mate_start=0, this_chr=0, this_start=0;
    
    //////////////////////////////////////////////////////////////////////
    //////////////////////////////////////////////////////////////////////
    
    public static void main(String args[]) throws Exception {
	
	process_args(args);
	
	// open input file to create file of all reads
	open_input_file();

	String[] forreads = reads_f.split("\\s+\\|\\s+");
	String[] revreads = reads_r.split("\\s+\\|\\s+");
	System.out.println(forreads.length);
	//forreads[0] = forreads[0].trim();
	for(int i=0; i < forreads.length; i++) {
		forreads[i] = forreads[i].trim();
	}
	//System.out.println(forreads.length);
 	for(int i=0; i < revreads.length; i++) {
		revreads[i] = revreads[i].trim();
	}

	int bsz = 10000;
	int nSpanNormal = 0;
	int nSpanClipped = 0;
	int nNormalPairs = 0;
	int nWeirdPairs = 0;
	int nbads = 0;
	int nTotReads = 0;
	String clippedreads_f = "";
	String clippedreads_r = "";

	String assemblepath = assembleddir + samp + "-" + type + ".chr" + chr + ".region." + startreg + "-" + stopreg + ".assembly";
    String assembledf = assemblepath + "/contigs_f.fa";
    String assembledr = assemblepath + "/contigs_r.fa";
	String nspan = assemblepath + "/nfile.txt";
	
	String reads_f = assemblepath + "/reads_f.txt";
	String reads_r = assemblepath + "/reads_r.txt";	
	System.out.println("reads for assembly are in " + reads_f + " and " + reads_r);
	
	//String clipped_f = readsdir + samp + "-" + type + ".chr" + chr + ".region." + startreg + "-" + stopreg + "split_forward.txt";
	//String clipped_r = readsdir + samp + "-" + type + ".chr" + chr + ".region." + startreg + "-" + stopreg + "split_reverse.txt";	
	//System.out.println("split reads for alignment are in " + clipped_f + " and " + clipped_r);
	

	File nspanf = new File(nspan);
    File assembledfile_f = new File(assembledf);
	File assembledfile_r = new File(assembledr);	
	File readsfile_f = new File(reads_f);
	File readsfile_r = new File(reads_r);
	//File clippedfile_f = new File(clipped_f);
	//File clippedfile_r = new File(clipped_r);
	
	if(readsfile_f.exists() && readsfile_r.exists() && nspanf.exists() && assembledfile_f.exists() && assembledfile_r.exists()) System.out.println("Assembled files exists");
	else {
		String readstemp_f = reads_f + ".partial";
		String readstemp_r = reads_r + ".partial";
		//String clippedtemp_f = clipped_f + ".partial";
		//String clippedtemp_r = clipped_r + ".partial";

		BufferedWriter readswriter_f = new BufferedWriter(new FileWriter(readstemp_f),bsz);
		BufferedWriter readswriter_r = new BufferedWriter(new FileWriter(readstemp_r),bsz);
		//BufferedWriter clippedwriter_f = new BufferedWriter(new FileWriter(clippedtemp_f),bsz);
		//BufferedWriter clippedwriter_r = new BufferedWriter(new FileWriter(clippedtemp_r),bsz);

		System.out.println("starting to read");

		// If region has more than maximumReads, it will cause java to crash and is likely a bad region 
		int maximumReads = 2000;
		int margin = 15;
		System.out.println("start less than breakstop+margin: " + (breakstop+margin));
		System.out.println("stop greater than breakstart-margin: " + (breakstart-margin));
		
		while( c.hasNext() && nTotReads < maximumReads ) { // nbads <= 150 && 
			SAMRecord x = c.next();
			//System.out.println(x.getBaseQualities());

			if (x.getDuplicateReadFlag()) continue;
			if (x.getNotPrimaryAlignmentFlag()) continue;
			if (!x.getReadPairedFlag()) continue;
			if (bam.isReadGroupBlacklisted(x.getAttribute("RG").toString())) continue;
			
			 	
			// if read spans breakpoint, determine whether its a split read or normal and count each
			if(nTotReads % 100 == 0) System.out.println(nTotReads);
			nTotReads++;
			
			SAMRecord mate = bam2.reader.queryMate(x);
			bam.parse(x,mate);
			
			// Clipped reads in region
			if ( x.getUnclippedStart() <= breakstop+margin && x.getUnclippedEnd() >= breakstart-margin ) {
				//System.out.println("read spans breakpoint");	
				// If clipped beginning
				//System.out.println("spans breakpoint");
				if( x.getAlignmentStart() != x.getUnclippedStart() ) {
					byte firstbasephred = x.getBaseQualities()[0];
					// IF base phred quality of first base in clip is > 2 then keep it
					if ( firstbasephred > 5 ) {   
					   //System.out.println(firstbasephred);
					   int endclip = x.getAlignmentStart()-x.getUnclippedStart();
					   //clippedwriter_r.write(">"+x.getReadName()+"\t"+bam.read.chr+"\t"+x.getUnclippedStart()+"\t"+x.getAlignmentStart()+"\t"+x.getUnclippedEnd()+"\n");
					   //clippedwriter_r.write(x.getReadString().substring(0,endclip) +"\n");
					   readswriter_r.write(">"+x.getReadName()+"\t"+bam.read.chr+"\t"+x.getUnclippedStart()+"\t"+x.getAlignmentStart()+"\t"+x.getUnclippedEnd()+"\n");
					   readswriter_r.write(x.getReadString() +"\n");
					   clippedreads_r = clippedreads_r + x.getAlignmentStart() + " | ";
					   nSpanClipped++;
					}
				} 
				// If clipped end	
				if ( x.getAlignmentEnd() != x.getUnclippedEnd() ) {
					//System.out.println("clipped end");
					int startclip = x.getAlignmentEnd()-x.getUnclippedStart();
					
					if(startclip > 100) continue;
					byte firstclipbasephred = x.getBaseQualities()[startclip];
					if ( firstclipbasephred > 5 ) {
					    //System.out.println(firstclipbasephred);
					    //clippedwriter_f.write(">"+x.getReadName()+"\t"+bam.read.chr+"\t"+x.getUnclippedStart()+"\t"+x.getAlignmentEnd()+"\t"+x.getUnclippedEnd()+"\n");
					    //System.out.println(startclip);   
					    //clippedwriter_f.write(x.getReadString().substring(startclip)+"\n");
					    readswriter_f.write(">"+x.getReadName()+"\t"+bam.read.chr+"\t"+x.getUnclippedStart()+"\t"+x.getAlignmentEnd()+"\t"+x.getUnclippedEnd()+"\n");
					    readswriter_f.write(x.getReadString() +"\n");
					    clippedreads_f = clippedreads_f + x.getAlignmentEnd() + " | ";
					    nSpanClipped++;
					}
				} 
				if ( x.getAlignmentStart() != x.getUnclippedStart() && x.getAlignmentEnd() == x.getUnclippedEnd() )  {	
					nSpanNormal++;			
				} 
			}
						
			// Write to file reads that are discordant and identified in TranspoSeq analysis as having one read align to TE db 
			if (bam.read.isweird()) {
			   //System.out.println("is weird");
			   //System.out.println("Read is " + x.getReadName());   
			nWeirdPairs++;
			   // Reads that are identified in TranspoSeq in forward_cluster - both pairmates
				for( int r = 0; r < forreads.length; r++) {
					//System.out.println("Forward is " + forreads[r]);
					if (x.getReadName().equals(forreads[r])) {  
			   		readswriter_f.write(">"+x.getReadName()+"\t"+bam.read.chr+"\t"+bam.read.start+"\n");
			   		readswriter_f.write(x.getReadString()+"\n");
			   		readswriter_f.write(">"+x.getReadName()+"\t"+bam.read.pairmatechr+"\t"+bam.read.pairmatestart+"\n");
			   		readswriter_f.write(mate.getReadString()+"\n");
			   	}
			   } 
			   for( int r = 0; r < revreads.length; r++) {
                		if (x.getReadName().equals(revreads[r])) { 
					readswriter_r.write(">"+x.getReadName()+"\t"+bam.read.chr+"\t"+bam.read.start+"\n");
					readswriter_r.write(x.getReadString()+"\n");
					readswriter_r.write(">"+x.getReadName()+"\t"+bam.read.pairmatechr+"\t"+bam.read.pairmatestart+"\n");
					readswriter_r.write(mate.getReadString()+"\n");
				}	
			   }
			} else {
			   //System.out.println("not weird");	
			   // If read pair surrounds breakpoint and is normal, count it
		       	   if ((bam.read.start < breakstart && bam.read.pairmatestart > breakstop) ||
			      (bam.read.pairmatestart < breakstart && bam.read.start > breakstop))
				nNormalPairs++;
			}
			// If quality of read in region is poor, count it
			int minQuality = 20;
			if (bam.read.isbad() < minQuality) {
			   nbads++;
			}
			//System.out.println("last read in region is " + x.getReadName()+" "+bam.read.chr+" "+bam.read.start);
		}
		
		//if(clippedreads_f.equals("")) { clippedreads_f = "\n"; }
		if(clippedreads_r.equals("")) { clippedreads_r = "\n"; } 
		System.out.println("Total number of reads in region: "+nTotReads);
		System.out.println("Total normal reads in region: "+nSpanNormal);
		System.out.println("Total clipped reads in region: "+nSpanClipped);
		System.out.println("Total normal spanning paired reads in region: "+nNormalPairs); 
		System.out.println("Total bad reads in region: "+nbads);
		bam.close();
		bam2.close();
		readswriter_f.close();
		readswriter_r.close();
		//clippedwriter_f.close();
		//clippedwriter_r.close();
		File f1 = new File(readstemp_f);
		f1.renameTo(new File(reads_f));
		File f2 = new File(readstemp_r);
		f2.renameTo(new File(reads_r));
		//File f3 = new File(clippedtemp_f);
		//f3.renameTo(new File(clipped_f));
		//File f4 = new File(clippedtemp_r);
		//f4.renameTo(new File(clipped_r));

	//}


		// Assemble 
		open_input_file();     
	
			
		if(assembledfile_f.exists() && assembledfile_r.exists()) System.out.println("Aligned file already exists");
		else {
			//Assembler aa = new VELVETassembler(outname3,assemblepath);
			Assembler aa = new INCHWORMassembler(reads_f,assembledf);
			aa.assemble();
			Assembler bb = new INCHWORMassembler(reads_r,assembledr);
			bb.assemble();
		}
	
		BufferedWriter nspanfile = new BufferedWriter(new FileWriter(nspanf),bsz);
		nspanfile.write("number of normal reads:"+nSpanNormal+"\t");
		nspanfile.write("number of discordant reads:"+nWeirdPairs+"\t");
		nspanfile.write("number of clipped reads:"+nSpanClipped+"\t");
		nspanfile.write("number of total reads in region:"+nTotReads+"\t");
		nspanfile.write("number of normal paired reads around bkpt:"+nNormalPairs+"\t");
		nspanfile.write("number of bad reads:"+nbads+"\n");
		nspanfile.write("location of forward clips:\n"+clippedreads_f+"\n");
		nspanfile.write("location of reverse clips:\n"+clippedreads_r+"\n");
		nspanfile.close();
	
	}	
	// done
	bam.close();
	bam2.close();

    } // end of main()
    
    
    //////////////////////////////////////////////////////////////////////
    //////////////////////////////////////////////////////////////////////
    
    private static void process_args(String args[]) throws Exception {
	if (args.length!=14) {
            System.out.print(usage);
            System.exit(1);
        }
	int a = 0;
	bamname = args[a++];
	blacklistname = args[a++];
        tum = args[a++];
	samp = args[a++];
	chr = Integer.valueOf(args[a++]);
	startreg = Integer.valueOf(args[a++]);
	stopreg = Integer.valueOf(args[a++]);
	breakstart = Integer.valueOf(args[a++]);
	breakstop = Integer.valueOf(args[a++]);
	reads_f = args[a++];
	reads_r = args[a++];
	if (chr<1 || chr>24) throw new Exception("chr should be 1-24");
	readsdir = args[a++];
	assembleddir = args[a++];
	type = args[a++];
    }
    
    //////////////////////////////////////////////////////////////////////
    //////////////////////////////////////////////////////////////////////
    
    private static void open_input_file() throws Exception {
	bam = new BamGraspMine(bamname,blacklistname);
	bam2 = new BamGraspMine(bamname,blacklistname);
	String seqname = bam.getChrName(chr);
	c = bam.reader.queryContained(seqname,(startreg-100),(stopreg+100));
    }
    
}
