package org.broadinstitute.cga.tools.seq;

import org.broadinstitute.cga.tools.seq.Reference;
import net.sf.samtools.*;
import java.io.*;
import java.lang.*;
import java.util.*;

public class Read {

    private static final int MAX_READLEN = 1000;
    private static final int MAX_CIGARUNITS = 100;
    private static final int WEIRDPAIR_THRESHOLD = 1000;
    private static final int WEIRDPAIR_DISTINCTNESS_THRESHOLD = 1000;

    public boolean empty = true;

    public boolean mapped = false;
    public int chr = 0;                                 // 1-based
    public String rgstring = null;
    public boolean isBlacklisted = false;
    public short readgroup = 0;                         // 0-based (according to order given in BAM)
    public String name = null;
    public int namenumber = 0;                          // Java hash of readname
    public byte whichpairmate = 0;                      // 1=first 2=second  -1=unpaired
    public int start = 0;                               // 1-based
    public int end = 0;                                 // 1-based
    public int insertsize = 0;
    public byte strand = 0;                             // 0=plus 1=minus
    public byte mapqual = 0;                            // 0-127 (capped at 127)
    public byte nmismatches = 0;                        // 0-127 (capped at 127)
    public short nonrefsumq = 0;                        // sum of qualities of non-reference bases
    public byte ninsertions = 0;                        // number of insertions in the read

    public boolean paired = false;
    public boolean pairmatemapped = false; 
    public byte pairmatechr = 0;                        // 1=chr1 24=chrY     -1=unpaired  -2=unmapped-pairmate
    public int pairmatestart = 0;                       // 1-based            -1=unpaired  -2=unmapped-pairmate
    public byte pairmatestrand = 0;                     // 0=plus 1=minus     -1=unpaired  -2=unmapped-pairmate
    public byte pairmatequal = 0;
    public String pairmateseq = null;

    public char[] seqstring = null;
    public int seqlength = -1;           // for reads with insertions/deletions, seqlength~=numBases
    public char[] qualstring = null;
    public String cigarstring = null;
    public int numcigarunits = -1;
     public int[] cigarunittype = new int[MAX_CIGARUNITS];
    public int[] cigarunitlen = new int[MAX_CIGARUNITS];

    public byte[] base = new byte[MAX_READLEN];           // -100=deletion
                                                          // -1=N   1=A  2=C  3=G  4=T   base=reference
                                                          // 63=N  65=A 66=C 67=G 68=T   base=non-reference
    public byte[] basequal = new byte[MAX_READLEN];       // 0-127 (capped at 127)      -100=deletion

    public int numBases = 0;          // number of bases actually stored in "base"

    public int weirdbin = -1;
    public static final int max_weirdbin = css_to_bin((byte)24,(int)60e6,(byte)1);

    public Read() throws Exception {
	clear();
    }

    public void clear() {
        empty = true;
    }

    //////////////////////////////////////////////////////////////////////
    //////////////////////////////////////////////////////////////////////

    public void parse_cigar() throws Exception {

        // for unmapped reads, replace "*" with "all M"
        if (cigarstring.equals("*")) cigarstring = seqlength+"M";

	int clen = cigarstring.length();
        int u = 0;
        int pos = 0;
        while(pos<clen) {
            int numstartpos = pos;
            char ch;
            do ch = cigarstring.charAt(pos++); while (ch>='0' && ch<='9');
            cigarunitlen[u] = Integer.valueOf(cigarstring.substring(numstartpos,pos-1));
            if (ch=='M') cigarunittype[u] = 0;
            else if (ch=='I') cigarunittype[u] = 1;
            else if (ch=='D') cigarunittype[u] = 2;
            else if (ch=='S') cigarunittype[u] = 3;
            else if (ch=='H') cigarunittype[u] = 4;
            else throw new Exception("Unparseable cigar: " + cigarstring);
            u++;
	    if (u==MAX_CIGARUNITS) throw new Exception("Cigar too complex: " + cigarstring);
        }
        numcigarunits = u;
    }

    //////////////////////////////////////////////////////////////////////
    //////////////////////////////////////////////////////////////////////

    public void parse_bases(Reference ref) throws Exception {
	String refdna = null;
	if (mapped) {
        try {
	    refdna = ref.get(chr,start,end);
        } catch (Exception e) {
            System.err.println("Read="+name+"; chr="+chr+"; start="+start+"; end=" +end);
            throw e;
        }
	    nmismatches = (byte)0;
	    ninsertions = (byte)0;
	    nonrefsumq = (short)0;
	} else {
	    nmismatches = (byte)-1;
	    ninsertions = (byte)-1;
	    nonrefsumq = (short)-1;
	}

	int j = 0;
	int pos = 0;
	for (int u=0; u<numcigarunits; u++) {
	    if (cigarunittype[u] == 0) {   // match/mismatch
		for (int k=0; k<cigarunitlen[u]; k++) {
		    char b = seqstring[pos];
		    boolean is_mismatch = false;
		    if (mapped) is_mismatch = (b!=refdna.charAt(j));
		    if (b=='A') base[j] = (byte)1;
		    else if (b=='C') base[j] = (byte)2;
		    else if (b=='G') base[j] = (byte)3;
		    else if (b=='T') base[j] = (byte)4;
		    else /* N */ base[j] = (byte)-1;
		    int tmp = 30;   // assume 30 if qualities are not known
		    if (qualstring != null) tmp = qualstring[pos]-33;   // convert from ASCII to 0-based Phred score
		    basequal[j] = (tmp<=127) ? (byte)tmp : (byte)127;
		    if (is_mismatch) {
			if (nmismatches<127) nmismatches++;
			nonrefsumq += tmp;
			base[j] += 64;         // +64 for non-reference ACGT   (non-reference N = 63)
		    }
		    j++;
		    pos++;
		}
	    } else if (cigarunittype[u] == 1) { // insertion
		for (int k=0; k<cigarunitlen[u]; k++) {
		    if (nmismatches<127) nmismatches++;
		    if (ninsertions<127) ninsertions++;
		    if (qualstring != null) nonrefsumq += qualstring[pos]-33;
		    pos++;
		}
	    } else if (cigarunittype[u] == 2) { // deletion
		for (int k=0; k<cigarunitlen[u]; k++) {
		    base[j] = (byte)-100;
		    basequal[j] = (byte)-100;
		    j++;
		    if (nmismatches<127) nmismatches++;
		}
	    } else if (cigarunittype[u] == 3) { // soft-clip
		pos += cigarunitlen[u];  // (no mismatch penalty)
	    } else if ( cigarunittype[u] == 4) { //hard clip, there's nothing to since hard clipped bases are not shown in read sequence
        }
	}
	numBases = j;
    }

    //////////////////////////////////////////////////////////////////////
    //////////////////////////////////////////////////////////////////////

    public void check_if_weird() throws Exception {
        if (isweird() && pairmatechr > 0) {
            weirdbin = css_to_bin(pairmatechr,pairmatestart,pairmatestrand);
        } else {
            weirdbin = -1;
        }
    }

    //////////////////////////////////////////////////////////////////////
    //////////////////////////////////////////////////////////////////////

    public boolean isweird() {
        //if (pairmatechr < 1) return(false);       // unpaired / unmapped-pairmate
        if (!paired) return(false);    // unpaired
	if (pairmatechr != chr) return(true);
        int dist = start - pairmatestart;
        return (dist >= WEIRDPAIR_THRESHOLD || dist <= -WEIRDPAIR_THRESHOLD);
    }
    
    public int isbad() {
	//if(pairmatequal == 0) return(true);
	//else return(false);
    	return(pairmatequal);
    }
    
    //////////////////////////////////////////////////////////////////////
    //////////////////////////////////////////////////////////////////////

    private static int css_to_bin(byte chr, int start, byte strand) {
        long[] tmp = { 0,251,500,705,902,1088,1264,1428,1580,1724,1865,2005,2143,2263,
                       2375,2481,2575,2659,2741,2810,2878,2930,2985,3145,3208 };
	//        if (chr<1) throw new Exception("css_to_bin: chr<1");
        long chroffset = 0;
        if (chr<=24) chroffset = tmp[chr-1];
        else chroffset = tmp[24] + (chroffset-24);   // chrN_random etc.
        long stroffset = 0;
        if (strand==1) stroffset = 3300;
        long pos = ((chroffset + stroffset)*(long)1e6) + (long)start;
        pos /= WEIRDPAIR_DISTINCTNESS_THRESHOLD;
	//        if (pos > 100000000) throw new Exception("WEIRDPAIR_DISTINCTNESS_THRESHOLD too low");
        int bin = (int)pos;
        return(bin);
    }

    //////////////////////////////////////////////////////////////////////
    //////////////////////////////////////////////////////////////////////
}
