package org.broadinstitute.cga.tools.seq;

import org.broadinstitute.cga.tools.seq.Fasta;
import java.io.*;
import java.lang.*;
import java.util.*;

public class HashAligner extends Aligner{

    public HashAligner(String dictionaryFasta, int keyLen,
		       int valResolution, int allowOneMismatch) throws Exception {
	loadFasta(dictionaryFasta,keyLen,valResolution, allowOneMismatch);
    }
    // dictionaryFasta = fasta file to load dictionary sequences from
    // keyLen = how many bp to make the hash keys
    // valResolution = hash values will be binned to this resolution
    //    (enforcing boundaries between dictionary sequences)
    // allowOneMismatch = if true, hashes one additional key for each single-base mismatch version
    // e.g.   valResolution = 10
    // Seq1                     Seq2      Seq3
    // actagctagctgttacgatcgatc atcatcg   actagctagcta
    // 111111111122222222223333 4444444   555555555566
    //
    // allign() returns 0 for queries where nothing is found

    private boolean ready = false;
    public boolean isReady() { return(ready); }

    private HashMap<String,Integer> hash = new HashMap<String,Integer>();
    
    private char[] bases = {'A','C','G','T'};

    public int align(String key) throws Exception {
	if (!ready) throw new Exception("not ready");
	Integer val = hash.get(key.toUpperCase());
	if (val==null) return(0);
	else return(val);
    }

    private void loadFasta(String dictionaryFasta, int keyLen,
		      int valResolution, int allowOneMismatch) throws Exception {
	
	Fasta f = new Fasta(dictionaryFasta);
	System.out.print("Hashing sequences");
	if (allowOneMismatch==1) System.out.print(" (allowing one mismatch)");
	System.out.print("... ");
	int val = 0;
	int valtick = 0;
	int nseqs = 0;
	while(f.next()) {
	    String h = f.getHeader();
	    String s = f.getSeq().toUpperCase();
	    if (!isLegalSeq(s)) throw new Exception("Illegal sequence "+h+" = "+s);
	    int sl = s.length();
	    if (sl<keyLen) throw new Exception(h + " is shorter than keyLen");
	    nseqs++;
	    val++;
	    valtick = 0;
	    for (int i=0; i<=sl-keyLen; i++) {
		String key = s.substring(i,i+keyLen);
		putSeqAndRc(key,val);
		if (allowOneMismatch==1) {
		    StringBuffer keySB = new StringBuffer(key);
		    for (int j=0; j<keyLen; j++) {
			char oldBase = keySB.charAt(j);
			for (int k=0; k<4; k++) {
			    char newBase = bases[k];
			    if (oldBase==newBase) continue;
			    keySB.setCharAt(j,newBase);
			    putSeqAndRc(keySB.toString(),val);
			}
			keySB.setCharAt(j,oldBase);
		    }
		}
		valtick++;
		if (valtick==valResolution) {
		    val++;
		    valtick = 0;
		}			    
	    }

	}
	f.close();
	ready = true;
	System.out.println("hashed "+nseqs+" sequences.");
    }

    private void putSeqAndRc(String key, int value) {
	Integer v = value;
	hash.put(key,v);
	hash.put(rc(key),-v);
    }
	/*
    private boolean isLegalSeq(String seq) {
	for (int i=0;i<seq.length();i++) {
	    char c = seq.charAt(i);
	    if (c!='A' && c!='C' && c!='G' && c!='T') return(false);
	}
	return(true);
    }

    private String rc(String seq) {
	int len = seq.length();
	StringBuffer rcseq = new StringBuffer(len);
	for (int i=len-1; i>=0; i--) {
	    rcseq.append(rc(seq.charAt(i)));
	}
	return rcseq.toString();
    }

    private char rc(char in) {
	if (in=='A') return('T');
	if (in=='C') return('G');
	if (in=='G') return('C');
	if (in=='T') return('A');
	return('N');
    }*/
}
