/* Copyright (C) 2003-2008 Dan Arlow
 * 
 * This file is part of motifADE.
 * 
 * motifADE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * motifADE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with motifADE.  If not, see <http://www.gnu.org/licenses/>.
 */

/* 
 *  gap_kmer_encoder.cpp
 *
 *  Interface to a class for converting k-mers with gaps into ints
 */


#include <vector>
#include <string>
#include <algorithm>


#include "common.hpp"
#include "sequence.hpp"
#include "kmer_encoder.hpp"
#include "gap_kmer_encoder.hpp"


// GapKmerEncoder constructor -- initializes this to encode k-mers with the given gap format
GapKmerEncoder::GapKmerEncoder( const string& gap_format )
	: KmerEncoder( countNucleotides( gap_format ) ),
	  fullLength( gap_format.size() )
{
	parseGapFormat( gap_format );
}


// GapKmerEncoder destructor -- does nothing?
GapKmerEncoder::~GapKmerEncoder()
{
}


// GapKmerEncoder method -- parses the given gap format to determine how many nucleotides it contains
unsigned int
GapKmerEncoder::countNucleotides( const string& gap_format )
{
	return count( gap_format.begin(), gap_format.end(), 'x' );
}


// GapKmerEncoder constructor -- parses the given gap format into ntPositions
unsigned int
GapKmerEncoder::parseGapFormat( const string& gap_format )
{
	ntPositions.clear();
	
	for( unsigned int i = 0; i < gap_format.size(); ++i ) {
		char ch = gap_format[ i ];
		if( ch == 'x' )
			ntPositions.push_back( i );
		else
			if( ch != '-' )
				throw( MotifADEException( "GapKmerEncoder::parseGapFormat: unrecognized gap format!" ) );
	}
		
	return ntPositions.size();
}


// GapKmerEncoder method -- accumulates the encoded value of a sequence of nucleotides
GapKmerEncoder::KmerValue
GapKmerEncoder::computeKmerValue( NtSeqType::const_iterator pos ) const
{
	KmerValue value = 0, ntValue;
	
	for( unsigned int i = 0; i < kmerSize; ++i ) {
		ntValue = ntToValue( *( pos + ntPositions[ i ] ) );
		if( ntValue == INVALID_VALUE ) return INVALID_VALUE;
		value += times4n( ntValue, i );
	}
	return value;
}


// GapKmerEncoder method -- computes the value of the k-mer at the current position, given the value of the immediately preceding k-mer
GapKmerEncoder::KmerValue
GapKmerEncoder::nextKmerValue( KmerValue currentValue, NtSeqType::const_iterator pos ) const
{
	return computeKmerValue( pos );
}


// GapKmerEncoder method -- converts a KmerValue to a string
void
GapKmerEncoder::kmerValueToSequence( KmerValue value, string& s ) const
{
	s.resize( fullLength );
	s.assign( fullLength, 'N' );
	
	for( unsigned int i = 0; i < kmerSize; ++i ) {
		s[ ntPositions[ i ] ] = valueToNt( value & 3 );
		value >>= 2;
	}
}


// GapKmerEncoder method -- returns a const_iterator of the first valid position in seq
GapKmerEncoder::NtSeqType::const_iterator
GapKmerEncoder::getFirstPos( const NtSeqType& seq ) const
{
	return seq.begin();
}


// GapKmerEncoder method -- returns a const_iterator of the last valid position in seq
GapKmerEncoder::NtSeqType::const_iterator
GapKmerEncoder::getLastPos( const NtSeqType& seq ) const
{
	return seq.end() - fullLength;
}
