/* Copyright (C) 2003-2008 Dan Arlow
 * 
 * This file is part of motifADE.
 * 
 * motifADE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * motifADE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with motifADE.  If not, see <http://www.gnu.org/licenses/>.
 */

/* 
 *  kmer_encoder.hpp
 *
 *  Interface to a class for converting k-mers into ints
 */


#ifndef KMER_ENCODER_H
#define KMER_ENCODER_H


#include <vector>
#include <string>


#include "common.hpp"
#include "sequence.hpp"


class KmerEncoder {
public:
	typedef		unsigned int		KmerValue;
	typedef		Sequence::DataType  NtSeqType;
	
	KmerEncoder( unsigned int kmersize );
	virtual ~KmerEncoder();
	
	static const KmerValue		INVALID_VALUE = ~0;
	
	unsigned int				totalKmers() const;
	
	KmerValue					ntToValue( char nt ) const;
	KmerValue					getNtValue( KmerValue value, unsigned int pos ) const;
	KmerValue					setNtValue( KmerValue value, KmerValue newvalue, unsigned int pos ) const;
	char						valueToNt( KmerValue value ) const;
	
	KmerValue					reverse( KmerValue value ) const;
	KmerValue					complement( KmerValue value ) const;
	KmerValue					reverseComplement( KmerValue value ) const;
	
	bool						isForward( KmerValue value ) const;
	KmerValue					getForwardVersion( KmerValue value ) const;
	
	const vector< KmerValue >*  getSuccessors( KmerValue ntValue ) const;
	
	virtual	void						kmerValueToSequence( KmerValue value, string& s ) const;
	
	virtual KmerValue					computeKmerValue( NtSeqType::const_iterator pos ) const;
	virtual KmerValue					nextKmerValue( KmerValue currentValue, NtSeqType::const_iterator pos ) const; // new way to get next KmerValue
	
	virtual NtSeqType::const_iterator   getFirstPos( const NtSeqType& seq ) const;
	virtual NtSeqType::const_iterator   getLastPos( const NtSeqType& seq ) const;
	
public:
	const unsigned int			kmerSize, numKmers, umerSize, msbShift;

protected:
	KmerValue					nextKmerValue( KmerValue currentValue, char nextNt ) const; // deprecated -- caller shouldn't have to dereference iterator
	
protected:
	vector< KmerValue >			masks;
	vector< vector< KmerValue > > successors;
	
};


#endif // KMER_ENCODER_H
