/* Copyright (C) 2003-2008 Dan Arlow
 * 
 * This file is part of motifADE.
 * 
 * motifADE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * motifADE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with motifADE.  If not, see <http://www.gnu.org/licenses/>.
 */

/* 
 *  pwm_motif.hpp
 *
 *  Class for storing "position weight matrix" motifs.
 */


#ifndef PWM_MOTIF_H
#define PWM_MOTIF_H


#include <string>
#include <vector>
#include <algorithm>
#include <functional>
#include <numeric>
#include <iostream>
#include <iterator>


#include "common.hpp"
#include "id_object.hpp"
#include "buffered_reader.hpp"
#include "sequence.hpp"
#include "tokenizer.hpp"
#include "nucleotide_distribution.hpp"


class PWMMotif {
public:
	PWMMotif();
	PWMMotif( const PWMMotif& rhs );
	PWMMotif( const string& init_name );
	PWMMotif( const string& init_name, const string& init_IUPAC_pattern, double init_threshold );
	
	// operators
	PWMMotif&   operator=( const PWMMotif& rhs );
	bool		operator==( const PWMMotif& rhs ) const;
	bool		operator!=( const PWMMotif& rhs ) const;
	
	// accessors
	const string&					getName() const;
	unsigned int					size() const;
	NucleotideDistribution&			getWeights( unsigned int pos );
	const NucleotideDistribution&   getWeights( unsigned int pos ) const;
	double							getThreshold() const;
	double							getLogThreshold() const;
	double							getEntropy() const;
	double							getInformationContent() const;
	double							getMaxScore() const;
	double							getMinScore() const;
	double							getMaxLogScore() const;
	double							getMinLogScore() const;

	// mutators
	void							set( const PWMMotif& rhs );
	void							setName( const string& newName );
	void							setToIUPACMotif( const string& init_IUPAC_pattern );
	void							resize( unsigned int newSize );
	void							append( const NucleotideDistribution& dist );
	void							setWeights( unsigned int pos, const NucleotideDistribution& dist );
	void							setThreshold( double value );
	void							setLogThreshold( double value );
	void							complement();
	void							reverse();
	void							reverseComplement();

	// matching
	bool							isValidSequence( Sequence::DataType::const_iterator iter ) const;
	bool							isValidSequence( const Sequence& seq, Sequence::DataType::size_type pos ) const;
	double							computeScore( Sequence::DataType::const_iterator iter ) const;
	double							computeLogScore( Sequence::DataType::const_iterator iter ) const;	
	double							computeScore( const Sequence& seq, Sequence::DataType::size_type pos ) const;
	double							computeLogScore( const Sequence& seq, Sequence::DataType::size_type pos ) const;

private:
    string								name;
	double								threshold, logThreshold;
	vector< NucleotideDistribution >	weights;
};


typedef vector< PWMMotif* >	PWMMotifVector;



ostream& operator<<( ostream& os, const PWMMotif& m );

BufferedReader& operator>>( BufferedReader& is, PWMMotif& m );


#endif // PWM_MOTIF_H
