/* Copyright (C) 2003-2008 Dan Arlow
 * 
 * This file is part of motifADE.
 * 
 * motifADE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * motifADE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with motifADE.  If not, see <http://www.gnu.org/licenses/>.
 */

/* 
 *  nucleotide_distribution.hpp
 *
 *  Class for columns of a "position weight matrix" motif.
 */


#ifndef NUCLEOTIDE_DISTRIBUTION_H
#define NUCLEOTIDE_DISTRIBUTION_H


#include <string>
#include <vector>
#include <algorithm>
#include <functional>
#include <numeric>
#include <iostream>
#include <iterator>


#include "common.hpp"


class NucleotideDistribution {
public:
	static const	NucleotideDistribution  IUPAC_A, IUPAC_C, IUPAC_G, IUPAC_T,
											IUPAC_M, IUPAC_R, IUPAC_S, IUPAC_W, IUPAC_Y, IUPAC_K,
											IUPAC_V, IUPAC_H, IUPAC_D, IUPAC_B,
											IUPAC_N;
	
	static const	NucleotideDistribution& getIUPACSymbolDistribution( char nt );
	
	static bool		isNucleotide( char nt );
	
	static double   safeLog( double value );
	
private:
	static const double						TINY_VAL, LOG_TINY_VAL, EPSILON, UNITY, ZERO, LOG_UNITY, LOG_ZERO, CONSENSUS_THRESHOLD;

public:
	NucleotideDistribution();
	NucleotideDistribution( double a, double c, double g, double t );
	NucleotideDistribution( double a, double c, double g, double t, bool normalize );
	NucleotideDistribution( const string& s );
	NucleotideDistribution( const string& s, bool normalize );
	NucleotideDistribution( const NucleotideDistribution& dist );
	NucleotideDistribution( const NucleotideDistribution& dist, bool normalize );
	
	// operators
	NucleotideDistribution& operator=( const NucleotideDistribution& rhs );
	bool					operator==( const NucleotideDistribution& rhs ) const;
	bool					operator!=( const NucleotideDistribution& rhs ) const;
	
	// accessors
	double				getA() const;
	double				getC() const;
	double				getG() const;
	double				getT() const;
	double				getLogA() const;
	double				getLogC() const;
	double				getLogG() const;
	double				getLogT() const;
	double				getWeight( unsigned int ntNum ) const;
	double				getLogWeight( unsigned int ntNum ) const;
	double				getWeight( char nt ) const;
	double				getLogWeight( char nt ) const;
	double				getLogMax() const;
	double				getLogMin() const;
	double				getEntropy() const;
	double				getInformationContent() const;
	char				getConsensusIUPACSymbol() const;
	
	// predicates
	bool				isNormalized() const;

	// mutators
	void				set( const NucleotideDistribution& dist );
	void				set( double a, double c, double g, double t );
	void				set( const string& rawDist );
	void				setA( double value );
	void				setC( double value );
	void				setG( double value );
	void				setT( double value );
	void				setLogA( double value );
	void				setLogC( double value );
	void				setLogG( double value );
	void				setLogT( double value );
	void				setWeight( unsigned int ntNum, double value );
	void				setLogWeight( unsigned int ntNum, double value );
	void				setWeight( char nt, double value );
	void				setLogWeight( char nt, double value );
	void				renormalize();
	void				complement();
	
private:
	void				renormalize( double& a, double& c, double& g, double& t ) const;
	void				resetLogWeights();
	void				checkWeight( double value ) const;
	void				checkLogWeight( double value ) const;

	void				checkRep() const;
	void				checkNormalization() const;
	
private:
	double				pA, pC, pG, pT;
	double				logA, logC, logG, logT;
};


ostream& operator<<( ostream& os, const NucleotideDistribution& dist );


#endif // NUCLEOTIDE_DISTRIBUTION_H
