/* Copyright (C) 2003-2008 Dan Arlow
 * 
 * This file is part of motifADE.
 * 
 * motifADE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * motifADE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with motifADE.  If not, see <http://www.gnu.org/licenses/>.
 */

/* 
 *  pwm_motif_incidence_collector.cpp
 *
 *  Class for collecting the incidence of a set of "position weight matrix"
 *  motifs in a set of sequences into a MotifIncidenceMap.
 */


#include <string>
#include <vector>

#include "common.hpp"
#include "sequence.hpp"
#include "pwm_motif.hpp"
#include "motif_incidence_map.hpp"
#include "pwm_motif_incidence_map.hpp"
#include "motif_instance.hpp"
#include "motif_incidence_collector.hpp"
#include "named_motif_incidence_collector.hpp"
#include "pwm_motif_incidence_collector.hpp"


MotifIncidenceMap*
PWMMotifIncidenceCollector::collectIncidence( const SequenceVector& seqs )
{
	PWMMotifIncidenceMap* m = new PWMMotifIncidenceMap;
	setMotifNames( motifVector, m );
	
	for( unsigned int seqNum = 0; seqNum < seqs.size(); ++seqNum )
		for( unsigned int motifNum = 0; motifNum < motifVector.size(); motifNum++ )
			collectIncidence( motifNum, seqNum, seqs, m );
	
	return m;
}


void
PWMMotifIncidenceCollector::setMotifNames( PWMMotifVector& motifs, PWMMotifIncidenceMap* m )
{
	svector motifNames( motifs.size() );
	for( unsigned int i = 0; i < motifs.size(); ++i )
		motifNames[ i ] = motifs[ i ]->getName();
	NamedMotifIncidenceCollector::setMotifNames( motifNames, m );
}


void
PWMMotifIncidenceCollector::collectIncidence( MotifIncidenceMap::MotifNumber motifNum, const PWMMotifVector& motifs, unsigned int seqNum, const SequenceVector& seqs, MotifIncidenceMap* m )
{
	const PWMMotif& motif = *motifs[ motifNum ];
	const Sequence& seq = *seqs[ seqNum ];
	const double logThreshold = motif.getLogThreshold();
	const Sequence::DataType& seqData = seq.getData();
	Sequence::DataType::const_iterator iter = seqData.begin();
	
	// NOTE: added check for sequences that are way too short!
	//       (loop condition isn't sufficient because size_type is unsigned!!!)
	if( seqData.length() < motif.size() )
		return;
	
	const Sequence::DataType::size_type lastPos = seqData.length() - motif.size();
	for( Sequence::DataType::size_type pos = 0; pos <= lastPos; ++pos ) {
		#ifndef NUCLEOTIDE_DISTRIBUTION_THROW_ON_BAD_NT
			if( motif.isValidSequence( iter ) ) // NOTE: changed from ifdef to ifndef!
		#endif
				if( motif.computeLogScore( iter ) > logThreshold )
					m->addInstance( motifNum, MotifInstance( seqNum, pos ) );   // NOTE: NOT distance from right -- distance from left
		++iter;
	}
}


void
PWMMotifIncidenceCollector::collectIncidence( MotifIncidenceMap::MotifNumber motifNum, unsigned int seqNum, const SequenceVector& seqs, MotifIncidenceMap* m )
{
	collectIncidence( motifNum, motifVector, seqNum, seqs, m );
}

