/* Copyright (C) 2003-2008 Dan Arlow
 * 
 * This file is part of motifADE.
 * 
 * motifADE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * motifADE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with motifADE.  If not, see <http://www.gnu.org/licenses/>.
 */

/* 
 *  neighborhood_iupac_motif_incidence_collector.cpp
 *
 *  Class for collecting the incidence of a set of IUPAC-degenerate motifs
 *  in a set of sequences into a MotifIncidenceMap, allowing mismatches.
 */


#include <string>
#include <vector>

#include "common.hpp"
#include "sequence.hpp"
#include "iupac_motif.hpp"
#include "motif_incidence_map.hpp"
#include "iupac_motif_incidence_map.hpp"
#include "motif_instance.hpp"
#include "motif_incidence_collector.hpp"
#include "iupac_alphabet.hpp"
#include "iupac_motif_incidence_collector.hpp"
#include "neighborhood_iupac_motif_incidence_collector.hpp"


NeighborhoodIUPACMotifIncidenceCollector::NeighborhoodIUPACMotifIncidenceCollector( IUPACMotifVector& motif_v, unsigned int r )
	: IUPACMotifIncidenceCollector( motif_v ), radius( r )
{
}


NeighborhoodIUPACMotifIncidenceCollector::~NeighborhoodIUPACMotifIncidenceCollector()
{
}


MotifIncidenceMap*
NeighborhoodIUPACMotifIncidenceCollector::collectIncidence( const SequenceVector& seqs, IUPACMotifVector& motifs )
{
	IUPACMotifIncidenceMap* m = new IUPACMotifIncidenceMap;
	setMotifNames( motifs, m );
	
	for( unsigned int seqNum = 0; seqNum < seqs.size(); ++seqNum )
		for( unsigned int motifNum = 0; motifNum < motifs.size(); motifNum++ )
			collectIncidence( motifNum, seqNum, seqs, m );
	
	return m;
}


void
NeighborhoodIUPACMotifIncidenceCollector::collectIncidence( MotifIncidenceMap::MotifNumber motifNum, unsigned int seqNum, const SequenceVector& seqs, MotifIncidenceMap* m )
{
	const Sequence& seq = *seqs[ seqNum ];
	const IUPACMotif& motif = *motifVector[ motifNum ];
	const svector& patterns = motif.getPatterns();
	// Match all patterns
	for( unsigned int patternNum = 0; patternNum < patterns.size(); ++patternNum ) {
		const string& pattern = patterns[ patternNum ];
		
		Sequence::DataType::size_type pos = findInSequence( pattern, seq, 0 );
		while( pos != Sequence::DataType::npos ) {
			m->addInstance( motifNum, MotifInstance( seqNum, pos ) );   // NOTE: NOT distance from right -- distance from left
			pos = findInSequence( pattern, seq, pos + 1 );				// increment pos so we do not keep matching the same instance
		}
	}
}


unsigned int
NeighborhoodIUPACMotifIncidenceCollector::hammingDistance( const string& motif, Sequence::DataType::const_iterator textIter ) const
{
	unsigned int numMismatches = 0;
	
	string::const_iterator i = motif.begin();
	while( i != motif.end() )
		if( !doesIUPACMatchChar( *i++, *textIter++ ) )
			++numMismatches;
	
	return numMismatches;
}


Sequence::DataType::size_type
NeighborhoodIUPACMotifIncidenceCollector::findInSequence( const string& motif, const Sequence& seq, Sequence::DataType::size_type pos ) const
{
	const Sequence::DataType& seqData = seq.getData();
	Sequence::DataType::const_iterator seqIter = seqData.begin() + pos;
	string::size_type lastPos = seqData.size() - motif.size();
	for( ; pos <= lastPos; ++pos )
		if( hammingDistance( motif, seqIter++ ) <= radius )
			return pos;
	return Sequence::DataType::npos;
}
