/* Copyright (C) 2003-2008 Dan Arlow
 * 
 * This file is part of motifADE.
 * 
 * motifADE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * motifADE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with motifADE.  If not, see <http://www.gnu.org/licenses/>.
 */

/* 
 *  iupac_motif_incidence_collector.cpp
 *
 *  Class for collecting the incidence of a set of IUPAC-degenerate motifs
 *  in a set of sequences into a MotifIncidenceMap, using Patrick Varilly's
 *  DFA IUPAC motif matching code.
 */


#include <string>
#include <vector>

#include "common.hpp"
#include "sequence.hpp"
#include "iupac_motif.hpp"
#include "motif_incidence_map.hpp"
#include "iupac_motif_incidence_map.hpp"
#include "motif_instance.hpp"
#include "motif_incidence_collector.hpp"
#include "iupac_motif_incidence_collector.hpp"
#include "dfa_iupac_motif_incidence_collector.hpp"

#include "multi_dfa.hpp"


MotifIncidenceMap*
DFAIUPACMotifIncidenceCollector::collectIncidence( const SequenceVector& seqs, IUPACMotifVector& motifs )
{
	IUPACMotifIncidenceMap* m = new IUPACMotifIncidenceMap;
	setMotifNames( motifs, m );
	
	vector< PatternToID > patternList;
	
	const int max_patterns_per_batch = 10;
	
	for( unsigned int start = 0; start < motifs.size(); start += max_patterns_per_batch )
	{
		patternList.clear();
		
		for( unsigned int i = start; i < motifs.size() && i < (start+max_patterns_per_batch); i++ ) {
			m->setMotifName( i, motifs[ i ]->getName() );
			const svector& patterns = motifs[ i ]->getPatterns();
			for( svector::const_iterator iter = patterns.begin(); iter != patterns.end(); ++iter ) {
				patternList.push_back( PatternToID( *iter, i ) );
			}
		}
		
		// Spot patterns like an eagle...
		DFA dfa( patternList );
		#ifdef MOTIFADE_DEBUG
		//cout << "Matching DFA has " << dfa.getNumStates() << " states" << endl;
		#endif
		
		// Scan each sequence in turns
		vector< MatchInstance > motifsInSeq;
		for( unsigned int seq = 0; seq < seqs.size(); ++seq ) {
			dfa.findAllMatches( seqs[ seq ]->getData(), motifsInSeq );

			for( vector< MatchInstance >::const_iterator iter = motifsInSeq.begin(); iter != motifsInSeq.end(); ++iter )
				m->addInstance( iter->patternNumber, MotifInstance( seq, iter->textPosition ) ); // textPosition is the position of the RIGHTMOST character of the motif! how to fix this to make it leftmost? complicated!
		}
	}
	
	return m;
}


void
DFAIUPACMotifIncidenceCollector::collectIncidence( MotifIncidenceMap::MotifNumber motifNum, unsigned int seqNum, const SequenceVector& seqs, MotifIncidenceMap* m )
{
	throw( MotifADEException( "DFAIUPACMotifIncidenceCollector does not support the collectIncidence method for individual motifs at this time." ) );
}
