/* Copyright (C) 2003-2008 Dan Arlow
 * 
 * This file is part of motifADE.
 * 
 * motifADE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * motifADE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with motifADE.  If not, see <http://www.gnu.org/licenses/>.
 */

/* 
 *  main.cpp
 *
 *  Implements the motifADE algorithm first described in
 *  "Erra and Gabpa/b specify PGC-1a-dependent oxidative phosphorylation gene
 *  expression that is altered in diabetic muscle" published 4/20/2004
 *  in Proc. Natl. Acad. Sci. USA., and extensions thereto.
 */


#include <iostream>
#include <vector>
#include <string>
#include <exception>
#include <cstdlib>
#include <unistd.h>

#include "common.hpp"
#include "buffered_reader.hpp"

#include "sequence.hpp"
#include "sequence_set.hpp"
#include "sequence_set_reader.hpp"
#include "promoter_set.hpp"
#include "promoter_set_reader.hpp"

#include "orthology_map.hpp"
#include "orthology_map_reader.hpp"
#include "orthology_annotator.hpp"

#include "expression_set.hpp"
#include "expression_set_reader.hpp"
#include "sequence_expression_map.hpp"
#include "sequence_expression_map_reader.hpp"
#include "expression_annotator.hpp"

#include "gene_id_set.hpp"
#include "gene_id_set_reader.hpp"
#include "gene_id_set_marker.hpp"
#include "gene_id_set_collection.hpp"
#include "gene_id_set_collection_reader.hpp"

#include "iupac_motif.hpp"
#include "iupac_motif_vector_reader.hpp"
#include "pwm_motif.hpp"
#include "pwm_motif_vector_reader.hpp"
#include "kmer_encoder.hpp"
#include "gap_kmer_encoder.hpp"

#include "motif_instance.hpp"
#include "motif_incidence_set.hpp"
#include "motif_incidence_map.hpp"
#include "iupac_motif_incidence_map.hpp"
#include "pwm_motif_incidence_map.hpp"
#include "kmer_incidence_map.hpp"
#include "bidirectional_kmer_incidence_map.hpp"

#include "precomputed_motif_incidence_collector.hpp"
#include "iupac_motif_incidence_collector.hpp"
#include "dfa_iupac_motif_incidence_collector.hpp"
#include "neighborhood_iupac_motif_incidence_collector.hpp"
#include "pwm_motif_incidence_collector.hpp"
#include "bidirectional_pwm_motif_incidence_collector.hpp"
#include "kmer_incidence_collector.hpp"
#include "bidirectional_kmer_incidence_collector.hpp"

#include "motif_incidence_computer.hpp"
#include "neighborhood_kmer_incidence_computer.hpp"

#include "motif_scanner.hpp"
#include "conserved_motif_scanner.hpp"

#include "expression_statistics.hpp"
#include "null_statistic_receiver.hpp"
#include "min_p_value.hpp"
#include "print_statistic.hpp"
#include "print_nominally_significant_statistic.hpp"
#include "print_significant_statistic.hpp"
#include "mann_whitney_u.hpp"
#include "length_corrected_mann_whitney_u.hpp"
#include "kolmogorov_smirnov_d.hpp"
#include "student_t.hpp"
#include "binomial_gene_set_motif_enrichment.hpp"
#include "hypergeometric_gene_set_motif_enrichment.hpp"
#include "print_incidence.hpp"
#include "collect_incidence_matrix.hpp"


void usage() {
    //      "********************************************************************************"
	cout << "usage: motifADE [-u num_samples | -d | -t | -s | -n | -f gene_set | -h gene_set]" << endl;
	cout << "                [-a adj_sig_level | -p nominal_sig_level]" << endl;
	cout << "                [-b] [-r neighborhood_radius]" << endl;
	cout << "                [-k kmer_sizes | -g gap_formats | -m motif_file | -w pssm_file | -S incidence_file]" << endl;
	cout << "                [-i | -I | -l | -L bp_up,bp_down]" << endl;
	cout << "                [-O order_file | -E expression_file] [-A annotation_file] [-M] [-J] [-P] [-v]" << endl;
	cout << "                [-C orthology_file -o FASTA_ortholog_file] FASTA_promoter_file" << endl;
}


void usage_error() {
	usage();
	exit( 0 );
}


void usage_error( const char* msg ) {
	cout << "error: " << msg << endl;
	usage();
	exit( 0 );
}


typedef void ( *IncidenceMapsReceiver )( const PromoterVector&, const MotifIncidenceMap*, vector< const MotifIncidenceMap* >& );


void scanAllMotifs(
	const PromoterVector& pv,
	MotifIncidenceCollector* incidenceCollector,
	MotifIncidenceComputer* incidenceComputer,
	bool use_conservation,
	ExpressionStatisticCalculator* calculator,
	Statistic* statistic,
	StatisticReceiver* receiver,
	IncidenceMapsReceiver incidenceMapsReceiver );


MotifIncidenceMap* collectIncidence(
	const PromoterVector& pv,
	unsigned int numOrthologs,
	MotifIncidenceCollector* incidenceCollector,
	vector< const MotifIncidenceMap* >& orthologIncidenceMaps );


void printIncidenceMaps(
	const PromoterVector& pv,
	const MotifIncidenceMap* incidenceMap,
	vector< const MotifIncidenceMap* >& orthologIncidenceMaps );


void printIncidenceTables(
	const PromoterVector& pv,
	const MotifIncidenceMap* incidenceMap,
	vector< const MotifIncidenceMap* >& orthologIncidenceMaps );


unsigned int bpUpstream = 0, bpDownstream = 0; // agh! terrible hack!


int main( int argc, char* const argv[] ) {
	char			c, test = ' ';
	double			alpha = 1.0;
	unsigned int	num_samples = 0, radius = 0;
	svector			kmer_lengths, gap_kmer_formats, background_gap_formats;
	int				index;
	
	bool			use_alpha = false,
					use_p = false,
					use_background = false,
					scan_kmers = false,
					scan_gap_kmers = false,
					scan_iupac_motifs = false,
					scan_pssms = false,
					scan_incidence = false,
					use_verbose = false,
					use_bidirectional = false,
					use_composition_correction = false,
					use_order_file = false,
					use_expression_file = false,
					use_annotation_file = false,
					use_conservation = false,
					use_ortholog_file = false,
					use_neighborhood_incidence = false,
					print_mapping_stats = false,
					print_annotated_promoters = false,
					print_annotation_mapping = false,
					print_incidence = false,
					use_gene_set_file = false;
						
	string			motif_file,
					pssm_file,
					incidence_file,
					annotation_file,
					expression_file,
					fasta_promoter_file,
					fasta_ortholog_file,
					orthology_file,
					gene_set_file,
					bp_upstream_str,
					bp_downstream_str;
	
	IncidenceMapsReceiver incidenceMapsReceiver = 0;
	
	
	DEBUG_BLOCK(
		for( int i = 0; i < argc; ++i )
			cout << argv[ i ] << ' ';
		cout << endl << endl;
	)
	
	
	// parse command line arguments
	if( argc == 1 ) usage_error();
		
	opterr = 0;
	
	while( ( c = getopt( argc, argv, "u:dtsiIlL:nf:h:k:g:m:w:S:bc:r:a:p:B:O:A:E:MC:o:PJv" ) ) != -1 )
	{
		switch( c )
		{
			case 'k':
				scan_kmers = true;
				tokenize( back_inserter( kmer_lengths ), optarg );
				break;
				
			case 'b':
				use_bidirectional = true;
				break;
			
			case 'g':
				tokenize( back_inserter( gap_kmer_formats ), optarg );
				scan_gap_kmers = true;
				break;
			
			case 'm':
				scan_iupac_motifs = true;
				motif_file = optarg;
				break;
			
			case 'w':
				scan_pssms = true;
				pssm_file = optarg;
				break;
			
			case 'S':
				scan_incidence = true;
				incidence_file = optarg;
				break;
			
			case 'v':
				use_verbose = true;
				break;
				
			case 'L':
				split( string( optarg ), bp_upstream_str, bp_downstream_str, ',' );
				bpUpstream = atoi( bp_upstream_str.c_str() );
				bpDownstream = atoi( bp_downstream_str.c_str() );
				test = c;
				break;
				
			case 'u':
				num_samples = atoi( optarg );
			case 'd':
			case 't':
			case 's':
			case 'i':
			case 'I':
			case 'l':
			case 'n':
				test = c;
				break;
			
			case 'f':
			case 'h':
				test = c;
				gene_set_file = optarg;
				use_gene_set_file = true;
				break;
			
			case 'c':
				use_composition_correction = true;
				num_samples = atoi( optarg );
				break;
			
			
			case 'r':
				use_neighborhood_incidence = true;
				radius = atoi( optarg );
				break;
			
			case 'a':
				alpha = atof( optarg );
				use_alpha = true;
				break;
			
			case 'p':
				alpha = atof( optarg );
				use_p = true;
				break;
				
			case 'B':
				tokenize( back_inserter( background_gap_formats ), optarg );
				use_background = true;
				break;
				
			case 'O':
				use_order_file = true;
				expression_file = optarg;
				break;
				
			case 'A':
				use_annotation_file = true;
				annotation_file = optarg;
				break;
				
			case 'E':
				use_expression_file = true;
				expression_file = optarg;
				break;
			
			case 'M':
				print_mapping_stats = true;
				break;
			
			case 'P':
				print_annotated_promoters = true;
				break;
			
			case 'J':
				print_annotation_mapping = true;
				break;
			
			case 'C':
				use_conservation = true;
				orthology_file = optarg;
				break;
			
			case 'o':
				use_ortholog_file = true;
				fasta_ortholog_file = optarg;
				break;
			
			case '?':
				if( isprint( optopt ) ) cout << "Unknown option `-" << static_cast< char >( optopt ) << "'." << endl;
				else					cout << "Unknown option character `\\x" << hex << optopt << "'." << endl;
				usage_error();
				break;
				
			default:
				usage_error( "unknown error in parsing command line arguments." );
				break;
		}
	}
	
	index = optind;
	if( index >= argc )				usage_error( "FASTA_promoter_file not specified." );
	fasta_promoter_file = argv[ index++ ];
			
	// validate the command line arguments
		
	// can't choose both adjusted p-value threshold and nominal p-value threshold
	if( use_alpha && use_p )
		usage_error( "can't use both adjusted p-value threshold and nominal p-value threshold." );
	
	// can't choose a p-value threshold and background p-value estimation
	if( ( use_alpha || use_p ) && use_background )
		usage_error( "can't use both a p-value threshold and background p-value estimation." );	
	
	// can't choose a p-value threshold and background p-value estimation
	if( ( scan_iupac_motifs || scan_pssms ) && use_background )
		usage_error( "can't use background p-value estimation with IUPAC motifs or PSSM motifs." );		
	
	// must use the same number of background gap formats as k-mer lengths
	if( use_background && scan_kmers )
		if( background_gap_formats.size() != kmer_lengths.size() )
			usage_error( "must use the same number of background gap formats as k-mer lengths." );

	// must use the same number of background gap formats as gap k-mer formats
	if( use_background && scan_gap_kmers )
		if( background_gap_formats.size() != gap_kmer_formats.size() )
			usage_error( "must use the same number of background gap formats as gap k-mer formats." );
	
	// stepwise regression requires a specified alpha
	if( test == 's' && !use_alpha )
		usage_error( "stepwise regression requires you to specify alpha with -a" );

	// composition correction not currently supported
	if( use_composition_correction )
		usage_error( "composition correction is not supported in this version of motifADE." );
	
	// composition correction is only implemented for the mann-whitney test
//	if( use_composition_correction && test != 'u' )
//		usage_error( "composition correction only implemented for the Mann-Whitney Test." );
	
	// must scan either k-mers *or* iupac-degenerate motifs, but not both
//	if( scan_kmers && scan_iupac_motifs )
//		usage_error( "must choose either -k or -m but not both." );
	
	// NEW: bidirectional scanning is supported for all types of motifs
	// bidirectional scanning is only supported for k-mers and gap k-mers
//	if( use_bidirectional && scan_iupac_motifs )
//		usage_error( "bidirectional scanning is only supported for k-mers and gap k-mers." );
	
	// must use either an order file *or* an expression file, but not both
	if( use_order_file && use_expression_file )
		usage_error( "must choose either -O or -E but not both." );
	
	// must use either an order file or an expression file unless using print incidence or only print mapping stats
	if( !( test == 'i' || test == 'I' || test == 'L' || test == 'n' || test == 'f' || test == 'h' ) && !( use_expression_file || use_order_file ) )
		usage_error( "must choose either -O or -E unless using -i or -I or -L or -n." );
	
	// composition correction is only implemented for kmer scanning
	if( use_composition_correction && !scan_kmers )
		usage_error( "composition correction only implemented for scanning k-mers." );
	
	// using conservation requires both an orthology file and a FASTA ortholog file
	if( ( use_conservation && !use_ortholog_file ) || ( !use_conservation && use_ortholog_file ) )
		usage_error( "conservation searching requires both an orthology file and a FASTA ortholog file." );
	
	// neighborhood incidence is not supported for PSSM motifs
	if( use_neighborhood_incidence && scan_pssms )
		usage_error( "cannot use -r with PSSM motifs." );
	
	#ifndef MOTIFADE_DEBUG
	try {
	#endif
		
		PromoterSet promoters;
		{
			if( use_verbose ) cout << "loading promoters..." << flush;
			
			BufferedReader* pb = openBufferedReader( fasta_promoter_file.c_str() );
			PromoterSetReader pReader( *pb );
			PromoterSetReader::Stats pReaderStats;
			pReader.load( promoters, pReaderStats );
			delete pb;
			
			if( use_verbose ) cout << "done." << endl << endl;
			
			if( print_mapping_stats ) {
				cout << "promoter stats:" << endl;
				pReaderStats.print( cout );
				cout << endl << endl;
			}
		}
		
		if( use_conservation ) {
			
			SequenceSet orthologs;
			{
				if( use_verbose ) cout << "loading orthologs..." << flush;
				
				BufferedReader* osb = openBufferedReader( fasta_ortholog_file.c_str() );
				SequenceSetReader osReader( *osb );
				SequenceSetReader::Stats osReaderStats;
				osReader.load( orthologs, osReaderStats );
				delete osb;
				
				if( use_verbose ) cout << "done." << endl << endl;
				
				if( print_mapping_stats ) {
					cout << "ortholog stats:" << endl;
					osReaderStats.print( cout );
					cout << endl << endl;
				}
			}
			
			OrthologyMap om;
			{
				if( use_verbose ) cout << "loading orthology map..." << flush;
				
				BufferedReader* omb = openBufferedReader( orthology_file.c_str() );
				OrthologyMapReader omReader( *omb );
				OrthologyMapReader::Stats omReaderStats;
				omReader.load( om, omReaderStats );
				delete omb;
				
				if( use_verbose ) cout << "done." << endl << endl;
				
				if( print_mapping_stats ) {
					cout << "orthology map stats:" << endl;
					omReaderStats.print( cout );
					cout << endl << endl;
				}
			}
			
			{
				if( use_verbose ) cout << "performing orthology annotation..." << flush;
				
				OrthologyAnnotator oa( promoters );
				OrthologyAnnotator::Stats oaStats;
				oa.annotate( om, orthologs, oaStats );
				
				if( use_verbose ) cout << "done." << endl << endl;
				
				if( print_mapping_stats ) {
					cout << "orthology annotation mapping stats:" << endl;
					oaStats.print( cout );
					cout << endl << endl;
				}
			}
			
		}
		
		if( use_expression_file || use_order_file ) {
			
			ExpressionSet expr;
			{
				if( use_verbose ) cout << "loading expression..." << flush;
				
				BufferedReader* eb = openBufferedReader( expression_file.c_str() );
				ExpressionSetReader eReader( *eb, use_order_file );
				ExpressionSetReader::Stats eReaderStats;
				eReader.load( expr, eReaderStats );
				delete eb;
				
				if( use_verbose ) cout << "done." << endl << endl;
				
				if( print_mapping_stats ) {
					cout << "expression stats:" << endl;
					eReaderStats.print( cout );
					cout << endl << endl;
				}
			}
			
			SequenceExpressionMap sem;
			
			if( use_annotation_file ) {
				
				if( use_verbose ) cout << "loading expression annotation map..." << flush;
				
				BufferedReader* semb = openBufferedReader( annotation_file.c_str() );
				SequenceExpressionMapReader semReader( *semb );
				SequenceExpressionMapReader::Stats semReaderStats;
				semReader.load( sem, semReaderStats );
				delete semb;
				
				if( use_verbose ) cout << "done." << endl << endl;
				
				if( print_mapping_stats ) {
					cout << "annotation mapping stats:" << endl;
					semReaderStats.print( cout );
					cout << endl << endl;
				}
				
			} else { // use sequence IDs
				
				// create identity map of sequence IDs
				for( PromoterSet::iterator i = promoters.begin(); i != promoters.end(); ++i )
					sem.insert( SequenceExpressionMap::value_type( ( *i )->getID(), ( *i )->getID() ) );
				
			}
			
			{
				if( use_verbose ) cout << "performing expression annotation..." << flush;
				
				ExpressionAnnotator ea( promoters );
				ExpressionAnnotator::Stats eaStats;
				ea.annotate( sem, expr, eaStats );
				
				if( use_verbose ) cout << "done." << endl << endl;
				
				if( print_mapping_stats ) {
					cout << "expression annotation mapping stats:" << endl;
					eaStats.print( cout );
					cout << endl << endl;
				}
			}
			
		}
		
				
		// initialize the promoter vector
		PromoterVector pv;
		promoters.asVector( pv );
		
		if( pv.size() == 0 )
			throw( MotifADEException( "no promoters retained!" ) );
		
		
		MarkSet geneSetMarks( pv.size() );
		if( use_gene_set_file ) {
			GeneIDSet geneSet;
			
			if( use_verbose ) cout << "loading gene set..." << flush;
			
			BufferedReader* gsb = openBufferedReader( gene_set_file.c_str() );
			GeneIDSetReader gsReader( *gsb );
			GeneIDSetReader::Stats gsReaderStats;
			gsReader.load( geneSet, gsReaderStats );
			delete gsb;
			
			if( use_verbose ) cout << "done." << endl << endl;
			
			GeneIDSetMarker geneSetMarker( pv, use_annotation_file );
			unsigned int numGenesMarked = geneSetMarker.markGeneSet( geneSet, geneSetMarks );
			
			if( print_mapping_stats ) {
				cout << "gene set stats:" << endl;
				gsReaderStats.print( cout );
				cout << numGenesMarked << " genes from the gene set in the remaining sequences." << endl;
				cout << endl << endl;
			}
			
//			cout << "geneSet:" << endl;
//			for( GeneIDSet::const_iterator i = geneSet.begin(); i != geneSet.end(); ++i )
//				cout << *i << endl;
//						
//			cout << "geneSetMarks:" << endl;
//			for( unsigned int i = 0; i < pv.size(); ++i )
//				cout << geneSetMarks[ i ] << '\t' << ( use_annotation_file ? pv[ i ]->getExpression().getID() : pv[ i ]->getID() ) << endl;
			
		}
		
				
		// print annotated promoters if requested
		if( print_annotated_promoters ) {
			if( use_verbose ) cout << "promoters:" << endl << endl;
			for( PromoterVector::const_iterator i = pv.begin(); i != pv.end(); ++i )
				cout << **i;
			cout << endl;
		}

		// print annotation mapping if requested
		if( print_annotation_mapping ) {
			cout << "Sequence ID\tExpression ID" << endl;
			for( PromoterVector::const_iterator i = pv.begin(); i != pv.end(); ++i )
				cout << ( *i )->getID() << '\t' << ( *i )->getExpression().getID() << endl;
		}
				
		
		// initialize the calculator and statistic and statistic receiver
		ExpressionStatisticCalculator*	calculator = 0;
		Statistic*						statistic  = 0;
		switch( test ) {
			case ' ':
				// use Mann-Whitney U test -- default
				if( use_composition_correction )
//					calculator = new CompositionCorrectedMannWhitneyUCalculator( pv, num_samples, 10 * alpha, use_alpha );
					usage_error( "composition correction is not supported in this development build of motifADE." );
				else
					calculator = new MannWhitneyUCalculator( pv );
				
				statistic  = new MannWhitneyU;
				break;
			
			case 'u':
				// use Mann-Whitney U test with length correction
				calculator = new LengthCorrectedMannWhitneyUCalculator( pv, num_samples );
				
				statistic  = new MannWhitneyU;
				break;
			
			case 'd':
				// use Kolmogorov-Smirnov d test
				calculator = new KolmogorovSmirnovDCalculator( pv );
				statistic  = new KolmogorovSmirnovD;
				break;

			case 't':
				// use Student's t test
				calculator = new StudentTCalculator( pv );
				statistic  = new StudentT;
				break;
			
			case 's':
				// use stepwise regression with Student's t test
				calculator = new StudentTCalculator( pv, true, alpha );
				statistic  = new StudentT;
				break;
			
			case 'i':
				// print the incidence set given
				calculator = new PrintIncidence( pv, use_annotation_file );
				statistic = new IncidenceStatistic;
				break;

			case 'I':
				// leave the statistic pointer null -- required
				print_incidence = true;
				calculator = new CollectIncidenceMatrix( pv, cout, use_annotation_file );
				break;
				
			case 'l':
				// leave both pointers null -- we aren't going to use them
				print_incidence = true;
				incidenceMapsReceiver = printIncidenceMaps;
				break;
			
			case 'L':
				// leave both pointers null -- we aren't going to use them
				print_incidence = true;
				incidenceMapsReceiver = printIncidenceTables;
				break;
			
			case 'n':
				// do no statistical test and exit
				return 0;
			
			case 'f':
				// compute motif enrichment in a given gene set using a binomial null
				calculator = new BinomialGeneSetEnrichmentCalculator( pv, geneSetMarks );
				statistic = new BinomialGeneSetEnrichment;
				break;
				
			case 'h':
				// compute motif enrichment in a given gene set using a hypergeometric null
				calculator = new HypergeometricGeneSetEnrichmentCalculator( pv, geneSetMarks );
				statistic = new HypergeometricGeneSetEnrichment;					
				break;
			
			default:
				string msg = string( "Strangely, unknown statistical test specified: " ) + test + string( " -- how did this happen?" );
				usage_error( msg.c_str() );
				break;
		}
		
		StatisticReceiver* receiver;
		if( test == 'I' )
			receiver = new NullStatisticReceiver;
		else if( use_alpha )
			receiver = new PrintSignificantStatistic( cout, alpha );
		else if( use_p )
			receiver = new PrintNominallySignificantStatistic( cout, alpha );
		else
			receiver = new PrintStatistic( cout );
				
		
		Statistic::USE_ADJUSTED = !use_p;
		
		
		MotifIncidenceComputer* incidenceComputer;
		
		if( use_neighborhood_incidence && ( scan_kmers || scan_gap_kmers ) )
			incidenceComputer = new NeighborhoodKmerIncidenceComputer( radius );
		else
			incidenceComputer = new MotifIncidenceComputer();
		
		
		
		MotifIncidenceCollector* incidenceCollector;

		IUPACMotifVector IUPACMotifs;
		
		if( scan_iupac_motifs ) {
			
			if( use_verbose ) cout << "loading motifs..." << flush;
			
			BufferedReader* motifBr = openBufferedReader( motif_file.c_str() );
			IUPACMotifVectorReader motifReader( *motifBr, use_bidirectional );
			IUPACMotifVectorReader::Stats motifStats;
			motifReader.load( IUPACMotifs, motifStats );
			delete motifBr;
			
			if( use_verbose ) cout << "done." << endl << endl;
			
			if( print_mapping_stats ) {
				cout << "motif stats:" << endl;
				motifStats.print( cout );
				cout << endl << endl;
			}
			
			// not sure why this is appropriate...
			//Statistic::USE_ADJUSTED = false;
		}
		
		
		PWMMotifVector PSSMMotifs;
		
		if( scan_pssms ) {
			
			if( use_verbose ) cout << "loading PSSMs..." << flush;
			
			BufferedReader* motifBr = openBufferedReader( pssm_file.c_str() );
			PWMMotifVectorReader motifReader( *motifBr );
			PWMMotifVectorReader::Stats motifStats;
			motifReader.load( PSSMMotifs, motifStats );
			delete motifBr;
			
			if( use_verbose ) cout << "done." << endl << endl;
			
			if( print_mapping_stats ) {
				cout << "motif stats:" << endl;
				motifStats.print( cout );
				cout << endl << endl;
			}
			
//			cout << endl << "PSSMs:" << endl;
//			for( unsigned int i = 0; i < PSSMMotifs.size(); ++i )
//				cout << *PSSMMotifs[ i ] << endl;
//			cout << endl;
			
			// not sure why this is appropriate...
			//Statistic::USE_ADJUSTED = false;
		}
		
		
		GeneIDSetCollection precomputedIncidence;
		
		if( scan_incidence ) {
			
			if( use_verbose ) cout << "loading precomputed incidence sets..." << flush;
			
			BufferedReader* motifBr = openBufferedReader( incidence_file.c_str() );
			GeneIDSetCollectionReader motifReader( *motifBr );
			GeneIDSetCollectionReader::Stats motifStats;
			motifReader.load( precomputedIncidence, motifStats );
			delete motifBr;
			
			if( use_verbose ) cout << "done." << endl << endl;
			
			if( print_mapping_stats ) {
				cout << "precomputed incidence stats:" << endl;
				motifStats.print( cout );
				cout << endl << endl;
			}
			
//			cout << endl << "Precomputed incidence:" << endl;
//			cout << precomputedIncidence << endl;
		}

		
		
		
		if( !print_incidence ) {
			cout << "Motif\t";
			statistic->printHeader( cout );
			cout << endl;
		}
		
		
		if( scan_iupac_motifs ) {
			
			if( use_neighborhood_incidence )
				incidenceCollector = new NeighborhoodIUPACMotifIncidenceCollector( IUPACMotifs, radius );
			else
				incidenceCollector = new DFAIUPACMotifIncidenceCollector( IUPACMotifs );
			
			scanAllMotifs( pv, incidenceCollector, incidenceComputer, use_conservation, calculator, statistic, receiver, incidenceMapsReceiver );
			delete incidenceCollector;
		} 
		
		
		if( scan_pssms ) {
			
			if( use_bidirectional )
				incidenceCollector = new BidirectionalPWMMotifIncidenceCollector( PSSMMotifs );
			else
				incidenceCollector = new PWMMotifIncidenceCollector( PSSMMotifs );
			
			scanAllMotifs( pv, incidenceCollector, incidenceComputer, use_conservation, calculator, statistic, receiver, incidenceMapsReceiver );
			delete incidenceCollector;
		}
		
		
		if( scan_incidence ) {
			
			incidenceCollector = new PrecomputedMotifIncidenceCollector( precomputedIncidence );
			
			scanAllMotifs( pv, incidenceCollector, incidenceComputer, use_conservation, calculator, statistic, receiver, incidenceMapsReceiver );
			delete incidenceCollector;
		}
		
		
		if( scan_kmers ) {
			
			for( unsigned int i = 0; i < kmer_lengths.size(); ++i ) {
				KmerEncoder encoder( atoi( kmer_lengths[ i ].c_str() ) );
								
				if( use_bidirectional )
					incidenceCollector = new BidirectionalKmerIncidenceCollector( encoder );
				else
					incidenceCollector = new KmerIncidenceCollector( encoder );
				
				scanAllMotifs( pv, incidenceCollector, incidenceComputer, use_conservation, calculator, statistic, receiver, incidenceMapsReceiver );
				delete incidenceCollector;
			}
			
		}
		

		if( scan_gap_kmers ) {
			
			for( unsigned int i = 0; i < gap_kmer_formats.size(); ++i ) {
				GapKmerEncoder encoder( gap_kmer_formats[ i ] );
								
				if( use_bidirectional )
					incidenceCollector = new BidirectionalKmerIncidenceCollector( encoder );
				else
					incidenceCollector = new KmerIncidenceCollector( encoder );
				
				scanAllMotifs( pv, incidenceCollector, incidenceComputer, use_conservation, calculator, statistic, receiver, incidenceMapsReceiver );
				delete incidenceCollector;
			}
			
		}
		
		if( test == 'I' ) // print incidence matrix
			dynamic_cast< CollectIncidenceMatrix* >( calculator )->printIncidenceMatrix();
		
		delete incidenceComputer;
		
		delete calculator;
		delete statistic;
		delete receiver;
		
	#ifndef MOTIFADE_DEBUG
	} catch( std::exception& e ) {
		
		cout << "Exception thrown: " << e.what() << endl;
		
	}
	#endif
	
    return 0;
}


MotifIncidenceMap* collectIncidence(
	const PromoterVector& pv,
	unsigned int numOrthologs,
	MotifIncidenceCollector* incidenceCollector,
	vector< const MotifIncidenceMap* >& orthologIncidenceMaps )
{
	MotifIncidenceMap* incidenceMap;
	orthologIncidenceMaps.resize( numOrthologs );
	
	SequenceVector seqs( pv.size() );
	copy( pv.begin(), pv.end(), seqs.begin() );
	
	incidenceMap = incidenceCollector->collectIncidence( seqs );
	
	if( numOrthologs > 0 ) {
		for( unsigned int i = 0; i < numOrthologs; ++i ) {
			for( unsigned int j = 0; j < seqs.size(); ++j )
				seqs[ j ] = pv[ j ]->getOrtholog( i );
			
			orthologIncidenceMaps[ i ] = incidenceCollector->collectIncidence( seqs );
		}
	}
	
	return incidenceMap;
}


void printIncidenceMaps(
	const PromoterVector& pv,
	const MotifIncidenceMap* incidenceMap,
	vector< const MotifIncidenceMap* >& orthologIncidenceMaps )
{
	unsigned int numOrthologs = orthologIncidenceMaps.size();
	cout << endl << "INCIDENCE:" << endl << *incidenceMap << endl << endl;
	for( unsigned int i = 0; i < numOrthologs; ++i )
		cout << "SPECIES " << i + 1 << " INCIDENCE:" << endl << *orthologIncidenceMaps[ i ] << endl << endl;
}


void printIncidenceTables(
	const PromoterVector& pv,
	const MotifIncidenceMap* incidenceMap,
	vector< const MotifIncidenceMap* >& orthologIncidenceMaps )
{
	bool use_expression = pv[ 0 ]->getExpression().size() > 0;
	bool use_orthologs = orthologIncidenceMaps.size() > 0;
	bool use_flanking = bpUpstream > 0 || bpDownstream > 0;
	
	string instance;
	const string instanceInit( bpUpstream + bpDownstream, 'N' );
	const unsigned int instanceLen = instanceInit.size();
	
	cout << "Motif";
	if( use_orthologs )
		cout << "\tSpecies";
	cout << "\tSequence ID";
	if( use_expression )
		cout << "\tExpression ID\tExpression";
	cout << "\tPosition";
	if( use_flanking )
		cout << "\tInstance";
	cout << endl;
	
	vector< const MotifIncidenceMap* > incidenceMaps( orthologIncidenceMaps.size() + 1 );
	incidenceMaps[ 0 ] = incidenceMap;
	for( unsigned int i = 0; i < orthologIncidenceMaps.size(); ++i )
		incidenceMaps[ i + 1 ] = orthologIncidenceMaps[ i ];
	string motifName;
	const MotifIncidenceSet* incidenceSet;
	for( unsigned int i = 0; i < incidenceMaps.size(); ++i ) {
		const MotifIncidenceMap* m = incidenceMaps[ i ];
		for( MotifIncidenceMap::const_iterator motifIter = m->begin(); motifIter != m->end(); ++motifIter ) {
			m->getMotifName( motifIter->first, motifName );
			incidenceSet = &motifIter->second;
			for( MotifIncidenceSet::const_iterator instanceIter = incidenceSet->begin(); instanceIter != incidenceSet->end(); ++instanceIter ) {
				const Promoter* targetPromoter = pv[ instanceIter->getSeqNum() ];
				const Sequence* targetSequence = i == 0 ? targetPromoter : targetPromoter->getOrtholog( i - 1 );
				unsigned int seqPos = instanceIter->getSeqPos();
				
				cout << motifName;
				
				if( use_orthologs )
					cout << '\t' << i;
				
				cout << '\t' << targetSequence->getID();
				
				if( use_expression )
					cout << '\t' << targetPromoter->getExpression().getID()
				         << '\t' << targetPromoter->getExpression( 0 );
				
				cout << '\t' << seqPos;
				
				if( use_flanking ) {
					instance = instanceInit;
					unsigned int insertPos = 0, extractPos = 0, extractLen = instanceLen, seqLen = targetSequence->getLength();
					if( seqPos < bpUpstream ) {
						unsigned int offset = bpUpstream - seqPos;
						extractLen -= offset;
						insertPos += offset;
					} else {
						extractPos = seqPos - bpUpstream;
					}
					if( seqPos + bpDownstream > seqLen ) {
						unsigned int offset = bpDownstream - seqLen + seqPos;
						extractLen -= offset;
					}
					instance.replace( insertPos, extractLen, targetSequence->getData(), extractPos, extractLen );
					cout << '\t' << instance;
				}
				cout << endl;
			}
		}
	}
}


void scanAllMotifs(
	const PromoterVector& pv,
	MotifIncidenceCollector* incidenceCollector,
	MotifIncidenceComputer* incidenceComputer,
	bool use_conservation,
	ExpressionStatisticCalculator* calculator,
	Statistic* statistic,
	StatisticReceiver* receiver,
	IncidenceMapsReceiver incidenceMapsReceiver )
{
	unsigned int numOrthologs = pv[ 0 ]->numOrthologs(); // always 1 or 2 until I add support for 3+ species...
	
	vector< const MotifIncidenceMap* > orthologIncidenceMaps;
	MotifIncidenceMap* incidenceMap = collectIncidence( pv, numOrthologs, incidenceCollector, orthologIncidenceMaps );
	
	if( incidenceMapsReceiver != 0 ) {
		incidenceMapsReceiver( pv, incidenceMap, orthologIncidenceMaps );
		return;
	}
	
	MotifScanner* scanner;
	
	if( use_conservation )
		scanner = new ConservedMotifScanner( incidenceMap, orthologIncidenceMaps, incidenceComputer, pv.size() );
	else
		scanner = new MotifScanner( incidenceMap, incidenceComputer, pv.size() );
	
	
	Statistic::NUMBER_OF_TESTS = incidenceMap->size();
	
	scanner->scan( calculator, statistic, receiver );
	
	//TODO: fix!
	delete scanner;
  
	delete incidenceMap;
	deleteVectorElements( orthologIncidenceMaps );
}
