/* Copyright (C) 2003-2008 Dan Arlow
 * 
 * This file is part of motifADE.
 * 
 * motifADE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * motifADE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with motifADE.  If not, see <http://www.gnu.org/licenses/>.
 */

/* 
 *  binomial_gene_set_motif_enrichment.cpp
 *
 *  Class for comparing the incidence of motifs in a given gene set to
 *  background using a binomial null model.
 */


#include "common.hpp"
#include "promoter.hpp"
#include "expression_statistics.hpp"
#include "mark_set.hpp"
#include "gene_set_motif_enrichment_base.hpp"
#include "binomial_gene_set_motif_enrichment.hpp"


BinomialGeneSetEnrichment::BinomialGeneSetEnrichment( unsigned int num_inside, unsigned int inside_size, unsigned int num_outside, unsigned int outside_size )
	: GeneSetEnrichmentBase( num_inside, inside_size, num_outside, outside_size )
{
	if( numInside < 5 || insideSize - numInside < 5 || numOutside < 5 || outsideSize - numOutside < 5 ) {
		error = true;
	} else {
		z = ( freqInside - freqOutside ) / sqrt( freq * ( 1.0 - freq ) * ( 1.0 / double( insideSize ) + 1.0 / double( outsideSize ) ) );
		p = 2.0 * normalcdf( z > 0 ? -z : z );
		p_adj = p * static_cast< double >( Statistic::NUMBER_OF_TESTS );
		if( p_adj > 1.0 ) p_adj = 1.0;
	}
}


void
BinomialGeneSetEnrichment::print( ostream& os ) const
{
	os << freq << '\t' << freqInside << '\t' << freqOutside;
	if( error ) {
		os << '\t' << "Err";
		if( USE_ADJUSTED ) os << '\t' << "Err";
	} else {
		os << '\t' << z << '\t' << p;
		if( USE_ADJUSTED ) os << '\t' << p_adj;
	}
}


void
BinomialGeneSetEnrichment::printHeader( ostream& os ) const
{
    os << "Frequency" << '\t' << "Frequency In Set" << '\t' << "Frequency In Rest" << '\t' << "Z-score" << '\t' << "P-value";
	if( USE_ADJUSTED )
		os << '\t' << "Adjusted P-value";
}


BinomialGeneSetEnrichmentCalculator::BinomialGeneSetEnrichmentCalculator( const PromoterVector& promoters, const MarkSet& gene_set )
	: GeneSetEnrichmentBaseCalculator( promoters, gene_set )
{
}


BinomialGeneSetEnrichmentCalculator::BinomialGeneSetEnrichmentCalculator( const BinomialGeneSetEnrichmentCalculator& calc )
	: GeneSetEnrichmentBaseCalculator( calc )
{
}


void
BinomialGeneSetEnrichmentCalculator::compute( const MarkSet& marks, Statistic& statistic ) // fix this to take a BinomialGeneSetEnrichment&
{
    BinomialGeneSetEnrichment* stat = dynamic_cast< BinomialGeneSetEnrichment* >( &statistic );
    if( stat == 0 ) throw( MotifADEException( "BinomialGeneSetEnrichmentCalculator::compute: not passed a BinomialGeneSetEnrichment pointer!" ) );
	
	if( marks.size() != getTotalSize() )
		throw( MotifADEException( "BinomialGeneSetEnrichmentCalculator::compute: marks.size() != getTotalSize()" ) );
	
	unsigned int numInside = 0, numOutside = 0;
	
	countIntersection( marks, numInside, numOutside );
	
	*stat = BinomialGeneSetEnrichment( numInside, geneSetSize, numOutside, restSize );
}


double
BinomialGeneSetEnrichmentCalculator::computeValue( const MarkSet& marks )
{
	BinomialGeneSetEnrichment stat;
	compute( marks, stat );
	return stat.z;
}
