/* Copyright (C) 2003-2008 Dan Arlow
 * 
 * This file is part of motifADE.
 * 
 * motifADE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * motifADE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with motifADE.  If not, see <http://www.gnu.org/licenses/>.
 */

/* 
 *  kolmogorov_smirnov_d.cpp
 *
 *  Class that performs a fast caching K-S test
 *  Inherits much of its rank-ordering functionality from MannWhitneyUCalculator
 */


#include "common.hpp"
#include "promoter.hpp"
#include "expression_statistics.hpp"
#include "univariate_expression_statistic_calculator.hpp"
#include "mark_set.hpp"
#include "mann_whitney_u.hpp"

#include "kolmogorov_smirnov_d.hpp"


// Adapted from "Numerical Recipies in C" online at http://www.library.cornell.edu/nr/bookcpdf.html
// procedure to approximate the CDF of the K-S d statistic
double
probks( double alam )
{
	int j;
	double a2,fac = 2.0, sum = 0.0, term, termbf = 0.0;
	a2 = -2.0 * alam* alam;
	for( j=1; j <= 100; j++ ) {
		term = fac * exp( a2 * j * j );
		sum += term;
		if( fabs( term ) <= EPS1 * termbf || fabs( term ) <= EPS2 * sum ) return sum;
		fac = -fac;
		termbf = fabs( term );
	}
	return 1.0;
}



// KolmogorovSmirnovD constructor -- computes its two-tailed p-values
KolmogorovSmirnovD::KolmogorovSmirnovD( double d_val, double alam, double freq_val )
    : Statistic( false ),
      freq( freq_val ),
	  value( d_val )
{
    
    p = probks( alam );
    
    p_adj = p * static_cast< double >( Statistic::NUMBER_OF_TESTS );
    if( p_adj > 1.0 ) p_adj = 1.0;
}


// KolmogorovSmirnovD method -- print the summary of the statistic
void
KolmogorovSmirnovD::print( ostream& os ) const
{
    #ifndef MOTIFADE_CRAPPY_IOMANIP
        os << showpoint << fixed;
    #endif
    
    os << setprecision( 6 );
    
    os << freq << '\t';
    
    if( error ) {
        
        os << "Err";
        
    } else {
        
        os << value << '\t' << p << '\t' << p_adj;
        
    }
}


// MannWhitneyU method -- print the header of the fields
void
KolmogorovSmirnovD::printHeader( ostream& os ) const
{
    os << "Frequency" << '\t' << "K-S d value" << '\t' << "P-value";
	if( USE_ADJUSTED )
		os << '\t' << "Adjusted P-value";
}



// KolmogorovSmirnovDCalculator constructor -- does nothing unexpected
KolmogorovSmirnovDCalculator::KolmogorovSmirnovDCalculator( const PromoterVector& promoters, unsigned int dimension )
    : MannWhitneyUCalculator( promoters, dimension ),
      data0( promoters.size() ),
      data1( promoters.size() )
{
    // no need to do anything here...
}


// KolmogorovSmirnovDCalculator method -- builds the sorted samples for a given MarkSet
void
KolmogorovSmirnovDCalculator::buildSortedSamples( const MarkSet& marks )
{
    unsigned int i = 0, n = marks.size();
    n0 = 0;
    n1 = 0;
    
    for( i = 0; i < n; ++i ) {
        if( marks[ sortedIndex[ i ] ] )
            data1[ n1++ ] = expression[ sortedIndex[ i ] ];
        else
            data0[ n0++ ] = expression[ sortedIndex[ i ] ];
    }
}


// KolmogorovSmirnovDCalculator method -- compute and store the d statistic for a given MarkSet
void
KolmogorovSmirnovDCalculator::compute( const MarkSet& marks, Statistic& statistic ) // fix this to take a KolmogorovSmirnovD&
{
    KolmogorovSmirnovD* stat = dynamic_cast< KolmogorovSmirnovD* >( &statistic );
    if( stat == 0 ) throw( MotifADEException( "KolmogorovSmirnovDCalculator::compute: not passed a KolmogorovSmirnovD pointer!" ) );
    
    buildSortedSamples( marks );
    
    // Adapted from "Numerical Recipies in C" online at http://www.library.cornell.edu/nr/bookcpdf.html
    unsigned long j0 = 0, j1 = 0, count;
    double d, d0, d1, dt, en0, en1, en, fn0 = 0.0, fn1 = 0.0, alam;
    
    count = marks.countMarked( true );
    // check to make sure than the samples are big enough to satisfy the assumptions of the sampling distribution, otherwise fail
    if( ( count < 5 ) || ( marks.size() - count < 5 ) ) {
        *stat = KolmogorovSmirnovD( marks.frequency() );
        return;
    }
    
    en0 = n0;
    en1 = n1;
    d = 0.0;
    while( j0 < n0 && j1 < n1 ) {
            d0 = data0[ j0 ];
            d1 = data1[ j1 ];
            if( d0 <= d1 ) fn0 = j0++ / en0;
            if( d1 <= d0 ) fn1 = j1++ / en1;
            dt = fabs( fn1 - fn0 );
            if( dt > d ) d = dt;
    }
    en = sqrt( en0 * en1 / ( en0 + en1 ) );
    alam = ( en + 0.12 + 0.11 / en ) * d;
    
    *stat = KolmogorovSmirnovD( d, alam, marks.frequency() );
    return;
}


// terrible!
// KolmogorovSmirnovDCalculator method -- compute and return the value of the d statistic for a given MarkSet
double
KolmogorovSmirnovDCalculator::computeValue( const MarkSet& marks )
{
	KolmogorovSmirnovD stat;
	compute( marks, stat );
	return stat.p;
}

