/* Copyright (C) 2003-2008 Dan Arlow
 * 
 * This file is part of motifADE.
 * 
 * motifADE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * motifADE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with motifADE.  If not, see <http://www.gnu.org/licenses/>.
 */

/* 
 *  length_corrected_mann_whitney_u.cpp
 *
 *  ExpressionStatisticCalculator that performs a variant on the Mann-Whitney
 *  test in which null distribution takes into account nonuniform distribution
 *  of ranks being drawn at random.
 *  Based on the concept by Xiaohui Xie 2004
 */


#include "common.hpp"
#include "promoter.hpp"
#include "expression_statistics.hpp"
#include "univariate_expression_statistic_calculator.hpp"
#include "mark_set.hpp"
#include "mann_whitney_u.hpp"
#include "weighted_sampling_without_replacement.hpp"
#include "length_corrected_mann_whitney_u.hpp"


using namespace std;


// LengthCorrectedMannWhitneyUCalculator constructor -- estimates the null distributions using monte carlo
LengthCorrectedMannWhitneyUCalculator::LengthCorrectedMannWhitneyUCalculator( const PromoterVector& pv, unsigned int numsamples, unsigned int dimension )
	: MannWhitneyUCalculator( pv, dimension ),
	  sequenceWeights( pv.size() ),
	  numSamples( numsamples )
{
	computeSequenceWeights( pv );
	TIMED( computeNullDistributions(); )
	DEBUG_BLOCK( cout << "null distribution computation time: " << total_time << endl; )
}


// LengthCorrectedMannWhitneyUCalculator constructor -- copy constructor
LengthCorrectedMannWhitneyUCalculator::LengthCorrectedMannWhitneyUCalculator( const LengthCorrectedMannWhitneyUCalculator& calc, bool copyRanks )
	: MannWhitneyUCalculator( calc, copyRanks ),
	  sequenceWeights( calc.getSequenceWeights() ),
	  numSamples( calc.getNumSamples() )
{
	// nothing to do here
}


// LengthCorrectedMannWhitneyUCalculator method -- computes the weights for the sequences
void
LengthCorrectedMannWhitneyUCalculator::computeSequenceWeights( const PromoterVector& pv )
{
	sequenceWeights.resize( pv.size() );
	for( unsigned int i = 0; i < pv.size(); ++i )
		sequenceWeights[ i ] = computeSequenceWeight( *pv[ i ] );
}


// LengthCorrectedMannWhitneyUCalculator method -- computes the weights for a given sequence
double
LengthCorrectedMannWhitneyUCalculator::computeSequenceWeight( const Promoter& p ) const
{
	double w = p.totalSequenceLength();
	for( unsigned int i = 0; i < p.numOrthologs(); ++i )
		w *= p.getOrtholog( i )->totalSequenceLength();
	return w;
}


// LengthCorrectedMannWhitneyUCalculator method -- estimates the null distributions using monte carlo
void
LengthCorrectedMannWhitneyUCalculator::computeNullDistributions()
{
	unsigned int	n = ranks.size();
	double			prevWeight, totalWeight;
	double			nSamples = numSamples, uSamples = numSamples - 1;
	dvector			cdf( n + 1 ), sample( n + 1 );
	
	// zero the null distribution vectors
	for( unsigned int i = 0; i <= n; ++i ) {
		nullDistributionMean[ i ] = 0.0;
		nullDistributionStddev[ i ] = 0.0;
	}
	
	// accumulate the cdf
	cdf[ 0 ] = 0.0;
	prevWeight = 0;
	for( unsigned int i = 0; i < n; ++i ) {
		prevWeight += sequenceWeights[ i ];
		cdf[ i + 1 ] = prevWeight;
	}
	totalWeight = prevWeight;
	// normalize the cdf
	for( dvector::iterator iter = cdf.begin(); iter != cdf.end(); ++iter )
		*iter = *iter / totalWeight;
	
	WeightedSamplingWithoutReplacement< dvector::iterator, dvector::iterator > wswr( ranks.begin(), ranks.end(), cdf.begin() + 1, cdf.end() );
	
	// repeat sampling numSamples times
	for( unsigned int s = 0; s < numSamples; ++s ) {
		wswr.reset();
		
		// sequentially select and remove all items from the cdf
		sample[ 0 ] = 0.0;
		for( unsigned int i = 1; i <= n; ++i ) {
			sample[ i ] = *wswr.sample();
		}
		
		// calculate the cumulative sum of the sample
		partial_sum( sample.begin(), sample.end(), sample.begin() );
		
		// accumulate the first and second moments
		for( unsigned int i = 0; i <= n; ++i ) {
			nullDistributionMean[ i ]   += sample[ i ];
			nullDistributionStddev[ i ] += fast_square( sample[ i ] ) / nSamples;
		}
		
	}
	
	// compute mean and variance from the accumulated sums
	for( unsigned int i = 1; i <= n; ++i ) {
		nullDistributionMean[ i ] /= nSamples;
		nullDistributionStddev[ i ] = sqrt( ( nSamples / uSamples ) * ( nullDistributionStddev[ i ] - fast_square( nullDistributionMean[ i ] ) ) );
	}
	
	DEBUG_BLOCK(
		cout << "nullDistributionMean[ i ] = " << endl;
		copy( nullDistributionMean.begin(), nullDistributionMean.end(), ostream_iterator< double >( cout, "\n" ) );
		cout << endl << endl;
		cout << "nullDistributionStddev[ i ] = " << endl;
		copy( nullDistributionStddev.begin(), nullDistributionStddev.end(), ostream_iterator< double >( cout, "\n" ) );
		cout << endl << endl;
	)
}
