%	Copyright, February 2009, Ayellet Segre, Mark Daly, David Altshuler, Broad Institute, 7 Cambridge Center, Cambridge, MA 02142, USA
%
%	This code is part of the MAGENTA software package vs1.1 written in Matlab version R2009b, that tests for enrichment of multiple modest
%	genetic effects on a given complex disease or trait, in predefined sets of genes or loci.
%	The main code from which all functions are run is called: Run_MAGENTA_vs1_May10_2010.m
%
%	This software accompanies the paper:
%	Ayellet V. Segre, DIAGRAM Consortium, MAGIC investigators, Leif Groop, Vamsi K. Mootha, Mark J. Daly, and David Altshuler (2010). Common Inherited Variation in
%	Mitochondrial Genes is not Enriched for Associations with Type 2 Diabetes or Related Glycemic Traits. PLoS Genetics Aug 12;6(8). pii: e1001058. 
%
%	If your work benefits from the use of the MAGENTA  software package please cite the reference above.
%
%	For questions or comments please contact Ayellet Segre at asegre@broadinstitute.org. You can check for updates at: http://www.broadinstitute.org/mpg/magenta
%
%	Disclaimer: This software is distributed as is. The authors take no responsibility for any use or misuse.
%
%	Last updated: May 10, 2010
%


function [GSEA_RankSum_z_pval]=GSEA_RankSumStat_092409(Corr_score,find_rnd_gene_set,score_signif_direct);

% Positive z-scores refers to the input gene set having lower p-values i.e. higher z-scores (and thus more significant) than the rest of the genes in the genome.
% The matlab function 'ranksum' is a two tailed test and since we are only interested in gene set whose p-values are more significant than the rest of the genome's p-values
% we divide the rank sum p-value by two.

% find_rnd_gene_set: vecor of indeces of genes that belong to input gene set

% indeces of genes outside gene set
[find_outside_gene_set,m] = setdiff([1:length(Corr_score)], find_rnd_gene_set);


clear RankSum_pval h RankSum_stat	
[RankSum_pval, h,RankSum_stat] = ranksum(abs(Corr_score(find_rnd_gene_set)),abs(Corr_score(find_outside_gene_set)), 'method','approximate');

GSEA_RankSum_z_pval(1,1) = -1*RankSum_stat.zval; % z-score (negative value means gene set has more significant values than rest of genome)
GSEA_RankSum_z_pval(1,2) = RankSum_pval/2; % one tailed test; if p-value uses a one tailed test where lower p-values are mor significant

if (score_signif_direct==0) % if z-score use one tailed test where higher values are better, thus multiply ranksum.zval by -1

	GSEA_RankSum_z_pval(1,1) = -1*GSEA_RankSum_z_pval(1,1); 
	
end
