#### BUHMBOX power calculation code.
#### Author: Buhm Han
#### Version: 0.1
#### created: 3/24/14
#### updated: 
#### included control sample size. (7/14/15)

## ARGUMENTS #########################################################
## nexp: number of simulation experiments to measure power (e.g 1000)
## N: sample size
## Ncont: control sample size (New: 7/14/15)
## p: number of SNPs
## rafs: risk allele frequency
## ORs: true odds ratios for generative model
## ORs.for.method: odds ratios that the method "think" are true
## phi: proportion of contaminating group
## thres: p-value threshold (can be vector, for examining multiple different thresholds.)

get.power <- function(nexp, N, Ncont, p, rafs, ORs, ORs.for.method, phi, thres, print.stride=NA) {
    if (length(rafs) <= p) {
        ## Repeat as necessary
        rafs <- rep(rafs, length.out=p)
        ORs <- rep(ORs, length.out=p)
        ORs.for.method <- rep(ORs.for.method, length.out=p)
    } else {
        ## Randomly sample
        indices=sample(1:length(rafs), p, replace=T)
        rafs = rafs[indices]
        ORs = ORs[indices]
        ORs.for.method = ORs.for.method[indices]
    }

    ## Contaminating group frequency.
    RAFs <- ORs * rafs / ((ORs-1)*rafs + 1)
    ## Contaminating group size
    Nphi <- floor(N*phi)
    ## Define uncontaminated group genotype unshuffled
    genou <- NULL
    for (i in 1:p) {
        count.AA <- floor((N-Nphi)*rafs[i]*rafs[i])
        count.AB <- floor((N-Nphi)*2*rafs[i]*(1-rafs[i]))
        count.BB <- (N-Nphi)-count.AA-count.AB
        genou <- cbind(genou, c(rep(2, count.AA), rep(1, count.AB), rep(0, count.BB)))
    }
    ## Define contaminated group genotype unshuffled
    genoc <- NULL
    for (i in 1:p) {
        count.AA <- floor(Nphi*RAFs[i]*RAFs[i])
        count.AB <- floor(Nphi*2*RAFs[i]*(1-RAFs[i]))
        count.BB <- Nphi-count.AA-count.AB
        genoc <- cbind(genoc, c(rep(2, count.AA), rep(1, count.AB), rep(0, count.BB)))
    }
    ## Define healty CONTROL group genotype unshuffled
    genoh <- NULL
    for (i in 1:p) {
        count.AA <- floor(Ncont*rafs[i]*rafs[i])
        count.AB <- floor(Ncont*2*rafs[i]*(1-rafs[i]))
        count.BB <- Ncont-count.AA-count.AB
        genoh <- cbind(genoh, c(rep(2, count.AA), rep(1, count.AB), rep(0, count.BB)))
    }
    ## Target null hypothesis
    I <- diag(p)
    ## Get weights of pairs for optimal method.
    W <- matrix(0, p, p)
    for (i in 1:p) {
        for (j in 1:p) {
            if (i < j) {
                p1 <- rafs[i]
                p2 <- rafs[j]
                r1 <- ORs.for.method[i]
                r2 <- ORs.for.method[j]
                DcorDphi <- sqrt((1-p1)*p1*(1-p2)*p2)*(r1-1)*(r2-1) / (1+p1*(r1-1)) / (1+p2*(r2-1))
                W[i,j] <- DcorDphi ## Fill in only Upper triangle
            }
        }
    }
    sqrt.sum.W.W <- sqrt(sum(W*W)) ## For later use

    ## Now simulate experiments "nexp" times!
    t <- length(thres)
    nsignif1 <- rep(0, t) 
    nsignif2 <- rep(0, t) 
    nsignif3 <- rep(0, t) 

    for (exper in 1:nexp) {
        ## Print to screen
        if (!is.na(print.stride)) {
            if (exper %% print.stride == 0) {
                write(exper, stderr())
            }
        }
        if (length(genou)>0) {
            genou <- apply(genou, 2, sample) ## Shuffling within group
        }
        if (length(genoc)>0) {
            genoc <- apply(genoc, 2, sample) ## Shuffling within group
        }
        genoh <- apply(genoh, 2, sample) ## Shuffling within group

        ## Matrix of interest.
        dat <- NULL
        if (length(genou) > 0) {
            dat <- rbind(dat, genou)
        }
        if (length(genoc) > 0) {
            dat <- rbind(dat, genoc)
        }
        
        R <- cor(dat)
        Rcont <- cor(genoh)
        Z <- sqrt(1/(1/N+1/Ncont))*(R-Rcont)
        U <- Z
        U[lower.tri(U)] <- 0 ## Make it upper triangle
        ## Method 1: Nondirectional
        chi2 <- sum(U * U)
        pvalue <- pchisq(chi2, df=p*(p-1)/2, lower.tail=F)
        nsignif1 <- nsignif1 + (pvalue < thres)

        ## Method 2: Directional (Unweighted)
        k <- p*(p-1)/2
        Lin <- sum(U) / k
        Lin.V <- 1 / k
        zscore <- Lin / sqrt(Lin.V)
        pvalue <- pnorm(zscore, lower.tail=F)
        nsignif2 <- nsignif2 + (pvalue < thres)

        ## Method 3: Optimal (Weighted, BUHMBOX)
        zscore <- sum(W*U) / sqrt.sum.W.W
        pvalue <- pnorm(zscore, lower.tail=F)
        nsignif3 <- nsignif3 + (pvalue < thres)
    }

    power1 <- nsignif1 / nexp
    power2 <- nsignif2 / nexp
    power3 <- nsignif3 / nexp

    rst = data.frame(Threshold=thres, PowerNondirectional=power1, PowerUnweighted=power2, PowerBUHMBOX=power3)
    rst$Nexp <- nexp
    rst$N <- N
    rst$Ncont <- Ncont
    rst$NSNP <- p
    rst$Pi <- phi
    rst
}