#### BUHMBOX power calculation code. #### Author: Buhm Han #### Version: 0.1 #### created: 3/24/14 #### updated: #### included control sample size. (7/14/15) ## ARGUMENTS ######################################################### ## nexp: number of simulation experiments to measure power (e.g 1000) ## N: sample size ## Ncont: control sample size (New: 7/14/15) ## p: number of SNPs ## rafs: risk allele frequency ## ORs: true odds ratios for generative model ## ORs.for.method: odds ratios that the method "think" are true ## phi: proportion of contaminating group ## thres: p-value threshold (can be vector, for examining multiple different thresholds.) get.power <- function(nexp, N, Ncont, p, rafs, ORs, ORs.for.method, phi, thres, print.stride=NA) { if (length(rafs) <= p) { ## Repeat as necessary rafs <- rep(rafs, length.out=p) ORs <- rep(ORs, length.out=p) ORs.for.method <- rep(ORs.for.method, length.out=p) } else { ## Randomly sample indices=sample(1:length(rafs), p, replace=T) rafs = rafs[indices] ORs = ORs[indices] ORs.for.method = ORs.for.method[indices] } ## Contaminating group frequency. RAFs <- ORs * rafs / ((ORs-1)*rafs + 1) ## Contaminating group size Nphi <- floor(N*phi) ## Define uncontaminated group genotype unshuffled genou <- NULL for (i in 1:p) { count.AA <- floor((N-Nphi)*rafs[i]*rafs[i]) count.AB <- floor((N-Nphi)*2*rafs[i]*(1-rafs[i])) count.BB <- (N-Nphi)-count.AA-count.AB genou <- cbind(genou, c(rep(2, count.AA), rep(1, count.AB), rep(0, count.BB))) } ## Define contaminated group genotype unshuffled genoc <- NULL for (i in 1:p) { count.AA <- floor(Nphi*RAFs[i]*RAFs[i]) count.AB <- floor(Nphi*2*RAFs[i]*(1-RAFs[i])) count.BB <- Nphi-count.AA-count.AB genoc <- cbind(genoc, c(rep(2, count.AA), rep(1, count.AB), rep(0, count.BB))) } ## Define healty CONTROL group genotype unshuffled genoh <- NULL for (i in 1:p) { count.AA <- floor(Ncont*rafs[i]*rafs[i]) count.AB <- floor(Ncont*2*rafs[i]*(1-rafs[i])) count.BB <- Ncont-count.AA-count.AB genoh <- cbind(genoh, c(rep(2, count.AA), rep(1, count.AB), rep(0, count.BB))) } ## Target null hypothesis I <- diag(p) ## Get weights of pairs for optimal method. W <- matrix(0, p, p) for (i in 1:p) { for (j in 1:p) { if (i < j) { p1 <- rafs[i] p2 <- rafs[j] r1 <- ORs.for.method[i] r2 <- ORs.for.method[j] DcorDphi <- sqrt((1-p1)*p1*(1-p2)*p2)*(r1-1)*(r2-1) / (1+p1*(r1-1)) / (1+p2*(r2-1)) W[i,j] <- DcorDphi ## Fill in only Upper triangle } } } sqrt.sum.W.W <- sqrt(sum(W*W)) ## For later use ## Now simulate experiments "nexp" times! t <- length(thres) nsignif1 <- rep(0, t) nsignif2 <- rep(0, t) nsignif3 <- rep(0, t) for (exper in 1:nexp) { ## Print to screen if (!is.na(print.stride)) { if (exper %% print.stride == 0) { write(exper, stderr()) } } if (length(genou)>0) { genou <- apply(genou, 2, sample) ## Shuffling within group } if (length(genoc)>0) { genoc <- apply(genoc, 2, sample) ## Shuffling within group } genoh <- apply(genoh, 2, sample) ## Shuffling within group ## Matrix of interest. dat <- NULL if (length(genou) > 0) { dat <- rbind(dat, genou) } if (length(genoc) > 0) { dat <- rbind(dat, genoc) } R <- cor(dat) Rcont <- cor(genoh) Z <- sqrt(1/(1/N+1/Ncont))*(R-Rcont) U <- Z U[lower.tri(U)] <- 0 ## Make it upper triangle ## Method 1: Nondirectional chi2 <- sum(U * U) pvalue <- pchisq(chi2, df=p*(p-1)/2, lower.tail=F) nsignif1 <- nsignif1 + (pvalue < thres) ## Method 2: Directional (Unweighted) k <- p*(p-1)/2 Lin <- sum(U) / k Lin.V <- 1 / k zscore <- Lin / sqrt(Lin.V) pvalue <- pnorm(zscore, lower.tail=F) nsignif2 <- nsignif2 + (pvalue < thres) ## Method 3: Optimal (Weighted, BUHMBOX) zscore <- sum(W*U) / sqrt.sum.W.W pvalue <- pnorm(zscore, lower.tail=F) nsignif3 <- nsignif3 + (pvalue < thres) } power1 <- nsignif1 / nexp power2 <- nsignif2 / nexp power3 <- nsignif3 / nexp rst = data.frame(Threshold=thres, PowerNondirectional=power1, PowerUnweighted=power2, PowerBUHMBOX=power3) rst$Nexp <- nexp rst$N <- N rst$Ncont <- Ncont rst$NSNP <- p rst$Pi <- phi rst }