# Generates the contamination plot of allele frequency by percentage of
# bases that match the expected allele vs. the alternative allele
#
# Author: Tim Fennell

# Parse the arguments
args <- commandArgs(trailing=T)
metricsFile  <- args[1]
outputFile   <- args[2]
titlePrefix  <- args[3]


# Figure out where the metrics and the histogram are in the file and parse them out
startFinder <- scan(metricsFile, what="character", sep="\n", quiet=TRUE, blank.lines.skip=FALSE)
firstBlankLine=0
for (i in 1:length(startFinder)) {
    if (startFinder[i] == "") {
        if (firstBlankLine==0) {
            firstBlankLine=i+1
        }
        else {
            secondBlankLine=i+1
            break
        }
    }
}

metrics <- read.table(args[1], header=T, nrows=1, skip=firstBlankLine)
histo <- read.table(args[1], header=T, skip=secondBlankLine)
pdf(outputFile)

# Ensure the plot runs from at least 0.95 up
ymin = min(histo$matching_pct, 0.95)

# Generate an empty plot area
plot(histo$allele_frequency, histo$matching_pct, type="n",
   xlim=c(0,1), ylim=c(ymin,1),
   xlab="Allele Frequency", ylab="Percent of Bases Matching Sample's Allele",
   main=sprintf("%s Contamination Detection Plot", titlePrefix))

# Add some grid lines and then the major plotting points
grid(lty="solid", col="#EEEEEE")
ss <- subset(histo, histo$used_in_fit > 0)
lines(ss$allele_frequency, ss$matching_pct, type="p", pch=19, col="blue")
ss <- subset(histo, !(histo$used_in_fit > 0))
lines(ss$allele_frequency, ss$matching_pct, type="p", pch=1, col="blue")

# Add the fit line
predictedY = metrics$LS_A + (histo$allele_frequency * metrics$LS_B)
lines(x=histo$allele_frequency,  y=predictedY, col="red")

# Generate legend and inset text
legend("bottomright", legend=c("Sample Data", "Least Squares Fit"), pch=c(19, 45), col=c("blue", "red"), cex=0.75)
inset <- sprintf("Observations: %d\nContamination: %.3f\nR^2 of fit: %.3f", metrics$TOTAL_OBSERVATIONS, metrics$PCT_CONTAMINATION, metrics$LS_R2)
text(x=1.04, y=(ymin + (1-ymin)*0.1), inset, pos=2, cex=0.75)
