# Script to generate a chart to display GC bias based upon read starts observed
# in windows along the genome.
#
# @author ktibbett@broadinstitute.org

# Parse the arguments
args <- commandArgs(trailing=T)
metricsFile  <- args[1]
outputFile   <- args[2]
datasetName  <- args[3]
kmer         <- args[4]
legend1 <- paste(c("% distinct", kmer, "at this frequency"), sep=" ", collapse=" ")
legend2 <- paste(c("% distinct", kmer, "at this frequency or less"), sep=" ", collapse=" ")
legend3 <- paste(c("% total", kmer, "at this frequency"), sep=" ", collapse=" ")
legend4 <- paste(c("% total", kmer, "at this frequency or less"), sep=" ", collapse=" ")

# Figure out where the metrics and the histogram are in the file and parse them out
startFinder <- scan(metricsFile, what="character", sep="\n", quiet=TRUE, blank.lines.skip=FALSE)

firstBlankLine=0

for (i in 1:length(startFinder)) {
        if (startFinder[i] == "") {
                if (firstBlankLine==0) {
                        firstBlankLine=i+1
                } else {
                        secondBlankLine=i+1
                        break
                }
        }
}

metrics <- read.table(metricsFile, header=T, sep="\t", skip=firstBlankLine)
pdf(outputFile)

# Some constants that are used below
#Y_AXIS_LIM = 2;
#MAX_QUALITY_SCORE = 40;
COLORS = c("palegreen3", "royalblue", "darkorange", "red");

# Adjust to give more margin on the right hand side
par(mar = c(5, 4, 4, 4));

# Do the main plot of the normalized coverage by GC
plot(type="l", x=metrics$FREQUENCY, y=metrics$PCT_DISTINCT_KMERS*100,
     xlab="Frequency",
     ylab="%",
     ylim=c(0, 100),
     xlim=c(1, max(metrics$FREQUENCY)),
     col=COLORS[1],
     main=paste(c(datasetName, kmer))
    );

lines(metrics$FREQUENCY, metrics$PCT_DISTINCT_KMERS_THIS_FREQUENCY_OR_LESS*100, type="l", col=COLORS[2]);
lines(metrics$FREQUENCY, metrics$PCT_TOTAL_KMERS*100, type="l", col=COLORS[3]);
lines(metrics$FREQUENCY, metrics$PCT_TOTAL_KMERS_THIS_FREQUENCY_OR_LESS*100, type="l", col=COLORS[4]);

legend("topleft", pch=c(15, 15, 15, 15), legend=c(legend1, legend2, legend3, legend4), col=COLORS)

dev.off();