#!/bin/bash
# The Broad Institute
# SOFTWARE COPYRIGHT NOTICE AGREEMENT
# This software and its documentation are copyright 2009 by the
# Broad Institute/Massachusetts Institute of Technology. All rights are
# reserved.

# This software is supplied without any warranty or guaranteed support
# whatsoever. Neither the Broad Institute nor MIT can be responsible for its
# use, misuse, or functionality.


set -x

function usage () {
    echo "USAGE: returnBamWorkflow.sh <project> <sample> <oldVersion> <bamFile> [source]" >&2
}

# Called after each step to check whether it succeeded and exit if not
function fail() {
    code=$?
    error=$1
    touch ${outputDir}finished.txt
    echo "BAM return workflow for ${project}/${sample} v${newVersion} FAILED: ${error}. Error code:  ${code}" >&2
    exit ${code}
}

# Gets the value on which all columns in the analysis_files.txt agree
# Takes the name of the column header, the variable to assign the result to, and the path to analysis_files.txt
function getIndex() {

    local result=$2
    local idx=`awk -v col="$1" '$0 ~ col { for(i=1;i<=NF;i++) if ($i == col) { print i } }' $3`
    eval $result="'$idx'"
}

# Gets the value on which all columns in the analysis_files.txt agree
# Takes the name of the column header, the variable to assign the result to, and the path to analysis_files.txt
function getConsensusValue() {

    local result=$2
    local idx=`awk -v col="$1" '$0 ~ col { for(i=1;i<=NF;i++) if ($i == col) { print i } }' $3`
    local consensus=""
    for str in `cut -f $idx $3`
    do
        str=`readline -fn ${str}`
        if [ $str != "$1" ]
        then
            if [ "$consensus" = "" ]
            then
                consensus=$str
            elif [ "$consensus" != $str ]
            then
                echo "Multiple values in ${3}: ${consensus}, ${str}"
                return 1
            fi
        fi
    done


    eval $result="'$consensus'"
}


while getopts "h" options; do
  case $options in
    h ) usage
          exit 1;;
    \? ) usage
         exit 1;;
    * ) usage
          exit 1;;

  esac
done



if (( $# < 4 ))
 then echo "ERROR: Incorrect number of arguments." >&2
      usage
      exit 1
fi

# Testing values
# metricsDb=seqtest
# aggregationDir="/seq/dirseq/ktibbett/seq/picard_aggregation"
# analysisDir="/seq/dirseq/ktibbett/seq/picard"
# tempDir="/seq/dirseq/ktibbett"
# bassClient="/prodinfo/qaapps/dmsClient"

# Production values
metricsDb=seqprod
aggregationDir="/seq/picard_aggregation"
analysisDir="/seq/picard"
tempDir="/seq/picardtemp"
bassClient="/prodinfo/prodapps/dmsClient"

# Get the arguments off the command line
project=$1
sample=$2   # This MUST be the un-file-safe version as we need it for BASS
oldVersion=$3
bamFile=$4
source=$5
if [ "${source}" == "" ]
then
    source="Picard"
fi

# This is where we write the output directory
tempFile="${tempDir}/${project}.${sample}.output"


# Determine and create the next version directory and save it into outputDir
java -jar /seq/software/picard/current/bin/CreateAggregationDirectory.jar PROJECT=${project} SAMPLE_ALIAS=${sample} PREFIX=${analysisDir} AGGREGATION_PREFIX=${aggregationDir} PEM_SCRIPT_RESULTS_FILE=${tempFile} AGGREGATION_DIR_ONLY=true
code=$?
if [ ${code} != "0" ]
then
    echo "BAM return workflow for ${project}/${sample} v${newVersion} FAILED: CreateAggregationDirectory failed. Error code:  ${code}" >&2
    exit ${code}
fi

outputDir=`head -1 ${tempFile} | cut -f 2 -d "="`
newVersion=`sed 's/dirPath.*\/v//' ${tempFile} | sed 's/\///'`
rm ${tempFile}
outputPrefix=${outputDir}/${sample}
newBamFile=${outputDir}/${sample}.bam

# Copy the new BAM file to the new directory
echo "${source}" >> ${outputDir}/source.txt || fail "Could not write source to ${outputDir}/source.txt"

# Copy the new BAM file to the new directory
cp ${bamFile} ${newBamFile} || fail "Copy bam file failed"

java -jar /seq/software/picard/current/bin/ValidateBamReturn.jar INPUT=${newBamFile} OUTPUT=${outputDir}analysis_files.txt TMP_DIR=${tempDir}  PROJECT=${project} SAMPLE_ALIAS=${sample} ORIGINAL_VERSION=${oldVersion} AGGREGATION_STARTING_POINT=${aggregationDir} || fail "Validate Bam Return File failed."

getConsensusValue "REFERENCE_SEQUENCE" reference  ${outputDir}/analysis_files.txt || fail "Could not get consensus reference sequence from ${outputDir}/analysis_files.txt."
getConsensusValue "TARGET_INTERVALS" targets  ${outputDir}/analysis_files.txt || fail "Could not get consensus target intervals from ${outputDir}/analysis_files.txt."
getConsensusValue "BAIT_INTERVALS" baits  ${outputDir}/analysis_files.txt || fail "Could not get consensus bait intervals from ${outputDir}/analysis_files.txt."

# Get the initiative; all analyses should agree so just grab the first one (a little different since we have to allow for spaces)
getIndex "INITIATIVE" initiativeIndex ${outputDir}/analysis_files.txt || fail "Could not get the index of INITIATIVE from ${outputDir}/analysis_files.txt."
initiative=`head -2 ${outputDir}/analysis_files.txt | tail -1 | cut -f ${initiativeIndex}` || fail "Could not get the initiative from ${outputDir}/analysis_files.txt."

# Fix mates
java -jar /seq/software/picard/current/bin/FixMateInformation.jar INPUT=${newBamFile} TMP_DIR=${tempDir} SORT_ORDER=coordinate || fail "FixMates failed."

# Validate the fixed file
java -jar /seq/software/picard/current/bin/ValidateSamFile.jar INPUT=${newBamFile} OUTPUT=${outputPrefix}.validation_metrics TMP_DIR=${tempDir} || fail "Validate Sam failed."

# Create the index
/seq/software/picard/current/3rd_party/samtools/samtools index ${newBamFile} ${outputPrefix}.bai || fail "Create bam index failed"

# Alignment summary metrics
java -jar /seq/software/picard/current/bin/CollectAlignmentSummaryMetrics.jar INPUT=${newBamFile} OUTPUT=${outputPrefix}.alignment_summary_metrics REFERENCE_SEQUENCE=${reference} TMP_DIR=${tempDir} || fail "Alignment Summary Metrics failed."

# Hybrid selection metrics, if appropriate
if [[ -n "${baits}" && "${baits}" != "" && "${baits}" != "null" && -n "${targets}" && "${targets}" != "" && "${targets}" != "null" ]]
then
    java -jar /seq/software/picard/current/bin/CalculateHsMetrics.jar INPUT=${newBamFile} OUTPUT=${outputPrefix}.hybrid_selection_metrics BAIT_INTERVALS=${baits} TARGET_INTERVALS=${targets} TMP_DIR=${tempDir} || fail "Hybrid Selection Metrics failed."
fi

# Md5 calculation and addition to BASS
md5=`md5sum ${newBamFile} | cut -f 1 -d " "` || fail "MD5 generation failed"

${bassClient}/dmsStore -type bam -action record -software_provider BROAD -software PICARD -timeout 30 -project ${project} -sample "${sample}" -version ${newVersion} -initiative "${initiative}" -md5 ${md5} -path ${newBamFile} || fail "Adding BAM file to BASS failed."

${bassClient}/dmsStore -type picard -action record -software_provider BROAD -software PICARD -timeout 30 -project ${project} -sample "${sample}" -version ${newVersion} -initiative "${initiative}" -path ${outputDir} || fail "Adding Picard directory to BASS failed."

# Metrics!
/seq/software/bin/picardAggregationMetrics.pl directory=${outputDir} project=${project} sample=${sample} database=${metricsDb} version=${newVersion} || fail "Aggregation metrics failed."

# Joy!  We are done!
touch ${outputDir}/finished.txt
echo "Bam return workflow SUCCEEDED for ${project}/${sample}, version ${newVersion}"
exit 0
