#!/usr/bin/env python
# The Broad Institute
# SOFTWARE COPYRIGHT NOTICE AGREEMENT
# This software and its documentation are copyright 2008 by the
# Broad Institute/Massachusetts Institute of Technology. All rights are
# reserved.

# This software is supplied without any warranty or guaranteed support
# whatsoever. Neither the Broad Institute nor MIT can be responsible for its
# use, misuse, or functionality.
# $Header$
"""usage %prog [options]"""
from __future__ import division
import extendedoptparse
import sys

def extractSubsequence(fIn, iStartPosition, iLength):
    # Will there be a problem with reading from a file object that is already in an iterator?
    strSequence = ""
    iZeroBasedStartPosition = iStartPosition - 1
    for strLine in fIn:
        if strLine.startswith(">"):
            raise Exception("Requested position off the end of the sequence.  Length of sequence: " + str(len(strSequence)))
        strSequence += strLine.rstrip("\n")
        if len(strSequence) >= iZeroBasedStartPosition + iLength:
            return strSequence[iZeroBasedStartPosition:iZeroBasedStartPosition + iLength]
    raise Exception("Requested position off the end of the sequence.  Length of sequence: " + str(len(strSequence)))

def extractSequenceFromFasta(strFastaFile, strSequenceName, iStartPosition, iLength):
    fIn = open(strFastaFile)
    for strLine in fIn:
        if strLine.startswith(">") and strLine[1:-1] == strSequenceName:
            return extractSubsequence(fIn, iStartPosition, iLength)
    raise Exception("Sequence " + strSequenceName + " not found in " + strFastaFile)

def main(argv=None):
    if argv is None:
        argv = sys.argv
    parser = extendedoptparse.OptionParser(usage="Extract subsequence from fasta")
    parser.add_option("-f", "--fasta", required=True)
    parser.add_option("-s", "--sequence", required=True,
                      help="Which sequence in fasta to extract from")
    parser.add_option("-p", "--position", required=True,  type="int",
                     help="Where to start extracting (1-based)")
    parser.add_option("-l", "--length", required=True,  type="int",
                     help="How many bases to extract")
    
    dctOptions, lstArgs = parser.parse_args(sys.argv)
    strSubSequence = extractSequenceFromFasta(dctOptions.fasta, dctOptions.sequence, dctOptions.position, dctOptions.length)
    print strSubSequence
    
    

if __name__ == "__main__":
    sys.exit(main())
    
