/* Copyright (C) 2003-2008 Dan Arlow
 * 
 * This file is part of motifADE.
 * 
 * motifADE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * motifADE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with motifADE.  If not, see <http://www.gnu.org/licenses/>.
 */

/* 
 *  iupac_motif.cpp
 *
 *  Class for storing IUPAC-degenerate motifs.
 */


#include "common.hpp"
#include "sequence.hpp"
#include "iupac_motif.hpp"

#include <string>
#include <algorithm>
#include <iostream>


IUPACMotif::IUPACMotif( const string& init_name, const string& init_pattern )
	: name( init_name )
{
	parseMotifPattern( init_pattern );
}


IUPACMotif::IUPACMotif( const string& raw_motif )
{
	string::size_type	tab_pos( raw_motif.find( "\t" ) );
	
	if( tab_pos == string::npos )
		throw( MotifADEException( "IUPACMotif::IUPACMotif: Malformed motif!" ) );
	
	name = raw_motif.substr( 0, tab_pos );
	parseMotifPattern( raw_motif.substr( tab_pos + 1 ) );
}


bool
IUPACMotif::isValidPattern( const string& motif_pattern, string::size_type pos )
{
	return motif_pattern.find_first_not_of( "acgtmrwsykvhdbnACGTMRWSYKVHDBN|", pos ) == string::npos;
}


bool
IUPACMotif::isValidRaw( const string& raw_pattern )
{
	string::size_type pos = raw_pattern.find_first_of( '\t' );
	if( pos == string::npos )
		return false;
	return isValidPattern( raw_pattern, pos + 1 );
}


// constructor parsing helpers
void
IUPACMotif::parseMotifPattern( const string& motif_pattern )
{
	if( !isValidPattern( motif_pattern ) )
		throw( MotifADEException( "IUPACMotif::parseMotifPattern: Malformed motif!" ) );
	
	tokenize( back_inserter( patterns ), motif_pattern, '|' );
	
	for_each( patterns.begin(), patterns.end(), string_toupper );
}


// IUPACMotif helper method -- returns the complement of a given IUPAC symbol
char
IUPACMotif::getComplement( char ch ) const
{
	switch( ch ) {
		case 'A':
			return 'T';
			
		case 'C':
			return 'G';
			
		case 'G':
			return 'C';
	
		case 'T':
			return 'A';
		
		case 'M':
			return 'G';
		
		case 'R':
			return 'Y';
		
		case 'W':
			return 'W';
			
		case 'S':
			return 'S';
		
		case 'Y':
			return 'R';
		
		case 'K':
			return 'M';
		
		case 'V':
			return 'B';
		
		case 'H':
			return 'D';
		
		case 'D':
			return 'H';
		
		case 'B':
			return 'V';
		
		case 'N':
			return 'N';
		
		default:
			cout << "ERROR in getComplement: UNRECOGNIZED PATTERN NUCLEOTIDE SYMBOL " << ch << endl;
			return '?';
	}
}


// IUPACMotif helper method -- copies the full motif pattern to a string
void
IUPACMotif::getPatterns( string& dst ) const
{
	dst.resize( 0 );
	
	unsigned int n = numPatterns();
	if( n == 0 )
		return;
	
	dst += getPattern( 0 );
	for( unsigned int i = 1; i < n; ++i ) {
		dst += '|';
		dst += getPattern( i );
	}
}



// IUPACMotif helper method -- copies the complement of a given IUPAC sequence to dst
void
IUPACMotif::getComplement( const string& motif, string& dst ) const
{
	dst.resize( motif.size() );
	string::iterator j = dst.begin();
	for( string::const_iterator i = motif.begin(); i != motif.end(); ++i, ++j )
		*j = getComplement( *i );
}


// IUPACMotif helper method -- copies the reverse of a given IUPAC sequence to dst
void
IUPACMotif::getReverse( const string& motif, string& dst ) const
{
	dst.resize( motif.size() );
	reverse_copy( motif.begin(), motif.end(), dst.begin() );
}


// IUPACMotif helper method -- copies the reverse-complement of a given IUPAC sequence to dst
void
IUPACMotif::getReverseComplement( const string& motif, string& dst ) const
{
	dst.resize( motif.length() );
	getComplement( motif, dst );
	reverse( dst.begin(), dst.end() );
}


// IUPACMotif helper method -- copies the reverse-complement of the full motif pattern to a string
void
IUPACMotif::getReverseComplementPatterns( string& dst ) const
{
	dst.resize( 0 );
	
	unsigned int n = numPatterns();
	if( n == 0 )
		return;
	
	string revc;
	getReverseComplement( getPattern( 0 ), revc );
	dst += revc;
	for( unsigned int i = 1; i < n; ++i ) {
		dst += '|';
		getReverseComplement( getPattern( i ), revc );
		dst += revc;
	}
}



ostream& operator<<( ostream& os, const IUPACMotif& m )
{
	string patterns;
	m.getPatterns( patterns );
	os << '(' << m.getName() << ", " << patterns << ')';
	return os;
}

