/* Copyright (C) 2003-2008 Dan Arlow
 * 
 * This file is part of motifADE.
 * 
 * motifADE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * motifADE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with motifADE.  If not, see <http://www.gnu.org/licenses/>.
 */

/* 
 *  tokenizer.hpp
 *
 *  string tokenizer functions
 */


#ifndef TOKENIZER_H
#define TOKENIZER_H


#include <string>
#include <iterator>


using namespace std;


// TODO: separate prototypes from implementation... not done yet because caused runtime linker errors when I tried...
// TODO: decide if I should template these to operate on generic sequences instead of only on std::string

// STL-style function to tokenize a string, writes tokens to an output iterator
template< typename _OutputIter >
_OutputIter
tokenize( _OutputIter __result, const string& text, const char delim = ',' )
{
	string::const_iterator spos = text.begin(), epos = spos;	
	
	while(  epos < text.end() ) {
		for( ; epos < text.end() && *epos != delim; ++epos );
		*__result++ = string( spos, epos );
		spos = ++epos;
	}
	
	return __result;
}


// STL-style function to tokenize an escaped string, writes tokens to an output iterator 
template< typename _OutputIter >
_OutputIter
tokenizeCSV( _OutputIter __result, const string& text, const char delim = ',', const char quote = '"' )
{
	bool betweenQuotes = false;
	
	string::const_iterator spos = text.begin(), epos = spos;	
	
	while( epos < text.end() ) {
		while( epos < text.end() ) {
			if( *epos == delim && !betweenQuotes ) break;
			if( *epos == quote ) betweenQuotes = !betweenQuotes;
			++epos;
		}
		if( *spos == quote )	*__result++ = string( spos + 1, epos - 1 );
		else					*__result++ = string( spos, epos );
		spos = ++epos;
	}
	
	return __result;
}


// STL-style function to extract selected tokens an escaped string, writes tokens to an output iterator
template< typename _InputIter, typename _OutputIter >
_OutputIter
getCSVFields( _InputIter __first, _InputIter __last, _OutputIter __result, const string& csv, const char delim = ',', const char quote = '"' )
{
	bool betweenQuotes = false;
	unsigned int fieldnum = 0;
	
	string::const_iterator spos = csv.begin(), epos = spos;	
	
	while(  __first < __last ) {
		while( epos < csv.end() ) {
			if( *epos == delim && !betweenQuotes ) break;
			if( *epos == quote ) betweenQuotes = !betweenQuotes;
			++epos;
		}
		if( *__first == fieldnum ) {
			if( *spos == quote )	try { *__result++ = string( spos + 1, epos - 1 ); } catch( std::exception& e ) { cout << "e.what() = " << e.what() << endl; cout << "trying to make a string of size " << (epos - spos) << endl; cout << "csv = " << csv << endl; throw e; }
			else					*__result++ = string( spos, epos );
			++__first;
		}
		spos = ++epos;
		++fieldnum;
	}
	
	return __result;
}

// STL-style functor to extract selected tokens an escaped string, writes tokens to an output iterator
struct CSVFieldPairExtractor {
	typedef pair< bool, bool > ExtractionSuccess;
	
	CSVFieldPairExtractor( unsigned int firstindex, unsigned int secondindex, const char delimC = ',',  const char quoteC = '"' )
		: firstIndex( firstindex ), secondIndex( secondindex ), delim( delimC ), quote( quoteC ) {}
	
	ExtractionSuccess extract( string& firstResult, string& secondResult, const string& csv ) {
		bool betweenQuotes = false;
		pair< bool, bool > assigned( false, false );
		unsigned int fieldnum = 0;
		
		string::const_iterator spos = csv.begin(), epos = spos;	
		
		while(  epos < csv.end() ) {
			while( epos < csv.end() ) {
				if( *epos == delim && !betweenQuotes ) break;
				if( *epos == quote ) betweenQuotes = !betweenQuotes;
				++epos;
			}
			if( !assigned.first && firstIndex == fieldnum ) {
				if( *spos == quote )	firstResult = string( spos + 1, epos - 1 );
				else					firstResult = string( spos, epos );
				assigned.first = true;
				if( assigned.second )   break;
			}
			if( !assigned.second && secondIndex == fieldnum ) {
				if( *spos == quote )	secondResult = string( spos + 1, epos - 1 );
				else					secondResult = string( spos, epos );
				assigned.second = true;
				if( assigned.first )   break;
			}
			
			spos = ++epos;
			++fieldnum;
		}
		
		return assigned;
	}
	
	const unsigned int firstIndex, secondIndex;
	const char delim, quote;
};


#endif // TOKENIZER_H
