/* Copyright (C) 2003-2008 Dan Arlow
 * 
 * This file is part of motifADE.
 * 
 * motifADE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * motifADE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with motifADE.  If not, see <http://www.gnu.org/licenses/>.
 */

/* 
 *  column_pair_reader.hpp
 *
 *  Simple class for reading a delimited text file and extracting two columns.
 */


#ifndef COLUMN_PAIR_READER_H
#define COLUMN_PAIR_READER_H


#include <map>
#include <vector>
#include <algorithm>
#include <string>
#include <iostream>


#include "common.hpp"
#include "buffered_reader.hpp"
#include "tokenizer.hpp"
#include "operation_stats.hpp"


template< typename PairReceiver >
class ColumnPairReader {
public:
	struct Stats : public OperationStats {
		Stats() { clear(); }
		
		virtual void clear() {
			num_mappings_loaded = 0;
			num_missing_keys = 0;
			num_missing_values = 0;
			num_good_mappings = 0;
		}
		
		virtual ostream& print( ostream& os ) const {
			os << num_mappings_loaded << " mappings loaded." << endl;
			os << num_missing_keys << " mappings were missing keys, and " << num_missing_values << " missing values." << endl;
			os << num_good_mappings << " mappings with both keys and values." << endl;
			return os;
		}
		
		unsigned int	num_mappings_loaded,
						num_missing_keys,
						num_missing_values,
						num_good_mappings;
	};
	
	ColumnPairReader( BufferedReader& ist, unsigned int keyCol, unsigned int valueCol, char delimiter = '\t', char quote = '"' )
		: is( ist ), pairExtractor( keyCol, valueCol, delimiter, quote ) { }
	
	void	load( PairReceiver& receiver, ColumnPairReader< PairReceiver >::Stats& stats )
	{
		stats.clear();
		
		if( !is ) throw( MotifADEException( "ColumnPairReader::load: couldn't read from stream!" ) );
		
		string key, value, tmp;
		
//		cout << endl << "in ColumnPairReader::load: ( keyCol, valueCol ) = ( " << pairExtractor.firstIndex << ", " << pairExtractor.secondIndex << " )" << endl;
		
		while( !is.eof() ) {
			is.getline( tmp, '\n' );
			
			if( tmp.empty() ) continue;
			
			++stats.num_mappings_loaded;
			
//			cout << endl << "in ColumnPairReader::load: tmp = " << tmp << endl;
			
//			key.clear();
//			value.clear();
			CSVFieldPairExtractor::ExtractionSuccess success = pairExtractor.extract( key, value, tmp );
			
//			cout << endl << "in ColumnPairReader::load: ( success.first, success.second ) = ( " << success.first << ", " << success.second << " )" << endl;
//			cout << endl << "in ColumnPairReader::load: ( key, value ) = ( " << key << ", " << value << " )" << endl;
			
			if( !success.first )
				++stats.num_missing_keys;
			
			if( !success.second )
				++stats.num_missing_values;
			
			if( success.first && success.second ) {
				++stats.num_good_mappings;
				receiver.receive( key, value );
			}
		}
	}

private:
	BufferedReader& is;
	CSVFieldPairExtractor pairExtractor;
};


#endif // COLUMN_PAIR_READER_H
