/* Copyright (C) 2003-2008 Dan Arlow
 * 
 * This file is part of motifADE.
 * 
 * motifADE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * motifADE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with motifADE.  If not, see <http://www.gnu.org/licenses/>.
 */

/* 
 *  sequence_expression_map_reader.hpp
 *
 *  Simple class for reading a mapping between sequence IDs and expression IDs from a stream.
 */


#ifndef SEQUENCE_EXPRESSION_MAP_READER_H
#define SEQUENCE_EXPRESSION_MAP_READER_H


#include <map>
#include <set>
#include <string>
#include <algorithm>

#include "common.hpp"
#include "buffered_reader.hpp"
#include "tokenizer.hpp"

#include "sequence.hpp"
#include "expression_vector.hpp"

#include "sequence_expression_map.hpp"
#include "column_map_reader.hpp"
#include "bijective_map_filter.hpp"
#include "map_inverter.hpp"
#include "operation_stats.hpp"


using namespace std;


class SequenceExpressionMapReader {
private:
	typedef multimap< SequenceExpressionMap::key_type, SequenceExpressionMap::mapped_type >  MMapType;
	typedef multimap< SequenceExpressionMap::mapped_type, SequenceExpressionMap::key_type >  InvertedMMapType;
	typedef pair< unsigned int, unsigned int >  ColumnPair;

public:
	struct Stats : OperationStats {
		Stats() { clear(); }
		
		virtual void clear()
		{
			num_mappings_loaded = 0;
			num_missing_keys = 0;
			num_missing_values = 0;
			num_unique_keys = 0;
			num_unique_values = 0;
			num_repeated_keys = 0;
			num_repeated_values = 0;
			num_key_repeats = 0;
			num_value_repeats = 0;
			num_mappings_retained = 0;
		}
		
		virtual ostream& print( ostream& os ) const {
			os << num_mappings_loaded << " mappings loaded." << endl;
			os << num_missing_keys << " mappings were missing sequence IDs, and " << num_missing_values << " missing expression IDs." << endl;
			os << num_unique_keys << " unique sequence IDs, and " << num_unique_values << " unique expression IDs (after annotations with missing sequence IDs were removed)" << endl;
			os << num_repeated_keys << " repeated sequence IDs, repeated a total of " << num_key_repeats << " times;" << endl;
			os << num_repeated_values << " repeated expression IDs, repeated a total of " << num_value_repeats << " times." << endl;
			os << num_mappings_retained << " mappings retained." << endl;
			return os;
		}
		
		unsigned int	num_mappings_loaded,
						num_missing_keys,
						num_missing_values,
						num_unique_keys,
						num_unique_values,
						num_repeated_keys,
						num_repeated_values,
						num_key_repeats,
						num_value_repeats,
						num_mappings_retained;
	};

public:
	SequenceExpressionMapReader( BufferedReader& ist ) : is( ist ) {}
	
	ColumnPair  getColumns( string& header );
		
	void		load( SequenceExpressionMap& sMap, SequenceExpressionMapReader::Stats& stats );
	
private:
	BufferedReader&				is;
	char						delimiter;
};



#endif // SEQUENCE_EXPRESSION_MAP_READER_H
