/* Copyright (C) 2003-2008 Dan Arlow
 * 
 * This file is part of motifADE.
 * 
 * motifADE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * motifADE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with motifADE.  If not, see <http://www.gnu.org/licenses/>.
 */

/* 
 *  expression_set_reader.hpp
 *
 *  Class that reads an ExpressionSet from a stream.
 */

#ifndef EXPRESSION_SET_READER_H
#define EXPRESSION_SET_READER_H

#include <set>
#include <iostream>
#include <string>
#include <cctype>


#include "common.hpp"


#include "buffered_reader.hpp"
#include "sequence.hpp"
#include "sequence_set.hpp"
#include "operation_stats.hpp"
#include "expression_set.hpp"


// nearly stateless class to read a ExpressionSet from a stream
class ExpressionSetReader {
public:
	typedef set< ExpressionVector::IDType > IDSet;

public:
	struct Stats : OperationStats {
		Stats() { clear(); }
		
		virtual void clear() {
			header.resize( 0 );
			repeatedIDs.clear();
			num_vectors_loaded = 0;
			num_unique_ids = 0;
			num_repeated_ids = 0;
			num_id_repeats = 0;
			num_vectors_retained = 0;
			removeRepeatedIDs = true;
		}
		
		virtual ostream& print( ostream& os ) const {
			os << num_vectors_loaded << " expression records loaded." << endl;
			os << num_unique_ids << " unique IDs; " << num_repeated_ids << " repeated IDs, repeated a total of " << num_id_repeats << " times." << endl;
			os << "expression records with repeated IDs were " << ( removeRepeatedIDs ? "" : "not " ) << "removed." << endl;
			os << num_vectors_retained << " expression records retained." << endl;
			return os;
		}
		
		string			header;
		
		IDSet			repeatedIDs;
		
		unsigned int	num_vectors_loaded,
						num_unique_ids,
						num_repeated_ids,
						num_id_repeats,
						num_vectors_retained;
		
		bool			removeRepeatedIDs;
	};
		
	ExpressionSetReader( BufferedReader& ist, bool useOrder = false, bool removeRepeats = true )
		: is( ist ), useRankOrder( useOrder ), removeRepeatedIDs( removeRepeats ) { }
	
	void	load( ExpressionSet& expr, Stats& stats );
	
private:
	BufferedReader& is;
	const bool		useRankOrder;
	const bool		removeRepeatedIDs;
};


#endif // EXPRESSION_SET_READER_H
