/*

	Manage gene expression data files

*/


#ifndef DATAFILE_H
#define DATAFILE_H 1

#include <string>
#include <vector>
#include <list>
#include <map>
#include <iostream>
using namespace std;

#include "gaussian.h"


struct Role { 
	string regulator;
	string regulatee;
	string role;
	int priority;
};
ostream& operator<<(ostream&, const Role&);


/**
 * skip over all comment lines (that begin with 'comment')
 * Assume stream is at beginning of line
 */
void skip_comments(istream &in, char comment='#');

/**
 * Read 'roles' data from input stream, populate:
 *	roles:  list of all roles.
 */
list<Role> read_roles(istream &in, ostream &err);
list<Role> read_roles(string filename, ostream &err);


/**
 * Read 'conditions' data
 * return list of (name, list-of-values)
 */
list<pair<string,list<string> > > read_conditions(istream &in, ostream &err);
list<pair<string,list<string> > > read_conditions(string filename, ostream &err);

/**
 * Read 'states' data
 * return list of (name, gaussian-mixture) pairs
 */
map<string,GaussianMixture> read_states(istream &in, ostream &err);
map<string,GaussianMixture> read_states(string filename, ostream &err);

/**
 * Read expression data
 *	return:
 *		header row (list of gene names)
 *		data (for each array, (name, vector of expression parallel to header row))
 */
void read_expression(istream &in, vector<string> &header, vector<pair<string, vector<double> > > &expression, ostream &err);
bool read_expression(string filename, vector<string> &header, vector<pair<string, vector<double> > > &expression, ostream &err);


/**
 * Read conditions data
 *	return:
 *		header row (list of conditions names)
 *		data (for each array, (name, vector of values parallel to header row))
 */
void read_conditions_values(istream &in, vector<string> &header, vector<pair<string, vector<string> > > &values, ostream &err);
bool read_conditions_values(string filename, vector<string> &header, vector<pair<string, vector<string> > > &values, ostream &err);

/**
 * Read all GRN data in this format:
 *
 *	# comments start with '#'
 *	# line 1:  Number of arrays conditions and genes
 *	# line 2:  header row, C conditions names, G genes names
 *	# next N lines:   array name conditions values genes expression
 *	N C G
 *	header row
 *	records
 *
 * populate reference arrays
 */
void read_assay_data(istream &in, int &N, int &C, int &G, vector<string> &arrays, vector<string> &conditions, vector<string> &genes, vector<vector<string> > &values, vector<vector<double> > &expression, ostream &err);

bool read_assay_data(string filename, int &N, int &C, int &G, vector<string> &arrays, vector<string> &conditions, vector<string> &genes, vector<vector<string> > &values, vector<vector<double> > &expression, ostream &err);




#endif
