

#include "WSS.h"
#include "datafile.h"
#include "gaussian.h"

#include <stdio.h>
#include <stdlib.h>

#include <fstream>
#include <iostream>
using namespace std;


ostream& operator<<(ostream &out, const Role &role) { 
	out << "[ROLE:  " 
		<< role.regulator << " regulates "
		<< role.regulatee << " as "
		<< role.role << " with priority "
		<< role.priority << "]";
	return out;
}



/**
 * skip over all comment lines (that begin with 'comment')
 * Assume stream is at beginning of line
 */
void skip_comments(istream &in, char comment) { 

	char c;
	while (in.good()) {
		
		c = in.peek();
		if (c == comment) { 
			// skip rest of line
			WSS wss;
			while (in.good() && !wss.eol) { 
				string dummy;
				in >> dummy >> wss;
			}
		} else {
			break;
		}
	}
	return;
}

/**
 * Read 'roles' data from input stream, populate:
 *	gene_map:  mapping from gene name to ID number
 *	roles:  list of all roles.
 */
list<Role> read_roles(istream &in, ostream &err) { 

	skip_comments(in, '#');

	list<Role> ans;
	WSS wss;
	
	while (in.good()) { 

		string regulator, regulatee, role_str;
		int priority;
		string dummy;

		ans.push_back(Role());
		in >> ans.back().regulator >> ans.back().regulatee 
		   >> ans.back().role >> ans.back().priority >> wss;
		while (in.good() && !wss.eol) { in >> dummy >> wss; }
		
	}
	return ans;
}
	

/**
 * Read 'conditions' data
 * return list of (name, list-of-values)
 */
list<pair<string,list<string> > > read_conditions(istream &in, ostream &err) { 

	list<pair<string,list<string> > > ans;

	skip_comments(in, '#');

	WSS wss;

	while (in.good()) { 
		
		string name;
		in >> name >> wss;
		ans.push_back(pair<string,list<string> >(name, list<string>()));
		
		while (in.good() && !wss.eol) { 
			string value;
			in >> value >> wss;
			ans.back().second.push_back(value);
		}
	}
	return ans;
}

map<string,GaussianMixture> read_states(istream &in, ostream &err) {

	skip_comments(in, '#');

	map<string,GaussianMixture> ans;

	WSS wss;

	while (in.good()) { 

		string name;
		int M;

		in >> name >> M >> wss;

		ans[name] = GaussianMixture(M);

		for (int m=0; m<M; m++) { 

			in >> ans[name][m].mu 
			   >> ans[name][m].sigma
			   >> ans[name][m].w
			   >> wss;
		}

	}
	return ans;
}


void read_expression(istream &in, vector<string> &header, vector<pair<string, vector<double> > > &expression, ostream &err) { 

	header.clear();
	expression.clear();

	WSS wss;

	skip_comments(in, '#');

	// read header row
	list<string> header_list;
	do { 
		string gene;
		in >> gene >> wss;
		header_list.push_back(gene);
	} while (in.good() && !wss.eol);

	header.resize(header_list.size());
	copy(header_list.begin(), header_list.end(), header.begin());

	// read data
	while (in.good()) { 

		string array;
		in >> array >> wss;

		expression.push_back(pair<string, vector<double> >(array, vector<double>(header.size())));

		for (int g=0; g<header.size(); g++) { 
			
			in >> expression.back().second[g] >> wss;
			if (g < header.size()-1 && wss.eol) { 
				err << "array " << array << " data row not long enough (" 
					<< g << " < " << header.size() << ")." << endl;
				break;
			}
		}

		if (!wss.eol) { 
			err << "array " << array << " data row too long (> " 
				<< header.size() << ")" << endl;
		}
		
	} // next array

	return;
}


	
void read_conditions_values(istream &in, vector<string> &header, vector<pair<string, vector<string> > > &values, ostream &err) { 

	header.clear();
	values.clear();

	WSS wss;

	skip_comments(in, '#');

	// read header row
	list<string> header_list;
	do { 
		string condition;
		in >> condition >> wss;
		header_list.push_back(condition);
	} while (in.good() && !wss.eol);

	header.resize(header_list.size());
	copy(header_list.begin(), header_list.end(), header.begin());

	// read data
	while (in.good()) { 

		string array;
		in >> array >> wss;

		values.push_back(pair<string, vector<string> >(array, vector<string>(header.size())));

		for (int c=0; c<header.size(); c++) { 
			
			in >> values.back().second[c] >> wss;
			if (c < header.size()-1 && wss.eol) { 
				err << "conditions values, array " << array << " data row not long enough (" 
					<< c << " < " << header.size() << ")." << endl;
				break;
			}
		}

		if (!wss.eol) { 
			err << "conditions values, array " << array << " data row too long (> " 
				<< header.size() << ")" << endl;
		}
		
	} // next array

	return;
}


void read_assay_data(istream &in, int &N, int &C, int &G, vector<string> &arrays, vector<string> &conditions, vector<string> &genes, vector<vector<string> > &values, vector<vector<double> > &expression, ostream &err) { 

	skip_comments(in, '#');

	WSS wss;

	// read numbers
	in >> N >> C >> G >> wss;

	arrays.resize(N);
	conditions.resize(C);
	genes.resize(G);
	values.resize(N);
	expression.resize(N);

	// read header
	for (int c=0; c<C; c++) { in >> conditions[c] >> wss; }
	for (int g=0; g<G; g++) { in >> genes[g] >> wss; }

	// read data
	for (int aid=0; aid<N; aid++) { 	// for each array

		values[aid].resize(C);
		expression[aid].resize(G);

		in >> arrays[aid] >> wss;

		for (int c=0; c<C; c++) { in >> values[aid][c] >> wss; }
		for (int g=0; g<G; g++) { in >> expression[aid][g] >> wss; }
	}

	return;
}

	
		
	





list<Role> read_roles(string filename, ostream &err) { 
	ifstream infile(filename.c_str());
	if (!infile) { 
		err << "cannot open roles file " << filename << endl;
	} else {
		list<Role> ans = read_roles(infile, err);
		infile.close();
		return ans;
	}
	return list<Role>();
}

list<pair<string,list<string> > > read_conditions(string filename, ostream &err=cerr) {
	
	ifstream infile(filename.c_str());
	if (!infile) { 
		err << "cannot open conditions file " << filename << endl;
	} else {
		list<pair<string,list<string> > > ans = read_conditions(infile, err);
		infile.close();
		return ans;
	}
	return list<pair<string,list<string> > >();
}

map<string,GaussianMixture> read_states(string filename, ostream &err) {
	
	ifstream infile(filename.c_str());
	if (!infile) { 
		err << "cannot open states file " << filename << endl;
	} else {
		map<string,GaussianMixture> ans = read_states(infile, err);
		infile.close();
		return ans;
	}
	return map<string,GaussianMixture>();
}

bool read_expression(string filename, vector<string> &header, vector<pair<string, vector<double> > > &expression, ostream &err) { 

	ifstream infile(filename.c_str());
	if (!infile) { 
		err << "cannot open expression file " << filename << endl;
		return false;
	} else {
		read_expression(infile, header, expression, err);
		infile.close();
		return true;
	}
}

bool read_conditions_values(string filename, vector<string> &header, vector<pair<string, vector<string> > > &values, ostream &err) { 

	ifstream infile(filename.c_str());
	if (!infile) { 
		err << "cannot open values file " << filename << endl;
		return false;
	} else {
		read_conditions_values(infile, header, values, err);
		infile.close();
		return true;
	}
}

bool read_assay_data(string filename, int &N, int &C, int &G, vector<string> &arrays, vector<string> &conditions, vector<string> &genes, vector<vector<string> > &values, vector<vector<double> > &expression, ostream &err) { 

	ifstream infile(filename.c_str());
	if (!infile) { 
		err << "cannot open assay data file " << filename << endl;
		return false;
	} else {
		read_assay_data(infile, N, C, G, arrays, conditions, genes, values, expression, err);
		infile.close();
		return true;
	}
}


