/*



*/

#include <string>
#include <list>
#include <sstream>
#include <vector>
#include <map>
#include <iostream>
#include <fstream>
using namespace std;

#include "OptionParser.h"
#include "datafile.h"

/**
 * read datafiles and create a datafile that combines conditions values and gene expression value.
 *	this does a thorough job of error checking, just to make sure everything's okay
 */
void write_data(ostream &out, string roles_filename, string conditions_names_filename, 
                string conditions_values_filename, string expression_filename) {

	list<Role> roles = read_roles(roles_filename, cerr);
	
	list<pair<string,list<string> > > conditions = read_conditions(conditions_names_filename, cerr);
	
	vector<string> conditions_values_header;
	vector<pair<string, vector<string> > > conditions_values;
	read_conditions_values(conditions_values_filename, conditions_values_header, conditions_values, cerr);
	
	vector<string> expression_header;
	vector<pair<string, vector<double> > > expression;
	read_expression(expression_filename, expression_header, expression, cerr);

	// limit gene names to those that appear in the roles file
	map<string,int> gmap;
	for (list<Role>::const_iterator r=roles.begin(); r!=roles.end(); r++) { 
		gmap[r->regulator] = gmap[r->regulatee] = -1;
	}

	// find the index of these genes in expression array
	for (int h=0; h<expression_header.size(); h++) { 
		map<string,int>::iterator g = gmap.find(expression_header[h]);
		if (g != gmap.end()) {
			g->second = h;
		}
	}

	// make sure all genes from 'roles' are in expression data file
	list<string> to_erase;
	for (map<string,int>::iterator g=gmap.begin(); g!=gmap.end(); g++) { 
		if (g->second == -1) { 
			cerr << "WARNING:  Gene " << g->first << " mentioned in " 
				 << roles_filename << " but not " << expression_filename << endl;
			to_erase.push_back(g->first);
		}
	}
	for (list<string>::iterator i=to_erase.begin(); i!=to_erase.end(); i++) { 
		gmap.erase(*i);
	}
	to_erase.clear();
	// gmap is now a map of all genes names -> index in expression[] 

	// create a map of conditions names -> list of values
	map<string,map<string,bool> > legal_conditions;
	for (list<pair<string,list<string> > >::const_iterator c=conditions.begin(); c!=conditions.end(); c++) { 
		for (list<string>::const_iterator v=c->second.begin(); v!=c->second.end(); v++) { 
			legal_conditions[c->first][*v] = true;
		}
	}

	// create a map of condition -> index in conditions_values array
	map<string,int> cmap;
	for (list<pair<string,list<string> > >::const_iterator c=conditions.begin(); c!=conditions.end(); c++) {
		cmap[c->first] = -1;
	}

	for (int h=0; h<conditions_values_header.size(); h++) { 
		map<string,int>::iterator c = cmap.find(conditions_values_header[h]);
		if (c != cmap.end()) { 
			c->second = h;
		}
	}

	for (map<string,int>::iterator c=cmap.begin(); c!=cmap.end(); c++) { 
		if (c->second == -1) { 
			cerr << "WARNING:  Condition " << c->first << " menitioned in "
				 << conditions_names_filename << " but not " << conditions_values_filename << endl;
			to_erase.push_back(c->first);
		}
	} 
	for (list<string>::const_iterator i=to_erase.begin(); i!=to_erase.end(); i++) { 
		cmap.erase(*i);
	}
	to_erase.clear();
	// cmap is now a map of all conditions names -> index in conditions_values[]
	
	// create a map of conditions' arrays 
	map<string,int> camap;
	for (int a=0; a<conditions_values.size(); a++) { 
		camap[conditions_values[a].first] = a;
	}

	// create a map of expressions' arrays
	map<string,int> xamap;
	for (int a=0; a<expression.size(); a++) { 
		xamap[expression[a].first] = a;
	}

	// make sure all arrays are mentioned in both conditions and expression filename
	for (map<string,int>::iterator c=camap.begin(); c!=camap.end(); c++) { 
		if (xamap.find(c->first) == xamap.end()) { 
			cerr << "WARNING:  Array " << c->first << " mentioned in " << conditions_values_filename 
			     << " but not " << expression_filename << "." << endl;
			to_erase.push_back(c->first);
		}
	}
	for (list<string>::const_iterator i=to_erase.begin(); i!=to_erase.end(); i++) { 
		camap.erase(*i);
	}
	to_erase.clear();

	for (map<string,int>::iterator x=xamap.begin(); x!=xamap.end(); x++) { 
		if (camap.find(x->first) == camap.end()) { 
			cerr << "WARNING:  Array " << x->first << " mentioned in " << expression_filename 
			     << " but not " << conditions_values_filename << "." << endl;
			to_erase.push_back(x->first);
		}
	}
	for (list<string>::const_iterator i=to_erase.begin(); i!=to_erase.end(); i++) { 
		xamap.erase(*i);
	}
	to_erase.clear();

	// make sure all conditions' values are legal
	for (vector<string>::const_iterator n=conditions_values_header.begin(); n!=conditions_values_header.end(); n++) { 
		if (legal_conditions.find(*n) == legal_conditions.end()) {
			cerr << "WARNING:  Condition " << *n << " not mentioned in " << conditions_names_filename << "." << endl;
		}
	}

	for (map<string,int>::const_iterator ca=camap.begin(); ca!=camap.end(); ca++) { 
		
		for (map<string,int>::const_iterator cm=cmap.begin(); cm!=cmap.end(); cm++) { 
		
			string value = conditions_values[ca->second].second[cm->second];
			if (legal_conditions[cm->first].find(value) == legal_conditions[cm->first].end()) { 
				cerr << "WARNING:  Illegal value " << value << " for condition "
				     << cm->first << " in array " << ca->first << endl;
			}
		}
	}

	// write number of arrays, conditions, genes
	out << xamap.size() << " "	// number of arrays
		<< cmap.size() << " " 	// number of conditions
		<< gmap.size() << endl;	// number of genes
	
	// write header row, each condition then each gene
	for (map<string,int>::const_iterator c=cmap.begin(); c!=cmap.end(); c++) { out << "\t" << c->first; }
	for (map<string,int>::const_iterator g=gmap.begin(); g!=gmap.end(); g++) { out << "\t" << g->first; }
	out << endl;
	
	// write records, array name, condtion values, gene expression values
	for (map<string,int>::const_iterator x=xamap.begin(); x!=xamap.end(); x++) { 
		
		// write array name
		out << x->first;

		// write each condition
		for (map<string,int>::const_iterator c=cmap.begin(); c!=cmap.end(); c++) { 
			out << "\t" << conditions_values[camap[x->first]].second[c->second];
		}

		// write each gene
		for (map<string,int>::const_iterator g=gmap.begin(); g!=gmap.end(); g++) {
			out << "\t" << expression[x->second].second[g->second];
		}
		out << endl;
	}
}


int main(int argc, char **argv) {

	string roles_filename, conditions_names_filename, conditions_values_filename, expression_filename;
	string output_filename;
	
	bool verbose;

	OptionParser parser("Create ``Assay'' File combining condition values and gene expression");

	parser.add(Option("roles", 'r', &roles_filename, "",
		"Roles file:  records are regulator-name regulatee-name Activator|Repressor priority", true));
	parser.add(Option("conditions", 'c', &conditions_names_filename, "",
		"Conditions file:  records are condition-name possible-values-separated-by-whitespace", true));
	parser.add(Option("values", 'v', &conditions_values_filename, "", 
		"Conditions values file:  header is conditions names, records are array-name condtion values \
			for each condition in header row", true));
	parser.add(Option("expression", 'x', &expression_filename, "",
		"Expression filename:  header row is gene names, records are array-name expression for each \
			gene in header row", true));
	parser.add(Option("output", 'o', &output_filename, "", "output file"));

	vector<string> args = parser.parse(argc, argv, "[output filename]");

	// assign output filename if not given as option
	if (output_filename == "" && args.size()) { output_filename = args[0]; }

	if (output_filename == "") { 
		write_data(cout, roles_filename, conditions_names_filename, conditions_values_filename, expression_filename);
	} else {
		ofstream fout(output_filename.c_str());
		write_data(fout, roles_filename, conditions_names_filename, conditions_values_filename, expression_filename);
		fout.close();
	}

	return 0;

}

