/*



*/

#include <string>
#include <sstream>
#include <vector>
#include <iostream>
#include <fstream>
using namespace std;

#include "OptionParser.h"
#include "datafile.h"

struct AssayData { 

	int N,C,G;

	vector<string> arrays; 
	vector<string> conditions; 
	vector<string> genes; 
	
	vector<vector<string> > values;
	vector<vector<double> > expression;

	AssayData() { }
	AssayData(string f) { read(f); }
	AssayData(istream &in) { read(in); }
	void read(string f) { read_assay_data(f, N,C,G,arrays,conditions,genes,values,expression,cerr); }
	void read(istream &in) { read_assay_data(in, N,C,G,arrays,conditions,genes,values,expression,cerr); }
};

template <typename _Functor>
int write(ostream &out, const AssayData &assay, _Functor included) { 

	int N = 0;
	
	map<int,bool> in;
	for (int a=0; a<assay.N; a++) { 
		in[a] = included(a);
		if (in[a]) { N++; }
	}

	out << N << " " << assay.C << " " << assay.G << endl;
	for (int c=0; c<assay.C; c++) { out << "\t" << assay.conditions[c]; }
	for (int g=0; g<assay.G; g++) { out << "\t" << assay.genes[g]; }
	out << endl;

	for (int a=0; a<assay.N; a++) { 
		if (in[a]) { 
			out << assay.arrays[a];
			for (int c=0; c<assay.C; c++) { out << "\t" << assay.values[a][c]; }
			for (int g=0; g<assay.G; g++) { out << "\t" << assay.expression[a][g]; }
			out << endl;
		}
	}

	return N;
}


struct CFV { 
  private:
  	int folds;
	int fold;
  public:
  	bool train;
  	CFV(int N, int f) : folds(N), fold(f) { }
	bool operator()(int example) { return train != (example%folds == fold); }
};	

int main(int argc, char **argv) {

	string filename;
	string path;
	int N;
	string prefix;
	bool verbose;

	OptionParser parser("Divide assay data file for Cross-Fold-Validation");

	parser.add(Option("file", 'f', &filename, "", "input file"));
	parser.add(Option("N", 'N', &N, 10, "number of folds"));
	parser.add(Option("path", 'p', &path, "/tmp", "output path"));
	parser.add(Option("prefix", 'x', &prefix, "cfv", "output filename prefix (no special characters, please) \
		Default creates files named cfv.fold.trainset, etc."));
	parser.add(Option("verbose", 'v', &verbose, false, "gab gab gab to stderr"));

	vector<string> args = parser.parse(argc, argv, "[input filename]");

	if (filename == "" && args.size() > 0) { filename = args[0]; }

	if (verbose) {
		cerr << N << "-fold cross-validation" << endl;
		cerr << "Input file:  ";
		if (filename == "") { cerr << "(stdin)"; }
		else { cerr << filename; }
		cerr << endl;
	}
	
	AssayData assay;
	if (filename != "") { 
		assay.read(filename);
	} else {
		assay.read(cin);
	}

	for (int fold=0; fold<N; fold++) {

		CFV cvf(N, fold);

		cvf.train = true;
		ostringstream tross;
		tross << path << "/" << prefix << "." << fold << ".trainset";
		if (verbose) { cerr << "fold " << fold << ", write to " << tross.str() << endl; }
		ofstream trout(tross.str().c_str());
		if (!trout) { cerr << "could not open " << tross.str() << " for output." << endl; }
		int trc = write(trout, assay, cvf);
		if (trc==0) { cerr << "WARNING:  " << tross.str() << " has no examples." << endl; }
		trout.close();

		cvf.train = false;
		ostringstream teoss;
		teoss << path << "/" << prefix << "." << fold << ".testset";
		if (verbose) { cerr << "fold " << fold << ", write to " << teoss.str() << endl; }
		ofstream teout(teoss.str().c_str());
		if (!teout) { cerr << "could not open " << teoss.str() << " for output." << endl; }
		int tec = write(teout, assay, cvf);
		if (tec==0) { cerr << "WARNING:  " << teoss.str() << " has no examples." << endl; }
		teout.close();

	}

	return 0;
}
