/*
	Create N-th Order Markov Model File as:

	# comment
	AGCT	0.002341289342923

	(i.e. Sequence followed by frequency)

*/

#include <iostream>
#include <fstream>
#include <sstream>
using namespace std;

#include "fasta.h"
#include "markov.h"
#include "logscale.h"
#include "Timer.h"
#include "Option.h"

int main(int argc, char **argv) {
	
	uint order;
	double pseudocount;
	bool normalize;
	string filename;
	bool verbose;

	ostringstream synopsis;
	synopsis << "Create Markov model of DNA distribution from sequence data.  "
	         << "Output format:  Character sequences followed by frequency "
			 << "of occurance, e.g. \"ACCT  0.0125.\"  Lines preceeded with '"
			 << MM_COMMENT
			 << "' are comments.";
	OptionParser parser(synopsis.str());
	parser.add("order", 'o', &order, 0, "Order of the Markov model");
	parser.add("pc", 'p', &pseudocount, 1, "Pseudocount (add this many to each instance)");
	parser.add("nonorm", 'k', &normalize, true, "Do not normalize counts");
	parser.add("append", 'a', &filename, "", "Append to this file's Markov model");
	parser.add("verbose", 'v', &verbose, false, "Verbose output (stderr)");

	vector<string> args = parser.parse(argc, argv, " < FASTA file");

	Timer timer;

	istream *in;
	ifstream fin;

	if (args.size()) { 
		fin.open(args[0].c_str());
		if (!fin) { cerr << args[0] << " ?" << endl;  exit(-1); }
		in = &fin;
	} else {
		in = &cin;
	}

	// create a markov model
	MM<logscale> mm;
	if (filename != "") {
		ifstream mfin(filename.c_str());
		if (!mfin) { cerr << filename << "?" << endl; exit(-1); }
		mm.read(mfin);
		mfin.close();
	} else {
		mm.resize(order);
		mm.fill();	// zero everything
	}
		
	while (in->good()) { 
		
		string header, sequence;
		uint fcode = fasta_read(*in, header, sequence);
		if (fcode != fasta_success) { 
			cerr << "FASTA read error (" 
				 << fasta_message[fcode] 
				 << ")" << endl;  exit(-1); 
		}
		mm.add(sequence);
	}
	
	if (args.size()) { fin.close(); }
	mm.pseudocount(pseudocount);
	if (normalize) { mm.normalize(); }
	mm.write(cout);

	if (verbose) { cerr << "(" << timer << ")" << endl; }

	return 0;
}

