/*

	Test the functionality of a CPDTree.

	The idea:  Create REAL data for the parents of the variable
			   Create REAL data for the variable itself,
			   	using some tree CONCEPT
			   Add NOISE to the REAL data to make PROBABLISTIC OBSERVED data
			   Train a tree on OBSERVED data
			   See if the TREE looks like the CONCEPT

*/

#include <vector>
#include <sstream>
#include <string>
#include <functional>
#include <iterator>
#include <iostream>
#include <stdlib.h>
using namespace std;

#include "CPDTree.h"
#include "CPD.h"		// training data
#include "stl.h"
#include "OptionParser.h"

#ifndef rnd
#define rnd() ((double)rand() / (double)RAND_MAX)
#endif

/** create a distribution over V values 
	add as much as 'noise' randomly to each signal
  */
vector<double> observe(int values, int value) {

	const double noise = 0.1;
	vector<double> ans(values);

	for (int v=0; v<values; v++) { 
		ans[v] = (value==v ? 1.0 : 0.0) + (rnd() * noise);
	}

	// normalize
	double sum = 0.0;
	for (int v=0; v<values; v++) { sum += ans[v]; }
	for (int v=0; v<values; v++) { ans[v] /= sum; }
	return ans;
}

ostream& operator<<(ostream &out, const Variable &variable) { 
	out << "[VAR "
	    << variable.name 
	    << " (ID=" << variable.ID << "), "
		<< variable.values.size() << " values:  {";
	for (vector<string>::const_iterator v=variable.values.begin(); 
	                                    v!=variable.values.end(); v++) { 
		out << *v;
		if (v != variable.values.end()-1) { out << ","; }
	}
	out << "} ]";
	return out;
}

class Observations : public TrainingData { 
  private:
  	vector<vector<vector<double> > > *observed_data;
	vector<int> *map;	// observed data only has so many variables
									// this maps index -> ID
  public:
  	Observations(int n, vector<vector<vector<double> > > *data, vector<int> *m) 
		: TrainingData(n), observed_data(data), map(m) { }

  	double operator()(int ID, int example, int value) { 
		// find index cooresponding to ID
		int index = -1;
		for (int i=0; i<map->size(); i++) { if ((*map)[i]==ID) { index=i; break; } }
		if (index==-1) { cerr << "Observations has no ID " << ID << endl; }
		return (*observed_data)[example][index][value];
	}
};

class FixedExampleObservations : public ExampleData { 
  private:
  	int example;
  	vector<vector<vector<double> > > *observed_data;
	vector<int> *map;	// observed data only has so many variables
									// this maps index -> ID
  public:
  	FixedExampleObservations(vector<vector<vector<double> > > *data, 
	                       vector<int> *m, int e) 
		: observed_data(data), map(m), example(e) { }

  	double operator()(int ID, int value) { 
		// find index cooresponding to ID
		int index = -1;
		for (int i=0; i<map->size(); i++) { if ((*map)[i]==ID) { index=i; break; } }
		if (index==-1) { cerr << "Observations has no ID " << ID << endl; }
		return (*observed_data)[example][index][value];
	}
};

int main(int argc, char **argv) { 

	int seed;
	double ratio;
	bool prune;

	OptionParser parser("test CPDTree");

	parser.add(Option("seed", 's', &seed, 6141976, "PRNG seed"));
	parser.add(Option("ratio", 'r', &ratio, 1.0, "LDL/IDL Max. prune ratio threshold"));
	parser.add(Option("prune", 'p', &prune, false, "prune CPD trees during training"));

	parser.parse(argc, argv);
	
	srand(seed);

	/*
		There are 6 variables (5 potential parents, 1 child)
		A-E are parents, F is child
	*/

	const int NUM_VARS = 6;
	enum VARS       {  A ,  B  , C ,  D ,  E ,  F  };
	int VIDS[] =    {  4 , 15  , 2 , 33 , 42 , 58  };	// unique, but could be anything
	//int VIDS[] =    {  0 ,  1  , 2 ,  3 ,  4 ,  5  };
	string VNMS[] = { "A", "B", "C", "D", "E", "F" };
	int VALS[] =    {  2,   2,   3,   3,   5 ,  5  };
	string VLNM[] = { "v0","v1","v2","v3","v4" };	// max num values is 5

	vector<Variable> variables;
	for (int i=0; i<NUM_VARS; i++) { 
		variables.push_back(Variable(VNMS[i], VIDS[i], VLNM, VLNM+VALS[i]));
	}

	const int N = 369;	// number of examples

	// real data (for each example, each variable)
	vector<vector<int> > real_data(N);	
	for (int x=0; x<N; x++) { 
		for (int v=0; v<variables.size()-1; v++) { 
			// create real value at random
			real_data[x].push_back( rand() % variables[v].values.size() );
		}
		int f_value;
	
	/* CONCEPT:  (make sure VALS[F] is big enough for all these values)
                      C   
                     / \
                 v1/     \~v1                                         
                 /         \                  
                D           A                                           
             v2/ \~v2    v0/ \~v0                                   
              0   1       E   4                                  
                       v3/ \~v3                                  
                        2   3                                    
                                    
	*/
                                                                 
		if (real_data[x][C] == 1) {
			if (real_data[x][D] == 2) {
				real_data[x][F] = 0; 
			} else {
				real_data[x][F] = 1;
			}
		} else {
			if (real_data[x][A] == 0) { 
				if (real_data[x][E] == 3) {
					real_data[x][F] = 2;
				} else {
					real_data[x][F] = 3;
				}
			} else {
				real_data[x][F] = 4;
			}
		}
	
	
	/* SIMPLER CONCEPT:
                      B   
                     / \
                 v0/     \~v0                                         
                 /         \                  
                A           2
             v0/ \~v0   
              0   1      
                       
                        
		if (real_data[x][B] == 0) {
			if (real_data[x][A] == 0) {
				real_data[x][F] = 0; 
			} else {
				real_data[x][F] = 1;
			}
		} else {
			real_data[x][F] = 2;
		}
	*/
		
	}

	// observed data (for each example, each variable, each possible value)
	vector<vector<vector<double> > > observed_data(N);
	for (int x=0; x<N; x++) { 
		observed_data[x].resize(variables.size());
		for (int v=0; v<variables.size(); v++) { 
			observed_data[x][v] = 
				observe(variables[v].values.size(), real_data[x][v]);
		}
	}

	// create training data
	vector<int> index2ID_map;
	for (int i=0; i<NUM_VARS; i++) { 
		index2ID_map.push_back(VIDS[i]);
	}
	Observations observations(N, &observed_data, &index2ID_map);
	
	// create Tree
	CPDTree tree(variables.begin(), variables.end());

	tree.auto_prune = prune;
	tree.ratio_threshold = ratio;

	// train on data
	tree.train(&observations);
	cerr << "trained tree size = " << tree.size() << endl;

	// prune that
	tree.prune();
	cerr << "pruned tree size = " << tree.size() << endl;

	/*
	// observed data (for each example, each variable, each possible value)
	cerr << "observed_data[0]:  " << observed_data[0] << endl;
	*/

	// classify some examples
	FixedExampleObservations feo(&observed_data, &index2ID_map, 0);
	cerr << "classify example 0:  " << endl;
	cerr << "Real example 0 is:  " << real_data[0] << endl;
	cerr << "Classification:  " << tree.classify(&feo) << endl;
	cerr << "value " << real_data[0][F] << " (start counting at 0) should be highest (depending on how much noise)" << endl;

	// convert to a table
	cerr << "Convert to a table..." << endl;
	CPT *table = tree.as_table();
	// cerr << *table << endl;

	// classify using table (see if you get the same answer)
	cerr << "Classification:  " << table->classify(&feo) << endl;

	/*
	// classify a few more examples:
	cerr << "classify a few more examples..." << endl;
	for (int x=1; x<10; x++) { 
		cerr << endl << "Example " << x << ":" << endl;
		FixedExampleObservations feox(&observed_data, &index2ID_map, x);
		cerr << "Tree says:\t" << tree.classify(&feox) << endl;
		cerr << "Table says:\t" << table->classify(&feox) << endl;
	}
	*/
	
	// print tree as dot, ascii
	tree.dot(cout);
	tree.ascii(cerr);

	delete table;
	
	return 0;

}
