
#include "BayesNet.h"
#include <iostream>
using namespace std;

#ifndef TAB
#define TAB "\t"
#endif 

// debugging, error/warning messages
ostream& operator<<(ostream &out, const Node &node) { 
	out << "[NODE " << node.variable->name << ", " 
	    << "ID=" << node.variable->ID << ", "
	    << node.arity() << " values, "
		<< node.parents.size() << " parents, "
		<< node.children.size() << " children, "
		<< (node.evidence ? "evidence" : "hidden")  << ", "
		<< (node.cpd ? "valid CPD" : "NO CPD") << "]";
	return out;
}


/** add node (if not already there), guarantee that
	BayesNet::node_map[node->variable->ID] equals this node pointer */
void BayesNet::add(Node *node) { 

	// if vector is too small, embiggen it.
	while (node_map.size() <= node->variable->ID) { node_map.push_back(NULL); }

	if (node_map[node->variable->ID] == NULL) { 
		// node not there, add it to list and to the map
		node_map[node->variable->ID] = node;

		// I like to keep nodes in ID order
		list<Node*>::iterator loc = nodes.begin();
		while (loc!=nodes.end() && (*loc)->variable->ID < node->variable->ID) { loc++; }
		nodes.insert(loc, node);
			
	} else if (node_map[node->variable->ID] != node) { 
		// a *different* node (judging by the pointer) is already in the network.
		// replace with this new node
		node_map[node->variable->ID] = node;
		for (list<Node*>::iterator n=nodes.begin(); n!=nodes.end(); n++) { 
			if ((*n)->variable->ID == node->variable->ID) { *n = node; }
		}
	}

	return;
}
	

void BayesNet::connect(Node *parent, Node *child) { 

	/*
	if (parent->variable->ID == child->variable->ID) { 
		cerr << "WARNING (BayesNet::connect):  parent == child (" 
			 << parent->variable->ID << ")" << endl;
	}// this is for debugging, remove this
	*/

	// 'add' will set up nodes and node_map
	add(parent);
	add(child);

	/*
	vector<const Node*>::iterator C = std::find(parent->children.begin(),
	                                      parent->children.end(), child);
	if (C != parent->children.end()) {
		cerr << "BayesNet::connect:  parent (" 
	         << parent->variable->name << ") -> child (" 
			 << child->variable->name << ") already connected." 
			 << endl;  // debugging, remove this
	}

	vector<const Node*>::iterator P = std::find(child->parents.begin(),
	                                      child->parents.end(), parent);
	if (P != child->parents.end()) { 
		cerr << "BayesNet::connect:  child (" 
	         << child->variable->name << ") <- parent (" 
			 << parent->variable->name << ") already connected." 
			 << endl; // debugging, remove this
	}
	*/

	parent->children.push_back(child);
	child->parents.push_back(parent);

	return;
}

void BayesNet::normalize_all() { 

	for (list<Node*>::iterator n = nodes.begin(); n != nodes.end(); n++) { 

		if ((*n)->cpd) { (*n)->cpd->normalize(); }
	}
}

	
/**
 * Get a list of a Node's parents (as variables)
 */
vector<const Variable*> BayesNet::parents(int ID) const { 

	// this is for debugging
	/*
	if (node_map.size() <= ID && node_map[ID]==NULL) {
		cerr << "Error (BayesNet::parents(" << ID << "):  No such node." << endl; 
	}
	*/

	vector<const Variable*> variables;

	Node *node = node_map[ID];

	for (vector<const Node*>::iterator p=node->parents.begin(); p!=node->parents.end(); p++) { 
		variables.push_back((*p)->variable);
	}

	return variables;

}



/**
 * Create a *new* (empty) CPT for a certain node with the
 * appropriate variables included based on the network structure.
 */
CPT* BayesNet::create_table(int ID) const { 

	vector<const Variable*> variables = parents(ID);	
	variables.push_back(node_map[ID]->variable);	// include, as the last variable, 
	                                        		//	that of the node itself
	CPT *ans = new CPT(variables);	
	return ans;
}





void BayesNet::dot(ostream &out, string title) const { 

	out << "digraph \"" << title << "\" {" << endl;
	if (title != "") { 
		out << "titlenode [color=\"white\" label=\"" << title << "\"];" << endl;
	}

	for (list<Node*>::const_iterator n=nodes.begin(); n!=nodes.end(); n++) { 

		out << TAB << "node" << (*n)->variable->ID 
		           << " [label=\"" << (*n)->variable->name << "\"];" << endl;

	}

	out << endl;

	for (list<Node*>::const_iterator n=nodes.begin(); n!=nodes.end(); n++) { 
	
		for (vector<const Node*>::const_iterator c=(*n)->children.begin();
		                                   c!=(*n)->children.end(); c++) { 

			out << TAB << "node" << (*n)->variable->ID 
			    << " -> node" << (*c)->variable->ID << ";" << endl;
		}
	}

	out << "}" << endl;

	// also print all cpds
	for (list<Node*>::const_iterator n=nodes.begin(); n!=nodes.end(); n++) { 
		Node *node = *n;

		if (node->cpd) { node->cpd->dot(out); }
	}

}

void BayesNet::delete_all_data() { 

	for (list<Node*>::iterator n=nodes.begin(); n!=nodes.end(); n++) { 

		Node *node = *n;

		if (node) { 
			if (node->cpd) { delete node->cpd;  node->cpd=NULL; }
			if (node->evidence) { delete node->evidence; node->evidence=NULL; }
			if (node->variable) { delete node->variable; node->variable = NULL; }
			delete node;
		}
	}
	nodes.clear();
	node_map.clear();
}

void BayesNet::mark_relevant_children(int ID, vector<bool> &marked) const { 

	// Node ID is relevant iff some descendant is relevant, in which
	//	case its ancestors are also relevant

	const Node *node = node_map[ID];
	for (vector<const Node*>::const_iterator c = node->children.begin(); c!=node->children.end(); c++) { 
		int CID = (*c)->variable->ID;

		// if node already marked, skip it
		if (marked[CID]) { continue; }
		
		// if child doesn't depend on me, it's not really a child
		if ((*c)->cpd && !(*c)->cpd->has(ID)) { continue; } 
		
		if ((*c)->evidence) {
			marked[CID] = true;
			mark_relevant_parents(CID, marked);
		} else {
			mark_relevant_children(CID, marked);
			if (marked[CID]) { marked[ID] = true; }	// if any desendent is evidence, I'm relevant
		}
	}
}

void BayesNet::mark_relevant_parents(int ID, vector<bool> &marked) const { 

	// I shall mark parents of ID as relevant.  Also, children of said parents
	//	may also be relevant if they or their descendants are relevant

	const Node *node = node_map[ID];

	// get a list of parents
	vector<const Node*> parents;
	if (node->cpd) {
		// if it's got a CPD only consider variables actually used.
		vector<const Variable*> variables = node->cpd->get_variables();
		variables.pop_back();	// remove last variable (ID)
		parents.reserve(variables.size());
		for (vector<const Variable*>::const_iterator v=variables.begin(); v!=variables.end(); v++) { 
		
			// Debugging:
			/*
			if (node_map.size() <= ((*v)->ID) || !node_map[(*v)->ID]) { 
				cerr << "BayesNet::mark_relevant_parents:  " 
					 << "variables in CPD for " << node->variable->name << " says it includes "
				     << (*v)->name << ", which is not in the network\n";
			}
			*/

			parents.push_back(node_map[(*v)->ID]);
		}
	} else {
		parents = node->parents; // copy list from node->parents
	} 
		
	for (vector<const Node*>::const_iterator p=parents.begin(); p!=parents.end(); p++) { 
		int PID = (*p)->variable->ID;
		if (marked[PID]) { continue; }
		marked[PID] = true;
		if (!(*p)->evidence) { 
			mark_relevant_parents(PID, marked);
			mark_relevant_children(PID, marked);
		}
	}
}

/**
 *	Create a list of nodes relevant in a query for node ID.
 *	This is similar to d-connectivity except that decendants
 *	are not relevant unless they are evidence nodes
 */
list<const Node*> BayesNet::relevant_nodes(int ID) const { 

	vector<bool> marked(node_map.size());
	fill(marked.begin(), marked.end(), false);

	marked[ID] = true;
	const Node *node = node_map[ID];

	if (!node->evidence) { 
		mark_relevant_parents(ID, marked);
		mark_relevant_children(ID, marked);
	}

	list<const Node*> ans;
	for (int i=0; i<marked.size(); i++) { 
		if (marked[i]) { ans.push_back(node_map[i]); }
	}

	return ans;
	
}

/**
 *	Create a list of *new* potentials (CPTs) to be used in a query for node ID.
 *	include only relevant CPTs
 */
list<CPT*> BayesNet::create_normalized_potentials(const list<const Node*> &pnodes, int example) const { 

	list<CPT*> potentials;	// return value

	for (list<const Node*>::const_iterator n=pnodes.begin(); n!=pnodes.end(); n++) { 

		const Node *node = *n;

		CPT *cpt;	// potential for the current node
	
		if (node->evidence && !node->cpd) { 
			// Evidence node where no CPD is defined:  use
			//	evidence distribution as probability distribution.
			// The reason is because if no CPD is defined, then
			//	the the prior probability is not a factor.
			//	We take evidence as probablistic, so there's
			//	still a distribution here.
			vector<const Variable*> single_var(1);
			single_var.front() = node->variable;
			cpt = new CPT(single_var);	
			double *data = cpt->begin();
			for (int v=0; v<node->arity(); v++) { 
				*data = (*(node->evidence))(example, v);
				data++;
			}
			cpt->normalize();

		} else if (node->evidence) { 
			// evidence node with CPD:
			//	get CPT, but apply evidence before adding to the pool of potentials
			cpt = node->cpd->as_table();
			cpt->normalize();

			// apply evidence 
			vector<double> value_distribution(node->arity());
			for (int v=0; v<node->arity(); v++) { 
				value_distribution[v] = (*(node->evidence))(example, v);
			}
			cpt->multiply(node->variable->ID, value_distribution.begin()); 

		} else {
			// non-evidence node:  use CPT
			if (!node->cpd) { cerr << "Error (BayesNet::create_normalized_potentials):  "
								   << "Non-evidence node " << node->variable->name 
								   << " has no CPD defined." << endl; }
			cpt = node->cpd->as_table();
			cpt->normalize();

		}

		potentials.push_back(cpt);

	} // next node ID

	return potentials;

} // create_normalized_potentials

struct SortElimOrder { 
  private:
  	int ID;
  public:
  	SortElimOrder(int id) : ID(id) { }
	bool operator()(const Node *LHS, const Node *RHS) { 
		if (LHS->variable->ID == ID) { return false; }
		if (RHS->variable->ID == ID) { return true; }
		return (LHS->num_children() < RHS->num_children());
	}
};




/** Query a variable in a given example */
distribution BayesNet::query(int ID, int example) const {


	const Node *query_node = node_map[ID];
	distribution ans(query_node->arity());

	if (query_node->evidence) {
	
		// Query node has evidence, use that.
		for (int v=0; v<query_node->arity(); v++) { 
			ans[v] = (*(query_node->evidence))(example, v);
		}
		// (answer normalized below)

	} else {

		// no evidence.  Need to do variable elimination

		// step 1:  create a potential for each table.
		//	NOTE:  each must be normalized, because multiplication 
		//	of UN-NORMALIZED potentials will yield incorrect results
		list<const Node*> relevant = relevant_nodes(ID);	// "relevant" is like "d-connected"
		list<CPT*> potentials = create_normalized_potentials(relevant, example);

		// step 2:  eliminate all nodes except the query node
		vector<const Node*> elimlist;
		for (list<const Node*>::const_iterator r=relevant.begin(); r!=relevant.end(); r++) {
			elimlist.push_back(*r); 
		}
		// sort list for a good (?) elimination order, and so query ID is LAST
		std::sort(elimlist.begin(), elimlist.end(), SortElimOrder(ID));

		if (elimlist.back()->variable->ID != ID) {
			cerr << "ERROR (BayesNet::query(" << ID << ", " << example << "):  "
				 << "Variable elimination order does not end with query variable." << endl;
		}

		for (vector<const Node*>::const_iterator enode=elimlist.begin(); enode!=elimlist.end(); enode++) {

			int elim = (*enode)->variable->ID;

			// find all potentials with this node
			list<CPT*> cpts_with_node;
			for (list<CPT*>::iterator p=potentials.begin();
			                          p!=potentials.end();) {	// no p++, see below
				if ( (*p)->has(elim) ) {
					cpts_with_node.push_back(*p); 
					p = potentials.erase(p);	// pointer now points to
												// the same position, to what
												//	used to be *after* p.
				} else {
					p++;
				}
			}

			// multiply them all together
			while (cpts_with_node.size() > 1) { 

				CPT *A = cpts_with_node.front();
				cpts_with_node.pop_front();
				CPT *B = cpts_with_node.front();
				cpts_with_node.pop_front();
	
				CPT *product = A->multiply(B);

				delete A;
				delete B;	

				cpts_with_node.push_back(product);
			}

			// sum out eliminated variable (unless it's the query node, then we're done)
			CPT *result;
			if (elim != ID) { 
				result = cpts_with_node.front() -> marginalize(elim);
				delete cpts_with_node.front();	
			} else {
				result = cpts_with_node.front();
			}
			potentials.push_back(result);
				
		} // next variable to eliminate

		// now there is 1 potential left, which is the distribution of the query variable
		CPT *final = potentials.front();
		copy(final->begin(), final->end(), ans.begin());	// (answer normalized below)
		delete final;	

	} // if evidence

	// normalize answer
	double sum = 0.0;
	for (distribution::iterator i=ans.begin(); i!=ans.end(); i++) { sum += *i; }
	for (distribution::iterator i=ans.begin(); i!=ans.end(); i++) { *i /= sum; }

	return ans;

} // query


		

struct VirtualEvidence : public Evidence { 
  public:
	vector<vector<double> > data;
	double operator()(int example, int value) { 
		return data[example][value];
	}
};


void BayesNet::em(int N, int iterations) {

	// E:  for all examples, for all non-evidence nodes, 
	//		create "virtual" evidence by querying.
	// M:  reset CPDs based on current evidence

	// find non-evidence nodes and set up a structure

	// hidden:  list of ( Node*, VirtualEvidence )
	list<pair<Node*, VirtualEvidence> > hidden;
	typedef list<pair<Node*, VirtualEvidence> >::iterator hitr;

	BayesNetTrainingData training_data(this, N);

	// create a list of which nodes are hidden (no evidence for them)
	for (list<Node*>::const_iterator n=nodes.begin(); n!=nodes.end(); n++) { 
	
		if (!(*n)->evidence) { 
			
			// add an entry to the list
			hidden.push_back(pair<Node*,VirtualEvidence>(*n, VirtualEvidence()));

			// resize the data structure to N examples, V values (depending on this particular node)
			hidden.back().second.data.resize(N);
			for (int x=0; x<N; x++) { hidden.back().second.data[x].resize((*n)->arity()); }

		}
	}

	for (int iteration=0; iteration<iterations; iteration++) { 

		// populate virtual evidence for each example, for each hidden node
		for (int example=0; example<N; example++) { 
			
			// for verbose output (mention progress with each experiment)...
			// cerr << "\tem example " << (example+1) << " of " << N << "                         \r";	
		
			// for all hidden nodes, query and set distribution to virtual evidence
			for (hitr h=hidden.begin(); h!=hidden.end(); h++) { 
				
				// get a disribution
				distribution Q = query( h->first->variable->ID, example );
				
				// copy it to the virtual evidence
				copy( Q.begin(), Q.end(), h->second.data[example].begin() );

			}
		
		}


		// set up the hidden nodes' evidence pointers
		for (hitr h=hidden.begin(); h!=hidden.end(); h++) {
			h->first->evidence = &(h->second);
		}

		// reset CPDs for all nodes
		for (list<Node*>::iterator n=nodes.begin(); n!=nodes.end(); n++) { 
			Node *node = *n;
			if (node->cpd) { 
				node->cpd->train(&training_data); 
			}
		}

		// remove hidden nodes' evidence pointers
		for (hitr h=hidden.begin(); h!=hidden.end(); h++) {
			h->first->evidence = NULL;
		}


		// done.  CPDs are now updated
		
	} // next iteration 
	
	return;

} // em

			
