
/*
	noto@biostat.wisc.edu
	11/14/2003
	06/28/2004

*/


#ifndef GAUSSIAN_H
#define GAUSSIAN_H 0

#include <math.h>
#include <stdlib.h>

#include <vector>
#include <iostream>
#include <iterator>
#include <functional>
#include <sstream>
#include <string>
using namespace std;


// some constants used here
#ifndef PI
#define PI 3.141592653589793
#endif     

// a useful shorthand for a random double (0..1)
#ifndef rnd
#define rnd() ((double)rand() / RAND_MAX)
#endif

// what you tell gnuplot so that a printed Gaussian can be a gnuplot command
#define GNUPLOT_GAUSSIAN_DEF "gaussian(x,mu,sigma) = (1.0/(sigma*sqrt(2.0*3.141592653589793))) * exp( (-(x-mu) * (x-mu))/(2.0*sigma*sigma));"



/**
	A One-Dimensional Gaussian distribution.
	Parameters are mu, sigma 
	Also includes a "weight" of the gaussian, which is the 
	coefficient of the probability density
*/
class Gaussian {

  friend class GaussianMixture;

  private:
	template <typename _Double_Iterator> double log_prob(_Double_Iterator begin,
	                                                     _Double_Iterator end, 
														 int &count) const;

  public:

  	double mu, sigma, w;
	inline double variance() { return sigma*sigma; }

	/** Define a Gaussian with a default */
	Gaussian(double mean, double stdev, double weight=1.0) : mu(mean), sigma(stdev), w(weight) { }
	Gaussian() { }

	void reset(double mean, double stdev, double weight=1.0) { 
		mu = mean; sigma = stdev; w = weight; }

	/** Operators:  comparable, printable, ... */
	bool operator == (const Gaussian &g) const;
	bool operator != (const Gaussian &g) const;
	bool operator <= (const Gaussian &g) const;	// compare by mus for sorting
	bool operator >= (const Gaussian &g) const;
	bool operator <  (const Gaussian &g) const;
	bool operator >  (const Gaussian &g) const;
	friend ostream& operator << (ostream& s, const Gaussian& g);

	/** Get the probability of data 
	 *	for multiple points, linear or log-scale versions.
	 *  iterator versions are more efficient than multiple calls to prob(double)
	 */
	/** Get the probability of given data */
	inline double density(double x) const { 
		return (w / (sigma * sqrt(2.0*PI))) * exp( (-(x-mu)*(x-mu)) / (2.0*sigma*sigma) ); }
	template <typename _Double_Iterator> double log_density(_Double_Iterator begin, 
	                                                        _Double_Iterator end) const;
	template <typename _Double_Iterator> vector<double> vdensity(_Double_Iterator begin,
	                                                             _Double_Iterator end) const;

	/**
	 * Generate random values along this distribution.
	 * This uses the Box-Muller transform, which generates
	 * two points at a time, so it will save time to 
	 * use "generate" for generating many points
	 */
	double generate() const;
	void generate(double &, double &) const; // generate two at a time

};




/** 
 * Gaussian Mixture Model:	A collection of some number of weighted Gaussian distributions.
 */
class GaussianMixture : public vector<Gaussian> {

  private:
  	bool print_sum_of_gaussians;

  public:

  	GaussianMixture() : vector<Gaussian>::vector<Gaussian>() { }
	GaussianMixture(int size) : vector<Gaussian>::vector<Gaussian>(size) { }

	/** There are two ways to print a mixture:  as a SUM of gaussians, or 
		as a list of separate gaussians.  These make it easier for a user
		to specify which way to do it, e.g.  cout << myGMM.list() << ...
		It actually returns a copy, but that's okay.
	*/
	GaussianMixture sum() { print_sum_of_gaussians = true; return *this; }
	GaussianMixture list() { print_sum_of_gaussians = false; return *this; }

	/** generate a point at random */
	double generate() const;

	/** normalize this (ws sum to 1.0) */
	void normalize();

	/** set ws to uninform distribution */
	void flatten();

	/** calculate probability of point given mixture (with or without respect to ws) */
	double density(double x) const;	// wed
	template <typename _Double_Iterator> double log_density(_Double_Iterator begin, 
	                                                        _Double_Iterator end) const;
	template <typename _Double_Iterator> vector<double> vdensity(_Double_Iterator begin, 
	                                                        _Double_Iterator end) const;

	/** generate a probability distribution (over Gaussians in mixture) of given point(s)
		being generated by that mixture */
	vector<double> prob_dist(double x) const;
	template <typename _Double_Iterator> vector<double> prob_dist(_Double_Iterator begin, 
	                                                              _Double_Iterator end) const;

	/** Operators:  comparable, printable, ... */
	bool operator == (const GaussianMixture&) const;
	bool operator != (const GaussianMixture&) const;
	ostream& print_list(ostream &out=cout) const;
	ostream& print_sum(ostream &out=cout) const;
	ostream& operator << (ostream&) const;
	friend ostream& operator << (ostream&, const GaussianMixture&);

}; // class



template <typename Ditty>
double Gaussian::log_density(Ditty begin, Ditty end) const { 
	
	double ans = 0.0;
	
	// pre-compute constants
	const double coeff = (w / (sigma * sqrt(2.0*PI)));
	const double twice_variance = 2.0*sigma*sigma;
	
	while (begin != end) {
		
		double diff = *begin - mu;
		ans += log( coeff * exp( (-diff*diff) / twice_variance ) );
		begin++;

	}
	return ans;
}


template <typename Ditty>
vector<double> Gaussian::vdensity(Ditty begin, Ditty end) const { 

	vector<double> ans;
	
	// pre-compute constants
	const double coeff = (w / (sigma * sqrt(2.0*PI)));
	const double twice_sigma_check = 2.0*sigma*sigma;
	
	while (begin != end) {
		
		double diff = *begin - mu;
		ans.push_back( log(coeff * exp( (-diff*diff) / twice_sigma_check)) );
		begin++;
	}
	return ans;
}


template <typename _Double_Iterator>
double GaussianMixture::log_density(_Double_Iterator dbegin, _Double_Iterator dend) const {

	// to speed things up, pre-calculate constants
	vector<double> coeff;
	vector<double> twice_variance;

	for (const_iterator i=begin(); i!=end(); i++) { 
		coeff.push_back(  i->w / (i->sigma * sqrt(2.0*PI))  );
		twice_variance.push_back(  2.0 * i->sigma * i->sigma );
	}

	double ans = 0.0;

	// for each data point, calc. weighted probability
	for (_Double_Iterator ditty=dbegin; ditty!=dend; ditty++) {
	
		double x = *ditty;
		double p = 0.0;
		for (int i=0; i<size(); i++) { 
			p += coeff[i] * exp( (-(x - this->at(i).mu)*(x - this->at(i).mu)) / twice_variance[i] );
		}
		ans += log(p);
	}
	return ans;

}

template <typename _Double_Iterator>
vector<double> GaussianMixture::vdensity(_Double_Iterator begin, _Double_Iterator end) const {

	int count = 0;
	vector<double> ans = front().vdensity(begin, end);
	for (const_iterator i=1; i<size(); i++) { 
		transform(ans.begin(), ans.end(), i->vdensity(begin, end), plus<double>());
	}

	return ans;
}	
	


template <typename _Double_Iterator>
vector<double> GaussianMixture::prob_dist(_Double_Iterator begin, _Double_Iterator end) const { 

	vector<double> lp(size());
	
	for (const_iterator i=begin(); i!=end(); i++) { 
		lp[i] = i->density(begin, end);
	}
	vector<double> ratio(size());
	double sum = 1.0;
	if (!empty()) { ratio[0] = 1.0; }
	for (int i=0; i<size(); i++) { 
		ratio[i] = exp(lp[i] - lp[0]);
		sum += ratio[i];
	}
	for (int i=0; i<size(); i++) { 
		ratio[i] /= sum;
	}
	return ratio;
}



#endif
