#ifndef DISTANCE_H 
#define DISTANCE_H

/** 
 *
 
 * Arbitrary (discrete) probability distribution over a finite range
 *
 * (Edited version of Histogram:  removed offset, added 'bin width', `w')
 *
 * In a binned histogram, each bin is the SUM of the probability of all the
 * values in that bin, e.g.:
 *

                        4
              3         _   3
              _ 2      | |2 _  
            1| |_ 1 1 1| |_| | 
            _| | |_ _ _| | | |
           | | | | | | | | | |
   ----------------------------------------------
            0 1 2 3 4 5 6 7 8

 *   would be binned into:
                        
                         
                        _____  
            _____      |     | 
           |  6  |_____|  9  |
           |     |  3  |     |
   ----------------------------------------------
              0     1     2
  
 *	So to query the value of, say, x=6, you'd look in its bin, 6/3 = 2, which
 *	has 9 in it, but only 9/3 = 3 belongs to just x=6 (the rest of it belongs
 *	to x=7 and x=8)
 *
 *	To add 1 to x=1, you'd just add 1 to the bin.
 *
 *
 *
 * */

#include <vector>
#include <iostream>
#include <iomanip>
using namespace std;

#include "probability.h"
#include "cvector.h"

#define DISTANCE_DEBUG 0
#define DISTANCE_BOUNDS_CHECK 0	// report out-of-range values (DEBUG)
#define DISTANCE_WARN_UNNORMALIZED 0	// report apparent violations to normalized assumption (DEBUG)


#if DISTANCE_DEBUG
#include <assert.h>
#endif

class Distance;

ostream& operator<<(ostream&, const Distance&);

class Distance {

  protected:
	
	cvector<probability> data;
	cvector<probability> cdf;		// Cached probability mass to the left of each value
	unsigned int w;	// bin width
  
  public:

  	Distance() { this->resize(0,1); }
	Distance(unsigned int size, unsigned int binw) { this->resize(size,binw); }

	void resize(unsigned int size, unsigned int binw);

	unsigned int size() const { return w * data.size(); }	// size of virtual array
	unsigned int binw() const { return w; }	// maybe for matching parameters?

	/** add a sampled (weighted) point to the distribution */
	inline void add(unsigned int x, probability sample) { data[x/w] += sample; }

	/** combine two distributions */
	void add(const Distance &addend, const probability &weight);
	bool subtract(const Distance &subtrahend, const probability &weight);	// return true=success, false=negative probabilites

	/** So the data[i] sum to 1 (uniform distribution for zeros) */
	void normalize();
	
	/** Set each data[i] = value, set cdf */
	void fill(const probability &value=0); 	

	/** Add a value to each entry */
	void pseudocount(const probability &count=1);
	
	/** Smooth according to a Kernel (e.g. <1 3 5 3 1>) */
	void ksmooth(const vector<probability> &kernel);

	// most of the following will require the structure be NORMALIZED first and
	// the CDF values cached.  

    probability CDF(unsigned int x) const;		
	probability prob(unsigned int x) const;
	ostream& operator<<(ostream &out) const;
	void dump(ostream&) const;

};	// class Distance



#endif

