#include "Distance.h"

void Distance::resize(unsigned int size, unsigned int binw) {

	// noto, 2006-07-12, keep old data; truncate or pad with zeros
	//	(old way was to reset all to zero)

	this->w = binw;
	this->data.resize( (unsigned int) ceil((double)size / w) , 0.0 );	// second argument is initialization
																// for any additional elements 
																// (i.e. if resize make data larger)
	

	this->cdf.resize(this->data.size() + 1);	

	// note:  this isn't automatically normalizing data

}

void Distance::fill(const probability &value) { 

	if (cdf.size()) { cdf[0] = 0; }
	for (unsigned int i=0; i<data.size(); i++) { 
		data[i] = value * w;	// 1 value for each of w in this bin
		cdf[i+1] = cdf[i] + data[i];
	}
}

/** Add a value to each entry */
void Distance::pseudocount(const probability &count) { 
	
	for (unsigned int i=0; i<data.size(); i++) { 
		data[i] += count * w;	// 1 count for each of w in this bin
		cdf[i+1] = cdf[i] + data[i];
	}
}


void Distance::normalize() {

	probability Z = 0;	// normalizing constant
	for (unsigned int i=0; i<data.size(); i++)  { Z += data[i]; }

	if (Z > 0) { 

		if (cdf.size()) { cdf[0] = 0; }
		for (unsigned int i=0; i<data.size(); i++) {
			
			data[i] /= Z;
			cdf[i+1] = cdf[i] + data[i];
		}

	
	} else {
	
		// Z = 0

		if (cdf.size()) { cdf[0] = 0; }
		for (unsigned int i=0; i<data.size(); i++) {
			data[i] = 1.0 / data.size();		// 2006-06-30, changed from "data[i] /= 1.0 / data.size();"
												//	my theory:  errant "/" inserted when trying to VIM search.
			cdf[i+1] = cdf[i] + data[i];
		}

		#if DISTANCE_DEBUG
		cerr << endl << "Distance::normalize():  Warning:  Distance distribution has gone to Z = "
		     << Z << ".  Reset to uniform." << endl << endl;
		#endif

	}

}


probability Distance::prob(unsigned int x) const {

	#if Distance_BOUNDS_CHECK
	if (x < 0 || x >= size()) { cerr << *this << ": prob(" << x << ") out of range." << endl; }
	#endif

	#if Distance_WARN_UNNORMALIZED
	if (cdf[data.size()] != 1) { cerr << "Warning:  Call to Distance::prob(probability) on unnormalized distribution (" << setprecision(999) << cdf[data.size()] << ")" << endl; }
	#endif

	return data[ x / w ] / w;
}


probability Distance::CDF(unsigned int x) const { 

	#if Distance_BOUNDS_CHECK
	if (x < 0 || x >= size()) { cerr << *this << ": CDF(" << x << ") out of range." << endl; }
	#endif

	#if Distance_WARN_UNNORMALIZED
	if (cdf[data.size()] != 1) { cerr << "Warning:  Call to Distance::CDF(probability) on unnormalized distribution (" << cdf[data.size()] << ")" << endl; }
	#endif

	if (cdf[data.size()] <= 0) { return 0; }

	unsigned int i = (unsigned int)(x/w);

	return cdf[i] + data[i] * (((double)(x%w)) / w);	// if width > 0, return portion of bin

}


ostream& Distance::operator<<(ostream &out) const { 

	probability peak = 0;
	unsigned int peak_x = 0;
	unsigned int mid = 0;
	
	for (unsigned int i=0; i<data.size(); i++) { 
		if (cdf[i] < 0.5) { mid = i*w; }
		if (data[i] > peak) { peak_x = i*w; peak = data[i]; }
	}

	out << "Distance[0.." << size() << ") peak ~= " << peak_x << ", mid ~= " << mid;
		
	return out;
}

ostream& operator<<(ostream &out, const Distance &distance) { return distance.operator<<(out); }


void Distance::dump(ostream &out) const { 

	const string TAB = "\t";	// delim

	const bool drawbars = false;

	if (drawbars) { out << 0 << TAB << 0 << endl; }

	for (unsigned int i=0; i<data.size(); i++) { 

		out << i*w << TAB << data[i] << endl;
		if (drawbars) { 
			out << (i+1)*w << TAB << data[i] << endl;
			out << (i+1)*w << TAB << 0 << endl;
		}
		
	}
}


/** combine two distrib/utions */
void Distance::add(const Distance &addend, const probability &weight) { 

	if (this->w == addend.w && this->data.size()==addend.data.size()) { 
		
		// expected, easy case
		
		for (unsigned int i=0; i<data.size(); i++) { 
			this->data[i] += weight * addend.data[i];
		}

	} else {
		
		for (unsigned int x=0; x<size(); x++) { 	// size() > data.size()
			
			this->add(x, weight*addend.prob(x));
		}
	}

	return;
}


/** subtract another distribution from this one */
bool Distance::subtract(const Distance &subtrahend, const probability &weight) { 

	bool ans = true; 

	if (this->w == subtrahend.w && this->data.size()==subtrahend.data.size()) { 

		// for (unsigned int i=0; i<data.size(); i++) { if (data[i] < weight * subtrahend.data[i]) { return false; } }
		
		for (unsigned int i=0; i<data.size(); i++) { 
			if (this->data[i] < weight * subtrahend.data[i]) { 
				this->data[i] = 0;
				ans = false;
			} else {
				this->data[i] -= weight * subtrahend.data[i];
			}
		}

	} else {

		//for (unsigned int x=0; x<size() && x<subtrahend.size(); x++) { 	if ( data[x/w] < weight * subtrahend.data[x/subtrahend.w]/subtrahend.w ) { return false; } }
		for (unsigned int x=0; x<size() && x<subtrahend.size(); x++) { 
			if (data[x/w] < weight * subtrahend.data[x/subtrahend.w]/subtrahend.w) { 
				data[x/w] = 0;
				ans = false;
			} else {
				data[x/w] -= weight * subtrahend.data[x/subtrahend.w]/subtrahend.w;
			}
		}
	}

	return ans;
}


/** 
 *	Smooth with an arbitrary kernel (e.g. <1 3 5 3 1>)
 *
 *	Resulting mass will sum to the sum of the kernel entries times the sum of the current entries
 *		(modulo floating point error)
 *	
 */
void Distance::ksmooth(const vector<probability> &kernel) {

	const bool REFLECT = true;

	const int H = ((int)(kernel.size())) / 2;

	vector<probability> smoothed(data.size());
	std::fill(smoothed.begin(), smoothed.end(), 0);

	for (unsigned int i=0; i<data.size(); i++) {	// i: data point

		for (unsigned int k=0; k<kernel.size(); k++) { 	// k: kernel index, 0..K-1
		
			int j = (int)i + (int)k - H;

			if (REFLECT) { 

				int wraps = 0;
				if (j < 0) {
					j = (int)(data.size()) - (int)1 - j;
					wraps = 1;
				}

				if (j >= (int)data.size()) { 
					wraps += j / data.size();
					j = j % data.size();
					if (wraps % 2) { 
						// odd 
						j = data.size() - j - 1;
					} else {
						// even
						// nothing to do
					}
				}

			} else if (j < (int)0 || j >= (int)data.size()) { 
				// no reflect, but this is out-of-bounds
				continue; 
			}

			smoothed[j] += kernel[k] * data[i];

			#if DISTANCE_DEBUG
			if (isnan(smoothed[j])) { 
				cerr << endl << endl << endl
					<< "data.size() = " << (data.size()) << endl
					<< "kernel.size() = " << (kernel.size()) << endl
					<< "H = " << (H ) << endl
					//<< " = " << () << endl
					<< "i = " << (i) << endl
					<< "j = " << (j) << endl
					<< "k = " << (k) << endl
					<< "smoothed[j] = " << (smoothed[j]) << endl
					<< "kernel[k] = " << (kernel[k]) << endl
					<< "data[i] = " << (data[i]) << endl
					<< "cdf[i] = " << (cdf[i]) << endl
					<< "cdf[i+1] = " << (cdf[i+1]) << endl
					<< endl << endl << endl;
			}
			assert(!isnan(smoothed[j]));
			#endif

		}
	}

	std::copy(smoothed.begin(), smoothed.end(), data.begin());	// copy result back to data
	this->normalize();

}

