00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include "cosine_tree.h"
00033
00034
00035 double columnNormL2(const Matrix& A, index_t i_col) {
00036 Vector col;
00037 A.MakeColumnVector(i_col, &col);
00038 return la::LengthEuclidean(col);
00039 }
00040
00041
00042 #define eps (1e-16)
00043 bool isZero(double d) {
00044 return (d<eps) && (d>-eps);
00045 }
00046
00047
00048 CosineNode::CosineNode(const Matrix& A) {
00049 this->A_.Alias(A);
00050 origIndices_.Init(A_.n_cols());
00051 norms_.Init(n_cols());
00052 for (int i_col = 0; i_col < origIndices_.size(); i_col++) {
00053 origIndices_[i_col] = i_col;
00054 norms_[i_col] = columnNormL2(A_, i_col);
00055 }
00056 parent_ = NULL;
00057 left_ = NULL;
00058 right_ = NULL;
00059 isLeft_ = false;
00060
00061 CalStats();
00062 }
00063
00064
00065 CosineNode::CosineNode(CosineNode& parent,
00066 const ArrayList<int>& indices, bool isLeft) {
00067 A_.Alias(parent.A_);
00068 origIndices_.Init(indices.size());
00069 norms_.Init(n_cols());
00070 for (int i_col = 0; i_col < origIndices_.size(); i_col++) {
00071 origIndices_[i_col] = parent.origIndices_[indices[i_col]];
00072 norms_[i_col] = parent.norms_[indices[i_col]];
00073 }
00074 parent_ = &parent;
00075 left_ = NULL;
00076 right_ = NULL;
00077 isLeft_ = isLeft;
00078
00079 if (isLeft) parent.left_ = this;
00080 else parent.right_ = this;
00081
00082 CalStats();
00083 }
00084
00085 void CosineNode::CalStats() {
00086
00087 cum_norms_.Init(origIndices_.size());
00088 for (int i_col = 0; i_col < origIndices_.size(); i_col++)
00089 cum_norms_[i_col] = ((i_col > 0) ? cum_norms_[i_col-1]:0)
00090 + math::Sqr(norms_[i_col]);
00091
00092
00093 mean_.Init(A_.n_rows()); mean_.SetZero();
00094 for (index_t i_col = 0; i_col < n_cols(); i_col++) {
00095 Vector col;
00096 GetColumn(i_col, &col);
00097 la::AddTo(col, &mean_);
00098 }
00099 la::Scale(1.0/(double)origIndices_.size(), &mean_);
00100 }
00101
00102 void CosineNode::ChooseCenter(Vector* center) {
00103 double r = (double)rand()/RAND_MAX * cum_norms_[n_cols()-1];
00104 index_t i_col;
00105 for (i_col = 0; i_col < n_cols(); i_col++)
00106 if (cum_norms_[i_col] >= r) break;
00107 GetColumn(i_col, center);
00108 }
00109
00110 void CosineNode::CalCosines(const Vector& center,
00111 ArrayList<double>* cosines) {
00112 cosines->Init(n_cols());
00113 double centerL2 = la::LengthEuclidean(center);
00114 for (index_t i_col = 0; i_col < n_cols(); i_col++)
00115
00116 if (isZero(norms_[i_col]))
00117 (*cosines)[i_col] = 2;
00118 else {
00119 Vector col;
00120 GetColumn(i_col, &col);
00121 double numerator = la::Dot(center, col);
00122 double denominator = centerL2 * norms_[i_col];
00123 (*cosines)[i_col] = numerator/denominator;
00124 }
00125 }
00126
00127 void CosineNode::CreateIndices(ArrayList<int>* indices) {
00128 indices->Init(n_cols());
00129 for (index_t i_col = 0; i_col < n_cols(); i_col++)
00130 (*indices)[i_col] = i_col;
00131 }
00132
00133
00134 index_t qpartition(ArrayList<double>& key, ArrayList<int>& data,
00135 index_t left, index_t right) {
00136 index_t j = left;
00137 double x = key[left];
00138 for (index_t i = left+1; i <= right; i++)
00139 if (key[i] >= x) {
00140 j++;
00141 double tmp_d = key[i]; key[i] = key[j]; key[j] = tmp_d;
00142 index_t tmp_i = data[i]; data[i] = data[j]; data[j] = tmp_i;
00143 }
00144 double tmp_d = key[left]; key[left] = key[j]; key[j] = tmp_d;
00145 index_t tmp_i = data[left]; data[left] = data[j]; data[j] = tmp_i;
00146 return j;
00147 }
00148
00149
00150
00151 void qsort(ArrayList<double>& key, ArrayList<int>& data,
00152 index_t left, index_t right) {
00153 if (left >= right) return;
00154 index_t middle = qpartition(key, data, left, right);
00155 qsort(key, data, left, middle-1);
00156 qsort(key, data, middle+1, right);
00157 }
00158
00159 void Sort(ArrayList<double>& key, ArrayList<int>& data) {
00160 qsort(key, data, 0, key.size()-1);
00161 }
00162
00163
00164
00165 index_t calSplitPoint(const ArrayList<double>& key) {
00166 double leftKey = key[0];
00167 double rightKey = key[key.size()-1];
00168 index_t i = 0;
00169 while (leftKey - key[i] <= key[i] - rightKey && i < key.size()) i++;
00170
00171 return i;
00172 }
00173
00174
00175 void InitSubCopy(const ArrayList<int>& src, index_t pos, index_t size,
00176 ArrayList<int>* dst) {
00177 dst->Init(size);
00178 for (index_t i = 0; i < size; i++)
00179 (*dst)[i] = src[pos+i];
00180 }
00181
00182
00183 void splitIndices(ArrayList<int>& indices, int leftSize,
00184 ArrayList<int>* leftIdx, ArrayList<int>* rightIdx) {
00185 InitSubCopy(indices, 0, leftSize, leftIdx);
00186 InitSubCopy(indices, leftSize, indices.size()-leftSize, rightIdx);
00187 }
00188
00189
00190
00191
00192
00193
00194 void CosineNode::Split() {
00195 if (n_cols() < 2) return;
00196 Vector center;
00197 ArrayList<double> cosines;
00198 ArrayList<int> indices, leftIdx, rightIdx;
00199
00200 ChooseCenter(¢er);
00201
00202
00203 CalCosines(center, &cosines);
00204
00205
00206 CreateIndices(&indices);
00207
00208 Sort(cosines, indices);
00209
00210
00211
00212 index_t leftSize = calSplitPoint(cosines);
00213
00214 if (leftSize == n_cols() || leftSize == 0) return;
00215
00216 splitIndices(indices, leftSize, &leftIdx, &rightIdx);
00217
00218
00219
00220 left_ = new CosineNode(*this, leftIdx, true);
00221 right_ = new CosineNode(*this, rightIdx, false);
00222 }
00223