cosine_tree.cc

00001 /* MLPACK 0.2
00002  *
00003  * Copyright (c) 2008, 2009 Alexander Gray,
00004  *                          Garry Boyer,
00005  *                          Ryan Riegel,
00006  *                          Nikolaos Vasiloglou,
00007  *                          Dongryeol Lee,
00008  *                          Chip Mappus, 
00009  *                          Nishant Mehta,
00010  *                          Hua Ouyang,
00011  *                          Parikshit Ram,
00012  *                          Long Tran,
00013  *                          Wee Chin Wong
00014  *
00015  * Copyright (c) 2008, 2009 Georgia Institute of Technology
00016  *
00017  * This program is free software; you can redistribute it and/or
00018  * modify it under the terms of the GNU General Public License as
00019  * published by the Free Software Foundation; either version 2 of the
00020  * License, or (at your option) any later version.
00021  *
00022  * This program is distributed in the hope that it will be useful, but
00023  * WITHOUT ANY WARRANTY; without even the implied warranty of
00024  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00025  * General Public License for more details.
00026  *
00027  * You should have received a copy of the GNU General Public License
00028  * along with this program; if not, write to the Free Software
00029  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00030  * 02110-1301, USA.
00031  */
00032 #include "cosine_tree.h"
00033 
00034 // L2 norm of a specific column in a matrix
00035 double columnNormL2(const Matrix& A, index_t i_col) {
00036   Vector col;
00037   A.MakeColumnVector(i_col, &col);
00038   return la::LengthEuclidean(col);
00039 }
00040 
00041 // Zero tolerance
00042 #define eps (1e-16)
00043 bool isZero(double d) {
00044   return (d<eps) && (d>-eps);
00045 }
00046 
00047 // Init a root cosine node from a matrix
00048 CosineNode::CosineNode(const Matrix& A) {
00049   this->A_.Alias(A);
00050   origIndices_.Init(A_.n_cols());
00051   norms_.Init(n_cols());
00052   for (int i_col = 0; i_col < origIndices_.size(); i_col++) {
00053     origIndices_[i_col] = i_col;
00054     norms_[i_col] = columnNormL2(A_, i_col);
00055   }
00056   parent_ = NULL;
00057   left_ = NULL;
00058   right_ = NULL;
00059   isLeft_ = false;
00060 
00061   CalStats();
00062 }
00063 
00064 // Init a child cosine node from its parent and a set of the parent's columns
00065 CosineNode::CosineNode(CosineNode& parent, 
00066                        const ArrayList<int>& indices, bool isLeft) {
00067   A_.Alias(parent.A_);
00068   origIndices_.Init(indices.size());
00069   norms_.Init(n_cols());
00070   for (int i_col = 0; i_col < origIndices_.size(); i_col++) {
00071     origIndices_[i_col] = parent.origIndices_[indices[i_col]];
00072     norms_[i_col] = parent.norms_[indices[i_col]];
00073   }
00074   parent_ = &parent;
00075   left_ = NULL;
00076   right_ = NULL;
00077   isLeft_ = isLeft;
00078   
00079   if (isLeft) parent.left_ = this;
00080   else parent.right_ = this;
00081 
00082   CalStats();
00083 }
00084 
00085 void CosineNode::CalStats() {
00086   // Calculate cummlulative sum square of L2 norms
00087   cum_norms_.Init(origIndices_.size());
00088   for (int i_col = 0; i_col < origIndices_.size(); i_col++)
00089     cum_norms_[i_col] = ((i_col > 0) ? cum_norms_[i_col-1]:0)
00090       + math::Sqr(norms_[i_col]);
00091 
00092   // Calculate mean vector
00093   mean_.Init(A_.n_rows()); mean_.SetZero();
00094   for (index_t i_col = 0; i_col < n_cols(); i_col++) {
00095     Vector col;
00096     GetColumn(i_col, &col);
00097     la::AddTo(col, &mean_);
00098   }
00099   la::Scale(1.0/(double)origIndices_.size(), &mean_);
00100 }
00101 
00102 void CosineNode::ChooseCenter(Vector* center) {
00103   double r = (double)rand()/RAND_MAX * cum_norms_[n_cols()-1];
00104   index_t i_col;
00105   for (i_col = 0; i_col < n_cols(); i_col++)
00106     if (cum_norms_[i_col] >= r) break;
00107   GetColumn(i_col, center);
00108 }
00109 
00110 void CosineNode::CalCosines(const Vector& center, 
00111                             ArrayList<double>* cosines) {
00112   cosines->Init(n_cols());
00113   double centerL2 = la::LengthEuclidean(center);
00114   for (index_t i_col = 0; i_col < n_cols(); i_col++) 
00115     // if col is a zero vector then push it to the left node
00116     if (isZero(norms_[i_col]))
00117       (*cosines)[i_col] = 2;
00118     else {
00119       Vector col;
00120       GetColumn(i_col, &col);
00121       double numerator = la::Dot(center, col);
00122       double denominator = centerL2 * norms_[i_col];
00123       (*cosines)[i_col] = numerator/denominator;
00124     }
00125 }
00126 
00127 void CosineNode::CreateIndices(ArrayList<int>* indices) {
00128   indices->Init(n_cols());
00129   for (index_t i_col = 0; i_col < n_cols(); i_col++)
00130     (*indices)[i_col] = i_col;
00131 }
00132 
00133 // Quicksort partitioning procedure
00134 index_t qpartition(ArrayList<double>& key, ArrayList<int>& data,
00135                 index_t left, index_t right) {
00136   index_t j = left;
00137   double x = key[left];
00138   for (index_t i = left+1; i <= right; i++)
00139     if (key[i] >= x) {
00140       j++;
00141       double tmp_d = key[i]; key[i] = key[j]; key[j] = tmp_d;
00142       index_t tmp_i = data[i]; data[i] = data[j]; data[j] = tmp_i;
00143     }
00144   double tmp_d = key[left]; key[left] = key[j]; key[j] = tmp_d;
00145   index_t tmp_i = data[left]; data[left] = data[j]; data[j] = tmp_i;
00146   return j;
00147 }
00148 
00149 
00150 // Quicksort on the cosine values
00151 void qsort(ArrayList<double>& key, ArrayList<int>& data,
00152            index_t left, index_t right) {
00153   if (left >= right) return;
00154   index_t middle = qpartition(key, data, left, right);
00155   qsort(key, data, left, middle-1);
00156   qsort(key, data, middle+1, right);
00157 }
00158 
00159 void Sort(ArrayList<double>& key, ArrayList<int>& data) {
00160   qsort(key, data, 0, key.size()-1);
00161 }
00162 
00163 // Calculate the split point where the cosines values are closer
00164 // to the minimum cosine value than the maximum cosine value
00165 index_t calSplitPoint(const ArrayList<double>& key) {
00166   double leftKey = key[0];
00167   double rightKey = key[key.size()-1];
00168   index_t i = 0;
00169   while (leftKey - key[i] <= key[i] - rightKey && i < key.size()) i++;
00170   //printf("i = %d\n", i);
00171   return i;
00172 }
00173 
00174 // Init a subcopy of an array list
00175 void InitSubCopy(const ArrayList<int>& src, index_t pos, index_t size,
00176                  ArrayList<int>* dst) {
00177   dst->Init(size);
00178   for (index_t i = 0; i < size; i++)
00179     (*dst)[i] = src[pos+i];
00180 }
00181 
00182 // Split the indices at the split point
00183 void splitIndices(ArrayList<int>& indices, int leftSize,
00184                   ArrayList<int>* leftIdx, ArrayList<int>* rightIdx) {
00185   InitSubCopy(indices, 0, leftSize, leftIdx);
00186   InitSubCopy(indices, leftSize, indices.size()-leftSize, rightIdx);
00187 }
00188 
00189 // Split a cosine tree node by choose a random center, and sort
00190 // the cosine values decreasingly, then choose a split point
00191 // by calling calSplitPoint()
00192 // This procedure won't split a node if either child has the same
00193 // set of columns as the parent
00194 void CosineNode::Split() {
00195   if (n_cols() < 2) return;
00196   Vector center;
00197   ArrayList<double> cosines;
00198   ArrayList<int> indices, leftIdx, rightIdx;
00199 
00200   ChooseCenter(&center);
00201   //ot::Print(center, "center", stdout);
00202 
00203   CalCosines(center, &cosines);
00204   //ot::Print(cosines, "cosines", stdout);
00205 
00206   CreateIndices(&indices);
00207 
00208   Sort(cosines, indices);
00209   //ot::Print(cosines, "cosines", stdout);
00210   //ot::Print(indices, "indices", stdout);
00211 
00212   index_t leftSize = calSplitPoint(cosines);
00213 
00214   if (leftSize == n_cols() || leftSize == 0) return;
00215   
00216   splitIndices(indices, leftSize, &leftIdx, &rightIdx);
00217   //ot::Print(leftIdx, "left indices", stdout);
00218   //ot::Print(rightIdx, "right indices", stdout);
00219   
00220   left_ = new CosineNode(*this, leftIdx, true);
00221   right_ = new CosineNode(*this, rightIdx, false);
00222 }
00223 
Generated on Mon Jan 24 12:04:38 2011 for FASTlib by  doxygen 1.6.3