ortho_range_search.h

Go to the documentation of this file.
00001 /* MLPACK 0.2
00002  *
00003  * Copyright (c) 2008, 2009 Alexander Gray,
00004  *                          Garry Boyer,
00005  *                          Ryan Riegel,
00006  *                          Nikolaos Vasiloglou,
00007  *                          Dongryeol Lee,
00008  *                          Chip Mappus, 
00009  *                          Nishant Mehta,
00010  *                          Hua Ouyang,
00011  *                          Parikshit Ram,
00012  *                          Long Tran,
00013  *                          Wee Chin Wong
00014  *
00015  * Copyright (c) 2008, 2009 Georgia Institute of Technology
00016  *
00017  * This program is free software; you can redistribute it and/or
00018  * modify it under the terms of the GNU General Public License as
00019  * published by the Free Software Foundation; either version 2 of the
00020  * License, or (at your option) any later version.
00021  *
00022  * This program is distributed in the hope that it will be useful, but
00023  * WITHOUT ANY WARRANTY; without even the implied warranty of
00024  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00025  * General Public License for more details.
00026  *
00027  * You should have received a copy of the GNU General Public License
00028  * along with this program; if not, write to the Free Software
00029  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00030  * 02110-1301, USA.
00031  */
00039 #ifndef ORTHO_RANGE_SEARCH_H
00040 #define ORTHO_RANGE_SEARCH_H
00041 
00042 #include "fastlib/fastlib.h"
00043 #include "contrib/dongryel/proximity_project/gen_kdtree.h"
00044 #include "contrib/dongryel/proximity_project/gen_kdtree_hyper.h"
00045 #include "contrib/dongryel/proximity_project/general_type_bounds.h"
00046 
00060 template<typename T>
00061 class OrthoRangeSearch {
00062 
00063   // This class object cannot be copied!
00064   FORBID_ACCIDENTAL_COPIES(OrthoRangeSearch);
00065 
00066  public:
00067   
00069 
00072   OrthoRangeSearch() {
00073     tree_buffer_ = NULL;
00074     old_from_new_buffer_ = NULL;
00075     new_from_old_buffer_ = NULL;
00076     root_ = NULL;
00077   }
00078 
00081   ~OrthoRangeSearch() {
00082     if(tree_buffer_ != NULL) {
00083       mem::Free(tree_buffer_);
00084       mem::Free(old_from_new_buffer_);
00085       mem::Free(new_from_old_buffer_);
00086     }
00087     else {
00088       delete root_;
00089     }
00090   }
00091 
00093 
00100   void Compute(GenMatrix<T> &set_of_low_coord_limits,
00101                GenMatrix<T> &set_of_high_coord_limits, 
00102                GenMatrix<bool> *candidate_points) {
00103 
00104     // Allocate space for storing candidate points found during
00105     // search.
00106     candidate_points->Init(data_.n_cols(), set_of_low_coord_limits.n_cols());
00107     for(index_t j = 0; j < set_of_low_coord_limits.n_cols(); j++) {
00108       for(index_t i = 0; i < data_.n_cols(); i++) {
00109         (*candidate_points).set(i, j, false);
00110       }
00111     }
00112 
00113     fx_timer_start(NULL, "tree_range_search");
00114 
00115     // First build a tree out of the set of range windows.
00116     ArrayList<index_t> old_from_new_windows;
00117     ArrayList<index_t> new_from_old_windows;
00118 
00119     Tree *tree_of_windows = 
00120       proximity::MakeGenKdTree<T, Tree, proximity::GenKdTreeMedianSplitter>
00121       (set_of_low_coord_limits, set_of_high_coord_limits, 2,
00122        &old_from_new_windows, &new_from_old_windows);
00123 
00124     ortho_range_search(tree_of_windows, set_of_low_coord_limits, 
00125                        set_of_high_coord_limits, old_from_new_windows,
00126                        new_from_old_windows,
00127                        root_, 0, data_.n_rows() - 1, *candidate_points);
00128     
00129     fx_timer_stop(NULL, "tree_range_search");
00130 
00131     // Delete the tree of search windows...
00132     delete tree_of_windows;
00133 
00134     // Reshuffle the search windows.
00135     GenMatrix<T> tmp_matrix;
00136     tmp_matrix.Init(set_of_low_coord_limits.n_rows(),
00137                     set_of_low_coord_limits.n_cols());
00138     for(index_t i = 0; i < tmp_matrix.n_cols(); i++) {
00139       GenVector<T> dest;
00140       GenVector<T> src;
00141       tmp_matrix.MakeColumnVector(old_from_new_windows[i], &dest);
00142       set_of_low_coord_limits.MakeColumnVector(i, &src);
00143       dest.CopyValues(src);
00144     }
00145     set_of_low_coord_limits.CopyValues(tmp_matrix);
00146 
00147     for(index_t i = 0; i < tmp_matrix.n_cols(); i++) {
00148       GenVector<T> dest;
00149       GenVector<T> src;
00150       tmp_matrix.MakeColumnVector(old_from_new_windows[i], &dest);
00151       set_of_high_coord_limits.MakeColumnVector(i, &src);
00152       dest.CopyValues(src);
00153     }
00154     set_of_high_coord_limits.CopyValues(tmp_matrix);
00155   }
00156 
00162   void SaveTree(const char *save_tree_file_name) {
00163 
00164     printf("Serializing the tree data structure...\n");
00165 
00166     FILE *output = fopen(save_tree_file_name, "w+");
00167 
00168     // first serialize the total amount of bytes needed for serializing the
00169     // tree and the tree itself
00170     int tree_size = ot::FrozenSize(*root_);
00171     printf("Tree occupies %d bytes...\n", tree_size);
00172 
00173     fwrite((const void *) &tree_size, sizeof(int), 1, output);
00174     char *tmp_root = (char *) mem::AllocBytes<Tree>(tree_size);
00175     ot::Freeze(tmp_root, *root_);
00176     fwrite((const void *) tmp_root, tree_size, 1, output);
00177     mem::Free(tmp_root);
00178 
00179     // then serialize the permutation of the points due to tree construction
00180     // along with its sizes
00181     int old_from_new_size = ot::FrozenSize(old_from_new_);
00182     int new_from_old_size = ot::FrozenSize(new_from_old_);
00183     char *tmp_array = 
00184       (char *) mem::AllocBytes<ArrayList<index_t> >(old_from_new_size);
00185 
00186     fwrite((const void *) &old_from_new_size, sizeof(int), 1, output);    
00187     ot::Freeze(tmp_array, old_from_new_);
00188     fwrite((const void *) tmp_array, old_from_new_size, 1, output);
00189     
00190     fwrite((const void*) &new_from_old_size, sizeof(int), 1, output);
00191     ot::Freeze(tmp_array, new_from_old_);
00192     fwrite((const void *) tmp_array, new_from_old_size, 1, output);
00193     
00194     mem::Free(tmp_array);
00195 
00196     printf("Tree is serialized...\n");
00197   }
00198 
00209   void Init(GenMatrix<T> &dataset, bool make_copy, 
00210             const char *load_tree_file_name) {
00211 
00212     int leaflen = fx_param_int(NULL, "leaflen", 20);
00213 
00214     // decide whether to make a copy or not.
00215     if(make_copy) {
00216       data_.StaticCopy(dataset);
00217     }
00218     else {
00219       data_.StaticOwn(&dataset);
00220     }
00221 
00222     fx_timer_start(NULL, "tree_d");
00223 
00224     // If the user wants to load the tree from a file,
00225     if(load_tree_file_name != NULL) {
00226       LoadTree(load_tree_file_name);
00227     }
00228 
00229     // Otherwise, construct one from scratch.
00230     else {
00231       root_ = proximity::MakeGenKdTree
00232         <T, Tree, proximity::GenKdTreeMedianSplitter>(data_, leaflen, 
00233                                                       &old_from_new_, 
00234                                                       &new_from_old_);
00235     }
00236     fx_timer_stop(NULL, "tree_d");
00237   }
00238 
00239  private:
00240 
00242   typedef GeneralBinarySpaceTree<GenHrectBound<T, 2>, GenMatrix<T> > Tree;
00243 
00245   enum PruneStatus {SUBSUME, INCONCLUSIVE, EXCLUDE};
00246 
00248 
00250   GenMatrix<T> data_;
00251 
00253   ArrayList<index_t> *old_from_new_buffer_;
00254 
00256   ArrayList<index_t> *new_from_old_buffer_;
00257 
00259   Tree *tree_buffer_;
00260 
00261   ArrayList<index_t> old_from_new_;
00262   
00263   ArrayList<index_t> new_from_old_;
00264 
00266   Tree *root_;
00267 
00269 
00275   void LoadTree(const char *load_tree_file_name) {
00276     
00277     //const char *tfname = fx_param_str(NULL, "load_tree_file", "savedtree");
00278     FILE *input = fopen(load_tree_file_name, "r");
00279     
00280     // read the tree size
00281     int tree_size, old_from_new_size, new_from_old_size;
00282     fread((void *) &tree_size, sizeof(int), 1, input);
00283 
00284     printf("Tree file: %s occupies %d bytes...\n", load_tree_file_name, 
00285            tree_size);
00286     tree_buffer_ = mem::AllocBytes<Tree>(tree_size);
00287     fread((void *) tree_buffer_, 1, tree_size, input);
00288     root_ = ot::SemiThaw<Tree>((char *) tree_buffer_);
00289     
00290     // read old_from_new
00291     fread((void *) &old_from_new_size, sizeof(int), 1, input);
00292     old_from_new_buffer_ = 
00293     mem::AllocBytes<ArrayList<index_t> >(old_from_new_size);
00294     fread((void *) old_from_new_buffer_, old_from_new_size, 1, input);
00295     old_from_new_.InitCopy(*(ot::SemiThaw<ArrayList<index_t> >
00296                              ((char *) old_from_new_buffer_)));
00297 
00298     // read new_from_old
00299     fread((void *) &new_from_old_size, sizeof(int), 1, input);
00300     new_from_old_buffer_ = 
00301     mem::AllocBytes<ArrayList<index_t> >(new_from_old_size);
00302     fread((void *) new_from_old_buffer_, new_from_old_size, 1, input);
00303     new_from_old_.InitCopy(*(ot::SemiThaw<ArrayList<index_t> >
00304                              ((char *) new_from_old_buffer_)));
00305 
00306     printf("Tree has been loaded...\n");
00307 
00308     // apply permutation to the dataset
00309     GenMatrix<T> tmp_data;
00310     tmp_data.Init(data_.n_rows(), data_.n_cols());
00311 
00312     for(index_t i = 0; i < data_.n_cols(); i++) {
00313       GenVector<T> source, dest;
00314       data_.MakeColumnVector(i, &source);
00315       tmp_data.MakeColumnVector(new_from_old_[i], &dest);
00316       dest.CopyValues(source);
00317     }
00318     data_.CopyValues(tmp_data);
00319   }
00320 
00336   void ortho_slow_range_search(Tree *search_window_node,
00337                                GenMatrix<T> &low_coord_limits,
00338                                GenMatrix<T> &high_coord_limits,
00339                                const ArrayList<index_t> &old_from_new_windows,
00340                                const ArrayList<index_t> &new_from_old_windows,
00341                                Tree *reference_node, 
00342                                index_t start_dim, index_t end_dim,
00343                                GenMatrix<bool> &candidate_points) {
00344     PruneStatus prune_flag;
00345 
00346     // Loop over each search window...
00347     for(index_t window = search_window_node->begin();
00348         window < search_window_node->end(); window++) {
00349 
00350       // Loop over each reference point...
00351       for(index_t row = reference_node->begin(); row < reference_node->end(); 
00352           row++) {
00353         prune_flag = SUBSUME;
00354         
00355         // loop over each dimension...
00356         for(index_t d = start_dim; d <= end_dim; d++) {
00357           // determine which one of the two cases we have: EXCLUDE,
00358           // SUBSUME.
00359           
00360           // first the EXCLUDE case: when dist is above the upper
00361           // bound distance of this dimension, or dist is below the
00362           // lower bound distance of this dimension
00363           if(data_.get(d, row) > high_coord_limits.get(d, window) ||
00364              data_.get(d, row) < low_coord_limits.get(d, window)) {
00365             prune_flag = EXCLUDE;
00366             break;
00367           }
00368         } // end of looping over dimensions...
00369 
00370         // Set each point result depending on the flag...
00371         candidate_points.set(old_from_new_[row], old_from_new_windows[window],
00372                              (prune_flag == SUBSUME));
00373 
00374       } // end of iterating over reference points...
00375     } // end of iterating over search window...
00376   }
00377 
00394   void ortho_range_search(Tree *search_window_node, 
00395                           GenMatrix<T> &low_coord_limits, 
00396                           GenMatrix<T> &high_coord_limits,
00397                           const ArrayList<index_t> &old_from_new_windows,
00398                           const ArrayList<index_t> &new_from_old_windows,
00399                           Tree *reference_node, index_t start_dim,
00400                           index_t end_dim, GenMatrix<bool> &candidate_points) {
00401 
00402     PruneStatus prune_flag = SUBSUME;
00403     
00404     // loop over each dimension to determine inclusion/exclusion by
00405     // determining the lower and the upper bound distance per each
00406     // dimension for the given reference node, kn
00407     for(index_t d = start_dim; d <= end_dim; d++) {
00408 
00409       const GenRange<T> &reference_node_dir_range = 
00410         reference_node->bound().get(d);
00411       const GenRange<T> &search_window_node_dir_range =
00412         search_window_node->bound().get(d);
00413       
00414       // determine which one of the three cases we have: EXCLUDE,
00415       // SUBSUME, or INCONCLUSIVE.
00416       
00417       // First the EXCLUDE case: when mindist is above the upper bound
00418       // distance of this dimension, or maxdist is below the lower
00419       // bound distance of this dimension
00420       if(reference_node_dir_range.lo > search_window_node_dir_range.hi ||
00421          reference_node_dir_range.hi < search_window_node_dir_range.lo) {
00422         return;
00423       }
00424       // otherwise, check for SUBSUME case
00425       else if(search_window_node_dir_range.lo <= reference_node_dir_range.lo &&
00426               reference_node_dir_range.hi <= search_window_node_dir_range.hi) {
00427       }
00428       // if any dimension turns out to be inconclusive, then break.
00429       else {
00430         if(search_window_node->count() == 1) {
00431           start_dim = d;
00432         }
00433         prune_flag = INCONCLUSIVE;
00434         break;
00435       }
00436     } // end of iterating over each dimension.
00437 
00438     // In case of subsume, then add all points owned by this node to
00439     // candidates - note that subsume prunes cannot be performed
00440     // always in batch query.
00441     if(search_window_node->count() == 1 && prune_flag == SUBSUME) {
00442       for(index_t j = search_window_node->begin(); 
00443           j < search_window_node->end(); j++) {
00444         for(index_t i = reference_node->begin(); 
00445             i < reference_node->end(); i++) {
00446           candidate_points.set(old_from_new_[i], old_from_new_windows[j],
00447                                true);
00448         }
00449       }
00450       return;
00451     }
00452     else {
00453       if(search_window_node->is_leaf()) {
00454 
00455         // If both the search window and the reference nodes are
00456         // leaves, then compute exhaustively.
00457         if(reference_node->is_leaf()) {
00458           ortho_slow_range_search(search_window_node, low_coord_limits,
00459                                   high_coord_limits, old_from_new_windows,
00460                                   new_from_old_windows, reference_node,
00461                                   start_dim, end_dim, candidate_points);
00462         }
00463         // If the reference node can be expanded, then do so.
00464         else {
00465           ortho_range_search(search_window_node, low_coord_limits,
00466                              high_coord_limits, old_from_new_windows,
00467                              new_from_old_windows, reference_node->left(),
00468                              start_dim, end_dim, candidate_points);
00469           ortho_range_search(search_window_node, low_coord_limits,
00470                              high_coord_limits, old_from_new_windows,
00471                              new_from_old_windows, reference_node->right(),
00472                              start_dim, end_dim, candidate_points);
00473         }
00474       }
00475       else {
00476 
00477         // In this case, expand the query side.
00478         if(reference_node->is_leaf()) {
00479           ortho_range_search(search_window_node->left(), low_coord_limits,
00480                              high_coord_limits, old_from_new_windows,
00481                              new_from_old_windows, reference_node,
00482                              start_dim, end_dim, candidate_points);
00483           ortho_range_search(search_window_node->right(), low_coord_limits,
00484                              high_coord_limits, old_from_new_windows,
00485                              new_from_old_windows, reference_node,
00486                              start_dim, end_dim, candidate_points);
00487         }
00488         // Otherwise, expand both query and the reference sides.
00489         else {
00490           ortho_range_search(search_window_node->left(), low_coord_limits,
00491                              high_coord_limits, old_from_new_windows,
00492                              new_from_old_windows, reference_node->left(),
00493                              start_dim, end_dim, candidate_points);
00494           ortho_range_search(search_window_node->left(), low_coord_limits,
00495                              high_coord_limits, old_from_new_windows,
00496                              new_from_old_windows, reference_node->right(),
00497                              start_dim, end_dim, candidate_points);
00498           ortho_range_search(search_window_node->right(), low_coord_limits,
00499                              high_coord_limits, old_from_new_windows,
00500                              new_from_old_windows, reference_node->left(),
00501                              start_dim, end_dim, candidate_points);
00502           ortho_range_search(search_window_node->right(), low_coord_limits,
00503                              high_coord_limits, old_from_new_windows,
00504                              new_from_old_windows, reference_node->right(),
00505                              start_dim, end_dim, candidate_points);
00506         }
00507       }
00508     }
00509   }
00510 };
00511 
00512 #endif
Generated on Mon Jan 24 12:04:38 2011 for FASTlib by  doxygen 1.6.3