allnn_main.cc

Go to the documentation of this file.
00001 /* MLPACK 0.2
00002  *
00003  * Copyright (c) 2008, 2009 Alexander Gray,
00004  *                          Garry Boyer,
00005  *                          Ryan Riegel,
00006  *                          Nikolaos Vasiloglou,
00007  *                          Dongryeol Lee,
00008  *                          Chip Mappus, 
00009  *                          Nishant Mehta,
00010  *                          Hua Ouyang,
00011  *                          Parikshit Ram,
00012  *                          Long Tran,
00013  *                          Wee Chin Wong
00014  *
00015  * Copyright (c) 2008, 2009 Georgia Institute of Technology
00016  *
00017  * This program is free software; you can redistribute it and/or
00018  * modify it under the terms of the GNU General Public License as
00019  * published by the Free Software Foundation; either version 2 of the
00020  * License, or (at your option) any later version.
00021  *
00022  * This program is distributed in the hope that it will be useful, but
00023  * WITHOUT ANY WARRANTY; without even the implied warranty of
00024  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00025  * General Public License for more details.
00026  *
00027  * You should have received a copy of the GNU General Public License
00028  * along with this program; if not, write to the Free Software
00029  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00030  * 02110-1301, USA.
00031  */
00055 // To begin, note the "@file" at the top of the previous comment
00056 // block.  This tells Doxygen to parse the file; accordingly, a
00057 // similar line should appear at the top of all of your files.
00058 
00059 // Reusable code should be linked through a librule's header file.
00060 #include "allnn.h"
00061 
00062 // While not strictly required for parameter parsing, etc., fx will
00063 // gently remind us (read: spew warning messages) if we do not
00064 // document our parameters, timers, and results in a structure like
00065 // the following.  Documentation may be accessed via --help.
00066 
00067 const fx_entry_doc allnn_main_entries[] = {
00068   {"r", FX_REQUIRED, FX_STR, NULL,
00069    "  A file containing reference data.\n"},
00070   {"q", FX_PARAM, FX_STR, NULL,
00071    "  A file containing query data (defaults to r).\n"},
00072   {"do_naive", FX_PARAM, FX_BOOL, NULL,
00073    "  Whether to perform naive computation as well.\n"},
00074   {"output_filename", FX_PARAM, FX_STR, NULL,
00075    "  A file to receive the results of computation.\n"},
00076   FX_ENTRY_DOC_DONE
00077 };
00078 
00079 const fx_submodule_doc allnn_main_submodules[] = {
00080   {"allnn", &allnn_doc,
00081    "  Responsible for dual-tree computation.\n"},
00082   {"naive", &allnn_naive_doc,
00083    "  Stores results for naive computation.\n"},
00084   FX_SUBMODULE_DOC_DONE
00085 };
00086 
00087 const fx_module_doc allnn_main_doc = {
00088   allnn_main_entries, allnn_main_submodules,
00089   "This is an example program written to demonstrate FASTlib components.\n"
00090   "It performs the all-nearest-neighbors dual-tree computation.\n"
00091 };
00092 
00093 int main(int argc, char* argv[]) {
00094 
00095   // Always initialize FASTexec with main's inputs at the beggining of
00096   // your program.  This reads the command line, among other things.
00097   fx_module *root = fx_init(argc, argv, &allnn_main_doc);
00098 
00100 
00101   // The reference data file is a required parameter.
00102   const char* references_file_name = fx_param_str_req(root, "r");
00103 
00104   // The query data file defaults to the references.
00105   const char* queries_file_name =
00106       fx_param_str(root, "q", references_file_name);
00107 
00108   // FASTlib classes only poison data in their default constructors;
00109   // declarations must be followed by Init or an equivalent function.
00110   Matrix references;
00111   Matrix queries;
00112 
00113   // data::Load inits a matrix with the contents of a .csv or .arff.
00114   data::Load(references_file_name, &references);
00115   data::Load(queries_file_name, &queries);
00116 
00118 
00119   AllNN allnn;
00120 
00121   // FASTexec organizes parameters and results into submodules.  Think
00122   // of this as creating a new folder named "allnn_module" under the
00123   // rood directory (NULL) for the AllNN object to work inside.  Here,
00124   // we initialize it with all parameters defined "--allnn/...=...".
00125   struct datanode* allnn_module =
00126       fx_submodule(root, "allnn");
00127 
00128   // The Init function readies our object for action.  In this
00129   // example, we built the AllNN class to expect all of its inputs at
00130   // the time of Init; other designs might do things differently.
00131   allnn.Init(queries, references, allnn_module);
00132 
00133   // ArrayLists are prefered over standard C/C++ arrays.  These grow
00134   // dynamically and check bounds when compiled with "--mode=debug".
00135   // An index_t is an integer with compile-time specified size.
00136   ArrayList<index_t> results;
00137 
00138   // Tell the AllNN object to perform its computation.  We pass it a
00139   // fresh ArrayList by pointer (hence the &) as a mental note that
00140   // this input will be modified, i.e. filled with results.
00141   allnn.ComputeNeighbors(&results);
00142 
00144 
00145   /* Compare results with naive if run with "--do_naive=true" */
00146   if (fx_param_bool(root, "do_naive", 0)) {
00147 
00148     // Our design of the AllNN class renders it usable only once;
00149     // different code could clean out intermediate results, but for
00150     // this example, we'll just create another instance.
00151     AllNN naive_allnn;
00152 
00153     // This time we'll use a different Init function so we can avoid
00154     // building the tree.  Also, note that submodules and parameters
00155     // may be created/accessed in-line.  (More on this shortly.)
00156     naive_allnn.InitNaive(queries, references,
00157         fx_submodule(root, "naive"));
00158 
00159     ArrayList<index_t> naive_results;
00160     naive_allnn.ComputeNaive(&naive_results);
00161 
00162     /* Perform a quick sanity check now that we have naive results */
00163 
00164     // We don't want to run the for-loop unless debugging, hence the
00165     // #ifdef.  For debug-only one-liners, use DEBUG_ONLY(expr) or the
00166     // other debugging macros.  See base/debug.h for more details.
00167 #ifdef DEBUG
00168     for (index_t i = 0; i < results.size(); ++i) {
00169       // Prints a message if results are different.  Note the peculiar
00170       // syntax "%"LI"d" used to properly format index_t values; this
00171       // alerts printf that index_t is long if compiled as such.
00172       DEBUG_WARN_MSG_IF(results[i] != naive_results[i],
00173           "i = %"LI"d, results[i] = %"LI"d, naive_results[i] = %"LI"d",
00174           i, results[i], naive_results[i]);
00175     }
00176 #endif /* DEBUG */
00177 
00178   } /* if do_naive */
00179 
00181 
00182   const char* output_filename =
00183       fx_param_str(root, "output_filename", "output.txt");
00184 
00185   // We encourage you to use C-style file streams and print buffers
00186   // rather than C++'s complicated equivalents.
00187   FILE* output_file = fopen(output_filename, "w");
00188 
00189   // The ot namespace stands for object traversal and provides many
00190   // features.  Here, we pretty-print an ArrayList to file, though we
00191   // could alternately have serialized it for later loading and reuse.
00192   //
00193   // Note that this doesn't write a .csv, but instead a transcript of
00194   // the ArrayList's contents.  The library does not appear to have a
00195   // .csv writer for ArrayLists--this will be addressed.  You can
00196   // write .csv from Matrix objects with data::save.
00197   ot::Print(results, "neighbors", output_file);
00198 
00199   // Don't forget these things!
00200   fclose(output_file);
00201 
00202   // We must tell FASTexec to wrap up when our code is done.  This
00203   // emits its complete data structure--parameter settings, timers,
00204   // and stored results--to stdout unless you call fx_silence first.
00205   fx_done(root);
00206 
00207   // main should return 0 if the program terminates normally.
00208   return 0;
00209 
00210 } /* main */
Generated on Mon Jan 24 12:04:37 2011 for FASTlib by  doxygen 1.6.3