00001 /* MLPACK 0.2 00002 * 00003 * Copyright (c) 2008, 2009 Alexander Gray, 00004 * Garry Boyer, 00005 * Ryan Riegel, 00006 * Nikolaos Vasiloglou, 00007 * Dongryeol Lee, 00008 * Chip Mappus, 00009 * Nishant Mehta, 00010 * Hua Ouyang, 00011 * Parikshit Ram, 00012 * Long Tran, 00013 * Wee Chin Wong 00014 * 00015 * Copyright (c) 2008, 2009 Georgia Institute of Technology 00016 * 00017 * This program is free software; you can redistribute it and/or 00018 * modify it under the terms of the GNU General Public License as 00019 * published by the Free Software Foundation; either version 2 of the 00020 * License, or (at your option) any later version. 00021 * 00022 * This program is distributed in the hope that it will be useful, but 00023 * WITHOUT ANY WARRANTY; without even the implied warranty of 00024 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00025 * General Public License for more details. 00026 * 00027 * You should have received a copy of the GNU General Public License 00028 * along with this program; if not, write to the Free Software 00029 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 00030 * 02110-1301, USA. 00031 */ 00055 // To begin, note the "@file" at the top of the previous comment 00056 // block. This tells Doxygen to parse the file; accordingly, a 00057 // similar line should appear at the top of all of your files. 00058 00059 // Reusable code should be linked through a librule's header file. 00060 #include "allnn.h" 00061 00062 // While not strictly required for parameter parsing, etc., fx will 00063 // gently remind us (read: spew warning messages) if we do not 00064 // document our parameters, timers, and results in a structure like 00065 // the following. Documentation may be accessed via --help. 00066 00067 const fx_entry_doc allnn_main_entries[] = { 00068 {"r", FX_REQUIRED, FX_STR, NULL, 00069 " A file containing reference data.\n"}, 00070 {"q", FX_PARAM, FX_STR, NULL, 00071 " A file containing query data (defaults to r).\n"}, 00072 {"do_naive", FX_PARAM, FX_BOOL, NULL, 00073 " Whether to perform naive computation as well.\n"}, 00074 {"output_filename", FX_PARAM, FX_STR, NULL, 00075 " A file to receive the results of computation.\n"}, 00076 FX_ENTRY_DOC_DONE 00077 }; 00078 00079 const fx_submodule_doc allnn_main_submodules[] = { 00080 {"allnn", &allnn_doc, 00081 " Responsible for dual-tree computation.\n"}, 00082 {"naive", &allnn_naive_doc, 00083 " Stores results for naive computation.\n"}, 00084 FX_SUBMODULE_DOC_DONE 00085 }; 00086 00087 const fx_module_doc allnn_main_doc = { 00088 allnn_main_entries, allnn_main_submodules, 00089 "This is an example program written to demonstrate FASTlib components.\n" 00090 "It performs the all-nearest-neighbors dual-tree computation.\n" 00091 }; 00092 00093 int main(int argc, char* argv[]) { 00094 00095 // Always initialize FASTexec with main's inputs at the beggining of 00096 // your program. This reads the command line, among other things. 00097 fx_module *root = fx_init(argc, argv, &allnn_main_doc); 00098 00100 00101 // The reference data file is a required parameter. 00102 const char* references_file_name = fx_param_str_req(root, "r"); 00103 00104 // The query data file defaults to the references. 00105 const char* queries_file_name = 00106 fx_param_str(root, "q", references_file_name); 00107 00108 // FASTlib classes only poison data in their default constructors; 00109 // declarations must be followed by Init or an equivalent function. 00110 Matrix references; 00111 Matrix queries; 00112 00113 // data::Load inits a matrix with the contents of a .csv or .arff. 00114 data::Load(references_file_name, &references); 00115 data::Load(queries_file_name, &queries); 00116 00118 00119 AllNN allnn; 00120 00121 // FASTexec organizes parameters and results into submodules. Think 00122 // of this as creating a new folder named "allnn_module" under the 00123 // rood directory (NULL) for the AllNN object to work inside. Here, 00124 // we initialize it with all parameters defined "--allnn/...=...". 00125 struct datanode* allnn_module = 00126 fx_submodule(root, "allnn"); 00127 00128 // The Init function readies our object for action. In this 00129 // example, we built the AllNN class to expect all of its inputs at 00130 // the time of Init; other designs might do things differently. 00131 allnn.Init(queries, references, allnn_module); 00132 00133 // ArrayLists are prefered over standard C/C++ arrays. These grow 00134 // dynamically and check bounds when compiled with "--mode=debug". 00135 // An index_t is an integer with compile-time specified size. 00136 ArrayList<index_t> results; 00137 00138 // Tell the AllNN object to perform its computation. We pass it a 00139 // fresh ArrayList by pointer (hence the &) as a mental note that 00140 // this input will be modified, i.e. filled with results. 00141 allnn.ComputeNeighbors(&results); 00142 00144 00145 /* Compare results with naive if run with "--do_naive=true" */ 00146 if (fx_param_bool(root, "do_naive", 0)) { 00147 00148 // Our design of the AllNN class renders it usable only once; 00149 // different code could clean out intermediate results, but for 00150 // this example, we'll just create another instance. 00151 AllNN naive_allnn; 00152 00153 // This time we'll use a different Init function so we can avoid 00154 // building the tree. Also, note that submodules and parameters 00155 // may be created/accessed in-line. (More on this shortly.) 00156 naive_allnn.InitNaive(queries, references, 00157 fx_submodule(root, "naive")); 00158 00159 ArrayList<index_t> naive_results; 00160 naive_allnn.ComputeNaive(&naive_results); 00161 00162 /* Perform a quick sanity check now that we have naive results */ 00163 00164 // We don't want to run the for-loop unless debugging, hence the 00165 // #ifdef. For debug-only one-liners, use DEBUG_ONLY(expr) or the 00166 // other debugging macros. See base/debug.h for more details. 00167 #ifdef DEBUG 00168 for (index_t i = 0; i < results.size(); ++i) { 00169 // Prints a message if results are different. Note the peculiar 00170 // syntax "%"LI"d" used to properly format index_t values; this 00171 // alerts printf that index_t is long if compiled as such. 00172 DEBUG_WARN_MSG_IF(results[i] != naive_results[i], 00173 "i = %"LI"d, results[i] = %"LI"d, naive_results[i] = %"LI"d", 00174 i, results[i], naive_results[i]); 00175 } 00176 #endif /* DEBUG */ 00177 00178 } /* if do_naive */ 00179 00181 00182 const char* output_filename = 00183 fx_param_str(root, "output_filename", "output.txt"); 00184 00185 // We encourage you to use C-style file streams and print buffers 00186 // rather than C++'s complicated equivalents. 00187 FILE* output_file = fopen(output_filename, "w"); 00188 00189 // The ot namespace stands for object traversal and provides many 00190 // features. Here, we pretty-print an ArrayList to file, though we 00191 // could alternately have serialized it for later loading and reuse. 00192 // 00193 // Note that this doesn't write a .csv, but instead a transcript of 00194 // the ArrayList's contents. The library does not appear to have a 00195 // .csv writer for ArrayLists--this will be addressed. You can 00196 // write .csv from Matrix objects with data::save. 00197 ot::Print(results, "neighbors", output_file); 00198 00199 // Don't forget these things! 00200 fclose(output_file); 00201 00202 // We must tell FASTexec to wrap up when our code is done. This 00203 // emits its complete data structure--parameter settings, timers, 00204 // and stored results--to stdout unless you call fx_silence first. 00205 fx_done(root); 00206 00207 // main should return 0 if the program terminates normally. 00208 return 0; 00209 00210 } /* main */