lbfgs.h

00001 /* MLPACK 0.2
00002  *
00003  * Copyright (c) 2008, 2009 Alexander Gray,
00004  *                          Garry Boyer,
00005  *                          Ryan Riegel,
00006  *                          Nikolaos Vasiloglou,
00007  *                          Dongryeol Lee,
00008  *                          Chip Mappus, 
00009  *                          Nishant Mehta,
00010  *                          Hua Ouyang,
00011  *                          Parikshit Ram,
00012  *                          Long Tran,
00013  *                          Wee Chin Wong
00014  *
00015  * Copyright (c) 2008, 2009 Georgia Institute of Technology
00016  *
00017  * This program is free software; you can redistribute it and/or
00018  * modify it under the terms of the GNU General Public License as
00019  * published by the Free Software Foundation; either version 2 of the
00020  * License, or (at your option) any later version.
00021  *
00022  * This program is distributed in the hope that it will be useful, but
00023  * WITHOUT ANY WARRANTY; without even the implied warranty of
00024  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00025  * General Public License for more details.
00026  *
00027  * You should have received a copy of the GNU General Public License
00028  * along with this program; if not, write to the Free Software
00029  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00030  * 02110-1301, USA.
00031  */
00032 /*
00033  * =====================================================================================
00034  * 
00035  *       Filename:  lbfgs.h
00036  * 
00037  *    Description:  
00038  * 
00039  *        Version:  1.0
00040  *        Created:  03/04/2008 10:09:29 PM EST
00041  *       Revision:  none
00042  *       Compiler:  gcc
00043  * 
00044  *         Author:  Nikolaos Vasiloglou (NV), nvasil@ieee.org
00045  *        Company:  Georgia Tech Fastlab-ESP Lab
00046  * 
00047  * =====================================================================================
00048  */
00049 
00050 #ifndef LBFGS_H_
00051 #define LBFGS_H_
00052 
00053 #include "fastlib/fastlib.h"
00054 #include <string>
00055 
00070 const fx_entry_doc lbfgs_entries[] = {
00071   {"num_of_points", FX_PARAM, FX_INT, NULL,
00072    "  The number of points for the optimization variable.\n"},
00073   {"sigma", FX_PARAM, FX_DOUBLE, NULL,
00074    "  The initial penalty parameter on the augmented lagrangian.\n"},
00075   {"objective_factor", FX_PARAM, FX_DOUBLE, NULL,
00076    "  obsolete.\n"},
00077   {"eta", FX_PARAM, FX_DOUBLE, NULL,
00078    "  wolfe parameter.\n"},
00079   {"gamma", FX_PARAM, FX_DOUBLE, NULL,
00080    "  sigma increase rate, after inner loop is done sigma is multiplied by gamma.\n"},
00081   {"new_dimension", FX_PARAM, FX_INT, NULL,
00082    "  The dimension of the points\n"},
00083   {"desired_feasibility", FX_PARAM, FX_DOUBLE, NULL,
00084    "  Since this is used with augmented lagrangian, we need to know "
00085      "when the  feasibility is sufficient.\n"},
00086   {"feasibility_tolerance", FX_PARAM, FX_DOUBLE, NULL,
00087    "  if the feasibility is not improved by that quantity, then it stops.\n"},
00088   {"wolfe_sigma1", FX_PARAM, FX_DOUBLE, NULL,
00089    "  wolfe parameter.\n"},
00090   {"wolfe_sigma2", FX_PARAM, FX_DOUBLE, NULL,
00091    "  wolfe parameter.\n"},
00092   {"min_beta", FX_PARAM, FX_DOUBLE, NULL,
00093    "  wolfe parameter.\n"},
00094   {"wolfe_beta", FX_PARAM, FX_DOUBLE, NULL,
00095    "  wolfe parameter.\n"},
00096   {"step_size", FX_PARAM, FX_DOUBLE, NULL,
00097    "  Initial step size for the wolfe search.\n"},
00098   {"silent", FX_PARAM, FX_BOOL, NULL,
00099    "  if true then it doesn't emmit updates.\n"},
00100   {"show_warnings", FX_PARAM, FX_BOOL, NULL,
00101    "  if true then it does show warnings.\n"},
00102   {"use_default_termination", FX_PARAM, FX_BOOL, NULL,
00103    "  let this module decide where to terminate. If false then"
00104    " the objective function decides .\n"},
00105   {"norm_grad_tolerance", FX_PARAM, FX_DOUBLE, NULL,
00106    "  If the norm of the gradient doesn't change more than "
00107      "this quantity between two iterations and the use_default_termination "
00108      "is set, the algorithm terminates.\n"},
00109   {"max_iterations", FX_PARAM, FX_INT, NULL,
00110    "  maximum number of iterations required.\n"},
00111   {"mem_bfgs", FX_PARAM, FX_INT, NULL,
00112    "  the limited memory of BFGS.\n"},
00113   {"log_file", FX_PARAM, FX_STR, NULL,
00114    " file to log the output.\n"},
00115   {"iterations", FX_RESULT, FX_INT, NULL,
00116    "  iterations until convergence.\n"},
00117   {"feasibility_error", FX_RESULT, FX_DOUBLE, NULL,
00118    "  the fesibility error achived by termination.\n"},
00119   {"final_sigma", FX_RESULT, FX_DOUBLE, NULL,
00120    "  the last penalty parameter used\n"},
00121   {"objective", FX_RESULT, FX_DOUBLE, NULL,
00122    "  the objective achieved by termination.\n"},
00123   {"wolfe_step", FX_TIMER, FX_CUSTOM, NULL,
00124    "  Time spent computing the wolfe step.\n"},
00125   {"bfgs_step", FX_TIMER, FX_CUSTOM, NULL,
00126    "  Time spent computing the bfgs step.\n"},
00127   {"update_bfgs", FX_TIMER, FX_CUSTOM, NULL,
00128    "  Time spent computing the bfgs updating.\n"},
00129 
00130   FX_ENTRY_DOC_DONE
00131 };
00132 
00133 const fx_module_doc lbfgs_doc = {
00134   lbfgs_entries, NULL,
00135   "The LBFGS module for optimization.\n"
00136 };
00137 
00138 
00139 template<typename OptimizedFunction>
00140 class Lbfgs {
00141  public:
00142   void Init(OptimizedFunction *optimized_function, datanode* module);
00143   void Destruct();
00144   void ComputeLocalOptimumBFGS();
00145   void ReportProgress();
00146   void ReportProgressFile(std::string file);
00147   void CopyCoordinates(Matrix *result);
00148   void Reset(); 
00149   void set_coordinates(Matrix &coordinates);
00150   void set_desired_feasibility(double desired_feasibility);
00151   void set_feasibility_tolerance(double feasibility_tolerance);
00152   void set_norm_grad_tolerance(double norm_grad_tolerance);
00153   void set_max_iterations(index_t max_iterations);
00154   Matrix *coordinates();
00155   double sigma();
00156   void set_sigma(double sigma);
00157 
00158  private:
00159   void InitOptimization_();
00160   void ComputeWolfeStep_();
00161   void UpdateLagrangeMult_();
00162   success_t ComputeWolfeStep_(double *step, Matrix &direction);
00163   success_t ComputeBFGS_(double *step, Matrix &grad, index_t memory);
00164   success_t UpdateBFGS_();
00165   success_t UpdateBFGS_(index_t index_bfgs);
00166   void BoundConstrain();
00167   std::string ComputeProgress_();
00168   void ReportProgressFile_();
00169 
00170   struct datanode* module_;
00171   OptimizedFunction  *optimized_function_;
00172   index_t num_of_iterations_;
00173   index_t num_of_points_;
00174   index_t new_dimension_;
00175   double sigma_;
00176   double objective_factor_;
00177   double eta_;
00178   double gamma_;
00179   double step_;
00180   double desired_feasibility_;
00181   double feasibility_tolerance_;
00182   double norm_grad_tolerance_;
00183   double wolfe_sigma1_;
00184   double wolfe_sigma2_;
00185   double wolfe_beta_;
00186   double min_beta_;
00187   bool silent_;
00188   bool show_warnings_;
00189   bool use_default_termination_;
00190   ArrayList<Matrix> s_bfgs_;
00191   ArrayList<Matrix> y_bfgs_;
00192   Vector ro_bfgs_;
00193   index_t index_bfgs_;
00194   Matrix coordinates_;
00195   Matrix previous_coordinates_;
00196   Matrix gradient_;
00197   Matrix previous_gradient_;
00198   index_t max_iterations_;
00199   double step_size_;
00200   // the memory of bfgs 
00201   index_t mem_bfgs_;
00202   FILE *fp_log_;
00203 };
00204 
00205 #include "lbfgs_impl.h"
00206 #endif //LBFGS_H_