mog_l2e.h

Go to the documentation of this file.
00001 /* MLPACK 0.2
00002  *
00003  * Copyright (c) 2008, 2009 Alexander Gray,
00004  *                          Garry Boyer,
00005  *                          Ryan Riegel,
00006  *                          Nikolaos Vasiloglou,
00007  *                          Dongryeol Lee,
00008  *                          Chip Mappus, 
00009  *                          Nishant Mehta,
00010  *                          Hua Ouyang,
00011  *                          Parikshit Ram,
00012  *                          Long Tran,
00013  *                          Wee Chin Wong
00014  *
00015  * Copyright (c) 2008, 2009 Georgia Institute of Technology
00016  *
00017  * This program is free software; you can redistribute it and/or
00018  * modify it under the terms of the GNU General Public License as
00019  * published by the Free Software Foundation; either version 2 of the
00020  * License, or (at your option) any later version.
00021  *
00022  * This program is distributed in the hope that it will be useful, but
00023  * WITHOUT ANY WARRANTY; without even the implied warranty of
00024  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00025  * General Public License for more details.
00026  *
00027  * You should have received a copy of the GNU General Public License
00028  * along with this program; if not, write to the Free Software
00029  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00030  * 02110-1301, USA.
00031  */
00041 #ifndef MOG_L2E_H
00042 #define MOG_L2E_H
00043 
00044 #include <fastlib/fastlib.h>
00045 
00046 const fx_entry_doc mog_l2e_entries[] = {
00047   {"K", FX_PARAM, FX_INT, NULL,
00048    " The number of Gaussians in the mixture model."
00049    " (defaults to 1)\n"},
00050   {"D", FX_RESERVED, FX_INT, NULL,
00051    " The number of dimensions of the data on which the"
00052    " the mixture model is to be fit.\n"},
00053   FX_ENTRY_DOC_DONE
00054 };
00055 
00056 const fx_module_doc mog_l2e_doc = {
00057   mog_l2e_entries, NULL,
00058   " This program defines a Gaussian mixture model"
00059   " and calculates the L2 error for the present"
00060   " parameter setting to be used by an optimizer.\n"
00061 };
00062 
00090 class MoGL2E {
00091 
00092  private:
00093 
00094   // The parameters of the Mixture model
00095   ArrayList<Vector> mu_;
00096   ArrayList<Matrix> sigma_;
00097   Vector omega_;
00098   index_t number_of_gaussians_;
00099   index_t dimension_;
00100   
00101   // The differential for the paramterization
00102   // for optimization
00103   Matrix d_omega_;
00104   ArrayList<ArrayList<Matrix> > d_sigma_;
00105 
00106  public:
00107 
00108   MoGL2E() {
00109     mu_.Init(0);
00110     sigma_.Init(0);
00111     d_sigma_.Init(0);
00112     d_omega_.Init(0, 0);
00113   }
00114 
00115   ~MoGL2E() {
00116   }
00117 
00118   void Init(index_t num_gauss, index_t dimension) {
00119 
00120     // Destruct everything to initialize afresh
00121     mu_.Clear();
00122     sigma_.Clear();
00123     d_sigma_.Clear();
00124     // Initialize the private variables
00125     number_of_gaussians_ = num_gauss;
00126     dimension_ = dimension;
00127 
00128     // Resize the ArrayList of Vectors and Matrices
00129     mu_.Resize(number_of_gaussians_);
00130     sigma_.Resize(number_of_gaussians_);
00131   }
00132 
00133   void Resize_d_sigma_() {
00134     d_sigma_.Resize(number_of_gaussians());
00135     for(index_t i =0; i < number_of_gaussians(); i++) {
00136       d_sigma_[i].Init(dimension()*(dimension()+1)/2);
00137     }
00138   }
00139 
00156   void MakeModel(index_t num_mods, index_t dimension, double* theta) {
00157     double *temp_mu; 
00158     Matrix lower_triangle_matrix, upper_triangle_matrix;
00159     double sum, s_min = 0.01;
00160 
00161     Init(num_mods, dimension);
00162     temp_mu = (double*) malloc (dimension * sizeof(double)) ;
00163     lower_triangle_matrix.Init(dimension, dimension);
00164     upper_triangle_matrix.Init(dimension, dimension);
00165                         
00166     // calculating the omega values
00167     sum = 0;
00168     double *temp_array;
00169     temp_array = (double*) malloc (num_mods * sizeof(double));
00170     for(index_t i = 0; i < num_mods - 1; i++) {
00171       temp_array[i] = exp(theta[i]) ;
00172       sum += temp_array[i] ;
00173     }
00174     temp_array[num_mods - 1] = 1 ; 
00175     ++sum ;
00176     la::Scale(num_mods, (1.0 / sum), temp_array);
00177     set_omega(dimension, temp_array);
00178 
00179     // calculating the mu values
00180     for(index_t k = 0; k < num_mods; k++) {
00181       for(index_t j = 0; j < dimension; j++) {
00182         temp_mu[j] = theta[num_mods + k*dimension + j - 1];
00183       }
00184       set_mu(k, dimension, temp_mu);
00185     }
00186                         
00187     // calculating the sigma values
00188     // using a lower triangular matrix and its transpose
00189     // to obtain a positive definite symmetric matrix
00190     Matrix sigma_temp;
00191     sigma_temp.Init(dimension, dimension);
00192     for(index_t k = 0; k < num_mods; k++) {
00193       lower_triangle_matrix.SetAll(0.0);
00194       for(index_t j = 0; j < dimension; j++) {
00195         for(index_t i = 0; i < j; i++) {
00196           lower_triangle_matrix.set(j, i, 
00197                                     theta[(num_mods - 1) 
00198                                           + num_mods*dimension 
00199                                           + k*(dimension*(dimension + 1) / 2) 
00200                                           + (j*(j + 1) / 2) + i]);
00201         }
00202         lower_triangle_matrix.set(j, j, 
00203                                   theta[(num_mods - 1) 
00204                                         + num_mods*dimension 
00205                                         + k*(dimension*(dimension + 1) / 2) 
00206                                         + (j*(j + 1) / 2) + j] + s_min);
00207                                         
00208       }
00209       la::TransposeOverwrite(lower_triangle_matrix, &upper_triangle_matrix);
00210       la::MulOverwrite(lower_triangle_matrix, upper_triangle_matrix, &sigma_temp);
00211       set_sigma(k, sigma_temp);
00212     }
00213   }                     
00214 
00215   void MakeModel(datanode *mog_l2e_module, double* theta) {
00216     index_t num_gauss = fx_param_int_req(mog_l2e_module, "K");
00217     index_t dimension = fx_param_int_req(mog_l2e_module, "D");
00218     MakeModel(num_gauss, dimension, theta);
00219   }
00220 
00237   void MakeModelWithGradients(index_t num_mods, index_t dimension, double* theta) {
00238     double *temp_mu;
00239     Matrix lower_triangle_matrix, upper_triangle_matrix;
00240     double sum, s_min = 0.01;
00241 
00242     Init(num_mods, dimension);
00243     temp_mu = (double*) malloc (dimension * sizeof(double));
00244     lower_triangle_matrix.Init(dimension, dimension);
00245     upper_triangle_matrix.Init(dimension, dimension);
00246     
00247     // calculating the omega values
00248     sum = 0;
00249     double *temp_array;
00250     temp_array = (double*) malloc (num_mods * sizeof(double));
00251     for(index_t i = 0; i < num_mods - 1; i++) {
00252       temp_array[i] = exp(theta[i]) ;
00253       sum += temp_array[i] ;
00254     }
00255     temp_array[num_mods - 1] = 1 ;  
00256     ++sum ;
00257     la::Scale(num_mods, (1.0 / sum), temp_array);
00258     set_omega(num_mods, temp_array);
00259 
00260     // calculating the d_omega values
00261     Matrix d_omega_temp;
00262     d_omega_temp.Init(num_mods - 1, num_mods);
00263     d_omega_temp.SetAll(0.0);
00264     for(index_t i = 0; i < num_mods - 1; i++) {
00265       for(index_t j = 0; j < i; j++) {
00266         d_omega_temp.set(i,j,-(omega(i)*omega(j)));
00267         d_omega_temp.set(j,i,-(omega(i)*omega(j)));
00268       }
00269       d_omega_temp.set(i,i,omega(i)*(1-omega(i)));
00270     }
00271     for(index_t i = 0; i < num_mods - 1; i++) {
00272       d_omega_temp.set(i, num_mods - 1, -(omega(i)*omega(num_mods - 1)));
00273     }
00274     set_d_omega(d_omega_temp);
00275                         
00276     // calculating the mu values
00277     for(index_t k = 0; k < num_mods; k++) {
00278       for(index_t j = 0; j < dimension; j++) {
00279         temp_mu[j] = theta[num_mods + k*dimension + j - 1];
00280       }
00281       set_mu(k, dimension, temp_mu);
00282     }
00283     // d_mu is not computed because it is implicitly known
00284     // since no parameterization is applied on them
00285                         
00286     // using a lower triangular matrix and its transpose 
00287     // to obtain a positive definite symmetric matrix
00288     
00289     // initializing the d_sigma values
00290 
00291     Matrix d_sigma_temp;
00292     d_sigma_temp.Init(dimension, dimension);
00293     Resize_d_sigma_();
00294 
00295     // calculating the sigma values
00296     Matrix sigma_temp;
00297     sigma_temp.Init(dimension, dimension);
00298     for(index_t k = 0; k < num_mods; k++) {
00299       lower_triangle_matrix.SetAll(0.0);
00300       for(index_t j = 0; j < dimension; j++) {
00301         for(index_t i = 0; i < j; i++) {
00302           lower_triangle_matrix.set( j, i, 
00303                                      theta[(num_mods - 1) 
00304                                            + num_mods*dimension 
00305                                            + k*(dimension*(dimension + 1) / 2) 
00306                                            + (j*(j + 1) / 2) + i]) ;
00307         }
00308         lower_triangle_matrix.set(j, j, 
00309                                   theta[(num_mods - 1) 
00310                                         + num_mods*dimension 
00311                                         + k*(dimension*(dimension + 1) / 2) 
00312                                         + (j*(j + 1) / 2) + j] + s_min);
00313       }
00314       la::TransposeOverwrite(lower_triangle_matrix, &upper_triangle_matrix);
00315       la::MulOverwrite(lower_triangle_matrix, upper_triangle_matrix, &sigma_temp);
00316       set_sigma(k, sigma_temp);
00317                                 
00318       // calculating the d_sigma values
00319       for(index_t i = 0; i < dimension; i++){
00320         for(index_t in = 0; in < i+1; in++){
00321           Matrix d_sigma_d_r,d_sigma_d_r_t,temp_matrix_1,temp_matrix_2;
00322           d_sigma_d_r.Init(dimension, dimension);
00323           d_sigma_d_r_t.Init(dimension, dimension);
00324           d_sigma_d_r.SetAll(0.0);
00325           d_sigma_d_r_t.SetAll(0.0);
00326           d_sigma_d_r.set(i,in,1.0);
00327           d_sigma_d_r_t.set(in,i,1.0);
00328                                                 
00329           la::MulInit(d_sigma_d_r,upper_triangle_matrix,&temp_matrix_1);
00330           la::MulInit(lower_triangle_matrix,d_sigma_d_r_t,&temp_matrix_2);
00331           la::AddOverwrite(temp_matrix_1,temp_matrix_2,&d_sigma_temp);
00332           set_d_sigma(k, (i*(i+1)/2)+in, d_sigma_temp);
00333         }
00334       }
00335     }
00336   }                     
00337   
00339   ArrayList<Vector>& mu() {
00340     return mu_;
00341   }                             
00342                 
00343   ArrayList<Matrix>& sigma() {
00344     return sigma_;
00345   }
00346                 
00347   Vector& omega() {
00348     return omega_;
00349   }
00350                 
00351   index_t number_of_gaussians() {
00352     return number_of_gaussians_;
00353   }                     
00354                 
00355   index_t dimension() {
00356     return dimension_;
00357   }
00358                 
00359   Vector& mu(index_t i) {
00360     return mu_[i] ;
00361   }
00362                 
00363   Matrix& sigma(index_t i) {
00364     return sigma_[i];
00365   }
00366                 
00367   double omega(index_t i) {
00368     return omega_.get(i);
00369   }
00370                 
00371   Matrix& d_omega(){
00372     return d_omega_;
00373   }
00374                 
00375   ArrayList<ArrayList<Matrix> >& d_sigma(){
00376     return d_sigma_;
00377   }
00378                 
00379   ArrayList<Matrix>& d_sigma(index_t i){
00380     return d_sigma_[i];
00381   }
00382 
00384 
00385   void set_mu(index_t i, Vector& mu) {
00386     DEBUG_ASSERT(i < number_of_gaussians());
00387     DEBUG_ASSERT(mu.length() == dimension()); 
00388     mu_[i].Copy(mu);
00389     return;
00390   }
00391 
00392   void set_mu(index_t i, index_t length, const double *mu) {
00393     DEBUG_ASSERT(i < number_of_gaussians());
00394     DEBUG_ASSERT(length == dimension());
00395     mu_[i].Copy(mu, length);
00396     return;
00397   }
00398 
00399   void set_sigma(index_t i, Matrix& sigma) {
00400     DEBUG_ASSERT(i < number_of_gaussians());
00401     DEBUG_ASSERT(sigma.n_rows() == dimension());
00402     DEBUG_ASSERT(sigma.n_cols() == dimension());
00403     sigma_[i].Copy(sigma);
00404     return;
00405   }
00406 
00407   void set_omega(Vector& omega) {
00408     DEBUG_ASSERT(omega.length() == number_of_gaussians());
00409     omega_.Copy(omega);
00410     return;
00411   }
00412 
00413   void set_omega(index_t length, const double *omega) {
00414     DEBUG_ASSERT(length == number_of_gaussians());
00415     omega_.Copy(omega, length);
00416     return;
00417   }
00418 
00419   void set_d_omega(Matrix& d_omega) {
00420     d_omega_.Destruct();
00421     d_omega_.Copy(d_omega);
00422     return;
00423   }
00424 
00425   void set_d_sigma(index_t i, index_t j, Matrix& d_sigma_i_j) {
00426     d_sigma_[i][j].Copy(d_sigma_i_j);
00427     return;
00428   }
00429 
00439   void OutputResults(ArrayList<double> *results) {
00440 
00441     // Initialize the size of the output array
00442     (*results).Init(number_of_gaussians_ * (1 + dimension_*(1 + dimension_)));
00443 
00444     // Copy values to the array from the private variables of the class
00445     for (index_t i = 0; i < number_of_gaussians_; i++) {
00446       (*results)[i] = omega(i);
00447       for (index_t j = 0; j < dimension_; j++) {
00448         (*results)[number_of_gaussians_ + i*dimension_ + j] = mu(i).get(j);
00449         for (index_t k = 0; k < dimension_; k++) {
00450           (*results)[number_of_gaussians_*(1 + dimension_) 
00451                    + i*dimension_*dimension_ + j*dimension_ 
00452                    + k] = sigma(i).get(j, k);
00453         }
00454       }
00455     }
00456   }
00457 
00465   void Display(){
00466 
00467     // Output the model parameters as the omega, mu and sigma                   
00468     printf(" Omega : [ ");
00469     for (index_t i = 0; i < number_of_gaussians_; i++) {
00470       printf("%lf ", omega(i));
00471     }
00472     printf("]\n");
00473     printf(" Mu : \n[");
00474     for (index_t i = 0; i < number_of_gaussians_; i++) {
00475       for (index_t j = 0; j < dimension_ ; j++) {
00476         printf("%lf ", mu(i).get(j));
00477       }
00478       printf(";");
00479       if (i == (number_of_gaussians_ - 1)) {
00480         printf("\b]\n");
00481       }
00482     }
00483     printf("Sigma : ");
00484     for (index_t i = 0; i < number_of_gaussians_; i++) {
00485       printf("\n[");
00486       for (index_t j = 0; j < dimension_ ; j++) {
00487         for(index_t k = 0; k < dimension_ ; k++) {
00488           printf("%lf ",sigma(i).get(j, k));
00489         }
00490         printf(";");
00491       }
00492       printf("\b]");
00493     }
00494     printf("\n");
00495   }
00496 
00497   
00516   long double L2Error(const Matrix&, Vector* = NULL);
00517   
00526   long double RegularizationTerm_(Vector* = NULL);
00527    
00536   long double GoodnessOfFitTerm_(const Matrix&, Vector* = NULL);
00537 
00551   static void MultiplePointsGenerator(double**, index_t, 
00552                                       const Matrix&, index_t);
00553 
00568   static void InitialPointGenerator(double*, const Matrix&, index_t);
00569 
00592   static void KMeans_(const Matrix&, ArrayList<Vector>*,
00593                       ArrayList<Matrix>*, Vector*, index_t);
00594 
00599   static void min_element(Matrix&, index_t*);
00600 
00609   static long double L2ErrorForOpt(Vector& params,
00610                                    const Matrix& data,
00611                                    Vector *gradient) {
00612 
00613     MoGL2E model;
00614     index_t dimension = data.n_rows();
00615     index_t num_gauss;
00616 
00617     num_gauss = (params.length() + 1)*2 / ((dimension+1)*(dimension+2));
00618     // This check added here to see if 
00619     // the gradient is actually demanded here
00620     if (gradient != NULL) {
00621       model.MakeModelWithGradients(num_gauss, dimension, params.ptr());
00622       return model.L2Error(data, gradient);
00623     }
00624     else {
00625       model.MakeModel(num_gauss, dimension, params.ptr());
00626       return model.L2Error(data);
00627     }
00628   }
00629 
00636   static long double L2ErrorForOpt(Vector& params, const Matrix& data) {
00637     return L2ErrorForOpt(params, data, NULL);
00638   }
00639 
00640 };
00641 
00642 #endif