mog_em.h

Go to the documentation of this file.
00001 /* MLPACK 0.2
00002  *
00003  * Copyright (c) 2008, 2009 Alexander Gray,
00004  *                          Garry Boyer,
00005  *                          Ryan Riegel,
00006  *                          Nikolaos Vasiloglou,
00007  *                          Dongryeol Lee,
00008  *                          Chip Mappus, 
00009  *                          Nishant Mehta,
00010  *                          Hua Ouyang,
00011  *                          Parikshit Ram,
00012  *                          Long Tran,
00013  *                          Wee Chin Wong
00014  *
00015  * Copyright (c) 2008, 2009 Georgia Institute of Technology
00016  *
00017  * This program is free software; you can redistribute it and/or
00018  * modify it under the terms of the GNU General Public License as
00019  * published by the Free Software Foundation; either version 2 of the
00020  * License, or (at your option) any later version.
00021  *
00022  * This program is distributed in the hope that it will be useful, but
00023  * WITHOUT ANY WARRANTY; without even the implied warranty of
00024  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00025  * General Public License for more details.
00026  *
00027  * You should have received a copy of the GNU General Public License
00028  * along with this program; if not, write to the Free Software
00029  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00030  * 02110-1301, USA.
00031  */
00040 #ifndef MOG_EM_H
00041 #define MOG_EM_H
00042 
00043 #include <fastlib/fastlib.h>
00044 
00045 const fx_entry_doc mog_em_entries[] = {
00046   {"K", FX_PARAM, FX_INT, NULL,
00047    " The number of Gaussians in the mixture model."
00048    " (defaults to 1)\n"},
00049   {"D", FX_RESERVED, FX_INT, NULL,
00050    " The number of dimensions of the data on which the"
00051    " the mixture model is to be fit.\n"},
00052   {"model_init", FX_TIMER, FX_CUSTOM, NULL,
00053    " The time required to initialize the mixture model.\n"},
00054   {"EM", FX_TIMER, FX_CUSTOM, NULL,
00055    " The time required for the EM algorithm to obtain"
00056    " the parameter values.\n"},
00057   FX_ENTRY_DOC_DONE
00058 };
00059 
00060 const fx_module_doc mog_em_doc = {
00061   mog_em_entries, NULL,
00062   " This program defines a Gaussian mixture model"
00063   " and estimates the parameters using maximum"
00064   " likelihood via the EM algorithm.\n"
00065 };
00066 
00085 class MoGEM {
00086 
00087  private:
00088 
00089   // The parameters of the mixture model
00090   ArrayList<Vector> mu_;
00091   ArrayList<Matrix> sigma_;
00092   Vector omega_;
00093   index_t number_of_gaussians_;
00094   index_t dimension_;
00095 
00096  public:
00097 
00098   MoGEM() {
00099     mu_.Init(0);
00100     sigma_.Init(0);
00101   }
00102 
00103   ~MoGEM() {
00104   }
00105 
00106   void Init(index_t num_gauss, index_t dimension) {
00107 
00108     // Initialize the private variables
00109     number_of_gaussians_ = num_gauss;
00110     dimension_ = dimension;
00111 
00112     // Resize the ArrayList of Vectors and Matrices
00113     mu_.Resize(number_of_gaussians_);
00114     sigma_.Resize(number_of_gaussians_);
00115   }
00116 
00117   void Init(datanode *mog_em_module) {
00118     
00119     index_t num_gauss = fx_param_int_req(mog_em_module, "K");
00120     index_t dim = fx_param_int_req(mog_em_module, "D");
00121     Init(num_gauss, dim);
00122   }
00123   // The get functions
00124 
00125   ArrayList<Vector>& mu() {
00126     return mu_;
00127   }                             
00128                 
00129   ArrayList<Matrix>& sigma() {
00130     return sigma_;
00131   }
00132                 
00133   Vector& omega() {
00134     return omega_;
00135   }
00136                 
00137   index_t number_of_gaussians() {
00138     return number_of_gaussians_;
00139   }                     
00140                 
00141   index_t dimension() {
00142     return dimension_;
00143   }
00144                 
00145   Vector& mu(index_t i) {
00146     return mu_[i] ;
00147   }
00148                 
00149   Matrix& sigma(index_t i) {
00150     return sigma_[i];
00151   }
00152                 
00153   double omega(index_t i) {
00154     return omega_.get(i);
00155   }
00156 
00157   // The set functions
00158 
00159   void set_mu(index_t i, Vector& mu) {
00160     DEBUG_ASSERT(i < number_of_gaussians());
00161     DEBUG_ASSERT(mu.length() == dimension()); 
00162     mu_[i].Copy(mu);
00163     return;
00164   }
00165 
00166   void set_mu(index_t i, index_t length, const double *mu) {
00167     DEBUG_ASSERT(i < number_of_gaussians());
00168     DEBUG_ASSERT(length == dimension());
00169     mu_[i].Copy(mu, length);
00170     return;
00171   }
00172 
00173   void set_sigma(index_t i, Matrix& sigma) {
00174     DEBUG_ASSERT(i < number_of_gaussians());
00175     DEBUG_ASSERT(sigma.n_rows() == dimension());
00176     DEBUG_ASSERT(sigma.n_cols() == dimension());
00177     sigma_[i].Copy(sigma);
00178     return;
00179   }
00180 
00181   void set_omega(Vector& omega) {
00182     DEBUG_ASSERT(omega.length() == number_of_gaussians());
00183     omega_.Copy(omega);
00184     return;
00185   }
00186 
00187   void set_omega(index_t length, const double *omega) {
00188     DEBUG_ASSERT(length == number_of_gaussians());
00189     omega_.Copy(omega, length);
00190     return;
00191   }
00192 
00193 
00203   void OutputResults(ArrayList<double> *results) {
00204 
00205     // Initialize the size of the output array
00206     (*results).Init(number_of_gaussians_ * (1 + dimension_*(1 + dimension_)));
00207 
00208     // Copy values to the array from the private variables of the class
00209     for (index_t i = 0; i < number_of_gaussians_; i++) {
00210       (*results)[i] = omega(i);
00211       for (index_t j = 0; j < dimension_; j++) {
00212         (*results)[number_of_gaussians_ + i*dimension_ + j] = mu(i).get(j);
00213         for (index_t k = 0; k < dimension_; k++) {
00214           (*results)[number_of_gaussians_*(1 + dimension_) 
00215                    + i*dimension_*dimension_ + j*dimension_ 
00216                    + k] = sigma(i).get(j, k);
00217         }
00218       }
00219     }
00220   }
00221 
00229   void Display(){
00230 
00231     // Output the model parameters as the omega, mu and sigma                   
00232     printf(" Omega : [ ");
00233     for (index_t i = 0; i < number_of_gaussians_; i++) {
00234       printf("%lf ", omega(i));
00235     }
00236     printf("]\n");
00237     printf(" Mu : \n[");
00238     for (index_t i = 0; i < number_of_gaussians_; i++) {
00239       for (index_t j = 0; j < dimension_ ; j++) {
00240         printf("%lf ", mu(i).get(j));
00241       }
00242       printf(";");
00243       if (i == (number_of_gaussians_ - 1)) {
00244         printf("\b]\n");
00245       }
00246     }
00247     printf("Sigma : ");
00248     for (index_t i = 0; i < number_of_gaussians_; i++) {
00249       printf("\n[");
00250       for (index_t j = 0; j < dimension_ ; j++) {
00251         for(index_t k = 0; k < dimension_ ; k++) {
00252           printf("%lf ",sigma(i).get(j, k));
00253         }
00254         printf(";");
00255       }
00256       printf("\b]");
00257     }
00258     printf("\n");
00259   }
00260 
00273   void ExpectationMaximization(Matrix& data_points);
00274 
00281   long double Loglikelihood(Matrix& data_points, ArrayList<Vector>& means,
00282                             ArrayList<Matrix>& covars, Vector& weights);
00283 
00293   void KMeans(Matrix& data, ArrayList<Vector> *means,
00294               ArrayList<Matrix> *covars, Vector *weights, index_t value_of_k);
00295 };
00296 
00297 #endif