mog_em.cc

Go to the documentation of this file.
00001 /* MLPACK 0.2
00002  *
00003  * Copyright (c) 2008, 2009 Alexander Gray,
00004  *                          Garry Boyer,
00005  *                          Ryan Riegel,
00006  *                          Nikolaos Vasiloglou,
00007  *                          Dongryeol Lee,
00008  *                          Chip Mappus, 
00009  *                          Nishant Mehta,
00010  *                          Hua Ouyang,
00011  *                          Parikshit Ram,
00012  *                          Long Tran,
00013  *                          Wee Chin Wong
00014  *
00015  * Copyright (c) 2008, 2009 Georgia Institute of Technology
00016  *
00017  * This program is free software; you can redistribute it and/or
00018  * modify it under the terms of the GNU General Public License as
00019  * published by the Free Software Foundation; either version 2 of the
00020  * License, or (at your option) any later version.
00021  *
00022  * This program is distributed in the hope that it will be useful, but
00023  * WITHOUT ANY WARRANTY; without even the implied warranty of
00024  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00025  * General Public License for more details.
00026  *
00027  * You should have received a copy of the GNU General Public License
00028  * along with this program; if not, write to the Free Software
00029  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00030  * 02110-1301, USA.
00031  */
00041 #include "mog_em.h"
00042 #include "phi.h"
00043 #include "math_functions.h"
00044 
00045 void MoGEM::ExpectationMaximization(Matrix& data_points) {
00046 
00047   // Declaration of the variables */
00048   index_t num_points;
00049   index_t dim, num_gauss;
00050   double sum, tmp;      
00051   ArrayList<Vector> mu_temp, mu;
00052   ArrayList<Matrix> sigma_temp, sigma;
00053   Vector omega_temp, omega, x;
00054   Matrix cond_prob;
00055   long double l, l_old, best_l, INFTY = 99999, TINY = 1.0e-10;
00056 
00057   // Initializing values
00058   dim = dimension();
00059   num_gauss = number_of_gaussians();
00060   num_points = data_points.n_cols();
00061 
00062   // Initializing the number of the vectors and matrices 
00063   // according to the parameters input
00064   mu_temp.Init(num_gauss);
00065   mu.Init(num_gauss);
00066   sigma_temp.Init(num_gauss);
00067   sigma.Init(num_gauss);
00068   omega_temp.Init(num_gauss);
00069   omega.Init(num_gauss);
00070   
00071   // Allocating size to the vectors and matrices 
00072   // according to the dimensionality of the data
00073   for(index_t i = 0; i < num_gauss; i++) {
00074     mu_temp[i].Init(dim);
00075     mu[i].Init(dim);
00076     sigma_temp[i].Init(dim, dim);    
00077     sigma[i].Init(dim, dim);
00078   }
00079   x.Init(dim);
00080   cond_prob.Init(num_gauss, num_points);
00081   
00082   best_l = -INFTY;
00083   index_t restarts = 0;
00084   // performing 5 restarts and choosing the best from them
00085   while (restarts < 5) { 
00086 
00087     // assign initial values to 'mu', 'sig' and 'omega' using k-means
00088     KMeans(data_points, &mu_temp, &sigma_temp, &omega_temp, num_gauss);
00089     
00090     l_old = -INFTY;
00091 
00092     // calculates the loglikelihood value
00093     l = Loglikelihood(data_points, mu_temp, sigma_temp, omega_temp);
00094     
00095     // added a check here to see if any 
00096     // significant change is being made 
00097     // at every iteration
00098     while (l - l_old > TINY) {
00099       // calculating the conditional probabilities 
00100       // of choosing a particular gaussian given 
00101       // the data and the present theta value   
00102       for (index_t j = 0; j < num_points; j++) {
00103         x.CopyValues(data_points.GetColumnPtr(j));
00104         sum = 0;
00105         for (index_t i = 0; i < num_gauss; i++) {
00106           tmp = phi(x, mu_temp[i], sigma_temp[i]) * omega_temp.get(i);
00107           cond_prob.set(i, j, tmp);
00108           sum += tmp;     
00109         }
00110         for (index_t i = 0; i < num_gauss; i++) {
00111           tmp = cond_prob.get(i, j);
00112           cond_prob.set(i, j, tmp / sum); 
00113         }
00114       }
00115                         
00116       // calculating the new value of the mu 
00117       // using the updated conditional probabilities
00118       for (index_t i = 0; i < num_gauss; i++) {
00119         sum = 0;
00120         mu_temp[i].SetZero();
00121         for (index_t j = 0; j < num_points; j++) {
00122           x.CopyValues(data_points.GetColumnPtr(j));
00123           la::AddExpert(cond_prob.get(i, j), x, &mu_temp[i]);
00124           sum += cond_prob.get(i, j); 
00125         }
00126         la::Scale((1.0 / sum), &mu_temp[i]);
00127       }
00128                                         
00129       // calculating the new value of the sig 
00130       // using the updated conditional probabilities 
00131       // and the updated mu
00132       for (index_t i = 0; i < num_gauss; i++) {
00133         sum = 0;
00134         sigma_temp[i].SetZero();
00135         for (index_t j = 0; j < num_points; j++) {
00136           Matrix co, ro, c;
00137           c.Init(dim, dim);
00138           x.CopyValues(data_points.GetColumnPtr(j));
00139           la::SubFrom(mu_temp[i] , &x);
00140           co.AliasColVector(x);
00141           ro.AliasRowVector(x);
00142           la::MulOverwrite(co, ro, &c);
00143           la::AddExpert(cond_prob.get(i, j), c, &sigma_temp[i]);
00144           sum += cond_prob.get(i, j);     
00145         }
00146         la::Scale((1.0 / sum), &sigma_temp[i]);
00147       }
00148                         
00149       // calculating the new values for omega 
00150       // using the updated conditional probabilities 
00151       Vector identity_vector;
00152       identity_vector.Init(num_points);
00153       identity_vector.SetAll(1.0 / num_points);
00154       la::MulOverwrite(cond_prob, identity_vector, &omega_temp);
00155       
00156       l_old = l;
00157       l = Loglikelihood(data_points, mu_temp, sigma_temp, omega_temp);
00158     }
00159     
00160     // putting a check to see if the best one is chosen
00161     if(l > best_l){
00162       best_l = l;
00163       for (index_t i = 0; i < num_gauss; i++) {
00164         mu[i].CopyValues(mu_temp[i]);
00165         sigma[i].CopyValues(sigma_temp[i]);
00166       }
00167       omega.CopyValues(omega_temp);
00168     }
00169     restarts++;
00170   }     
00171 
00172   for (index_t i = 0; i < num_gauss; i++) {
00173     set_mu(i, mu[i]);
00174     set_sigma(i, sigma[i]);
00175   }
00176   set_omega(omega);
00177   
00178   NOTIFY("loglikelihood value of the estimated model: %Lf\n", best_l);
00179   return;
00180 }
00181 
00182 long double MoGEM::Loglikelihood(Matrix& data_points, ArrayList<Vector>& means,
00183                                ArrayList<Matrix>& covars, Vector& weights) {
00184         
00185   index_t i, j;
00186   Vector x;
00187   long double likelihood, loglikelihood = 0;
00188         
00189   x.Init(data_points.n_rows());
00190         
00191   for (j = 0; j < data_points.n_cols(); j++) {
00192     x.CopyValues(data_points.GetColumnPtr(j));
00193     likelihood = 0;
00194     for(i = 0; i < number_of_gaussians() ; i++){
00195       likelihood += weights.get(i) * phi(x, means[i], covars[i]);
00196     }
00197     loglikelihood += log(likelihood);
00198   }
00199   return loglikelihood;
00200 }
00201 
00202 void MoGEM::KMeans(Matrix& data, ArrayList<Vector> *means,
00203                  ArrayList<Matrix> *covars, Vector *weights, index_t value_of_k){
00204 
00205         
00206   ArrayList<Vector> mu, mu_old;
00207   double* tmpssq;
00208   double* sig;
00209   double* sig_best;
00210   index_t *y;
00211   Vector x, diff;
00212   Matrix ssq;
00213   index_t i, j, k, n, t, dim;
00214   double score, score_old, sum;
00215         
00216   n = data.n_cols();
00217   dim = data.n_rows();
00218   mu.Init(value_of_k);
00219   mu_old.Init(value_of_k);
00220   tmpssq = (double*)malloc(value_of_k * sizeof( double ));
00221   sig = (double*)malloc(value_of_k * sizeof( double ));
00222   sig_best = (double*)malloc(value_of_k * sizeof( double ));
00223   ssq.Init(n, value_of_k);
00224         
00225   for( i = 0; i < value_of_k; i++){
00226     mu[i].Init(dim);
00227     mu_old[i].Init(dim);
00228   }
00229   x.Init(dim);
00230   y = (index_t*)malloc(n * sizeof(index_t));
00231   diff.Init(dim);
00232         
00233   score_old = 999999;
00234         
00235   // putting 5 random restarts to obtain the k-means
00236   for(i = 0; i < 5; i++){
00237     t = -1;
00238     for (k = 0; k < value_of_k; k++){
00239       t = (t + 1 + (rand()%((n - 1 - (value_of_k - k)) - (t + 1))));
00240       mu[k].CopyValues(data.GetColumnPtr(t));
00241       for(j = 0; j < n; j++){
00242         x.CopyValues( data.GetColumnPtr(j));
00243         la::SubOverwrite(mu[k], x, &diff);
00244         ssq.set( j, k, la::Dot(diff, diff));
00245       }         
00246     }
00247     min_element(ssq, y);
00248     
00249     do{
00250       for(k = 0; k < value_of_k; k++){
00251         mu_old[k].CopyValues(mu[k]);
00252       }
00253                         
00254       for(k = 0; k < value_of_k; k++){
00255         index_t p = 0;
00256         mu[k].SetZero();
00257         for(j = 0; j < n; j++){
00258           x.CopyValues(data.GetColumnPtr(j));
00259           if(y[j] == k){
00260             la::AddTo(x, &mu[k]);
00261             p++;
00262           }
00263         }
00264         
00265         if(p == 0){ 
00266         }
00267         else{
00268           double sc = 1 ;
00269           sc = sc / p;
00270           la::Scale(sc , &mu[k]);
00271         }
00272         for(j = 0; j < n; j++){
00273           x.CopyValues(data.GetColumnPtr(j));
00274           la::SubOverwrite(mu[k], x, &diff);
00275           ssq.set(j, k, la::Dot(diff, diff));
00276         }
00277       }      
00278       min_element(ssq, y);
00279       
00280       sum = 0;
00281       for(k = 0; k < value_of_k; k++) {
00282         la::SubOverwrite(mu[k], mu_old[k], &diff);
00283         sum += la::Dot(diff, diff);
00284       }
00285     }while(sum != 0);
00286                 
00287     for(k = 0; k < value_of_k; k++){
00288       index_t p = 0;
00289       tmpssq[k] = 0;
00290       for(j = 0; j < n; j++){
00291         if(y[j] == k){
00292           tmpssq[k] += ssq.get(j, k);
00293           p++;
00294         }
00295       }
00296       sig[k] = sqrt(tmpssq[k] / p);
00297     }   
00298                 
00299     score = 0;
00300     for(k = 0; k < value_of_k; k++){
00301       score += tmpssq[k];
00302     }
00303     score = score / n;
00304     
00305     if (score < score_old) {
00306       score_old = score;
00307       for(k = 0; k < value_of_k; k++){
00308         (*means)[k].CopyValues(mu[k]);
00309         sig_best[k] = sig[k];   
00310       }
00311     }
00312   }
00313         
00314   for(k = 0; k < value_of_k; k++){
00315     x.SetAll(sig_best[k]);
00316     (*covars)[k].SetDiagonal(x);
00317   }
00318   double tmp = 1;
00319   (*weights).SetAll(tmp / value_of_k);
00320   return;
00321 }