mog_l2e.cc

Go to the documentation of this file.
00001 /* MLPACK 0.2
00002  *
00003  * Copyright (c) 2008, 2009 Alexander Gray,
00004  *                          Garry Boyer,
00005  *                          Ryan Riegel,
00006  *                          Nikolaos Vasiloglou,
00007  *                          Dongryeol Lee,
00008  *                          Chip Mappus, 
00009  *                          Nishant Mehta,
00010  *                          Hua Ouyang,
00011  *                          Parikshit Ram,
00012  *                          Long Tran,
00013  *                          Wee Chin Wong
00014  *
00015  * Copyright (c) 2008, 2009 Georgia Institute of Technology
00016  *
00017  * This program is free software; you can redistribute it and/or
00018  * modify it under the terms of the GNU General Public License as
00019  * published by the Free Software Foundation; either version 2 of the
00020  * License, or (at your option) any later version.
00021  *
00022  * This program is distributed in the hope that it will be useful, but
00023  * WITHOUT ANY WARRANTY; without even the implied warranty of
00024  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00025  * General Public License for more details.
00026  *
00027  * You should have received a copy of the GNU General Public License
00028  * along with this program; if not, write to the Free Software
00029  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00030  * 02110-1301, USA.
00031  */
00041 #include "mog_l2e.h"
00042 #include "phi.h"
00043 
00044 long double MoGL2E::L2Error(const Matrix& data, Vector *gradients) {
00045 
00046   long double reg, fit, l2e;
00047   index_t number_of_points = data.n_cols();
00048 
00049 
00050   if (gradients != NULL) {
00051     Vector g_reg, g_fit;
00052     reg = RegularizationTerm_(&g_reg);
00053     fit = GoodnessOfFitTerm_(data, &g_fit);  
00054     DEBUG_ASSERT(gradients->length() == (number_of_gaussians()*
00055                                          (dimension()+1)*(dimension()+2)/2 - 1)
00056                  
00057                  );
00058     gradients->SetAll(0.0);
00059     la::AddTo(g_reg, gradients);
00060     la::AddExpert((-2.0/number_of_points), g_fit, gradients);
00061   }
00062   else { 
00063     reg = RegularizationTerm_();
00064     fit = GoodnessOfFitTerm_(data);
00065   }
00066   l2e = reg - 2*fit / number_of_points; 
00067   return l2e;
00068 }
00069 
00070 long double MoGL2E::RegularizationTerm_(Vector *g_reg){
00071   Matrix phi_mu, sum_covar;
00072   Vector x, y;
00073   long double reg, tmpVal;
00074   index_t num_gauss, dim;
00075 
00076   Vector df_dw, g_omega;
00077   ArrayList<Vector> g_mu, g_sigma;
00078   ArrayList<ArrayList<Vector> > dp_d_mu, dp_d_sigma;
00079   
00080 
00081   num_gauss = number_of_gaussians();
00082   dim = dimension();
00083   
00084   phi_mu.Init(num_gauss, num_gauss);
00085   sum_covar.Init(dim, dim);
00086   x.Copy(omega());
00087 
00088   if (g_reg != NULL) {
00089     g_mu.Init(num_gauss);
00090     g_sigma.Init(num_gauss);
00091     dp_d_mu.Init(num_gauss);
00092     dp_d_sigma.Init(num_gauss);
00093     for(index_t k = 0; k < num_gauss; k++){
00094       dp_d_mu[k].Init(num_gauss);
00095       dp_d_sigma[k].Init(num_gauss);
00096     }
00097   }
00098   else {
00099     g_mu.Init(0);
00100     g_sigma.Init(0);
00101     dp_d_mu.Init(0);
00102     dp_d_sigma.Init(0);
00103     df_dw.Init(0);
00104     g_omega.Init(0);
00105   }
00106 
00107   for(index_t k = 1; k < num_gauss; k++) {
00108     for(index_t j = 0; j < k; j++) {
00109       la::AddOverwrite(sigma(k), sigma(j), &sum_covar);
00110                         
00111       if (g_reg != NULL) {
00112         ArrayList<Matrix> tmp_d_cov;
00113         Vector tmp_dp_d_sigma;
00114       
00115         tmp_d_cov.Init(dim*(dim+1));
00116         for(index_t i = 0; i < (dim*(dim + 1) / 2); i++){
00117           tmp_d_cov[i].Copy(d_sigma(k)[i]);
00118           tmp_d_cov[(dim*(dim+1)/2)+i].Copy(d_sigma(j)[i]);
00119         }
00120       
00121         tmpVal = phi(mu(k),mu(j),sum_covar,
00122                      tmp_d_cov,&dp_d_mu[j][k],&tmp_dp_d_sigma);
00123       
00124         phi_mu.set(j, k, tmpVal);
00125         phi_mu.set(k, j, tmpVal);
00126                         
00127         la::ScaleInit(-1.0, dp_d_mu[j][k], &dp_d_mu[k][j]);
00128                         
00129         double *tmp_dp, *tmp_dp_1, *tmp_dp_2;
00130         tmp_dp = tmp_dp_d_sigma.ptr();
00131         tmp_dp_1 = (double*)malloc((tmp_dp_d_sigma.length()/2) * sizeof(double));
00132         tmp_dp_2 = (double*)malloc((tmp_dp_d_sigma.length()/2) * sizeof(double));
00133         for(index_t i = 0; i < (tmp_dp_d_sigma.length()/2); i++){
00134           tmp_dp_1[i] = tmp_dp[i];
00135           tmp_dp_2[i] = tmp_dp[(dim*(dim + 1) / 2) + i];
00136         }
00137         dp_d_sigma[j][k].Copy(tmp_dp_1, (dim*(dim + 1) / 2));
00138         dp_d_sigma[k][j].Copy(tmp_dp_2, (dim*(dim + 1) / 2));
00139       }
00140       else {
00141         tmpVal = phi(mu(k), mu(j), sum_covar);
00142         phi_mu.set(j, k, tmpVal);
00143         phi_mu.set(k, j, tmpVal);
00144       }
00145     }
00146   }
00147         
00148   for(index_t k = 0; k < num_gauss; k++) {
00149     la::ScaleOverwrite(2, sigma(k), &sum_covar);
00150 
00151     if (g_reg != NULL) {
00152       Vector junk;
00153       tmpVal = phi(mu(k), mu(k), sum_covar,
00154                    d_sigma(k), &junk, &dp_d_sigma[k][k]);
00155       phi_mu.set(k, k, tmpVal);
00156       dp_d_mu[k][k].Init(dim);
00157       dp_d_mu[k][k].SetZero();
00158     }
00159     else {
00160       tmpVal = phi(mu(k), mu(k), sum_covar);
00161       phi_mu.set(k, k, tmpVal);
00162 
00163     }
00164   }
00165         
00166   // Calculating the reg value
00167   la::MulInit( x, phi_mu, &y );
00168   reg = la::Dot( x, y );
00169         
00170   if (g_reg != NULL) {
00171     // Calculating the g_omega values - a vector of size K-1
00172     la::ScaleInit(2.0,y,&df_dw);
00173     la::MulInit(d_omega(),df_dw,&g_omega);
00174         
00175     // Calculating the g_mu values - K vectors of size D
00176     for(index_t k = 0; k < num_gauss; k++){
00177       g_mu[k].Init(dim);
00178       g_mu[k].SetZero();
00179       for(index_t j = 0; j < num_gauss; j++)
00180         la::AddExpert(x.get(j), dp_d_mu[j][k], &g_mu[k]);
00181       la::Scale((2.0 * x.get(k)), &g_mu[k]);
00182     }
00183         
00184     // Calculating the g_sigma values - K vectors of size D(D+1)/2
00185     for(index_t k = 0; k < num_gauss; k++){
00186       g_sigma[k].Init((dim*(dim + 1)) / 2);
00187       g_sigma[k].SetZero();
00188       for(index_t j = 0; j < num_gauss; j++)
00189         la::AddExpert(x.get(j), dp_d_sigma[j][k], &g_sigma[k]);
00190       la::Scale((2.0 * x.get(k)), &g_sigma[k]);
00191     }
00192         
00193     // Making the single gradient vector of size K*(D+1)*(D+2)/2 - 1
00194     double *tmp_g_reg;
00195     tmp_g_reg = (double*)malloc(((num_gauss*(dim + 1)*(dim + 2) / 2) - 1)
00196                                 *sizeof(double));
00197     index_t j = 0;
00198     for(index_t k = 0; k < g_omega.length(); k++)
00199       tmp_g_reg[k] = g_omega.get(k);
00200     j = g_omega.length();
00201     for(index_t k = 0; k < num_gauss; k++){
00202       for(index_t i = 0; i < dim; i++){
00203         tmp_g_reg[j + k*(dim) + i] = g_mu[k].get(i);
00204       }
00205       for(index_t i = 0; i < (dim*(dim+1)/2); i++){
00206         tmp_g_reg[j + num_gauss*dim 
00207                   + k*(dim*(dim+1) / 2) 
00208                   + i] = g_sigma[k].get(i);
00209       }
00210     }
00211     g_reg->Copy(tmp_g_reg, ((num_gauss*(dim+1)*(dim+2) / 2) - 1));
00212   }
00213         
00214   return reg;
00215 }
00216 
00217 long double MoGL2E::GoodnessOfFitTerm_(const Matrix& data, Vector *g_fit) {
00218   long double fit;
00219   Matrix phi_x;
00220   Vector weights, x, y, identity_vector;
00221   index_t num_gauss, num_points, dim;
00222   long double tmpVal;
00223   Vector g_omega,tmp_g_omega;
00224   ArrayList<Vector> g_mu, g_sigma;
00225  
00226  num_gauss = number_of_gaussians();
00227   num_points = data.n_cols();
00228   dim = data.n_rows();
00229   phi_x.Init(num_gauss, num_points);
00230   weights.Copy(omega()); 
00231   x.Init(data.n_rows());
00232   identity_vector.Init(num_points);
00233   identity_vector.SetAll(1);
00234 
00235   if(g_fit != NULL) {
00236     g_mu.Init(num_gauss);
00237     g_sigma.Init(num_gauss);
00238   }
00239   else {
00240     g_mu.Init(0);
00241     g_sigma.Init(0);
00242     g_omega.Init(0);
00243     tmp_g_omega.Init(0);
00244   }
00245         
00246   for(index_t k = 0; k < num_gauss; k++) {
00247     if (g_fit != NULL) {
00248       g_mu[k].Init(dim);
00249       g_mu[k].SetZero();
00250       g_sigma[k].Init((dim * (dim+1) / 2));
00251       g_sigma[k].SetZero();
00252     }
00253     for(index_t i = 0; i < num_points; i++) {
00254       if (g_fit != NULL) {
00255         Vector tmp_g_mu, tmp_g_sigma;
00256         x.CopyValues(data.GetColumnPtr(i));
00257         tmpVal = phi(x, mu(k), sigma(k),
00258                      d_sigma(k), &tmp_g_mu, &tmp_g_sigma);
00259         phi_x.set(k, i, tmpVal);
00260         la::AddTo(tmp_g_mu, &g_mu[k]);
00261         la::AddTo(tmp_g_sigma, &g_sigma[k]);
00262       }
00263       else {
00264         x.CopyValues(data.GetColumnPtr(i));
00265         phi_x.set(k, i, phi(x, mu(k), sigma(k)));
00266       }
00267     }
00268     if (g_fit != NULL) {
00269       la::Scale(weights.get(k), &g_mu[k]); 
00270       la::Scale(weights.get(k), &g_sigma[k]);
00271     }
00272   }
00273 
00274   la::MulInit(weights, phi_x, &y);
00275   fit = la::Dot(y, identity_vector); 
00276         
00277   if (g_fit != NULL) {
00278     // Calculating the g_omega
00279     la::MulInit(phi_x, identity_vector, &tmp_g_omega);
00280     la::MulInit(d_omega(), tmp_g_omega, &g_omega);
00281         
00282     // Making the single gradient vector of size K*(D+1)*(D+2)/2
00283     double *tmp_g_fit;
00284     tmp_g_fit = (double*)malloc(((num_gauss * (dim+1)*(dim+2) / 2) - 1)
00285                                 *sizeof(double));
00286     index_t j = 0;
00287     for(index_t k = 0; k < g_omega.length(); k++)
00288       tmp_g_fit[k] = g_omega.get(k);
00289     j = g_omega.length();
00290     for(index_t k = 0; k < num_gauss; k++){
00291       for(index_t i = 0; i < dim; i++){
00292         tmp_g_fit[j + k*dim + i] = g_mu[k].get(i);
00293       }
00294       for(index_t i = 0; i < (dim * (dim+1) / 2); i++){
00295         tmp_g_fit[j + num_gauss*dim 
00296                   + k*(dim * (dim+1) / 2) 
00297                   + i] = g_sigma[k].get(i);
00298       }
00299     }
00300     g_fit->Copy(tmp_g_fit, ((num_gauss*(dim+1)*(dim+2) / 2) - 1));
00301   }
00302   return fit;
00303 }
00304 
00305 void MoGL2E::MultiplePointsGenerator(double **points,
00306                                      index_t number_of_points,
00307                                      const Matrix& d,
00308                                      index_t number_of_components) {
00309 
00310   index_t dim, n, i, j, x;
00311   
00312   dim = d.n_rows();
00313   n = d.n_cols();
00314   
00315   for( i = 0; i < number_of_points; i++) {
00316     for(j = 0; j < number_of_components - 1; j++) {
00317       points[i][j] = (rand() % 20001)/1000 - 10;
00318     }
00319   }
00320 
00321   for(i = 0; i < number_of_points; i++){
00322     for(j = 0; j < number_of_components; j++){
00323       Vector tmp_mu;
00324       tmp_mu.Init(dim);
00325       tmp_mu.CopyValues(d.GetColumnPtr((rand() % n)));
00326       for(x = 0; x < dim; x++)
00327         points[i][number_of_components - 1 + j * dim + x] = tmp_mu.get(x);
00328     }
00329   } 
00330  
00331   for(i = 0; i < number_of_points; i++)
00332     for(j = 0; j < number_of_components; j++) 
00333       for(x = 0 ; x < (dim * (dim + 1) / 2); x++)
00334         points[i][(number_of_components * (dim + 1) - 1) 
00335                   + (j * (dim * (dim + 1) / 2)) + x] = (rand() % 501)/100;
00336 
00337   return;
00338 }
00339 
00340 void MoGL2E::InitialPointGenerator(double *theta, const Matrix& data,
00341                                    index_t k_comp) {
00342 
00343   ArrayList<Vector> means;
00344   ArrayList<Matrix> covars;
00345   Vector weights;
00346   double temp, noise;
00347   index_t dim;
00348 
00349   weights.Init(k_comp);
00350   means.Init(k_comp);
00351   covars.Init(k_comp);
00352   dim = data.n_rows();
00353 
00354   for (index_t i = 0; i < k_comp; i++) {
00355     means[i].Init(dim);
00356     covars[i].Init(dim, dim);
00357   }
00358 
00359   KMeans_(data, &means, &covars, &weights, k_comp);
00360 
00361   for(index_t k = 0; k < k_comp - 1; k++){
00362     temp = weights[k] / weights[k_comp - 1];
00363     noise = (double)(rand() % 10000) / (double)1000;
00364     theta[k] = noise - 5;
00365   }
00366   for(index_t k = 0; k < k_comp; k++){
00367     for(index_t j = 0; j < dim; j++)
00368       theta[k_comp - 1 + k * dim + j] = means[k].get(j);
00369 
00370     Matrix U, U_tran;
00371     la::CholeskyInit(covars[k], &U);
00372     la::TransposeInit(U, &U_tran);
00373     for(index_t j = 0; j < dim; j++) {
00374       for(index_t i = 0; i < j + 1; i++) {
00375         noise = (rand() % 501) / 100;
00376         theta[k_comp - 1 + k_comp * dim 
00377               + k * dim * (dim + 1) / 2 
00378               + j * (j + 1) / 2 + i] = U_tran.get(j, i) + noise;
00379       }
00380     }
00381   }
00382   return;
00383 }
00384 
00385 void MoGL2E::KMeans_(const Matrix& data, ArrayList<Vector> *means,
00386                      ArrayList<Matrix> *covars, Vector *weights, 
00387                      index_t value_of_k){
00388   
00389   ArrayList<Vector> mu, mu_old;
00390   double* tmpssq;
00391   double* sig;
00392   double* sig_best;
00393   index_t *y;
00394   Vector x, diff;
00395   Matrix ssq;
00396   index_t i, j, k, n, t, dim;
00397   double score, score_old, sum;
00398         
00399   n = data.n_cols();
00400   dim = data.n_rows();
00401   mu.Init(value_of_k);
00402   mu_old.Init(value_of_k);
00403   tmpssq = (double*)malloc(value_of_k * sizeof( double ));
00404   sig = (double*)malloc(value_of_k * sizeof( double ));
00405   sig_best = (double*)malloc(value_of_k * sizeof( double ));
00406   ssq.Init(n, value_of_k);
00407         
00408   for( i = 0; i < value_of_k; i++){
00409     mu[i].Init(dim);
00410     mu_old[i].Init(dim);
00411   }
00412   x.Init(dim);
00413   y = (index_t*)malloc(n * sizeof(index_t));
00414   diff.Init(dim);
00415         
00416   score_old = 999999;
00417         
00418   // putting 5 random restarts to obtain the k-means
00419   for(i = 0; i < 5; i++){
00420     t = -1;
00421     for (k = 0; k < value_of_k; k++){
00422       t = (t + 1 + (rand()%((n - 1 - (value_of_k - k)) - (t + 1))));
00423       mu[k].CopyValues(data.GetColumnPtr(t));
00424       for(j = 0; j < n; j++){
00425         x.CopyValues( data.GetColumnPtr(j));
00426         la::SubOverwrite(mu[k], x, &diff);
00427         ssq.set( j, k, la::Dot(diff, diff));
00428       }         
00429     }
00430     min_element(ssq, y);
00431     
00432     do{
00433       for(k = 0; k < value_of_k; k++){
00434         mu_old[k].CopyValues(mu[k]);
00435       }
00436                         
00437       for(k = 0; k < value_of_k; k++){
00438         index_t p = 0;
00439         mu[k].SetZero();
00440         for(j = 0; j < n; j++){
00441           x.CopyValues(data.GetColumnPtr(j));
00442           if(y[j] == k){
00443             la::AddTo(x, &mu[k]);
00444             p++;
00445           }
00446         }
00447         
00448         if(p == 0){
00449         }
00450         else{
00451           double sc = 1 ;
00452           sc = sc / p;
00453           la::Scale(sc , &mu[k]);
00454         }
00455         for(j = 0; j < n; j++){
00456           x.CopyValues(data.GetColumnPtr(j));
00457           la::SubOverwrite(mu[k], x, &diff);
00458           ssq.set(j, k, la::Dot(diff, diff));
00459         }
00460       }      
00461       min_element(ssq, y);
00462       
00463       sum = 0;
00464       for(k = 0; k < value_of_k; k++) {
00465         la::SubOverwrite(mu[k], mu_old[k], &diff);
00466         sum += la::Dot(diff, diff);
00467       }
00468     }while(sum != 0);
00469                 
00470     for(k = 0; k < value_of_k; k++){
00471       index_t p = 0;
00472       tmpssq[k] = 0;
00473       for(j = 0; j < n; j++){
00474         if(y[j] == k){
00475           tmpssq[k] += ssq.get(j, k);
00476           p++;
00477         }
00478       }
00479       sig[k] = sqrt(tmpssq[k] / p);
00480     }   
00481                 
00482     score = 0;
00483     for(k = 0; k < value_of_k; k++){
00484       score += tmpssq[k];
00485     }
00486     score = score / n;
00487     
00488     if (score < score_old) {
00489       score_old = score;
00490       for(k = 0; k < value_of_k; k++){
00491         (*means)[k].CopyValues(mu[k]);
00492         sig_best[k] = sig[k];   
00493       }
00494     }
00495   }
00496         
00497   for(k = 0; k < value_of_k; k++){
00498     x.SetAll(sig_best[k]);
00499     (*covars)[k].SetDiagonal(x);
00500   }
00501   double tmp = 1;
00502   weights->SetAll(tmp / value_of_k);
00503   return;
00504 }
00505 
00506 void MoGL2E::min_element( Matrix& element, index_t *indices ){
00507         
00508   index_t last = element.n_cols() - 1;
00509   index_t first, lowest;
00510   index_t i;
00511         
00512   for( i = 0; i < element.n_rows(); i++ ){
00513                 
00514     first = lowest = 0;
00515     if(first == last){
00516       indices[ i ] = last;
00517     }
00518     while(++first <= last){
00519       if( element.get( i , first ) < element.get( i , lowest ) ){
00520         lowest = first;
00521       }
00522     }
00523     indices[ i ] = lowest;
00524   }
00525   return;
00526 }
00527 
Generated on Mon Jan 24 12:04:38 2011 for FASTlib by  doxygen 1.6.3