00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00041 #ifndef MOG_L2E_H
00042 #define MOG_L2E_H
00043
00044 #include <fastlib/fastlib.h>
00045
00046 const fx_entry_doc mog_l2e_entries[] = {
00047 {"K", FX_PARAM, FX_INT, NULL,
00048 " The number of Gaussians in the mixture model."
00049 " (defaults to 1)\n"},
00050 {"D", FX_RESERVED, FX_INT, NULL,
00051 " The number of dimensions of the data on which the"
00052 " the mixture model is to be fit.\n"},
00053 FX_ENTRY_DOC_DONE
00054 };
00055
00056 const fx_module_doc mog_l2e_doc = {
00057 mog_l2e_entries, NULL,
00058 " This program defines a Gaussian mixture model"
00059 " and calculates the L2 error for the present"
00060 " parameter setting to be used by an optimizer.\n"
00061 };
00062
00090 class MoGL2E {
00091
00092 private:
00093
00094
00095 ArrayList<Vector> mu_;
00096 ArrayList<Matrix> sigma_;
00097 Vector omega_;
00098 index_t number_of_gaussians_;
00099 index_t dimension_;
00100
00101
00102
00103 Matrix d_omega_;
00104 ArrayList<ArrayList<Matrix> > d_sigma_;
00105
00106 public:
00107
00108 MoGL2E() {
00109 mu_.Init(0);
00110 sigma_.Init(0);
00111 d_sigma_.Init(0);
00112 d_omega_.Init(0, 0);
00113 }
00114
00115 ~MoGL2E() {
00116 }
00117
00118 void Init(index_t num_gauss, index_t dimension) {
00119
00120
00121 mu_.Clear();
00122 sigma_.Clear();
00123 d_sigma_.Clear();
00124
00125 number_of_gaussians_ = num_gauss;
00126 dimension_ = dimension;
00127
00128
00129 mu_.Resize(number_of_gaussians_);
00130 sigma_.Resize(number_of_gaussians_);
00131 }
00132
00133 void Resize_d_sigma_() {
00134 d_sigma_.Resize(number_of_gaussians());
00135 for(index_t i =0; i < number_of_gaussians(); i++) {
00136 d_sigma_[i].Init(dimension()*(dimension()+1)/2);
00137 }
00138 }
00139
00156 void MakeModel(index_t num_mods, index_t dimension, double* theta) {
00157 double *temp_mu;
00158 Matrix lower_triangle_matrix, upper_triangle_matrix;
00159 double sum, s_min = 0.01;
00160
00161 Init(num_mods, dimension);
00162 temp_mu = (double*) malloc (dimension * sizeof(double)) ;
00163 lower_triangle_matrix.Init(dimension, dimension);
00164 upper_triangle_matrix.Init(dimension, dimension);
00165
00166
00167 sum = 0;
00168 double *temp_array;
00169 temp_array = (double*) malloc (num_mods * sizeof(double));
00170 for(index_t i = 0; i < num_mods - 1; i++) {
00171 temp_array[i] = exp(theta[i]) ;
00172 sum += temp_array[i] ;
00173 }
00174 temp_array[num_mods - 1] = 1 ;
00175 ++sum ;
00176 la::Scale(num_mods, (1.0 / sum), temp_array);
00177 set_omega(dimension, temp_array);
00178
00179
00180 for(index_t k = 0; k < num_mods; k++) {
00181 for(index_t j = 0; j < dimension; j++) {
00182 temp_mu[j] = theta[num_mods + k*dimension + j - 1];
00183 }
00184 set_mu(k, dimension, temp_mu);
00185 }
00186
00187
00188
00189
00190 Matrix sigma_temp;
00191 sigma_temp.Init(dimension, dimension);
00192 for(index_t k = 0; k < num_mods; k++) {
00193 lower_triangle_matrix.SetAll(0.0);
00194 for(index_t j = 0; j < dimension; j++) {
00195 for(index_t i = 0; i < j; i++) {
00196 lower_triangle_matrix.set(j, i,
00197 theta[(num_mods - 1)
00198 + num_mods*dimension
00199 + k*(dimension*(dimension + 1) / 2)
00200 + (j*(j + 1) / 2) + i]);
00201 }
00202 lower_triangle_matrix.set(j, j,
00203 theta[(num_mods - 1)
00204 + num_mods*dimension
00205 + k*(dimension*(dimension + 1) / 2)
00206 + (j*(j + 1) / 2) + j] + s_min);
00207
00208 }
00209 la::TransposeOverwrite(lower_triangle_matrix, &upper_triangle_matrix);
00210 la::MulOverwrite(lower_triangle_matrix, upper_triangle_matrix, &sigma_temp);
00211 set_sigma(k, sigma_temp);
00212 }
00213 }
00214
00215 void MakeModel(datanode *mog_l2e_module, double* theta) {
00216 index_t num_gauss = fx_param_int_req(mog_l2e_module, "K");
00217 index_t dimension = fx_param_int_req(mog_l2e_module, "D");
00218 MakeModel(num_gauss, dimension, theta);
00219 }
00220
00237 void MakeModelWithGradients(index_t num_mods, index_t dimension, double* theta) {
00238 double *temp_mu;
00239 Matrix lower_triangle_matrix, upper_triangle_matrix;
00240 double sum, s_min = 0.01;
00241
00242 Init(num_mods, dimension);
00243 temp_mu = (double*) malloc (dimension * sizeof(double));
00244 lower_triangle_matrix.Init(dimension, dimension);
00245 upper_triangle_matrix.Init(dimension, dimension);
00246
00247
00248 sum = 0;
00249 double *temp_array;
00250 temp_array = (double*) malloc (num_mods * sizeof(double));
00251 for(index_t i = 0; i < num_mods - 1; i++) {
00252 temp_array[i] = exp(theta[i]) ;
00253 sum += temp_array[i] ;
00254 }
00255 temp_array[num_mods - 1] = 1 ;
00256 ++sum ;
00257 la::Scale(num_mods, (1.0 / sum), temp_array);
00258 set_omega(num_mods, temp_array);
00259
00260
00261 Matrix d_omega_temp;
00262 d_omega_temp.Init(num_mods - 1, num_mods);
00263 d_omega_temp.SetAll(0.0);
00264 for(index_t i = 0; i < num_mods - 1; i++) {
00265 for(index_t j = 0; j < i; j++) {
00266 d_omega_temp.set(i,j,-(omega(i)*omega(j)));
00267 d_omega_temp.set(j,i,-(omega(i)*omega(j)));
00268 }
00269 d_omega_temp.set(i,i,omega(i)*(1-omega(i)));
00270 }
00271 for(index_t i = 0; i < num_mods - 1; i++) {
00272 d_omega_temp.set(i, num_mods - 1, -(omega(i)*omega(num_mods - 1)));
00273 }
00274 set_d_omega(d_omega_temp);
00275
00276
00277 for(index_t k = 0; k < num_mods; k++) {
00278 for(index_t j = 0; j < dimension; j++) {
00279 temp_mu[j] = theta[num_mods + k*dimension + j - 1];
00280 }
00281 set_mu(k, dimension, temp_mu);
00282 }
00283
00284
00285
00286
00287
00288
00289
00290
00291 Matrix d_sigma_temp;
00292 d_sigma_temp.Init(dimension, dimension);
00293 Resize_d_sigma_();
00294
00295
00296 Matrix sigma_temp;
00297 sigma_temp.Init(dimension, dimension);
00298 for(index_t k = 0; k < num_mods; k++) {
00299 lower_triangle_matrix.SetAll(0.0);
00300 for(index_t j = 0; j < dimension; j++) {
00301 for(index_t i = 0; i < j; i++) {
00302 lower_triangle_matrix.set( j, i,
00303 theta[(num_mods - 1)
00304 + num_mods*dimension
00305 + k*(dimension*(dimension + 1) / 2)
00306 + (j*(j + 1) / 2) + i]) ;
00307 }
00308 lower_triangle_matrix.set(j, j,
00309 theta[(num_mods - 1)
00310 + num_mods*dimension
00311 + k*(dimension*(dimension + 1) / 2)
00312 + (j*(j + 1) / 2) + j] + s_min);
00313 }
00314 la::TransposeOverwrite(lower_triangle_matrix, &upper_triangle_matrix);
00315 la::MulOverwrite(lower_triangle_matrix, upper_triangle_matrix, &sigma_temp);
00316 set_sigma(k, sigma_temp);
00317
00318
00319 for(index_t i = 0; i < dimension; i++){
00320 for(index_t in = 0; in < i+1; in++){
00321 Matrix d_sigma_d_r,d_sigma_d_r_t,temp_matrix_1,temp_matrix_2;
00322 d_sigma_d_r.Init(dimension, dimension);
00323 d_sigma_d_r_t.Init(dimension, dimension);
00324 d_sigma_d_r.SetAll(0.0);
00325 d_sigma_d_r_t.SetAll(0.0);
00326 d_sigma_d_r.set(i,in,1.0);
00327 d_sigma_d_r_t.set(in,i,1.0);
00328
00329 la::MulInit(d_sigma_d_r,upper_triangle_matrix,&temp_matrix_1);
00330 la::MulInit(lower_triangle_matrix,d_sigma_d_r_t,&temp_matrix_2);
00331 la::AddOverwrite(temp_matrix_1,temp_matrix_2,&d_sigma_temp);
00332 set_d_sigma(k, (i*(i+1)/2)+in, d_sigma_temp);
00333 }
00334 }
00335 }
00336 }
00337
00339 ArrayList<Vector>& mu() {
00340 return mu_;
00341 }
00342
00343 ArrayList<Matrix>& sigma() {
00344 return sigma_;
00345 }
00346
00347 Vector& omega() {
00348 return omega_;
00349 }
00350
00351 index_t number_of_gaussians() {
00352 return number_of_gaussians_;
00353 }
00354
00355 index_t dimension() {
00356 return dimension_;
00357 }
00358
00359 Vector& mu(index_t i) {
00360 return mu_[i] ;
00361 }
00362
00363 Matrix& sigma(index_t i) {
00364 return sigma_[i];
00365 }
00366
00367 double omega(index_t i) {
00368 return omega_.get(i);
00369 }
00370
00371 Matrix& d_omega(){
00372 return d_omega_;
00373 }
00374
00375 ArrayList<ArrayList<Matrix> >& d_sigma(){
00376 return d_sigma_;
00377 }
00378
00379 ArrayList<Matrix>& d_sigma(index_t i){
00380 return d_sigma_[i];
00381 }
00382
00384
00385 void set_mu(index_t i, Vector& mu) {
00386 DEBUG_ASSERT(i < number_of_gaussians());
00387 DEBUG_ASSERT(mu.length() == dimension());
00388 mu_[i].Copy(mu);
00389 return;
00390 }
00391
00392 void set_mu(index_t i, index_t length, const double *mu) {
00393 DEBUG_ASSERT(i < number_of_gaussians());
00394 DEBUG_ASSERT(length == dimension());
00395 mu_[i].Copy(mu, length);
00396 return;
00397 }
00398
00399 void set_sigma(index_t i, Matrix& sigma) {
00400 DEBUG_ASSERT(i < number_of_gaussians());
00401 DEBUG_ASSERT(sigma.n_rows() == dimension());
00402 DEBUG_ASSERT(sigma.n_cols() == dimension());
00403 sigma_[i].Copy(sigma);
00404 return;
00405 }
00406
00407 void set_omega(Vector& omega) {
00408 DEBUG_ASSERT(omega.length() == number_of_gaussians());
00409 omega_.Copy(omega);
00410 return;
00411 }
00412
00413 void set_omega(index_t length, const double *omega) {
00414 DEBUG_ASSERT(length == number_of_gaussians());
00415 omega_.Copy(omega, length);
00416 return;
00417 }
00418
00419 void set_d_omega(Matrix& d_omega) {
00420 d_omega_.Destruct();
00421 d_omega_.Copy(d_omega);
00422 return;
00423 }
00424
00425 void set_d_sigma(index_t i, index_t j, Matrix& d_sigma_i_j) {
00426 d_sigma_[i][j].Copy(d_sigma_i_j);
00427 return;
00428 }
00429
00439 void OutputResults(ArrayList<double> *results) {
00440
00441
00442 (*results).Init(number_of_gaussians_ * (1 + dimension_*(1 + dimension_)));
00443
00444
00445 for (index_t i = 0; i < number_of_gaussians_; i++) {
00446 (*results)[i] = omega(i);
00447 for (index_t j = 0; j < dimension_; j++) {
00448 (*results)[number_of_gaussians_ + i*dimension_ + j] = mu(i).get(j);
00449 for (index_t k = 0; k < dimension_; k++) {
00450 (*results)[number_of_gaussians_*(1 + dimension_)
00451 + i*dimension_*dimension_ + j*dimension_
00452 + k] = sigma(i).get(j, k);
00453 }
00454 }
00455 }
00456 }
00457
00465 void Display(){
00466
00467
00468 printf(" Omega : [ ");
00469 for (index_t i = 0; i < number_of_gaussians_; i++) {
00470 printf("%lf ", omega(i));
00471 }
00472 printf("]\n");
00473 printf(" Mu : \n[");
00474 for (index_t i = 0; i < number_of_gaussians_; i++) {
00475 for (index_t j = 0; j < dimension_ ; j++) {
00476 printf("%lf ", mu(i).get(j));
00477 }
00478 printf(";");
00479 if (i == (number_of_gaussians_ - 1)) {
00480 printf("\b]\n");
00481 }
00482 }
00483 printf("Sigma : ");
00484 for (index_t i = 0; i < number_of_gaussians_; i++) {
00485 printf("\n[");
00486 for (index_t j = 0; j < dimension_ ; j++) {
00487 for(index_t k = 0; k < dimension_ ; k++) {
00488 printf("%lf ",sigma(i).get(j, k));
00489 }
00490 printf(";");
00491 }
00492 printf("\b]");
00493 }
00494 printf("\n");
00495 }
00496
00497
00516 long double L2Error(const Matrix&, Vector* = NULL);
00517
00526 long double RegularizationTerm_(Vector* = NULL);
00527
00536 long double GoodnessOfFitTerm_(const Matrix&, Vector* = NULL);
00537
00551 static void MultiplePointsGenerator(double**, index_t,
00552 const Matrix&, index_t);
00553
00568 static void InitialPointGenerator(double*, const Matrix&, index_t);
00569
00592 static void KMeans_(const Matrix&, ArrayList<Vector>*,
00593 ArrayList<Matrix>*, Vector*, index_t);
00594
00599 static void min_element(Matrix&, index_t*);
00600
00609 static long double L2ErrorForOpt(Vector& params,
00610 const Matrix& data,
00611 Vector *gradient) {
00612
00613 MoGL2E model;
00614 index_t dimension = data.n_rows();
00615 index_t num_gauss;
00616
00617 num_gauss = (params.length() + 1)*2 / ((dimension+1)*(dimension+2));
00618
00619
00620 if (gradient != NULL) {
00621 model.MakeModelWithGradients(num_gauss, dimension, params.ptr());
00622 return model.L2Error(data, gradient);
00623 }
00624 else {
00625 model.MakeModel(num_gauss, dimension, params.ptr());
00626 return model.L2Error(data);
00627 }
00628 }
00629
00636 static long double L2ErrorForOpt(Vector& params, const Matrix& data) {
00637 return L2ErrorForOpt(params, data, NULL);
00638 }
00639
00640 };
00641
00642 #endif