00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00041 #include "mog_l2e.h"
00042 #include "phi.h"
00043
00044 long double MoGL2E::L2Error(const Matrix& data, Vector *gradients) {
00045
00046 long double reg, fit, l2e;
00047 index_t number_of_points = data.n_cols();
00048
00049
00050 if (gradients != NULL) {
00051 Vector g_reg, g_fit;
00052 reg = RegularizationTerm_(&g_reg);
00053 fit = GoodnessOfFitTerm_(data, &g_fit);
00054 DEBUG_ASSERT(gradients->length() == (number_of_gaussians()*
00055 (dimension()+1)*(dimension()+2)/2 - 1)
00056
00057 );
00058 gradients->SetAll(0.0);
00059 la::AddTo(g_reg, gradients);
00060 la::AddExpert((-2.0/number_of_points), g_fit, gradients);
00061 }
00062 else {
00063 reg = RegularizationTerm_();
00064 fit = GoodnessOfFitTerm_(data);
00065 }
00066 l2e = reg - 2*fit / number_of_points;
00067 return l2e;
00068 }
00069
00070 long double MoGL2E::RegularizationTerm_(Vector *g_reg){
00071 Matrix phi_mu, sum_covar;
00072 Vector x, y;
00073 long double reg, tmpVal;
00074 index_t num_gauss, dim;
00075
00076 Vector df_dw, g_omega;
00077 ArrayList<Vector> g_mu, g_sigma;
00078 ArrayList<ArrayList<Vector> > dp_d_mu, dp_d_sigma;
00079
00080
00081 num_gauss = number_of_gaussians();
00082 dim = dimension();
00083
00084 phi_mu.Init(num_gauss, num_gauss);
00085 sum_covar.Init(dim, dim);
00086 x.Copy(omega());
00087
00088 if (g_reg != NULL) {
00089 g_mu.Init(num_gauss);
00090 g_sigma.Init(num_gauss);
00091 dp_d_mu.Init(num_gauss);
00092 dp_d_sigma.Init(num_gauss);
00093 for(index_t k = 0; k < num_gauss; k++){
00094 dp_d_mu[k].Init(num_gauss);
00095 dp_d_sigma[k].Init(num_gauss);
00096 }
00097 }
00098 else {
00099 g_mu.Init(0);
00100 g_sigma.Init(0);
00101 dp_d_mu.Init(0);
00102 dp_d_sigma.Init(0);
00103 df_dw.Init(0);
00104 g_omega.Init(0);
00105 }
00106
00107 for(index_t k = 1; k < num_gauss; k++) {
00108 for(index_t j = 0; j < k; j++) {
00109 la::AddOverwrite(sigma(k), sigma(j), &sum_covar);
00110
00111 if (g_reg != NULL) {
00112 ArrayList<Matrix> tmp_d_cov;
00113 Vector tmp_dp_d_sigma;
00114
00115 tmp_d_cov.Init(dim*(dim+1));
00116 for(index_t i = 0; i < (dim*(dim + 1) / 2); i++){
00117 tmp_d_cov[i].Copy(d_sigma(k)[i]);
00118 tmp_d_cov[(dim*(dim+1)/2)+i].Copy(d_sigma(j)[i]);
00119 }
00120
00121 tmpVal = phi(mu(k),mu(j),sum_covar,
00122 tmp_d_cov,&dp_d_mu[j][k],&tmp_dp_d_sigma);
00123
00124 phi_mu.set(j, k, tmpVal);
00125 phi_mu.set(k, j, tmpVal);
00126
00127 la::ScaleInit(-1.0, dp_d_mu[j][k], &dp_d_mu[k][j]);
00128
00129 double *tmp_dp, *tmp_dp_1, *tmp_dp_2;
00130 tmp_dp = tmp_dp_d_sigma.ptr();
00131 tmp_dp_1 = (double*)malloc((tmp_dp_d_sigma.length()/2) * sizeof(double));
00132 tmp_dp_2 = (double*)malloc((tmp_dp_d_sigma.length()/2) * sizeof(double));
00133 for(index_t i = 0; i < (tmp_dp_d_sigma.length()/2); i++){
00134 tmp_dp_1[i] = tmp_dp[i];
00135 tmp_dp_2[i] = tmp_dp[(dim*(dim + 1) / 2) + i];
00136 }
00137 dp_d_sigma[j][k].Copy(tmp_dp_1, (dim*(dim + 1) / 2));
00138 dp_d_sigma[k][j].Copy(tmp_dp_2, (dim*(dim + 1) / 2));
00139 }
00140 else {
00141 tmpVal = phi(mu(k), mu(j), sum_covar);
00142 phi_mu.set(j, k, tmpVal);
00143 phi_mu.set(k, j, tmpVal);
00144 }
00145 }
00146 }
00147
00148 for(index_t k = 0; k < num_gauss; k++) {
00149 la::ScaleOverwrite(2, sigma(k), &sum_covar);
00150
00151 if (g_reg != NULL) {
00152 Vector junk;
00153 tmpVal = phi(mu(k), mu(k), sum_covar,
00154 d_sigma(k), &junk, &dp_d_sigma[k][k]);
00155 phi_mu.set(k, k, tmpVal);
00156 dp_d_mu[k][k].Init(dim);
00157 dp_d_mu[k][k].SetZero();
00158 }
00159 else {
00160 tmpVal = phi(mu(k), mu(k), sum_covar);
00161 phi_mu.set(k, k, tmpVal);
00162
00163 }
00164 }
00165
00166
00167 la::MulInit( x, phi_mu, &y );
00168 reg = la::Dot( x, y );
00169
00170 if (g_reg != NULL) {
00171
00172 la::ScaleInit(2.0,y,&df_dw);
00173 la::MulInit(d_omega(),df_dw,&g_omega);
00174
00175
00176 for(index_t k = 0; k < num_gauss; k++){
00177 g_mu[k].Init(dim);
00178 g_mu[k].SetZero();
00179 for(index_t j = 0; j < num_gauss; j++)
00180 la::AddExpert(x.get(j), dp_d_mu[j][k], &g_mu[k]);
00181 la::Scale((2.0 * x.get(k)), &g_mu[k]);
00182 }
00183
00184
00185 for(index_t k = 0; k < num_gauss; k++){
00186 g_sigma[k].Init((dim*(dim + 1)) / 2);
00187 g_sigma[k].SetZero();
00188 for(index_t j = 0; j < num_gauss; j++)
00189 la::AddExpert(x.get(j), dp_d_sigma[j][k], &g_sigma[k]);
00190 la::Scale((2.0 * x.get(k)), &g_sigma[k]);
00191 }
00192
00193
00194 double *tmp_g_reg;
00195 tmp_g_reg = (double*)malloc(((num_gauss*(dim + 1)*(dim + 2) / 2) - 1)
00196 *sizeof(double));
00197 index_t j = 0;
00198 for(index_t k = 0; k < g_omega.length(); k++)
00199 tmp_g_reg[k] = g_omega.get(k);
00200 j = g_omega.length();
00201 for(index_t k = 0; k < num_gauss; k++){
00202 for(index_t i = 0; i < dim; i++){
00203 tmp_g_reg[j + k*(dim) + i] = g_mu[k].get(i);
00204 }
00205 for(index_t i = 0; i < (dim*(dim+1)/2); i++){
00206 tmp_g_reg[j + num_gauss*dim
00207 + k*(dim*(dim+1) / 2)
00208 + i] = g_sigma[k].get(i);
00209 }
00210 }
00211 g_reg->Copy(tmp_g_reg, ((num_gauss*(dim+1)*(dim+2) / 2) - 1));
00212 }
00213
00214 return reg;
00215 }
00216
00217 long double MoGL2E::GoodnessOfFitTerm_(const Matrix& data, Vector *g_fit) {
00218 long double fit;
00219 Matrix phi_x;
00220 Vector weights, x, y, identity_vector;
00221 index_t num_gauss, num_points, dim;
00222 long double tmpVal;
00223 Vector g_omega,tmp_g_omega;
00224 ArrayList<Vector> g_mu, g_sigma;
00225
00226 num_gauss = number_of_gaussians();
00227 num_points = data.n_cols();
00228 dim = data.n_rows();
00229 phi_x.Init(num_gauss, num_points);
00230 weights.Copy(omega());
00231 x.Init(data.n_rows());
00232 identity_vector.Init(num_points);
00233 identity_vector.SetAll(1);
00234
00235 if(g_fit != NULL) {
00236 g_mu.Init(num_gauss);
00237 g_sigma.Init(num_gauss);
00238 }
00239 else {
00240 g_mu.Init(0);
00241 g_sigma.Init(0);
00242 g_omega.Init(0);
00243 tmp_g_omega.Init(0);
00244 }
00245
00246 for(index_t k = 0; k < num_gauss; k++) {
00247 if (g_fit != NULL) {
00248 g_mu[k].Init(dim);
00249 g_mu[k].SetZero();
00250 g_sigma[k].Init((dim * (dim+1) / 2));
00251 g_sigma[k].SetZero();
00252 }
00253 for(index_t i = 0; i < num_points; i++) {
00254 if (g_fit != NULL) {
00255 Vector tmp_g_mu, tmp_g_sigma;
00256 x.CopyValues(data.GetColumnPtr(i));
00257 tmpVal = phi(x, mu(k), sigma(k),
00258 d_sigma(k), &tmp_g_mu, &tmp_g_sigma);
00259 phi_x.set(k, i, tmpVal);
00260 la::AddTo(tmp_g_mu, &g_mu[k]);
00261 la::AddTo(tmp_g_sigma, &g_sigma[k]);
00262 }
00263 else {
00264 x.CopyValues(data.GetColumnPtr(i));
00265 phi_x.set(k, i, phi(x, mu(k), sigma(k)));
00266 }
00267 }
00268 if (g_fit != NULL) {
00269 la::Scale(weights.get(k), &g_mu[k]);
00270 la::Scale(weights.get(k), &g_sigma[k]);
00271 }
00272 }
00273
00274 la::MulInit(weights, phi_x, &y);
00275 fit = la::Dot(y, identity_vector);
00276
00277 if (g_fit != NULL) {
00278
00279 la::MulInit(phi_x, identity_vector, &tmp_g_omega);
00280 la::MulInit(d_omega(), tmp_g_omega, &g_omega);
00281
00282
00283 double *tmp_g_fit;
00284 tmp_g_fit = (double*)malloc(((num_gauss * (dim+1)*(dim+2) / 2) - 1)
00285 *sizeof(double));
00286 index_t j = 0;
00287 for(index_t k = 0; k < g_omega.length(); k++)
00288 tmp_g_fit[k] = g_omega.get(k);
00289 j = g_omega.length();
00290 for(index_t k = 0; k < num_gauss; k++){
00291 for(index_t i = 0; i < dim; i++){
00292 tmp_g_fit[j + k*dim + i] = g_mu[k].get(i);
00293 }
00294 for(index_t i = 0; i < (dim * (dim+1) / 2); i++){
00295 tmp_g_fit[j + num_gauss*dim
00296 + k*(dim * (dim+1) / 2)
00297 + i] = g_sigma[k].get(i);
00298 }
00299 }
00300 g_fit->Copy(tmp_g_fit, ((num_gauss*(dim+1)*(dim+2) / 2) - 1));
00301 }
00302 return fit;
00303 }
00304
00305 void MoGL2E::MultiplePointsGenerator(double **points,
00306 index_t number_of_points,
00307 const Matrix& d,
00308 index_t number_of_components) {
00309
00310 index_t dim, n, i, j, x;
00311
00312 dim = d.n_rows();
00313 n = d.n_cols();
00314
00315 for( i = 0; i < number_of_points; i++) {
00316 for(j = 0; j < number_of_components - 1; j++) {
00317 points[i][j] = (rand() % 20001)/1000 - 10;
00318 }
00319 }
00320
00321 for(i = 0; i < number_of_points; i++){
00322 for(j = 0; j < number_of_components; j++){
00323 Vector tmp_mu;
00324 tmp_mu.Init(dim);
00325 tmp_mu.CopyValues(d.GetColumnPtr((rand() % n)));
00326 for(x = 0; x < dim; x++)
00327 points[i][number_of_components - 1 + j * dim + x] = tmp_mu.get(x);
00328 }
00329 }
00330
00331 for(i = 0; i < number_of_points; i++)
00332 for(j = 0; j < number_of_components; j++)
00333 for(x = 0 ; x < (dim * (dim + 1) / 2); x++)
00334 points[i][(number_of_components * (dim + 1) - 1)
00335 + (j * (dim * (dim + 1) / 2)) + x] = (rand() % 501)/100;
00336
00337 return;
00338 }
00339
00340 void MoGL2E::InitialPointGenerator(double *theta, const Matrix& data,
00341 index_t k_comp) {
00342
00343 ArrayList<Vector> means;
00344 ArrayList<Matrix> covars;
00345 Vector weights;
00346 double temp, noise;
00347 index_t dim;
00348
00349 weights.Init(k_comp);
00350 means.Init(k_comp);
00351 covars.Init(k_comp);
00352 dim = data.n_rows();
00353
00354 for (index_t i = 0; i < k_comp; i++) {
00355 means[i].Init(dim);
00356 covars[i].Init(dim, dim);
00357 }
00358
00359 KMeans_(data, &means, &covars, &weights, k_comp);
00360
00361 for(index_t k = 0; k < k_comp - 1; k++){
00362 temp = weights[k] / weights[k_comp - 1];
00363 noise = (double)(rand() % 10000) / (double)1000;
00364 theta[k] = noise - 5;
00365 }
00366 for(index_t k = 0; k < k_comp; k++){
00367 for(index_t j = 0; j < dim; j++)
00368 theta[k_comp - 1 + k * dim + j] = means[k].get(j);
00369
00370 Matrix U, U_tran;
00371 la::CholeskyInit(covars[k], &U);
00372 la::TransposeInit(U, &U_tran);
00373 for(index_t j = 0; j < dim; j++) {
00374 for(index_t i = 0; i < j + 1; i++) {
00375 noise = (rand() % 501) / 100;
00376 theta[k_comp - 1 + k_comp * dim
00377 + k * dim * (dim + 1) / 2
00378 + j * (j + 1) / 2 + i] = U_tran.get(j, i) + noise;
00379 }
00380 }
00381 }
00382 return;
00383 }
00384
00385 void MoGL2E::KMeans_(const Matrix& data, ArrayList<Vector> *means,
00386 ArrayList<Matrix> *covars, Vector *weights,
00387 index_t value_of_k){
00388
00389 ArrayList<Vector> mu, mu_old;
00390 double* tmpssq;
00391 double* sig;
00392 double* sig_best;
00393 index_t *y;
00394 Vector x, diff;
00395 Matrix ssq;
00396 index_t i, j, k, n, t, dim;
00397 double score, score_old, sum;
00398
00399 n = data.n_cols();
00400 dim = data.n_rows();
00401 mu.Init(value_of_k);
00402 mu_old.Init(value_of_k);
00403 tmpssq = (double*)malloc(value_of_k * sizeof( double ));
00404 sig = (double*)malloc(value_of_k * sizeof( double ));
00405 sig_best = (double*)malloc(value_of_k * sizeof( double ));
00406 ssq.Init(n, value_of_k);
00407
00408 for( i = 0; i < value_of_k; i++){
00409 mu[i].Init(dim);
00410 mu_old[i].Init(dim);
00411 }
00412 x.Init(dim);
00413 y = (index_t*)malloc(n * sizeof(index_t));
00414 diff.Init(dim);
00415
00416 score_old = 999999;
00417
00418
00419 for(i = 0; i < 5; i++){
00420 t = -1;
00421 for (k = 0; k < value_of_k; k++){
00422 t = (t + 1 + (rand()%((n - 1 - (value_of_k - k)) - (t + 1))));
00423 mu[k].CopyValues(data.GetColumnPtr(t));
00424 for(j = 0; j < n; j++){
00425 x.CopyValues( data.GetColumnPtr(j));
00426 la::SubOverwrite(mu[k], x, &diff);
00427 ssq.set( j, k, la::Dot(diff, diff));
00428 }
00429 }
00430 min_element(ssq, y);
00431
00432 do{
00433 for(k = 0; k < value_of_k; k++){
00434 mu_old[k].CopyValues(mu[k]);
00435 }
00436
00437 for(k = 0; k < value_of_k; k++){
00438 index_t p = 0;
00439 mu[k].SetZero();
00440 for(j = 0; j < n; j++){
00441 x.CopyValues(data.GetColumnPtr(j));
00442 if(y[j] == k){
00443 la::AddTo(x, &mu[k]);
00444 p++;
00445 }
00446 }
00447
00448 if(p == 0){
00449 }
00450 else{
00451 double sc = 1 ;
00452 sc = sc / p;
00453 la::Scale(sc , &mu[k]);
00454 }
00455 for(j = 0; j < n; j++){
00456 x.CopyValues(data.GetColumnPtr(j));
00457 la::SubOverwrite(mu[k], x, &diff);
00458 ssq.set(j, k, la::Dot(diff, diff));
00459 }
00460 }
00461 min_element(ssq, y);
00462
00463 sum = 0;
00464 for(k = 0; k < value_of_k; k++) {
00465 la::SubOverwrite(mu[k], mu_old[k], &diff);
00466 sum += la::Dot(diff, diff);
00467 }
00468 }while(sum != 0);
00469
00470 for(k = 0; k < value_of_k; k++){
00471 index_t p = 0;
00472 tmpssq[k] = 0;
00473 for(j = 0; j < n; j++){
00474 if(y[j] == k){
00475 tmpssq[k] += ssq.get(j, k);
00476 p++;
00477 }
00478 }
00479 sig[k] = sqrt(tmpssq[k] / p);
00480 }
00481
00482 score = 0;
00483 for(k = 0; k < value_of_k; k++){
00484 score += tmpssq[k];
00485 }
00486 score = score / n;
00487
00488 if (score < score_old) {
00489 score_old = score;
00490 for(k = 0; k < value_of_k; k++){
00491 (*means)[k].CopyValues(mu[k]);
00492 sig_best[k] = sig[k];
00493 }
00494 }
00495 }
00496
00497 for(k = 0; k < value_of_k; k++){
00498 x.SetAll(sig_best[k]);
00499 (*covars)[k].SetDiagonal(x);
00500 }
00501 double tmp = 1;
00502 weights->SetAll(tmp / value_of_k);
00503 return;
00504 }
00505
00506 void MoGL2E::min_element( Matrix& element, index_t *indices ){
00507
00508 index_t last = element.n_cols() - 1;
00509 index_t first, lowest;
00510 index_t i;
00511
00512 for( i = 0; i < element.n_rows(); i++ ){
00513
00514 first = lowest = 0;
00515 if(first == last){
00516 indices[ i ] = last;
00517 }
00518 while(++first <= last){
00519 if( element.get( i , first ) < element.get( i , lowest ) ){
00520 lowest = first;
00521 }
00522 }
00523 indices[ i ] = lowest;
00524 }
00525 return;
00526 }
00527