00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00053 #ifndef FFT_KDE_H
00054 #define FFT_KDE_H
00055
00056 #include <fastlib/fastlib.h>
00057
00077 class FFTKde {
00078
00079 FORBID_ACCIDENTAL_COPIES(FFTKde);
00080
00081 private:
00082
00084
00086 struct complex {
00087
00089 double real;
00090
00092 double imag;
00093 };
00094
00096
00098 struct datanode *module_;
00099
00101 static const double TAU = 4.0;
00102
00104 Matrix qset_;
00105
00107 Matrix rset_;
00108
00110 GaussianKernel kernel_;
00111
00113 Vector densities_;
00114
00116 int m_;
00117
00119 ArrayList<int> size_;
00120
00122 Vector mincoords_;
00123
00125 ArrayList<int> minindices_;
00126
00128 Vector maxcoords_;
00129
00131 Vector diffcoords_;
00132
00134 Vector gridsizes_;
00135
00137 ArrayList<int> kernelweights_dims_;
00138
00140 int numgridpts_;
00141
00143 double gridbinvolume_;
00144
00146 Vector discretized_;
00147
00148 int nyquistnum_;
00149
00150 Vector d_fnyquist_;
00151
00152 Vector k_fnyquist_;
00153
00154 Vector kernelweights_;
00155
00162 void fftc1(double *f, int N, int skip, int forward) {
00163
00164 int b, index1, index2, trans_size, trans;
00165 double pi2 = 4. * asin(1.);
00166
00167
00168 double pi2n, cospi2n, sinpi2n;
00169
00170
00171
00172 struct complex wb;
00173
00174
00175 struct complex temp1, temp2;
00176
00177
00178 struct complex *c = (struct complex *)f;
00179
00180
00181 for(index1 = 1, index2 = 0; index1 < N; index1++) {
00182
00183
00184
00185 for(b = N / 2; index2 >= b; b /= 2) {
00186 index2 -= b;
00187 }
00188
00189
00190
00191 index2 += b;
00192
00193
00194 if(index2 > index1) {
00195 temp1 = c[index2 * skip];
00196 c[index2 * skip] = c[index1 * skip];
00197 c[index1 * skip] = temp1;
00198 }
00199 }
00200
00201
00202
00203
00204
00205 for(trans_size = 2; trans_size <= N; trans_size *= 2) {
00206
00207
00208 pi2n = forward * pi2 / (double)trans_size;
00209
00210
00211 cospi2n = cos(pi2n);
00212 sinpi2n = sin(pi2n);
00213
00214
00215 wb.real = 1.;
00216 wb.imag = 0.;
00217
00218
00219 for(b = 0; b < trans_size / 2; b++) {
00220
00221
00222 for(trans = 0; trans < N / trans_size; trans++) {
00223
00224
00225 index1 = (trans * trans_size + b) * skip;
00226
00227
00228 index2 = index1 + trans_size / 2 * skip;
00229 temp1 = c[index1];
00230 temp2 = c[index2];
00231
00232
00233 c[index1].real = temp1.real + wb.real * temp2.real -
00234 wb.imag * temp2.imag;
00235 c[index1].imag = temp1.imag + wb.real * temp2.imag +
00236 wb.imag * temp2.real;
00237 c[index2].real = temp1.real - wb.real * temp2.real +
00238 wb.imag * temp2.imag;
00239 c[index2].imag = temp1.imag - wb.real * temp2.imag -
00240 wb.imag * temp2.real;
00241 }
00242 temp1 = wb;
00243
00244
00245 wb.real = cospi2n * temp1.real - sinpi2n * temp1.imag;
00246
00247
00248 wb.imag = cospi2n*temp1.imag + sinpi2n*temp1.real;
00249 }
00250 }
00251
00252
00253 if(forward<0) {
00254 for(index1 = 0; index1 < skip * N; index1 += skip) {
00255 c[index1].real /= N;
00256 c[index1].imag /= N;
00257 }
00258 }
00259 }
00260
00269 void fftcn(double *f, int ndims, int *size, int forward) {
00270
00271
00272
00273 int planesize = 1, skip = 1;
00274
00275
00276 int totalsize = 1;
00277
00278
00279 for(index_t dim = 0; dim < ndims; dim++) {
00280 totalsize *= size[dim];
00281 }
00282
00283
00284 for(index_t dim = ndims - 1; dim >= 0; dim--) {
00285
00286
00287 planesize *= size[dim];
00288
00289
00290 for(index_t i = 0; i < totalsize; i += planesize) {
00291
00292
00293
00294 for(index_t j = 0; j < skip; j++) {
00295
00296
00297
00298 fftc1(f + 2 * (i + j), size[dim], skip, forward);
00299 }
00300 }
00301
00302 skip *= size[dim];
00303 }
00304 }
00305
00312 void fftr1(double *f, int N, int forward) {
00313
00314 int b;
00315
00316
00317 double pi2n = 4. * asin(1.) / N, cospi2n = cos(pi2n), sinpi2n = sin(pi2n);
00318
00319
00320
00321 struct complex wb;
00322
00323
00324 struct complex temp1, temp2;
00325
00326
00327 struct complex *c = (struct complex *)f;
00328
00329
00330 if(forward == 1) {
00331 fftc1(f, N / 2, 1, 1);
00332 }
00333
00334
00335 wb.real = 1.;
00336 wb.imag = 0.;
00337
00338
00339 for(b = 1; b < N / 4; b++) {
00340
00341 temp1 = wb;
00342
00343
00344 wb.real = cospi2n * temp1.real - sinpi2n * temp1.imag;
00345
00346
00347 wb.imag = cospi2n * temp1.imag + sinpi2n * temp1.real;
00348 temp1 = c[b];
00349 temp2 = c[N / 2 - b];
00350 c[b].real = .5 * (temp1.real + temp2.real + forward * wb.real *
00351 (temp1.imag + temp2.imag) + wb.imag *
00352 (temp1.real - temp2.real));
00353 c[b].imag = .5 * (temp1.imag-temp2.imag - forward * wb.real *
00354 (temp1.real - temp2.real) + wb.imag *
00355 (temp1.imag + temp2.imag));
00356 c[N/2-b].real = .5 * (temp1.real + temp2.real - forward * wb.real *
00357 (temp1.imag + temp2.imag) - wb.imag *
00358 (temp1.real - temp2.real));
00359 c[N/2-b].imag = .5 * (-temp1.imag + temp2.imag - forward * wb.real *
00360 (temp1.real - temp2.real) + wb.imag *
00361 (temp1.imag + temp2.imag));
00362 }
00363
00364 temp1 = c[0];
00365
00366
00367 c[0].real = temp1.real+temp1.imag;
00368
00369
00370 c[0].imag = temp1.real-temp1.imag;
00371
00372 if(forward == -1) {
00373 c[0].real *= .5;
00374 c[0].imag *= .5;
00375 fftc1(f, N / 2, 1, -1);
00376 }
00377 }
00378
00387 void fftrn(double *f, double *fnyquist, int ndims, int *size, int forward) {
00388
00389 int i, j, b;
00390
00391
00392
00393
00394 int index,indexneg = 0;
00395 int stepsize;
00396
00397
00398
00399 int N = size[ndims - 1];
00400
00401
00402 double pi2n = 4. * asin(1.) / N, cospi2n = cos(pi2n), sinpi2n = sin(pi2n);
00403
00404
00405
00406 struct complex wb;
00407
00408
00409 struct complex temp1, temp2;
00410
00411
00412 struct complex *c = (struct complex *)f,
00413 *cnyquist = (struct complex *)fnyquist;
00414
00415
00416 int totalsize = 1;
00417
00418
00419 ArrayList<int> indices;
00420 indices.Init(ndims);
00421
00422
00423 size[ndims - 1] /= 2;
00424
00425 for(i = 0; i < ndims; i++) {
00426 totalsize *= size[i];
00427 indices[i] = 0;
00428 }
00429
00430
00431 if(forward == 1) {
00432
00433
00434 fftcn(f, ndims, size, 1);
00435
00436
00437
00438 for(i = 0; i < totalsize / size[ndims - 1]; i++) {
00439
00440
00441 cnyquist[i] = c[i * size[ndims - 1]];
00442 }
00443 }
00444
00445
00446 for(index = 0; index < totalsize; index += size[ndims-1]) {
00447
00448 wb.real = 1.;
00449 wb.imag = 0.;
00450
00451
00452 for(b = 1; b < N / 4; b++) {
00453
00454 temp1 = wb;
00455
00456
00457 wb.real = cospi2n*temp1.real - sinpi2n*temp1.imag;
00458
00459
00460 wb.imag = cospi2n*temp1.imag + sinpi2n*temp1.real;
00461
00462 temp1 = c[index + b];
00463
00464
00465
00466 temp2 = c[indexneg + N / 2 - b];
00467
00468 c[index + b].real = .5 * (temp1.real + temp2.real + forward * wb.real *
00469 (temp1.imag + temp2.imag) + wb.imag *
00470 (temp1.real - temp2.real));
00471 c[index + b].imag = .5 * (temp1.imag - temp2.imag - forward * wb.real *
00472 (temp1.real - temp2.real) + wb.imag *
00473 (temp1.imag + temp2.imag));
00474 c[indexneg + N / 2 - b].real = .5 * (temp1.real + temp2.real -
00475 forward *
00476 wb.real *
00477 (temp1.imag + temp2.imag) -
00478 wb.imag *
00479 (temp1.real - temp2.real));
00480 c[indexneg + N / 2 - b].imag = .5 * (-temp1.imag + temp2.imag -
00481 forward * wb.real *
00482 (temp1.real - temp2.real) +
00483 wb.imag *
00484 (temp1.imag + temp2.imag));
00485 }
00486 temp1 = c[index];
00487
00488
00489
00490 temp2 = cnyquist[indexneg / size[ndims - 1]];
00491
00492
00493 c[index].real = .5 * (temp1.real + temp2.real + forward *
00494 (temp1.imag + temp2.imag));
00495 c[index].imag = .5 * (temp1.imag - temp2.imag - forward *
00496 (temp1.real - temp2.real));
00497
00498
00499 cnyquist[indexneg / size[ndims - 1]].real =
00500 .5 * (temp1.real + temp2.real - forward * (temp1.imag + temp2.imag));
00501 cnyquist[indexneg / size[ndims - 1]].imag =
00502 .5 * (-temp1.imag + temp2.imag - forward * (temp1.real - temp2.real));
00503
00504
00505
00506
00507
00508
00509
00510 stepsize = size[ndims - 1];
00511
00512
00513
00514 for(j = ndims - 2; j >= 0 && indices[j] == size[j] - 1; j--) {
00515 indices[j] = 0;
00516 indexneg -= stepsize;
00517 stepsize *= size[j];
00518 }
00519
00520
00521 if(j >= 0 && indices[j] == 0) {
00522 indexneg += stepsize * (size[j] - 1);
00523 }
00524
00525 else {
00526 indexneg -= stepsize;
00527 }
00528
00529
00530
00531 if(j >= 0) {
00532 indices[j]++;
00533 }
00534 }
00535
00536
00537 if(forward == -1) {
00538 fftcn(f, ndims, size, -1);
00539 }
00540
00541
00542 size[ndims - 1] *= 2;
00543
00544 }
00545
00546 void assign_weights(int reference_pt_num, int level, double volume, int pos,
00547 int skip) {
00548 if(level == -1) {
00549 discretized_[pos] += volume;
00550 }
00551 else {
00552
00553
00554 double coord = rset_.get(level, reference_pt_num);
00555 double leftgridcoord = mincoords_[level] + minindices_[level] *
00556 gridsizes_[level];
00557 double rightgridcoord = leftgridcoord + gridsizes_[level];
00558 double leftvolume = volume * (rightgridcoord - coord);
00559 double rightvolume = volume * (coord - leftgridcoord);
00560 int nextskip = size_[level] * skip;
00561 int nextleftpos = pos + skip * minindices_[level];
00562
00563 if(leftvolume > 0.0) {
00564 assign_weights(reference_pt_num, level - 1, leftvolume, nextleftpos,
00565 nextskip);
00566 }
00567
00568 if(rightvolume > 0.0) {
00569 assign_weights(reference_pt_num, level - 1, rightvolume,
00570 nextleftpos + skip, nextskip);
00571 }
00572 }
00573 }
00574
00575 void retrieve_weights(int query_pt_num, double volume, int level, int pos,
00576 int skip, double divfactor) {
00577
00578 if(level == -1) {
00579 densities_[query_pt_num] += discretized_[pos] * volume / divfactor;
00580 }
00581 else {
00582
00583
00584 double coord = qset_.get(level, query_pt_num);
00585 double leftgridcoord = mincoords_[level] + minindices_[level] *
00586 gridsizes_[level];
00587 double rightgridcoord = leftgridcoord + gridsizes_[level];
00588 double leftvolume = volume * (rightgridcoord - coord);
00589 double rightvolume = volume * (coord - leftgridcoord);
00590 int nextskip = size_[level] * skip;
00591 int nextleftpos = pos + skip * minindices_[level];
00592
00593 if(leftvolume > 0.0) {
00594 retrieve_weights(query_pt_num, leftvolume, level - 1, nextleftpos,
00595 nextskip, divfactor);
00596 }
00597 if(rightvolume > 0.0) {
00598 retrieve_weights(query_pt_num, rightvolume, level - 1,
00599 nextleftpos + skip, nextskip, divfactor);
00600 }
00601 }
00602 }
00603
00607 void RetrieveDensities() {
00608
00609 double normc =
00610 (kernel_.CalcNormConstant(rset_.n_rows()) * rset_.n_cols());
00611
00612 for(index_t r = 0; r < qset_.n_cols(); r++) {
00613 densities_[r] = 0.0;
00614
00615 for(index_t d = 0; d < qset_.n_rows(); d++) {
00616 minindices_[d] = (int) floor((qset_.get(d, r) - mincoords_[d])/
00617 gridsizes_[d]);
00618 }
00619 retrieve_weights(r, 1.0, qset_.n_rows() - 1, 0, 1,
00620 gridbinvolume_ * normc);
00621 }
00622 }
00623
00624 void discretize_dataset() {
00625
00626
00627
00628
00629 numgridpts_ = 1;
00630 gridbinvolume_ = 1.0;
00631
00632 double min, max;
00633
00634
00635
00636 for(index_t d = 0; d < qset_.n_rows(); d++) {
00637 int possiblesample;
00638 min = DBL_MAX;
00639 max = -DBL_MAX;
00640
00641 for(index_t r = 0; r < rset_.n_cols(); r++) {
00642 double coord = rset_.get(d, r);
00643 if(coord > max)
00644 max = coord;
00645 if(coord < min)
00646 min = coord;
00647 }
00648
00649
00650 mincoords_[d] = min;
00651 maxcoords_[d] = max;
00652 diffcoords_[d] = maxcoords_[d] - mincoords_[d];
00653 gridsizes_[d] = diffcoords_[d] / ((double) m_ - 1);
00654 gridbinvolume_ *= gridsizes_[d];
00655
00656
00657
00658 kernelweights_dims_[d] = m_ - 1;
00659 possiblesample = (int) floor(TAU * sqrt(kernel_.bandwidth_sq()) /
00660 gridsizes_[d]);
00661
00662 if(kernelweights_dims_[d] > possiblesample) {
00663 if(possiblesample == 0) {
00664 possiblesample = 1;
00665 }
00666 kernelweights_dims_[d] = possiblesample;
00667 }
00668
00669
00670
00671
00672 size_[d] = (int) ceil(log(m_ + kernelweights_dims_[d]) / log(2));
00673 size_[d] = 1 << size_[d];
00674
00675 numgridpts_ *= size_[d];
00676 }
00677
00678
00679
00680 discretized_.Init(numgridpts_);
00681 discretized_.SetZero();
00682
00683 double inv_gvolume = 1.0 / gridbinvolume_;
00684
00685
00686 for(index_t r = 0; r < rset_.n_cols(); r++) {
00687
00688
00689
00690 for(index_t d = 0; d < rset_.n_rows(); d++) {
00691 minindices_[d] = (int) floor((rset_.get(d, r) - mincoords_[d])/
00692 gridsizes_[d]);
00693 }
00694
00695
00696
00697
00698 assign_weights(r, qset_.n_rows() - 1, inv_gvolume, 0, 1);
00699 }
00700
00701 }
00702
00703 void Gaussify(double acc, double precalc, int level, int pos, int skip) {
00704
00705 if(level == -1) {
00706 kernelweights_[pos] = exp(precalc * acc);
00707 }
00708 else {
00709 int half = kernelweights_dims_[level];
00710 int g;
00711 for(g = 0; g <= half; g++) {
00712 double addThis = g * gridsizes_[level];
00713 double newacc = acc + addThis * addThis;
00714 int newskip = skip * size_[level];
00715
00716 Gaussify(newacc, precalc, level - 1, pos + skip * g, newskip);
00717
00718
00719 if(g != 0) {
00720 Gaussify(newacc, precalc, level - 1,
00721 pos + skip * (size_[level] - g), newskip);
00722 }
00723 }
00724 }
00725 }
00726
00727
00728 public:
00729
00731
00733 FFTKde() {}
00734
00736 ~FFTKde() {}
00737
00739
00745 void get_density_estimates(Vector *results) {
00746 results->Init(densities_.length());
00747
00748 for(index_t i = 0; i < densities_.length(); i++) {
00749 (*results)[i] = densities_[i];
00750 }
00751 }
00752
00760 void Init(Matrix &qset, Matrix &rset, struct datanode *module_in) {
00761
00762
00763 module_ = module_in;
00764
00765 printf("Initializing FFT KDE...\n");
00766 fx_timer_start(module_, "fft_kde_init");
00767
00768
00769 kernel_.Init(fx_param_double_req(module_, "bandwidth"));
00770 m_ = fx_param_int(module_, "num_grid_pts_per_dim", 128);
00771
00772
00773
00774 qset_.Copy(qset);
00775 densities_.Init(qset_.n_cols());
00776 rset_.Copy(rset);
00777
00778
00779 size_.Init(qset_.n_rows());
00780 minindices_.Init(rset_.n_rows());
00781 mincoords_.Init(qset_.n_rows());
00782 maxcoords_.Init(qset_.n_rows());
00783 diffcoords_.Init(qset_.n_rows());
00784 gridsizes_.Init(qset_.n_rows());
00785 kernelweights_dims_.Init(qset_.n_rows());
00786
00787
00788 discretize_dataset();
00789
00790 nyquistnum_ = 2 * numgridpts_ / size_[rset_.n_rows() - 1];
00791
00792 d_fnyquist_.Init(nyquistnum_);
00793 k_fnyquist_.Init(nyquistnum_);
00794 kernelweights_.Init(numgridpts_);
00795
00796 fx_timer_stop(module_, "fft_kde_init");
00797 printf("FFT KDE initialization completed...\n");
00798 }
00799
00802 void Compute() {
00803
00804 printf("Computing FFT KDE...\n");
00805 fx_timer_start(module_, "fft_kde");
00806
00807
00808 d_fnyquist_.SetZero();
00809 k_fnyquist_.SetZero();
00810 kernelweights_.SetZero();
00811 fftrn(discretized_.ptr(), d_fnyquist_.ptr(), rset_.n_rows(),
00812 size_.begin(), 1);
00813
00814
00815
00816 double precalc = -0.5 / kernel_.bandwidth_sq();
00817 Gaussify(0.0, precalc, rset_.n_rows() - 1, 0, 1);
00818
00819
00820 fftrn(kernelweights_.ptr(), k_fnyquist_.ptr(),
00821 rset_.n_rows(), size_.begin(), 1);
00822
00823
00824
00825
00826
00827 for(index_t d = 0; d < numgridpts_; d += 2) {
00828 double real1 = discretized_[d];
00829 double complex1 = discretized_[d + 1];
00830 double real2 = kernelweights_[d];
00831 double complex2 = kernelweights_[d + 1];
00832 discretized_[d] = real1 * real2 - complex1 * complex2;
00833 discretized_[d + 1] = real1 * complex2 + complex1 * real2;
00834 }
00835
00836 for(index_t d = 0; d < nyquistnum_; d += 2) {
00837 double real1 = d_fnyquist_[d];
00838 double complex1 = d_fnyquist_[d + 1];
00839 double real2 = k_fnyquist_[d];
00840 double complex2 = k_fnyquist_[d + 1];
00841 d_fnyquist_[d] = real1 * real2 - complex1 * complex2;
00842 d_fnyquist_[d + 1] = real1 * complex2 + complex1 * real2;
00843 }
00844
00845
00846 fftrn(discretized_.ptr(), d_fnyquist_.ptr(),
00847 rset_.n_rows(), size_.begin(), -1);
00848
00849
00850 RetrieveDensities();
00851
00852 fx_timer_stop(module_, "fft_kde");
00853 printf("FFT KDE completed...\n");
00854 }
00855
00863 void PrintDebug() {
00864
00865 FILE *stream = stdout;
00866 const char *fname = NULL;
00867
00868 if((fname = fx_param_str(module_, "fft_kde_output", NULL)) != NULL) {
00869 stream = fopen(fname, "w+");
00870 }
00871 for(index_t q = 0; q < qset_.n_cols(); q++) {
00872 fprintf(stream, "%g\n", densities_[q]);
00873 }
00874
00875 if(stream != stdout) {
00876 fclose(stream);
00877 }
00878 }
00879
00880 };
00881
00882 #endif