00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00059 #ifndef FGT_KDE_H
00060 #define FGT_KDE_H
00061
00062 #include <fastlib/fastlib.h>
00063 #include "..//series_expansion/mult_series_expansion_aux.h"
00064
00065
00085 class FGTKde {
00086
00087 FORBID_ACCIDENTAL_COPIES(FGTKde);
00088
00089 private:
00090
00092
00094 struct datanode *module_;
00095
00097 Matrix qset_;
00098
00100 Matrix rset_;
00101
00103 GaussianKernel kernel_;
00104
00106 Vector densities_;
00107
00109 double tau_;
00110
00112 MultSeriesExpansionAux msea_;
00113
00115
00130 void FastGaussTransformPreprocess_(double *interaction_radius,
00131 ArrayList<int> &nsides,
00132 Vector &sidelengths, Vector &mincoords,
00133 int *nboxes, int *nterms) {
00134
00135
00136 double bandwidth = sqrt(kernel_.bandwidth_sq());
00137 *interaction_radius = sqrt(-2.0 * kernel_.bandwidth_sq() * log(tau_));
00138
00139 int di, n, num_rows = rset_.n_cols();
00140 int dim = rset_.n_rows();
00141
00142
00143 Vector maxcoords;
00144 maxcoords.Init(dim);
00145 maxcoords.SetAll(-DBL_MAX);
00146 double boxside = -1.0;
00147 *nboxes = 1;
00148
00149 for(di = 0; di < dim; di++) {
00150 mincoords[di] = DBL_MAX;
00151 }
00152
00153 for(n = 0; n < num_rows; n++) {
00154 for(di = 0; di < dim; di++) {
00155 if(mincoords[di] > rset_.get(di, n)) {
00156 mincoords[di] = rset_.get(di, n);
00157 }
00158 if(maxcoords[di] < rset_.get(di, n)) {
00159 maxcoords[di] = rset_.get(di, n);
00160 }
00161 }
00162 }
00163
00164
00165 for(di = 0; di < dim; di++) {
00166
00167 nsides[di] = (int)
00168 ((maxcoords[di] - mincoords[di]) / bandwidth + 1);
00169
00170 (*nboxes) = (*nboxes) * nsides[di];
00171 double tmp = (maxcoords[di] - mincoords[di]) /
00172 (nsides[di] * 2 * bandwidth);
00173
00174 if(tmp > boxside) {
00175 boxside = tmp;
00176 }
00177
00178 sidelengths[di] = (maxcoords[di] - mincoords[di]) /
00179 ((double) nsides[di]);
00180
00181 }
00182
00183 int ip = 0;
00184 double two_r = 2.0 * boxside;
00185 double one_minus_two_r = 1.0 - two_r;
00186 double ret = 1.0 / pow(one_minus_two_r * one_minus_two_r, dim);
00187 double factorialvalue = 1.0;
00188 double r_raised_to_p_alpha = 1.0;
00189 double first_factor, second_factor;
00190 double ret2;
00191
00192
00193 do {
00194 ip++;
00195 factorialvalue *= ip;
00196
00197 r_raised_to_p_alpha *= two_r;
00198 first_factor = 1.0 - r_raised_to_p_alpha;
00199 first_factor *= first_factor;
00200 second_factor = r_raised_to_p_alpha * (2.0 - r_raised_to_p_alpha)
00201 / sqrt(factorialvalue);
00202
00203 ret2 = ret * (pow((first_factor + second_factor), dim) -
00204 pow(first_factor, dim));
00205
00206 } while(ret2 > tau_);
00207
00208 *nterms = ip;
00209 }
00210
00232 int MultiDimIndexInSingleArray_(ArrayList<int> &coords,
00233 ArrayList<int> &n) {
00234
00235 int sum = 0;
00236
00237 for(index_t i = 0; i < coords.size(); i++) {
00238 int prod = 1;
00239 for(index_t j = 0; j < i; j++) {
00240 prod *= n[j];
00241 }
00242 sum += coords[i] * prod;
00243 }
00244 return sum;
00245 }
00246
00255 void SingleDimIndexInMultiArray_(ArrayList<int> &n, int index,
00256 ArrayList<int> &coords) {
00257
00258 for(index_t i = 0; i < coords.size(); i++) {
00259 int this_coord = index % n[i];
00260 index /= n[i];
00261 coords[i] = this_coord;
00262 }
00263 }
00264
00276 int IsOngrid_(ArrayList<int> &old_coords, int delta,
00277 ArrayList<int> &new_coords, ArrayList<int> &nsides) {
00278
00279 int dim = old_coords.size();
00280 for(index_t d = 0; d < dim; d++) {
00281 int new_val = old_coords[d] + delta;
00282
00283 if(new_val < 0 || new_val >= nsides[d]) {
00284 return 0;
00285 }
00286
00287 new_coords[d] = new_val;
00288 }
00289 return 1;
00290 }
00291
00301 void MakeNeighbors_(int ibox, ArrayList<int> &nsides, int kdis,
00302 ArrayList<int> &ret) {
00303
00304
00305 ArrayList<int> coords;
00306 coords.Init(nsides.size());
00307 SingleDimIndexInMultiArray_(nsides, ibox, coords);
00308
00309 ArrayList<int> dummy_n;
00310 dummy_n.Init(coords.size());
00311 ArrayList<int> all_ones;
00312 all_ones.Init(coords.size());
00313 for(index_t i = 0; i < coords.size(); i++) {
00314 dummy_n[i] = 2 * kdis + 1;
00315 all_ones[i] = kdis;
00316 }
00317
00318 int num_neighbors = (2 * kdis + 1);
00319 int i;
00320
00321
00322 num_neighbors = (int) pow(num_neighbors, coords.size());
00323
00324
00325
00326
00327
00328
00329
00330 ArrayList<int> delta, new_coords;
00331 delta.Init(coords.size());
00332 new_coords.Init(coords.size());
00333
00334 for(i = 0; i < num_neighbors; i++) {
00335 SingleDimIndexInMultiArray_(dummy_n, i, delta);
00336
00337 for(index_t j = 0; j < coords.size(); j++) {
00338 new_coords[j] = coords[j] + delta[j];
00339 }
00340 int ongrid = IsOngrid_(new_coords, -kdis, new_coords, nsides);
00341
00342 if(ongrid) {
00343 *(ret.PushBackRaw()) =
00344 MultiDimIndexInSingleArray_(new_coords, nsides);
00345 }
00346
00347 }
00348 }
00349
00366 void Assign_(int nallbx, ArrayList<int> &nsides, Vector &sidelengths,
00367 Vector &mincoords, Matrix ¢er,
00368 ArrayList<ArrayList<int> > &queries_assigned,
00369 ArrayList<ArrayList<int> > &references_assigned) {
00370
00371 int r;
00372 int num_query_rows = qset_.n_cols();
00373 int num_ref_rows = rset_.n_cols();
00374 int dim = qset_.n_rows();
00375 int di;
00376
00377
00378 for(r = 0; r < num_ref_rows; r++) {
00379
00380 int boxnum = 0;
00381
00382 for(di = dim - 1; di >= 0; di--) {
00383 int nside = nsides[di];
00384 double h = sidelengths[di];
00385 int binnum = (int) floor((rset_.get(di, r) - mincoords[di]) / h);
00386
00387 binnum = max(0, min(binnum, nside - 1));
00388 boxnum = boxnum * nside + binnum;
00389 }
00390 *(references_assigned[boxnum].PushBackRaw()) = r;
00391 }
00392
00393
00394 for(r = 0; r < num_query_rows; r++) {
00395
00396 int boxnum = 0;
00397
00398 for(di = dim - 1; di >= 0; di--) {
00399 int nside = nsides[di];
00400 double h = sidelengths[di];
00401 int binnum = (int) floor((qset_.get(di, r) - mincoords[di]) / h);
00402
00403 binnum = max(0, min(binnum, nside - 1));
00404 boxnum = boxnum * nside + binnum;
00405 }
00406 *(queries_assigned[boxnum].PushBackRaw()) = r;
00407 }
00408
00409
00410 for(r = 0; r < nallbx; r++) {
00411 int sf = nallbx;
00412 int ind = r, rem;
00413 Vector box_center;
00414 center.MakeColumnVector(r, &box_center);
00415
00416 for(di = dim - 1; di >= 0; di--) {
00417 int nside = nsides[di];
00418 double h = sidelengths[di];
00419 sf /= nside;
00420 rem = ind % sf;
00421 ind = ind / sf;
00422
00423 box_center[di] = mincoords[di] + (ind + 0.5) * h;
00424
00425 ind = rem;
00426 }
00427 }
00428 }
00429
00446 void TranslateMultipoleToLocal_(int ref_box_num, int query_box_num,
00447 Matrix &mcoeffsb, Matrix &lcoeffsb,
00448 int p_alpha, int totalnumcoeffs,
00449 double bwsqd_2, const double *hrcentroid,
00450 const double *dest_hrcentroid) {
00451
00452 double bandwidth = sqrt(bwsqd_2);
00453 int j, k, l, d, step;
00454
00455 int dim = qset_.n_rows();
00456
00457 Vector lcoeffs;
00458 lcoeffsb.MakeColumnVector(query_box_num, &lcoeffs);
00459 Vector mcoeffs;
00460 mcoeffsb.MakeColumnVector(ref_box_num, &mcoeffs);
00461
00462 {
00463 Vector dest_minus_parent;
00464 dest_minus_parent.Init(dim);
00465
00466 la::SubOverwrite(dim, dest_hrcentroid, hrcentroid,
00467 dest_minus_parent.ptr());
00468
00469 int limit = 2 * p_alpha - 2;
00470 Matrix hermite_map;
00471 hermite_map.Init(dim, limit + 1);
00472 Matrix arrtmp;
00473 arrtmp.Init(dim, totalnumcoeffs);
00474
00475 Vector C_k_neg;
00476 C_k_neg.Alias(msea_.get_neg_inv_multiindex_factorials());
00477
00478 for(j = 0; j < dim; j++) {
00479 double coord_div_band = dest_minus_parent[j] / bandwidth;
00480 double d2 = 2 * coord_div_band;
00481 double facj = exp(-coord_div_band * coord_div_band);
00482
00483 hermite_map.set(j, 0, facj);
00484
00485 if(p_alpha > 1) {
00486 hermite_map.set(j, 1, d2 * facj);
00487
00488 for(k = 1; k < limit; k++) {
00489 int k2 = k * 2;
00490 hermite_map.set(j, k + 1,
00491 d2 * hermite_map.get(j, k) - k2 *
00492 hermite_map.get(j, k - 1));
00493 }
00494 }
00495
00496 for(l = 0; l < totalnumcoeffs; l++) {
00497 arrtmp.set(j, l, 0.0);
00498 }
00499 }
00500
00501 step = totalnumcoeffs / p_alpha;
00502 d = 0;
00503
00504 for(j = 0; j < totalnumcoeffs; j++) {
00505 const ArrayList<int> &mapping = msea_.get_multiindex(j);
00506
00507 for(k = 0, l = j % step; k < p_alpha; k++, l += step) {
00508 arrtmp.set(d, j, arrtmp.get(d, j) + mcoeffs[l] *
00509 hermite_map.get(d, mapping[d] + k));
00510 }
00511 }
00512
00513
00514 if(p_alpha > 1) {
00515 int boundary, boundary2;
00516
00517 for(boundary = totalnumcoeffs / p_alpha, step = step / p_alpha, d = 1;
00518 step >= 1; step /= p_alpha, d++, boundary /= p_alpha) {
00519
00520 boundary2 = 0;
00521
00522 for(j = 0; j < totalnumcoeffs; j++) {
00523 const ArrayList<int> &mapping = msea_.get_multiindex(j);
00524
00525 if(j % boundary == 0) {
00526 boundary2 += boundary;
00527 }
00528
00529 for(k = 0; k < p_alpha; k++) {
00530
00531 int jump = (j + step * k) % boundary2;
00532
00533 if(jump < boundary2 - boundary) {
00534 jump += boundary2 - boundary;
00535 }
00536
00537 const ArrayList<int> &mapping2 = msea_.get_multiindex(jump);
00538
00539 arrtmp.set(d, j,
00540 arrtmp.get(d, j) +
00541 arrtmp.get(d - 1, jump) *
00542 hermite_map.get(d, mapping2[d] + mapping[d]));
00543 }
00544 }
00545 }
00546 }
00547
00548 d = dim - 1;
00549
00550 for(j = 0; j < totalnumcoeffs; j++) {
00551 lcoeffs[j] = lcoeffs[j] + C_k_neg[j] * arrtmp.get(d, j);
00552 }
00553 }
00554 }
00555
00569 void ComputeMultipoleCoeffs_(Matrix &mcoeffs, int dim,
00570 int p_alpha, int totalnumcoeffs,
00571 int ref_box_num, double bwsqd_two,
00572 ArrayList<int> &rows, const double *x_R) {
00573
00574 Vector A_k;
00575 mcoeffs.MakeColumnVector(ref_box_num, &A_k);
00576 double bw_times_sqrt_two = sqrt(bwsqd_two);
00577
00578
00579
00580 if(A_k[0] != 0) {
00581 return;
00582 }
00583
00584 Vector C_k;
00585 C_k.Alias(msea_.get_inv_multiindex_factorials());
00586
00587 Vector tmp;
00588 tmp.Init(totalnumcoeffs);
00589 Vector x_r;
00590 x_r.Init(dim);
00591 int num_rows = rows.size();
00592 int step, boundary;
00593 int r, i, j;
00594
00595 A_k[0] = num_rows;
00596 for(i = 1; i < totalnumcoeffs; i++) {
00597 A_k[i] = 0.0;
00598 }
00599
00600 if(p_alpha > 1) {
00601
00602 for(r = 0; r < num_rows; r++) {
00603
00604 int row_num = rows[r];
00605
00606 for(i = 0; i < dim; i++) {
00607 x_r[i] = (rset_.get(i, row_num) - x_R[i]) / bw_times_sqrt_two;
00608 }
00609
00610 tmp[0] = 1.0;
00611
00612 for(boundary = totalnumcoeffs, step = totalnumcoeffs / p_alpha,
00613 j = 0;
00614 step >= 1; step /= p_alpha, boundary /= p_alpha, j++) {
00615 for(i = 0; i < totalnumcoeffs; ) {
00616 int limit = i + boundary;
00617
00618 i += step;
00619
00620 for( ; i < limit; i += step) {
00621 tmp[i] = tmp[i - step] * x_r[j];
00622 }
00623 }
00624 }
00625
00626
00627 for(i = 1; i < totalnumcoeffs; i++) {
00628 A_k[i] = A_k[i] + tmp[i];
00629 }
00630 }
00631 }
00632
00633 for(r = 1; r < totalnumcoeffs; r++) {
00634 A_k[r] = A_k[r] * C_k[r];
00635 }
00636
00637 return;
00638 }
00639
00653 void DirectLocalAccumulation_(ArrayList<int> &rows, int query_box_num,
00654 Matrix &locexps, double delta,
00655 const double *dest_hrcentroid, int p_alpha,
00656 int totalnumcoeffs) {
00657
00658 int num_rows = rows.size();
00659 int r, d, boundary, step, i, j, k;
00660
00661
00662 int dim = rset_.n_rows();
00663 int limit2 = p_alpha - 1;
00664 Matrix hermite_map;
00665 hermite_map.Init(dim, p_alpha);
00666 Vector arrtmp;
00667 arrtmp.Init(totalnumcoeffs);
00668 Vector x_r_minus_x_Q;
00669 x_r_minus_x_Q.Init(dim);
00670 double bandwidth = sqrt(delta);
00671 Vector neg_inv_multiindex_factorials;
00672 neg_inv_multiindex_factorials.Alias
00673 (msea_.get_neg_inv_multiindex_factorials());
00674
00675 Vector arr;
00676 locexps.MakeColumnVector(query_box_num, &arr);
00677
00678
00679 for(r = 0; r < num_rows; r++) {
00680
00681 int row_num = rows[r];
00682
00683
00684 for(d = 0; d < dim; d++) {
00685 x_r_minus_x_Q[d] = dest_hrcentroid[d] - rset_.get(d, row_num);
00686 }
00687
00688
00689
00690 for(d = 0; d < dim; d++) {
00691
00692 double coord_div_band = x_r_minus_x_Q[d] / bandwidth;
00693 double d2 = 2 * coord_div_band;
00694 double facj = exp(-coord_div_band * coord_div_band);
00695
00696 hermite_map.set(d, 0, facj);
00697
00698 if(p_alpha > 1) {
00699 hermite_map.set(d, 1, d2 * facj);
00700
00701 for(k = 1; k < limit2; k++) {
00702 int k2 = k * 2;
00703 hermite_map.set(d, k + 1,
00704 d2 * hermite_map.get(d, k) - k2 *
00705 hermite_map.get(d, k - 1));
00706 }
00707 }
00708 }
00709
00710
00711 arrtmp[0] = 1.0;
00712
00713 if(p_alpha > 1) {
00714
00715
00716 for(boundary = totalnumcoeffs, step = totalnumcoeffs / p_alpha,
00717 d = 0;
00718 step >= 1; step /= p_alpha, boundary /= p_alpha, d++) {
00719 for(i = 0; i < totalnumcoeffs; ) {
00720 int limit = i + boundary;
00721
00722
00723 int first = i;
00724 i += step;
00725
00726 for(j = 1; i < limit; i += step, j++) {
00727 arrtmp[i] = arrtmp[first] * hermite_map.get(d, j);
00728 }
00729
00730 arrtmp[first] *= hermite_map.get(d, 0);
00731 }
00732 }
00733 }
00734 else {
00735 for(d = 0; d < dim; d++) {
00736 arrtmp[0] *= hermite_map.get(d, 0);
00737 }
00738 }
00739
00740 for(j = 0; j < totalnumcoeffs; j++) {
00741 arr[j] = arr[j] + neg_inv_multiindex_factorials[j] * arrtmp[j];
00742 }
00743 }
00744 }
00745
00760 void EvaluateMultipoleExpansion_(ArrayList<int> &rows, int p_alpha,
00761 int totalnumcoeffs, Matrix &mcoeffsb,
00762 int ref_box_num, double bwsqd_times_2,
00763 const double *ref_hrcentroid) {
00764
00765 double bandwidth = sqrt(bwsqd_times_2);
00766 int num_query_rows = rows.size();
00767 int r, d, boundary, step, i, j, k;
00768
00769
00770 int dim = qset_.n_rows();
00771 Vector x_q_minus_x_R;
00772 x_q_minus_x_R.Init(dim);
00773 Matrix hermite_map;
00774 hermite_map.Init(dim, p_alpha);
00775 Vector arrtmp;
00776 arrtmp.Init(totalnumcoeffs);
00777 int limit2 = p_alpha - 1;
00778 Vector mcoeffs;
00779 mcoeffsb.MakeColumnVector(ref_box_num, &mcoeffs);
00780
00781
00782 for(r = 0; r < num_query_rows; r++) {
00783
00784 double multipolesum = 0.0;
00785 int row_num = rows[r];
00786
00787
00788 for(d = 0; d < dim; d++) {
00789 x_q_minus_x_R[d] = qset_.get(d, row_num) - ref_hrcentroid[d];
00790 }
00791
00792
00793
00794 for(d = 0; d < dim; d++) {
00795 double coord_div_band = x_q_minus_x_R[d] / bandwidth;
00796 double d2 = 2 * coord_div_band;
00797 double facj = exp(-coord_div_band * coord_div_band);
00798
00799 hermite_map.set(d, 0, facj);
00800
00801 if(p_alpha > 1) {
00802 hermite_map.set(d, 1, d2 * facj);
00803
00804 for(k = 1; k < limit2; k++) {
00805 int k2 = k * 2;
00806 hermite_map.set(d, k + 1,
00807 d2 * hermite_map.get(d, k) - k2 *
00808 hermite_map.get(d, k - 1));
00809 }
00810 }
00811 }
00812
00813
00814 arrtmp[0] = 1.0;
00815
00816 if(p_alpha > 1) {
00817
00818 for(boundary = totalnumcoeffs, step = totalnumcoeffs / p_alpha,
00819 d = 0;
00820 step >= 1; step /= p_alpha, boundary /= p_alpha, d++) {
00821 for(i = 0; i < totalnumcoeffs; ) {
00822 int limit = i + boundary;
00823
00824
00825 int first = i;
00826 i += step;
00827
00828 for(j = 1; i < limit; i += step, j++) {
00829 arrtmp[i] = arrtmp[first] * hermite_map.get(d, j);
00830 }
00831
00832 arrtmp[first] *= hermite_map.get(d, 0);
00833 }
00834 }
00835 }
00836 else {
00837 for(d = 0; d < dim; d++) {
00838 arrtmp[0] *= hermite_map.get(d, 0);
00839 }
00840 }
00841
00842 for(j = 0; j < totalnumcoeffs; j++) {
00843 multipolesum += mcoeffs[j] * arrtmp[j];
00844 }
00845
00846 densities_[row_num] += multipolesum;
00847 }
00848 }
00849
00863 double EvaluateLocalExpansion_(int row_q, Vector &x_Q, double h,
00864 int query_box_num, Matrix &lcoeffsb,
00865 int totalnumcoeffs, int p_alpha) {
00866
00867 int dim = qset_.n_rows();
00868 Vector x_Q_to_x_q;
00869 x_Q_to_x_q.Init(dim);
00870 int i, j, boundary, step;
00871 double multipolesum = 0;
00872
00876 for(i = 0; i < dim; i++) {
00877 double x_Q_val = x_Q[i];
00878 double x_q_val = qset_.get(i, row_q);
00879 x_Q_to_x_q[i] = (x_q_val - x_Q_val) / h;
00880 }
00881
00882 {
00883 Vector lcoeffs;
00884 lcoeffsb.MakeColumnVector(query_box_num, &lcoeffs);
00885
00886 Vector tmp;
00887 tmp.Init(totalnumcoeffs);
00888 tmp[0] = 1.0;
00889
00890 if(totalnumcoeffs > 1) {
00891
00892 for(boundary = totalnumcoeffs, step = totalnumcoeffs / p_alpha,
00893 j = 0;
00894 step >= 1; step /= p_alpha, boundary /= p_alpha, j++) {
00895 for(i = 0; i < totalnumcoeffs; ) {
00896 int limit = i + boundary;
00897
00898 i += step;
00899
00900 for( ; i < limit; i += step) {
00901 tmp[i] = tmp[i - step] * x_Q_to_x_q[j];
00902 }
00903 }
00904 }
00905 }
00906
00907 for(i = 0; i < totalnumcoeffs; i++) {
00908 multipolesum += lcoeffs[i] * tmp[i];
00909 }
00910 }
00911 return multipolesum;
00912 }
00913
00936 void EvaluateLocalExpansionForAllQueries_
00937 (double delta, int nterms, int nallbx, ArrayList<int> &nsides,
00938 Matrix &locexp, int nlmax, ArrayList<ArrayList<int> > &queries_assigned,
00939 Matrix ¢er, int totalnumcoeffs) {
00940
00941 int i, j;
00942
00943
00944 for(i = 0; i < nallbx; i++) {
00945 ArrayList<int> &query_rows = queries_assigned[i];
00946 int ninbox = query_rows.size();
00947
00948 if(ninbox <= nlmax) {
00949 continue;
00950 }
00951 else {
00952
00953 for(j = 0; j < ninbox; j++) {
00954 int row_q = query_rows[j];
00955 Vector x_Q;
00956 center.MakeColumnVector(i, &x_Q);
00957
00958 double result = EvaluateLocalExpansion_(row_q, x_Q, sqrt(delta),
00959 i, locexp, totalnumcoeffs,
00960 nterms);
00961 densities_[row_q] += result;
00962 }
00963 }
00964 }
00965 }
00966
00999 void FinalizeSum_(double delta, int nterms, int nallbx,
01000 ArrayList<int> &nsides, Vector &sidelengths,
01001 Vector &mincoords, Matrix &locexp, int nfmax, int nlmax,
01002 int kdis, Matrix ¢er,
01003 ArrayList<ArrayList<int> > &queries_assigned,
01004 ArrayList<ArrayList<int> > &references_assigned,
01005 Matrix &mcoeffs) {
01006
01007 int dim = qset_.n_rows();
01008
01009 int totalnumcoeffs = (int) pow(nterms, dim);
01010
01011
01012 Assign_(nallbx, nsides, sidelengths, mincoords, center,
01013 queries_assigned, references_assigned);
01014
01015
01016 int i, j, k, l;
01017
01018
01019 for(i = 0; i < nallbx; i++) {
01020
01021 ArrayList<int> &reference_rows = references_assigned[i];
01022 int ninbox = reference_rows.size();
01023
01024
01025 if(ninbox <= 0) {
01026 continue;
01027 }
01028
01029
01030 else if(ninbox <= nfmax) {
01031
01032
01033 ArrayList <int> nbors;
01034 nbors.Init();
01035 MakeNeighbors_(i, nsides, kdis, nbors);
01036 int nnbors = nbors.size();
01037
01038 for(j = 0; j < nnbors; j++) {
01039
01040
01041 int query_box_num = nbors[j];
01042 ArrayList<int> &query_rows = queries_assigned[query_box_num];
01043 int ninnbr = query_rows.size();
01044
01045 if(ninnbr <= nlmax) {
01046
01047
01048 for(k = 0; k < ninnbr; k++) {
01049
01050 int query_row = query_rows[k];
01051 const double *query = qset_.GetColumnPtr(query_row);
01052
01053 for(l = 0; l < ninbox; l++) {
01054 int reference_row = reference_rows[l];
01055 const double *reference = rset_.GetColumnPtr(reference_row);
01056
01057 double dsqd =
01058 la::DistanceSqEuclidean(qset_.n_rows(), query, reference);
01059
01060 double pot = exp(-dsqd / delta);
01061
01062
01063 densities_[query_row] += pot;
01064 }
01065 }
01066
01067 }
01068
01069
01070
01071 else {
01072
01073 DirectLocalAccumulation_(reference_rows, query_box_num,
01074 locexp, delta,
01075 center.GetColumnPtr(query_box_num),
01076 nterms, totalnumcoeffs);
01077 }
01078
01079 }
01080 }
01081
01082
01083 else {
01084
01085 ComputeMultipoleCoeffs_(mcoeffs ,dim, nterms,
01086 totalnumcoeffs, i, delta, reference_rows,
01087 center.GetColumnPtr(i));
01088
01089
01090 ArrayList<int> nbors;
01091 nbors.Init();
01092 MakeNeighbors_(i, nsides, kdis, nbors);
01093 int nnbors = nbors.size();
01094
01095 for(j = 0; j < nnbors; j++) {
01096 int query_box_num = nbors[j];
01097 ArrayList<int> &query_rows = queries_assigned[query_box_num];
01098 int ninnbr = query_rows.size();
01099
01100
01101 if(ninnbr <= nlmax) {
01102 EvaluateMultipoleExpansion_(query_rows, nterms,
01103 totalnumcoeffs, mcoeffs,
01104 i, delta, center.GetColumnPtr(i));
01105 }
01106
01107
01108 else {
01109
01110 TranslateMultipoleToLocal_(i, query_box_num,
01111 mcoeffs, locexp,
01112 nterms, totalnumcoeffs, delta,
01113 center.GetColumnPtr(i),
01114 center.GetColumnPtr(query_box_num));
01115
01116 }
01117 }
01118 }
01119 }
01120
01121
01122 EvaluateLocalExpansionForAllQueries_(delta, nterms, nallbx, nsides, locexp,
01123 nlmax, queries_assigned, center,
01124 totalnumcoeffs);
01125 }
01126
01151 void GaussTransform_(double delta, int nterms, int nallbx,
01152 ArrayList<int> &nsides,
01153 Vector &sidelengths, Vector &mincoords,
01154 Matrix &locexp, Matrix ¢er,
01155 ArrayList<ArrayList<int> > &queries_assigned,
01156 ArrayList<ArrayList<int> > &references_assigned,
01157 Matrix &mcoeffs) {
01158
01159 int dim = qset_.n_rows();
01160 int kdis = (int) (sqrt(log(tau_) * -2.0) + 1);
01161
01162
01163
01164 int nfmax = (int) pow(nterms, dim - 1) + 2;
01165 int nlmax = nfmax;
01166
01167
01168
01169 FinalizeSum_(delta, nterms, nallbx, nsides, sidelengths, mincoords,
01170 locexp, nfmax, nlmax, kdis, center, queries_assigned,
01171 references_assigned, mcoeffs);
01172 }
01173
01177 void NormalizeDensities_() {
01178 double norm_const = kernel_.CalcNormConstant(qset_.n_rows()) *
01179 rset_.n_cols();
01180
01181 for(index_t q = 0; q < qset_.n_cols(); q++) {
01182 densities_[q] /= norm_const;
01183 }
01184 }
01185
01186 public:
01187
01189
01191 FGTKde() {}
01192
01194 ~FGTKde() {}
01195
01197
01203 void get_density_estimates(Vector *results) {
01204 results->Init(densities_.length());
01205
01206 for(index_t i = 0; i < densities_.length(); i++) {
01207 (*results)[i] = densities_[i];
01208 }
01209 }
01210
01212
01220 void Init(Matrix &qset, Matrix &rset, struct datanode *module_in) {
01221
01222
01223 module_ = module_in;
01224
01225
01226 kernel_.Init(fx_param_double_req(module_, "bandwidth"));
01227
01228
01229
01230 qset_.Copy(qset);
01231 densities_.Init(qset_.n_cols());
01232 rset_.Copy(rset);
01233
01234
01235 tau_ = fx_param_double(module_, "absolute_error", 0.1);
01236 }
01237
01240 void Compute() {
01241
01242 double interaction_radius;
01243 ArrayList<int> nsides;
01244 Vector sidelengths;
01245 Vector mincoords;
01246 int nboxes, nterms;
01247 int dim = rset_.n_rows();
01248
01249 nsides.Init(rset_.n_rows());
01250 sidelengths.Init(rset_.n_rows());
01251 mincoords.Init(rset_.n_rows());
01252
01253 printf("Computing FGT KDE...\n");
01254
01255
01256 densities_.SetZero();
01257
01258 fx_timer_start(module_, "fgt_kde_init");
01259 FastGaussTransformPreprocess_(&interaction_radius, nsides, sidelengths,
01260 mincoords, &nboxes, &nterms);
01261 fx_timer_stop(module_, "fgt_kde_init");
01262
01263
01264 msea_.Init(nterms - 1, qset_.n_rows());
01265
01266
01267 Matrix center;
01268 center.Init(dim, nboxes);
01269
01270
01271 Matrix locexp;
01272 locexp.Init((int) pow(nterms, dim), nboxes);
01273 locexp.SetZero();
01274
01275
01276 ArrayList<ArrayList<int> > queries_assigned;
01277 queries_assigned.Init(nboxes);
01278
01279 for(index_t i = 0; i < nboxes; i++) {
01280 queries_assigned[i].Init();
01281 }
01282
01283
01284 ArrayList<ArrayList<int> > references_assigned;
01285 references_assigned.Init(nboxes);
01286
01287 for(index_t i = 0; i < nboxes; i++) {
01288 references_assigned[i].Init();
01289 }
01290
01291
01292 Matrix mcoeffs;
01293 mcoeffs.Init((int)pow(nterms, dim), nboxes);
01294 mcoeffs.SetZero();
01295
01296 double delta = 2 * kernel_.bandwidth_sq();
01297
01298 fx_timer_start(module_, "fgt_kde");
01299 GaussTransform_(delta, nterms, nboxes, nsides, sidelengths, mincoords,
01300 locexp, center, queries_assigned, references_assigned,
01301 mcoeffs);
01302
01303
01304 NormalizeDensities_();
01305 fx_timer_stop(module_, "fgt_kde");
01306 printf("FGT KDE completed...\n");
01307 }
01308
01316 void PrintDebug() {
01317
01318 FILE *stream = stdout;
01319 const char *fname = NULL;
01320
01321 if((fname = fx_param_str(module_, "fgt_kde_output", NULL)) != NULL) {
01322 stream = fopen(fname, "w+");
01323 }
01324 for(index_t q = 0; q < qset_.n_cols(); q++) {
01325 fprintf(stream, "%g\n", densities_[q]);
01326 }
01327
01328 if(stream != stdout) {
01329 fclose(stream);
01330 }
01331 }
01332
01333 };
01334
01335 #endif