fgt_kde.h

Go to the documentation of this file.
00001 /* MLPACK 0.2
00002  *
00003  * Copyright (c) 2008, 2009 Alexander Gray,
00004  *                          Garry Boyer,
00005  *                          Ryan Riegel,
00006  *                          Nikolaos Vasiloglou,
00007  *                          Dongryeol Lee,
00008  *                          Chip Mappus, 
00009  *                          Nishant Mehta,
00010  *                          Hua Ouyang,
00011  *                          Parikshit Ram,
00012  *                          Long Tran,
00013  *                          Wee Chin Wong
00014  *
00015  * Copyright (c) 2008, 2009 Georgia Institute of Technology
00016  *
00017  * This program is free software; you can redistribute it and/or
00018  * modify it under the terms of the GNU General Public License as
00019  * published by the Free Software Foundation; either version 2 of the
00020  * License, or (at your option) any later version.
00021  *
00022  * This program is distributed in the hope that it will be useful, but
00023  * WITHOUT ANY WARRANTY; without even the implied warranty of
00024  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00025  * General Public License for more details.
00026  *
00027  * You should have received a copy of the GNU General Public License
00028  * along with this program; if not, write to the Free Software
00029  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00030  * 02110-1301, USA.
00031  */
00059 #ifndef FGT_KDE_H
00060 #define FGT_KDE_H
00061 
00062 #include <fastlib/fastlib.h>
00063 #include "..//series_expansion/mult_series_expansion_aux.h"
00064 
00065 
00085 class FGTKde {
00086   
00087   FORBID_ACCIDENTAL_COPIES(FGTKde);
00088 
00089  private:
00090 
00092 
00094   struct datanode *module_;
00095 
00097   Matrix qset_;
00098 
00100   Matrix rset_;
00101 
00103   GaussianKernel kernel_;
00104 
00106   Vector densities_;
00107   
00109   double tau_;
00110 
00112   MultSeriesExpansionAux msea_;
00113   
00115 
00130   void FastGaussTransformPreprocess_(double *interaction_radius, 
00131                                      ArrayList<int> &nsides, 
00132                                      Vector &sidelengths, Vector &mincoords, 
00133                                      int *nboxes, int *nterms) {
00134   
00135     // Compute the interaction radius.
00136     double bandwidth = sqrt(kernel_.bandwidth_sq());
00137     *interaction_radius = sqrt(-2.0 * kernel_.bandwidth_sq() * log(tau_));
00138 
00139     int di, n, num_rows = rset_.n_cols();
00140     int dim = rset_.n_rows();
00141 
00142     // Discretize the grid space into boxes.
00143     Vector maxcoords;
00144     maxcoords.Init(dim);
00145     maxcoords.SetAll(-DBL_MAX);
00146     double boxside = -1.0;
00147     *nboxes = 1;
00148 
00149     for(di = 0; di < dim; di++) {
00150       mincoords[di] = DBL_MAX;
00151     }
00152     
00153     for(n = 0; n < num_rows; n++) {
00154       for(di = 0; di < dim; di++) {
00155         if(mincoords[di] > rset_.get(di, n)) {
00156           mincoords[di] = rset_.get(di, n);
00157         }
00158         if(maxcoords[di] < rset_.get(di, n)) {
00159           maxcoords[di] = rset_.get(di, n);
00160         }
00161       }
00162     }
00163 
00164     // Figure out how many boxes lie along each direction.
00165     for(di = 0; di < dim; di++) {
00166 
00167       nsides[di] = (int) 
00168         ((maxcoords[di] - mincoords[di]) / bandwidth + 1);
00169 
00170       (*nboxes) = (*nboxes) * nsides[di];
00171       double tmp = (maxcoords[di] - mincoords[di]) /
00172         (nsides[di] * 2 * bandwidth);
00173     
00174       if(tmp > boxside) {
00175         boxside = tmp;
00176       }
00177 
00178       sidelengths[di] = (maxcoords[di] - mincoords[di]) / 
00179         ((double) nsides[di]);
00180 
00181     }
00182     
00183     int ip = 0;
00184     double two_r = 2.0 * boxside;
00185     double one_minus_two_r = 1.0 - two_r;
00186     double ret = 1.0 / pow(one_minus_two_r * one_minus_two_r, dim);
00187     double factorialvalue = 1.0;
00188     double r_raised_to_p_alpha = 1.0;
00189     double first_factor, second_factor;
00190     double ret2;
00191 
00192     // Determine the truncation order.
00193     do {
00194       ip++;
00195       factorialvalue *= ip;
00196     
00197       r_raised_to_p_alpha *= two_r;
00198       first_factor = 1.0 - r_raised_to_p_alpha;
00199       first_factor *= first_factor;
00200       second_factor = r_raised_to_p_alpha * (2.0 - r_raised_to_p_alpha)
00201         / sqrt(factorialvalue);
00202     
00203       ret2 = ret * (pow((first_factor + second_factor), dim) -
00204                     pow(first_factor, dim));
00205     
00206     } while(ret2 > tau_);
00207 
00208     *nterms = ip;
00209   }
00210 
00232   int MultiDimIndexInSingleArray_(ArrayList<int> &coords, 
00233                                   ArrayList<int> &n) {
00234     
00235     int sum = 0;
00236     
00237     for(index_t i = 0; i < coords.size(); i++) {
00238       int prod = 1;
00239       for(index_t j = 0; j < i; j++) {
00240         prod *= n[j];
00241       }
00242       sum += coords[i] * prod;
00243     }
00244     return sum;
00245   }
00246 
00255   void SingleDimIndexInMultiArray_(ArrayList<int> &n, int index, 
00256                                    ArrayList<int> &coords) {
00257 
00258     for(index_t i = 0; i < coords.size(); i++) {
00259       int this_coord = index % n[i];
00260       index /= n[i];
00261       coords[i] = this_coord;
00262     }
00263   }
00264 
00276   int IsOngrid_(ArrayList<int> &old_coords, int delta, 
00277                 ArrayList<int> &new_coords, ArrayList<int> &nsides) {
00278     
00279     int dim = old_coords.size();
00280     for(index_t d = 0; d < dim; d++) {
00281       int new_val = old_coords[d] + delta;
00282       
00283       if(new_val < 0 || new_val >= nsides[d]) {
00284         return 0;
00285       }
00286       
00287       new_coords[d] = new_val;
00288     }
00289     return 1;
00290   }
00291 
00301   void MakeNeighbors_(int ibox, ArrayList<int> &nsides, int kdis, 
00302                       ArrayList<int> &ret) {
00303 
00304     // Compute actual vector position of a given box.
00305     ArrayList<int> coords;
00306     coords.Init(nsides.size());
00307     SingleDimIndexInMultiArray_(nsides, ibox, coords);
00308 
00309     ArrayList<int> dummy_n;
00310     dummy_n.Init(coords.size());
00311     ArrayList<int> all_ones;
00312     all_ones.Init(coords.size()); 
00313     for(index_t i = 0; i < coords.size(); i++) {
00314       dummy_n[i] = 2 * kdis + 1;
00315       all_ones[i] = kdis;
00316     }
00317 
00318     int num_neighbors = (2 * kdis + 1);
00319     int i;
00320 
00321     // number of neighbors in $D$ dimension is num_neighbors^D
00322     num_neighbors = (int) pow(num_neighbors, coords.size());
00323 
00324     // We need to generate every combination of [-1, 0, 1] (except [0, 0, 0]).
00325     // This is slightly hacky but also pretty crafty. We can easily enumerate 
00326     // these combinations by viewing the space as a grid in d dimensions with 
00327     // 3 cells in each. Therefore, we get coordinates in [0, 1, 2]. We achieve 
00328     // our goal by simply subtracting 1. We filter out values that aren't on 
00329     // the grid.
00330     ArrayList<int> delta, new_coords;
00331     delta.Init(coords.size());
00332     new_coords.Init(coords.size());
00333 
00334     for(i = 0; i < num_neighbors; i++) {
00335       SingleDimIndexInMultiArray_(dummy_n, i, delta);
00336 
00337       for(index_t j = 0; j < coords.size(); j++) {
00338         new_coords[j] = coords[j] + delta[j];
00339       }
00340       int ongrid = IsOngrid_(new_coords, -kdis, new_coords, nsides);
00341       
00342       if(ongrid) {
00343         *(ret.PushBackRaw()) = 
00344           MultiDimIndexInSingleArray_(new_coords, nsides);
00345       }
00346       
00347     }
00348   }
00349 
00366   void Assign_(int nallbx, ArrayList<int> &nsides, Vector &sidelengths, 
00367                Vector &mincoords, Matrix &center, 
00368                ArrayList<ArrayList<int> > &queries_assigned, 
00369                ArrayList<ArrayList<int> > &references_assigned) {
00370 
00371     int r;
00372     int num_query_rows = qset_.n_cols();
00373     int num_ref_rows = rset_.n_cols();
00374     int dim = qset_.n_rows();
00375     int di;
00376   
00377     // Assign the reference points.
00378     for(r = 0; r < num_ref_rows; r++) {
00379 
00380       int boxnum = 0;
00381 
00382       for(di = dim - 1; di >= 0; di--) {
00383         int nside = nsides[di];
00384         double h = sidelengths[di];
00385         int binnum = (int) floor((rset_.get(di, r) - mincoords[di]) / h);
00386       
00387         binnum = max(0, min(binnum, nside - 1));
00388         boxnum = boxnum * nside + binnum;
00389       }
00390       *(references_assigned[boxnum].PushBackRaw()) = r;
00391     }
00392 
00393     // Assign the query points
00394     for(r = 0; r < num_query_rows; r++) {
00395     
00396       int boxnum = 0;
00397 
00398       for(di = dim - 1; di >= 0; di--) {
00399         int nside = nsides[di];
00400         double h = sidelengths[di];
00401         int binnum = (int) floor((qset_.get(di, r) - mincoords[di]) / h);
00402       
00403         binnum = max(0, min(binnum, nside - 1));
00404         boxnum = boxnum * nside + binnum;
00405       }
00406       *(queries_assigned[boxnum].PushBackRaw()) = r;
00407     }
00408   
00409     // Create centers for all boxes.
00410     for(r = 0; r < nallbx; r++) {
00411       int sf = nallbx;
00412       int ind = r, rem;
00413       Vector box_center;    
00414       center.MakeColumnVector(r, &box_center);
00415 
00416       for(di = dim - 1; di >= 0; di--) {
00417         int nside = nsides[di];
00418         double h = sidelengths[di];
00419         sf /= nside;
00420         rem = ind % sf;
00421         ind = ind / sf;
00422 
00423         box_center[di] = mincoords[di] + (ind + 0.5) * h;
00424 
00425         ind = rem;
00426       }
00427     }
00428   }
00429 
00446   void TranslateMultipoleToLocal_(int ref_box_num, int query_box_num,
00447                                   Matrix &mcoeffsb, Matrix &lcoeffsb,
00448                                   int p_alpha, int totalnumcoeffs,
00449                                   double bwsqd_2, const double *hrcentroid,
00450                                   const double *dest_hrcentroid) {
00451 
00452     double bandwidth = sqrt(bwsqd_2);
00453     int j, k, l, d, step;
00454 
00455     int dim = qset_.n_rows();
00456 
00457     Vector lcoeffs;
00458     lcoeffsb.MakeColumnVector(query_box_num, &lcoeffs);
00459     Vector mcoeffs;
00460     mcoeffsb.MakeColumnVector(ref_box_num, &mcoeffs);
00461   
00462     {
00463       Vector dest_minus_parent;
00464       dest_minus_parent.Init(dim);
00465       
00466       la::SubOverwrite(dim, dest_hrcentroid, hrcentroid, 
00467                        dest_minus_parent.ptr());
00468 
00469       int limit = 2 * p_alpha - 2;
00470       Matrix hermite_map;
00471       hermite_map.Init(dim, limit + 1);
00472       Matrix arrtmp;
00473       arrtmp.Init(dim, totalnumcoeffs);
00474 
00475       Vector C_k_neg;
00476       C_k_neg.Alias(msea_.get_neg_inv_multiindex_factorials());
00477     
00478       for(j = 0; j < dim; j++) {
00479         double coord_div_band = dest_minus_parent[j] / bandwidth;
00480         double d2 = 2 * coord_div_band;
00481         double facj = exp(-coord_div_band * coord_div_band);
00482       
00483         hermite_map.set(j, 0, facj);
00484       
00485         if(p_alpha > 1) {
00486           hermite_map.set(j, 1, d2 * facj);
00487         
00488           for(k = 1; k < limit; k++) {
00489             int k2 = k * 2;
00490             hermite_map.set(j, k + 1,
00491                             d2 * hermite_map.get(j, k) - k2 * 
00492                             hermite_map.get(j, k - 1));
00493           }
00494         }
00495         
00496         for(l = 0; l < totalnumcoeffs; l++) {
00497           arrtmp.set(j, l, 0.0);
00498         }
00499       }
00500     
00501       step = totalnumcoeffs / p_alpha;
00502       d = 0;
00503     
00504       for(j = 0; j < totalnumcoeffs; j++) {
00505         const ArrayList<int> &mapping = msea_.get_multiindex(j);
00506         
00507         for(k = 0, l = j % step; k < p_alpha; k++, l += step) {
00508           arrtmp.set(d, j, arrtmp.get(d, j) + mcoeffs[l] * 
00509                      hermite_map.get(d, mapping[d] + k));
00510         }
00511       }
00512     
00513     
00514       if(p_alpha > 1) {
00515         int boundary, boundary2;
00516 
00517         for(boundary = totalnumcoeffs / p_alpha, step = step / p_alpha, d = 1; 
00518             step >= 1; step /= p_alpha, d++, boundary /= p_alpha) {
00519         
00520           boundary2 = 0;
00521         
00522           for(j = 0; j < totalnumcoeffs; j++) {
00523             const ArrayList<int> &mapping = msea_.get_multiindex(j);
00524 
00525             if(j % boundary == 0) {
00526               boundary2 += boundary;
00527             }
00528           
00529             for(k = 0; k < p_alpha; k++) {
00530               
00531               int jump = (j + step * k) % boundary2;
00532             
00533               if(jump < boundary2 - boundary) {
00534                 jump += boundary2 - boundary;
00535               }
00536             
00537               const ArrayList<int> &mapping2 = msea_.get_multiindex(jump);
00538 
00539               arrtmp.set(d, j,
00540                          arrtmp.get(d, j) +
00541                          arrtmp.get(d - 1, jump) * 
00542                          hermite_map.get(d, mapping2[d] + mapping[d]));
00543             }
00544           }
00545         }
00546       }
00547     
00548       d = dim - 1;
00549       
00550       for(j = 0; j < totalnumcoeffs; j++) {
00551         lcoeffs[j] = lcoeffs[j] + C_k_neg[j] * arrtmp.get(d, j);
00552       }
00553     }
00554   }
00555 
00569   void ComputeMultipoleCoeffs_(Matrix &mcoeffs, int dim,
00570                                int p_alpha, int totalnumcoeffs, 
00571                                int ref_box_num, double bwsqd_two, 
00572                                ArrayList<int> &rows, const double *x_R) {
00573     
00574     Vector A_k;
00575     mcoeffs.MakeColumnVector(ref_box_num, &A_k);
00576     double bw_times_sqrt_two = sqrt(bwsqd_two);
00577 
00578     // If the thing has been computed already, return. Otherwise, compute it
00579     // and store it as a cached sufficient statistics.
00580     if(A_k[0] != 0) {
00581       return;
00582     }
00583 
00584     Vector C_k;
00585     C_k.Alias(msea_.get_inv_multiindex_factorials());
00586   
00587     Vector tmp;
00588     tmp.Init(totalnumcoeffs);
00589     Vector x_r;
00590     x_r.Init(dim);
00591     int num_rows = rows.size();
00592     int step, boundary;
00593     int r, i, j;
00594   
00595     A_k[0] = num_rows;
00596     for(i = 1; i < totalnumcoeffs; i++) {
00597       A_k[i] = 0.0;
00598     }
00599   
00600     if(p_alpha > 1) {
00601     
00602       for(r = 0; r < num_rows; r++) {
00603       
00604         int row_num = rows[r];
00605       
00606         for(i = 0; i < dim; i++) {
00607           x_r[i] = (rset_.get(i, row_num) - x_R[i]) / bw_times_sqrt_two;
00608         }
00609       
00610         tmp[0] = 1.0;
00611       
00612         for(boundary = totalnumcoeffs, step = totalnumcoeffs / p_alpha,
00613               j = 0;
00614             step >= 1; step /= p_alpha, boundary /= p_alpha, j++) {
00615           for(i = 0; i < totalnumcoeffs; ) {
00616             int limit = i + boundary;
00617           
00618             i += step;
00619           
00620             for( ; i < limit; i += step) {
00621               tmp[i] = tmp[i - step] * x_r[j];
00622             }
00623           }
00624         }
00625       
00626       
00627         for(i = 1; i < totalnumcoeffs; i++) {
00628           A_k[i] = A_k[i] + tmp[i];
00629         }
00630       }
00631     }
00632   
00633     for(r = 1; r < totalnumcoeffs; r++) {
00634       A_k[r] = A_k[r] * C_k[r];
00635     }
00636 
00637     return;
00638   }
00639 
00653   void DirectLocalAccumulation_(ArrayList<int> &rows, int query_box_num,
00654                                 Matrix &locexps, double delta,
00655                                 const double *dest_hrcentroid, int p_alpha, 
00656                                 int totalnumcoeffs) {
00657 
00658     int num_rows = rows.size();
00659     int r, d, boundary, step, i, j, k;
00660 
00661     // Retrieve the centroid
00662     int dim = rset_.n_rows();
00663     int limit2 = p_alpha - 1;
00664     Matrix hermite_map;
00665     hermite_map.Init(dim, p_alpha);
00666     Vector arrtmp;
00667     arrtmp.Init(totalnumcoeffs);
00668     Vector x_r_minus_x_Q;
00669     x_r_minus_x_Q.Init(dim);
00670     double bandwidth = sqrt(delta);
00671     Vector neg_inv_multiindex_factorials;
00672     neg_inv_multiindex_factorials.Alias
00673       (msea_.get_neg_inv_multiindex_factorials());
00674 
00675     Vector arr;
00676     locexps.MakeColumnVector(query_box_num, &arr);
00677 
00678     // For each reference point.
00679     for(r = 0; r < num_rows; r++) {
00680 
00681       int row_num = rows[r];
00682     
00683       // Calculate (x_r - x_Q)
00684       for(d = 0; d < dim; d++) {
00685         x_r_minus_x_Q[d] = dest_hrcentroid[d] - rset_.get(d, row_num);
00686       }
00687     
00688       // Compute the necessary Hermite precomputed map based on the
00689       // coordinate diference.
00690       for(d = 0; d < dim; d++) {
00691 
00692         double coord_div_band = x_r_minus_x_Q[d] / bandwidth;
00693         double d2 = 2 * coord_div_band;
00694         double facj = exp(-coord_div_band * coord_div_band);
00695       
00696         hermite_map.set(d, 0, facj);
00697       
00698         if(p_alpha > 1) {
00699           hermite_map.set(d, 1, d2 * facj);
00700         
00701           for(k = 1; k < limit2; k++) {
00702             int k2 = k * 2;
00703             hermite_map.set(d, k + 1,
00704                             d2 * hermite_map.get(d, k) - k2 * 
00705                             hermite_map.get(d, k - 1));
00706           }
00707         }
00708       }
00709     
00710       // Seed to start out the coefficients.
00711       arrtmp[0] = 1.0;
00712     
00713       if(p_alpha > 1) {
00714       
00715         // Compute the Taylor coefficients directly...
00716         for(boundary = totalnumcoeffs, step = totalnumcoeffs / p_alpha,
00717               d = 0;
00718             step >= 1; step /= p_alpha, boundary /= p_alpha, d++) {
00719           for(i = 0; i < totalnumcoeffs; ) {
00720             int limit = i + boundary;
00721           
00722             // Skip the first one.
00723             int first = i;
00724             i += step;
00725           
00726             for(j = 1; i < limit; i += step, j++) {
00727               arrtmp[i] = arrtmp[first] * hermite_map.get(d, j);
00728             }
00729           
00730             arrtmp[first] *= hermite_map.get(d, 0);
00731           }
00732         }
00733       }
00734       else {
00735         for(d = 0; d < dim; d++) {
00736           arrtmp[0] *= hermite_map.get(d, 0);
00737         }
00738       }
00739     
00740       for(j = 0; j < totalnumcoeffs; j++) {
00741         arr[j] = arr[j] + neg_inv_multiindex_factorials[j] * arrtmp[j];
00742       }    
00743     }
00744   }
00745 
00760   void EvaluateMultipoleExpansion_(ArrayList<int> &rows, int p_alpha,
00761                                    int totalnumcoeffs, Matrix &mcoeffsb,
00762                                    int ref_box_num, double bwsqd_times_2, 
00763                                    const double *ref_hrcentroid) {
00764     
00765     double bandwidth = sqrt(bwsqd_times_2);
00766     int num_query_rows = rows.size();
00767     int r, d, boundary, step, i, j, k;
00768 
00769     // Retrieve the reference centroid.
00770     int dim = qset_.n_rows();
00771     Vector x_q_minus_x_R;
00772     x_q_minus_x_R.Init(dim);
00773     Matrix hermite_map;
00774     hermite_map.Init(dim, p_alpha);
00775     Vector arrtmp;
00776     arrtmp.Init(totalnumcoeffs);
00777     int limit2 = p_alpha - 1;
00778     Vector mcoeffs;
00779     mcoeffsb.MakeColumnVector(ref_box_num, &mcoeffs);
00780   
00781     // For each data point,
00782     for(r = 0; r < num_query_rows; r++) {
00783 
00784       double multipolesum = 0.0;
00785       int row_num = rows[r];
00786 
00787       // Calculate (x_q - x_R)
00788       for(d = 0; d < dim; d++) {
00789         x_q_minus_x_R[d] = qset_.get(d, row_num) - ref_hrcentroid[d];
00790       }
00791       
00792       // Compute the necessary Hermite precomputed map based on the coordinate
00793       // difference.
00794       for(d = 0; d < dim; d++) {
00795         double coord_div_band = x_q_minus_x_R[d] / bandwidth;
00796         double d2 = 2 * coord_div_band;
00797         double facj = exp(-coord_div_band * coord_div_band);
00798       
00799         hermite_map.set(d, 0, facj);
00800       
00801         if(p_alpha > 1) {
00802           hermite_map.set(d, 1, d2 * facj);
00803         
00804           for(k = 1; k < limit2; k++) {
00805             int k2 = k * 2;
00806             hermite_map.set(d, k + 1,
00807                             d2 * hermite_map.get(d, k) - k2 * 
00808                             hermite_map.get(d, k - 1));
00809           }
00810         }
00811       }
00812     
00813       // Seed to start out the coefficients.
00814       arrtmp[0] = 1.0;
00815     
00816       if(p_alpha > 1) {
00817       
00818         for(boundary = totalnumcoeffs, step = totalnumcoeffs / p_alpha,
00819               d = 0;
00820             step >= 1; step /= p_alpha, boundary /= p_alpha, d++) {
00821           for(i = 0; i < totalnumcoeffs; ) {
00822             int limit = i + boundary;
00823           
00824             // Skip the first one.
00825             int first = i;
00826             i += step;
00827             
00828             for(j = 1; i < limit; i += step, j++) {
00829               arrtmp[i] = arrtmp[first] * hermite_map.get(d, j);
00830             }
00831           
00832             arrtmp[first] *= hermite_map.get(d, 0);
00833           }
00834         }
00835       }
00836       else {
00837         for(d = 0; d < dim; d++) {
00838           arrtmp[0] *= hermite_map.get(d, 0);
00839         }
00840       }
00841     
00842       for(j = 0; j < totalnumcoeffs; j++) {
00843         multipolesum += mcoeffs[j] * arrtmp[j];
00844       }
00845 
00846       densities_[row_num] += multipolesum;
00847     }
00848   }
00849 
00863   double EvaluateLocalExpansion_(int row_q, Vector &x_Q, double h, 
00864                                  int query_box_num, Matrix &lcoeffsb, 
00865                                  int totalnumcoeffs, int p_alpha) {
00866     
00867     int dim = qset_.n_rows();
00868     Vector x_Q_to_x_q;
00869     x_Q_to_x_q.Init(dim);
00870     int i, j, boundary, step;
00871     double multipolesum = 0;
00872 
00876     for(i = 0; i < dim; i++) {
00877       double x_Q_val = x_Q[i];
00878       double x_q_val = qset_.get(i, row_q);
00879       x_Q_to_x_q[i] = (x_q_val - x_Q_val) / h;
00880     }
00881 
00882     {
00883       Vector lcoeffs;
00884       lcoeffsb.MakeColumnVector(query_box_num, &lcoeffs);
00885 
00886       Vector tmp;
00887       tmp.Init(totalnumcoeffs);
00888       tmp[0] = 1.0;
00889     
00890       if(totalnumcoeffs > 1) {
00891         
00892         for(boundary = totalnumcoeffs, step = totalnumcoeffs / p_alpha,
00893               j = 0;
00894             step >= 1; step /= p_alpha, boundary /= p_alpha, j++) {
00895           for(i = 0; i < totalnumcoeffs; ) {
00896             int limit = i + boundary;
00897           
00898             i += step;
00899           
00900             for( ; i < limit; i += step) {
00901               tmp[i] = tmp[i - step] * x_Q_to_x_q[j];
00902             }
00903           }
00904         }    
00905       }
00906     
00907       for(i = 0; i < totalnumcoeffs; i++) {
00908         multipolesum += lcoeffs[i] * tmp[i];
00909       }
00910     }
00911     return multipolesum;
00912   }
00913 
00936   void EvaluateLocalExpansionForAllQueries_
00937     (double delta, int nterms, int nallbx, ArrayList<int> &nsides, 
00938      Matrix &locexp, int nlmax, ArrayList<ArrayList<int> > &queries_assigned, 
00939      Matrix &center, int totalnumcoeffs) {
00940 
00941     int i, j;
00942     
00943     // GO through all query boxes.
00944     for(i = 0; i < nallbx; i++) {
00945       ArrayList<int> &query_rows = queries_assigned[i];
00946       int ninbox = query_rows.size();
00947       
00948       if(ninbox <= nlmax) {
00949         continue;
00950       }
00951       else {
00952         
00953         for(j = 0; j < ninbox; j++) {
00954           int row_q = query_rows[j];
00955           Vector x_Q;
00956           center.MakeColumnVector(i, &x_Q);
00957           
00958           double result = EvaluateLocalExpansion_(row_q, x_Q, sqrt(delta), 
00959                                                   i, locexp, totalnumcoeffs, 
00960                                                   nterms);
00961           densities_[row_q] += result;
00962         }
00963       }
00964     }
00965   }
00966 
00999   void FinalizeSum_(double delta, int nterms, int nallbx, 
01000                     ArrayList<int> &nsides, Vector &sidelengths, 
01001                     Vector &mincoords, Matrix &locexp, int nfmax, int nlmax, 
01002                     int kdis, Matrix &center, 
01003                     ArrayList<ArrayList<int> > &queries_assigned,
01004                     ArrayList<ArrayList<int> > &references_assigned, 
01005                     Matrix &mcoeffs) {
01006 
01007     int dim = qset_.n_rows();
01008 
01009     int totalnumcoeffs = (int) pow(nterms, dim);
01010 
01011     // Step 1: Assign query points and reference points to boxes.
01012     Assign_(nallbx, nsides, sidelengths, mincoords, center, 
01013             queries_assigned, references_assigned);
01014 
01015     // Initialize local expansions to zero.
01016     int i, j, k, l;
01017 
01018     // Process all reference boxes
01019     for(i = 0; i < nallbx; i++) {
01020 
01021       ArrayList<int> &reference_rows = references_assigned[i];
01022       int ninbox = reference_rows.size();
01023 
01024       // If the box contains no reference points, skip it.
01025       if(ninbox <= 0) {
01026         continue;
01027       }
01028 
01029       // In this case, no far-field expansion is created
01030       else if(ninbox <= nfmax) {
01031 
01032         // Get the query boxes that are in the interaction range.
01033         ArrayList <int> nbors;
01034         nbors.Init();
01035         MakeNeighbors_(i, nsides, kdis, nbors);
01036         int nnbors = nbors.size();
01037 
01038         for(j = 0; j < nnbors; j++) {
01039 
01040           // Number of query points in this neighboring box.
01041           int query_box_num = nbors[j];
01042           ArrayList<int> &query_rows = queries_assigned[query_box_num];
01043           int ninnbr = query_rows.size();
01044         
01045           if(ninnbr <= nlmax) {
01046 
01047             // Direct Interaction
01048             for(k = 0; k < ninnbr; k++) {
01049             
01050               int query_row = query_rows[k];
01051               const double *query = qset_.GetColumnPtr(query_row);
01052 
01053               for(l = 0; l < ninbox; l++) {
01054                 int reference_row = reference_rows[l];
01055                 const double *reference = rset_.GetColumnPtr(reference_row);
01056 
01057                 double dsqd = 
01058                   la::DistanceSqEuclidean(qset_.n_rows(), query, reference);
01059 
01060                 double pot = exp(-dsqd / delta);
01061 
01062                 // Here, I hard-coded to do only one bandwidth.
01063                 densities_[query_row] += pot;
01064               }
01065             }
01066           
01067           }
01068 
01069           // In this case, take each reference point and convert into the 
01070           // Taylor series.
01071           else {
01072           
01073             DirectLocalAccumulation_(reference_rows, query_box_num,
01074                                      locexp, delta,
01075                                      center.GetColumnPtr(query_box_num),
01076                                      nterms, totalnumcoeffs);
01077           }
01078         
01079         }
01080       }
01081 
01082       // In this case, create a far field expansion.
01083       else {
01084 
01085         ComputeMultipoleCoeffs_(mcoeffs ,dim, nterms,
01086                                 totalnumcoeffs, i, delta, reference_rows,
01087                                 center.GetColumnPtr(i));
01088 
01089         // Get the query boxes that are in the interaction range.
01090         ArrayList<int> nbors;
01091         nbors.Init();
01092         MakeNeighbors_(i, nsides, kdis, nbors);
01093         int nnbors = nbors.size();
01094 
01095         for(j = 0; j < nnbors; j++) {
01096           int query_box_num = nbors[j];
01097           ArrayList<int> &query_rows = queries_assigned[query_box_num];
01098           int ninnbr = query_rows.size();
01099 
01100           // If this is true, evaluate far field expansion at each query point.
01101           if(ninnbr <= nlmax) {
01102             EvaluateMultipoleExpansion_(query_rows, nterms,
01103                                         totalnumcoeffs, mcoeffs,
01104                                         i, delta, center.GetColumnPtr(i));
01105           }
01106 
01107           // In this case do far-field to local translation.
01108           else {
01109             
01110             TranslateMultipoleToLocal_(i, query_box_num,
01111                                        mcoeffs, locexp,
01112                                        nterms, totalnumcoeffs, delta,
01113                                        center.GetColumnPtr(i),
01114                                        center.GetColumnPtr(query_box_num));
01115           
01116           }
01117         }
01118       }
01119     }
01120 
01121     // Now evaluate the local expansions for all queries.
01122     EvaluateLocalExpansionForAllQueries_(delta, nterms, nallbx, nsides, locexp,
01123                                          nlmax, queries_assigned, center, 
01124                                          totalnumcoeffs);
01125   }
01126 
01151   void GaussTransform_(double delta, int nterms, int nallbx, 
01152                        ArrayList<int> &nsides, 
01153                        Vector &sidelengths, Vector &mincoords,
01154                        Matrix &locexp, Matrix &center,
01155                        ArrayList<ArrayList<int> > &queries_assigned, 
01156                        ArrayList<ArrayList<int> > &references_assigned,
01157                        Matrix &mcoeffs) {
01158 
01159     int dim = qset_.n_rows();
01160     int kdis = (int) (sqrt(log(tau_) * -2.0) + 1);
01161 
01162     // This is a slight modification of Strain's cutoff since he never
01163     // implemented this above 2 dimensions.
01164     int nfmax = (int) pow(nterms, dim - 1) + 2;
01165     int nlmax = nfmax;
01166 
01167     // Call gafexp to create all expansions on grid, evaluate all appropriate
01168     // far-field expansions and evaluate all appropriate direct interactions.
01169     FinalizeSum_(delta, nterms, nallbx, nsides, sidelengths, mincoords,
01170                  locexp, nfmax, nlmax, kdis, center, queries_assigned,
01171                  references_assigned, mcoeffs);
01172   }
01173 
01177   void NormalizeDensities_() {
01178     double norm_const = kernel_.CalcNormConstant(qset_.n_rows()) *
01179       rset_.n_cols();
01180 
01181     for(index_t q = 0; q < qset_.n_cols(); q++) {
01182       densities_[q] /= norm_const;
01183     }
01184   }
01185 
01186  public:
01187 
01189   
01191   FGTKde() {}
01192   
01194   ~FGTKde() {}
01195   
01197 
01203   void get_density_estimates(Vector *results) {
01204     results->Init(densities_.length());
01205 
01206     for(index_t i = 0; i < densities_.length(); i++) {
01207       (*results)[i] = densities_[i];
01208     }
01209   }
01210 
01212 
01220   void Init(Matrix &qset, Matrix &rset, struct datanode *module_in) {
01221 
01222     // initialize with the incoming module holding the paramters
01223     module_ = module_in;
01224 
01225     // initialize the kernel
01226     kernel_.Init(fx_param_double_req(module_, "bandwidth"));
01227 
01228     // set aliases to the query and reference datasets and initialize
01229     // query density sets
01230     qset_.Copy(qset);
01231     densities_.Init(qset_.n_cols());
01232     rset_.Copy(rset);
01233 
01234     // set accuracy
01235     tau_ = fx_param_double(module_, "absolute_error", 0.1);
01236   }
01237 
01240   void Compute() {
01241 
01242     double interaction_radius;
01243     ArrayList<int> nsides;
01244     Vector sidelengths;
01245     Vector mincoords;
01246     int nboxes, nterms;
01247     int dim = rset_.n_rows();
01248     
01249     nsides.Init(rset_.n_rows());
01250     sidelengths.Init(rset_.n_rows());
01251     mincoords.Init(rset_.n_rows());
01252 
01253     printf("Computing FGT KDE...\n");
01254     
01255     // initialize densities to zero
01256     densities_.SetZero();
01257 
01258     fx_timer_start(module_, "fgt_kde_init");
01259     FastGaussTransformPreprocess_(&interaction_radius, nsides, sidelengths,
01260                                   mincoords, &nboxes, &nterms);
01261     fx_timer_stop(module_, "fgt_kde_init");
01262 
01263     // precompute factorials
01264     msea_.Init(nterms - 1, qset_.n_rows());
01265     
01266     // stores the coordinate of each grid box
01267     Matrix center;
01268     center.Init(dim, nboxes);
01269 
01270     // stores the local expansion of each grid box
01271     Matrix locexp;
01272     locexp.Init((int) pow(nterms, dim), nboxes);
01273     locexp.SetZero();
01274 
01275     // stores the ids of query points assigned to each grid box
01276     ArrayList<ArrayList<int> > queries_assigned;
01277     queries_assigned.Init(nboxes);
01278     
01279     for(index_t i = 0; i < nboxes; i++) {
01280       queries_assigned[i].Init();
01281     }
01282 
01283     // stores the ids of reference points assigned to each grid box
01284     ArrayList<ArrayList<int> > references_assigned;
01285     references_assigned.Init(nboxes);
01286 
01287     for(index_t i = 0; i < nboxes; i++) {
01288       references_assigned[i].Init();
01289     }
01290 
01291     // stores the multipole moments of the reference points in each grid box
01292     Matrix mcoeffs;
01293     mcoeffs.Init((int)pow(nterms, dim), nboxes);
01294     mcoeffs.SetZero();
01295     
01296     double delta = 2 * kernel_.bandwidth_sq();
01297 
01298     fx_timer_start(module_, "fgt_kde");
01299     GaussTransform_(delta, nterms, nboxes, nsides, sidelengths, mincoords,
01300                     locexp, center, queries_assigned, references_assigned, 
01301                     mcoeffs);
01302 
01303     // normalize the sum
01304     NormalizeDensities_();
01305     fx_timer_stop(module_, "fgt_kde");
01306     printf("FGT KDE completed...\n");
01307   }
01308 
01316   void PrintDebug() {
01317 
01318     FILE *stream = stdout;
01319     const char *fname = NULL;
01320 
01321     if((fname = fx_param_str(module_, "fgt_kde_output", NULL)) != NULL) {
01322       stream = fopen(fname, "w+");
01323     }
01324     for(index_t q = 0; q < qset_.n_cols(); q++) {
01325       fprintf(stream, "%g\n", densities_[q]);
01326     }
01327     
01328     if(stream != stdout) {
01329       fclose(stream);
01330     }
01331   }
01332 
01333 };
01334 
01335 #endif