fft_kde.h

Go to the documentation of this file.
00001 /* MLPACK 0.2
00002  *
00003  * Copyright (c) 2008, 2009 Alexander Gray,
00004  *                          Garry Boyer,
00005  *                          Ryan Riegel,
00006  *                          Nikolaos Vasiloglou,
00007  *                          Dongryeol Lee,
00008  *                          Chip Mappus, 
00009  *                          Nishant Mehta,
00010  *                          Hua Ouyang,
00011  *                          Parikshit Ram,
00012  *                          Long Tran,
00013  *                          Wee Chin Wong
00014  *
00015  * Copyright (c) 2008, 2009 Georgia Institute of Technology
00016  *
00017  * This program is free software; you can redistribute it and/or
00018  * modify it under the terms of the GNU General Public License as
00019  * published by the Free Software Foundation; either version 2 of the
00020  * License, or (at your option) any later version.
00021  *
00022  * This program is distributed in the hope that it will be useful, but
00023  * WITHOUT ANY WARRANTY; without even the implied warranty of
00024  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00025  * General Public License for more details.
00026  *
00027  * You should have received a copy of the GNU General Public License
00028  * along with this program; if not, write to the Free Software
00029  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00030  * 02110-1301, USA.
00031  */
00053 #ifndef FFT_KDE_H
00054 #define FFT_KDE_H
00055 
00056 #include <fastlib/fastlib.h>
00057 
00077 class FFTKde {
00078 
00079   FORBID_ACCIDENTAL_COPIES(FFTKde);
00080 
00081  private:
00082 
00084 
00086   struct complex {
00087 
00089     double real;
00090 
00092     double imag;
00093   };
00094 
00096 
00098   struct datanode *module_;
00099 
00101   static const double TAU = 4.0;
00102 
00104   Matrix qset_;
00105 
00107   Matrix rset_;
00108 
00110   GaussianKernel kernel_;
00111 
00113   Vector densities_;
00114 
00116   int m_;
00117 
00119   ArrayList<int> size_;
00120   
00122   Vector mincoords_;
00123 
00125   ArrayList<int> minindices_;
00126 
00128   Vector maxcoords_;
00129   
00131   Vector diffcoords_;
00132 
00134   Vector gridsizes_;
00135 
00137   ArrayList<int>  kernelweights_dims_;
00138 
00140   int numgridpts_;
00141   
00143   double gridbinvolume_;
00144 
00146   Vector discretized_;
00147 
00148   int nyquistnum_;
00149 
00150   Vector d_fnyquist_;
00151   
00152   Vector k_fnyquist_;
00153   
00154   Vector kernelweights_;
00155   
00162   void fftc1(double *f, int N, int skip, int forward) {
00163 
00164     int b, index1, index2, trans_size, trans;
00165     double pi2 = 4. * asin(1.);
00166     
00167     // used in recursive formula for Re(W^b) and Im(W^b)
00168     double pi2n, cospi2n, sinpi2n;
00169     
00170     // wk = W^k = e^(2 pi i b/N) in the Danielson-Lanczos formula for a
00171     // transform of length N
00172     struct complex wb;
00173     
00174     // buffers for implementing recursive formulas
00175     struct complex temp1, temp2;
00176 
00177     // treat f as an array of N complex numbers
00178     struct complex *c = (struct complex *)f;
00179 
00180     // Place the elements of the array c in bit-reversed order
00181     for(index1 = 1, index2 = 0; index1 < N; index1++) {
00182       
00183       // to find the next bit reversed array index subtract leading 1's from
00184       // index2
00185       for(b = N / 2; index2 >= b; b /= 2) {
00186         index2 -= b;
00187       }
00188       
00189       // Next replace the first 0 in index2 with a 1 and this gives the 
00190       // correct next value
00191       index2 += b;
00192       
00193       // swap each pair only the first time it is found
00194       if(index2 > index1) {
00195         temp1 = c[index2 * skip];
00196         c[index2 * skip] = c[index1 * skip];
00197         c[index1 * skip] = temp1;
00198       }
00199     }
00200 
00201     // Next perform successive transforms of length 2,4,...,N using the
00202     // Danielson-Lanczos formula
00203 
00204     // trans_size = size of transform being computed
00205     for(trans_size = 2; trans_size <= N; trans_size *= 2) {
00206       
00207       // +- 2 pi/trans_size
00208       pi2n = forward * pi2 / (double)trans_size;
00209       
00210       // Used to calculate W^k in D-L formula
00211       cospi2n = cos(pi2n);
00212       sinpi2n = sin(pi2n);
00213       
00214       // Initialize W^b for b=0
00215       wb.real = 1.;
00216       wb.imag = 0.;
00217       
00218       // Step over half of the elements in the transform
00219       for(b = 0; b < trans_size / 2; b++) {
00220         
00221         // Iterate over all transforms of size trans_size to be computed
00222         for(trans = 0; trans < N / trans_size; trans++) {
00223 
00224           // Index of element in first half of transform being computed
00225           index1 = (trans * trans_size + b) * skip;
00226 
00227           // Index of element in second half of transform being computed
00228           index2 = index1 + trans_size / 2 * skip;
00229           temp1 = c[index1];
00230           temp2 = c[index2];
00231 
00232           // implement D-L formula
00233           c[index1].real = temp1.real + wb.real * temp2.real - 
00234             wb.imag * temp2.imag;
00235           c[index1].imag = temp1.imag + wb.real * temp2.imag + 
00236             wb.imag * temp2.real;
00237           c[index2].real = temp1.real - wb.real * temp2.real + 
00238             wb.imag * temp2.imag;
00239           c[index2].imag = temp1.imag - wb.real * temp2.imag - 
00240             wb.imag * temp2.real;
00241         }
00242         temp1 = wb;
00243 
00244         // Real part of e^(2 pi i b/trans_size) used in D-L formula
00245         wb.real = cospi2n * temp1.real - sinpi2n * temp1.imag;
00246         
00247         // Imaginary part of e^(2 pi i b/trans_size) used in D-L formula
00248         wb.imag = cospi2n*temp1.imag + sinpi2n*temp1.real;
00249       }
00250     }
00251     
00252     // For an inverse transform divide by the number of grid points
00253     if(forward<0) {
00254       for(index1 = 0; index1 < skip * N; index1 += skip) {
00255         c[index1].real /= N;
00256         c[index1].imag /= N;
00257       }
00258     }
00259   }
00260 
00269   void fftcn(double *f, int ndims, int *size, int forward) {
00270 
00271     // These determine where to begin successive transforms and the skip 
00272     // between their elements (see below)
00273     int planesize = 1, skip = 1;
00274 
00275     // Total size of the ndims dimensional array
00276     int totalsize = 1;
00277     
00278     // determine total size of array
00279     for(index_t dim = 0; dim < ndims; dim++) {
00280       totalsize *= size[dim];
00281     }
00282 
00283     // loop over dimensions
00284     for(index_t dim = ndims - 1; dim >= 0; dim--) {
00285       
00286       // planesize = Product of all sizes up to and including size[dim] 
00287       planesize *= size[dim];
00288 
00289       // Take big steps to begin loops of transforms 
00290       for(index_t i = 0; i < totalsize; i += planesize) {
00291         
00292         // Skip sets the number of transforms in between big steps as well as 
00293         // the skip between elements
00294         for(index_t j = 0; j < skip; j++) {
00295           
00296           // 1-D Fourier transform. (Factor of two converts complex index to 
00297           // double index.)
00298           fftc1(f + 2 * (i + j), size[dim], skip, forward);
00299         }
00300       }
00301       // Skip = Product of all sizes up to (but not including) size[dim]
00302       skip *= size[dim];
00303     }
00304   }
00305 
00312   void fftr1(double *f, int N, int forward) {
00313 
00314     int b;
00315     
00316     // pi2n = 2 Pi/N
00317     double pi2n = 4. * asin(1.) / N, cospi2n = cos(pi2n), sinpi2n = sin(pi2n);
00318     
00319     // wb = W^b = e^(2 pi i b/N) in the Danielson-Lanczos formula for a 
00320     // transform of length N
00321     struct complex wb;
00322     
00323     // Buffers for implementing recursive formulas
00324     struct complex temp1, temp2;
00325     
00326     // Treat f as an array of N/2 complex numbers
00327     struct complex *c = (struct complex *)f;
00328     
00329     // Do a transform of f as if it were N/2 complex points
00330     if(forward == 1) {
00331       fftc1(f, N / 2, 1, 1);
00332     }
00333 
00334     // initialize W^b for b = 0
00335     wb.real = 1.;
00336     wb.imag = 0.;
00337 
00338     // Loop over elements of transform. See documentation for these formulas 
00339     for(b = 1; b < N / 4; b++) {
00340 
00341       temp1 = wb;
00342 
00343       // Real part of e^(2 pi i b/N) used in D-L formula 
00344       wb.real = cospi2n * temp1.real - sinpi2n * temp1.imag;
00345       
00346       // Imaginary part of e^(2 pi i b/N) used in D-L formula
00347       wb.imag = cospi2n * temp1.imag + sinpi2n * temp1.real;
00348       temp1 = c[b];
00349       temp2 = c[N / 2 - b];
00350       c[b].real = .5 * (temp1.real + temp2.real + forward * wb.real * 
00351                         (temp1.imag + temp2.imag) + wb.imag * 
00352                         (temp1.real - temp2.real));
00353       c[b].imag = .5 * (temp1.imag-temp2.imag - forward * wb.real * 
00354                         (temp1.real - temp2.real) + wb.imag * 
00355                         (temp1.imag + temp2.imag));
00356       c[N/2-b].real = .5 * (temp1.real + temp2.real - forward * wb.real * 
00357                             (temp1.imag + temp2.imag) - wb.imag * 
00358                             (temp1.real - temp2.real));
00359       c[N/2-b].imag = .5 * (-temp1.imag + temp2.imag - forward * wb.real * 
00360                             (temp1.real - temp2.real) + wb.imag * 
00361                             (temp1.imag + temp2.imag));
00362     }
00363 
00364     temp1 = c[0];
00365 
00366     // set b = 0 term in transform
00367     c[0].real = temp1.real+temp1.imag;
00368     
00369     // put b = N / 2 term in imaginary part of first term
00370     c[0].imag = temp1.real-temp1.imag;
00371     
00372     if(forward == -1) {
00373       c[0].real *= .5;
00374       c[0].imag *= .5;
00375       fftc1(f, N / 2, 1, -1);
00376     }
00377   }
00378 
00387   void fftrn(double *f, double *fnyquist, int ndims, int *size, int forward) {
00388 
00389     int i, j, b;
00390 
00391     // Positions in the 1-d arrays of points labeled by indices 
00392     // (i0,i1,...,i(ndims-1)); indexneg gives the position in the array of 
00393     // the corresponding negative frequency
00394     int index,indexneg = 0;
00395     int stepsize; // Used in calculating indexneg
00396 
00397     // The size of the last dimension is used often enough to merit its own 
00398     // name.
00399     int N = size[ndims - 1];
00400     
00401     // pi2n = 2Pi / N
00402     double pi2n = 4. * asin(1.) / N, cospi2n = cos(pi2n), sinpi2n = sin(pi2n);
00403 
00404     // wb = W^b = e^(2 pi i b/N) in the Danielson-Lanczos formula for a 
00405     // transform of length N
00406     struct complex wb; 
00407 
00408     // Buffers for implementing recursive formulas
00409     struct complex temp1, temp2;
00410 
00411     // Treat f and fnyquist as arrays of complex numbers
00412     struct complex *c = (struct complex *)f, 
00413       *cnyquist = (struct complex *)fnyquist;
00414 
00415     // Total number of complex points in array
00416     int totalsize = 1;
00417 
00418     // Indices for looping through array
00419     ArrayList<int> indices;
00420     indices.Init(ndims);
00421     
00422     // Set size[] to be the sizes of f viewed as a complex array
00423     size[ndims - 1] /= 2;
00424 
00425     for(i = 0; i < ndims; i++) {
00426       totalsize *= size[i];
00427       indices[i] = 0;
00428     }
00429     
00430     // forward transform
00431     if(forward == 1) {
00432 
00433       // Do a transform of f as if it were N/2 complex points 
00434       fftcn(f, ndims, size, 1);
00435 
00436       // Copy b=0 data into cnyquist so the recursion formulas below for b=0 
00437       // and cnyquist don't overwrite data they later need
00438       for(i = 0; i < totalsize / size[ndims - 1]; i++) {
00439         
00440         // Only copy points where last array index for c is 0
00441         cnyquist[i] = c[i * size[ndims - 1]];
00442       }
00443     }
00444     
00445     // Loop over all but last array index
00446     for(index = 0; index < totalsize; index += size[ndims-1]) {
00447 
00448       wb.real = 1.; /* Initialize W^b for b=0 */
00449       wb.imag = 0.;
00450 
00451       // Loop over elements of transform. See documentation for these formulas
00452       for(b = 1; b < N / 4; b++) {
00453 
00454         temp1 = wb;
00455 
00456         // Real part of e^(2 pi i b/N_real) used in D-L formula
00457         wb.real = cospi2n*temp1.real - sinpi2n*temp1.imag;
00458 
00459         // Imaginary part of e^(2 pi i b/N_real) used in D-L formula
00460         wb.imag = cospi2n*temp1.imag + sinpi2n*temp1.real;
00461 
00462         temp1 = c[index + b];
00463 
00464         // Note that N-b is NOT the negative frequency for b. Only 
00465         // nonnegative b momenta are stored.
00466         temp2 = c[indexneg + N / 2 - b];
00467 
00468         c[index + b].real = .5 * (temp1.real + temp2.real + forward * wb.real * 
00469                                   (temp1.imag + temp2.imag) + wb.imag * 
00470                                   (temp1.real - temp2.real));
00471         c[index + b].imag = .5 * (temp1.imag - temp2.imag - forward * wb.real * 
00472                                   (temp1.real - temp2.real) + wb.imag * 
00473                                   (temp1.imag + temp2.imag));
00474         c[indexneg + N / 2 - b].real = .5 * (temp1.real + temp2.real - 
00475                                              forward * 
00476                                              wb.real * 
00477                                              (temp1.imag + temp2.imag) - 
00478                                              wb.imag * 
00479                                              (temp1.real - temp2.real));
00480         c[indexneg + N / 2 - b].imag = .5 * (-temp1.imag + temp2.imag - 
00481                                              forward * wb.real * 
00482                                              (temp1.real - temp2.real) + 
00483                                              wb.imag * 
00484                                              (temp1.imag + temp2.imag));
00485       }
00486       temp1 = c[index];
00487 
00488       // Index is smaller for cnyquist because it doesn't have the last 
00489       // dimension
00490       temp2 = cnyquist[indexneg / size[ndims - 1]];
00491 
00492       // Set b=0 term in transform
00493       c[index].real = .5 * (temp1.real + temp2.real + forward * 
00494                             (temp1.imag + temp2.imag));
00495       c[index].imag = .5 * (temp1.imag - temp2.imag - forward * 
00496                             (temp1.real - temp2.real));
00497 
00498       // Set b=N/2 transform.
00499       cnyquist[indexneg / size[ndims - 1]].real = 
00500         .5 * (temp1.real + temp2.real - forward * (temp1.imag + temp2.imag));
00501       cnyquist[indexneg / size[ndims - 1]].imag = 
00502         .5 * (-temp1.imag + temp2.imag - forward * (temp1.real - temp2.real));
00503       
00504       // Find indices for positive and single index for negative frequency. 
00505       // In each dimension indexneg[j]=0 if index[j]=0, 
00506       // indexneg[j]=size[j]-index[j] otherwise.
00507 
00508       // amount to increment indexneg by as each individual index is 
00509       // incremented
00510       stepsize = size[ndims - 1];
00511 
00512       // If the rightmost indices are maximal reset them to 0. Indexneg goes 
00513       // from 1 to 0 in these dimensions
00514       for(j = ndims - 2; j >= 0 && indices[j] == size[j] - 1; j--) {
00515         indices[j] = 0;
00516         indexneg -= stepsize;
00517         stepsize *= size[j];
00518       }
00519       
00520       // If index[j] goes from 0 to 1 indexneg[j] goes from 0 to size[j]-1
00521       if(j >= 0 && indices[j] == 0) {
00522         indexneg += stepsize * (size[j] - 1);
00523       }
00524       // Otherwise increasing index[j] decreases indexneg by one unit.
00525       else {
00526         indexneg -= stepsize;
00527       }
00528 
00529       // This avoids writing outside the array bounds on the last pass 
00530       // through the array loop
00531       if(j >= 0) {
00532         indices[j]++;
00533       }
00534     } // End of i loop (over total array)
00535     
00536     // inverse transform
00537     if(forward == -1) {
00538       fftcn(f, ndims, size, -1);
00539     }
00540     
00541     // Give the user back the array size[] in its original condition
00542     size[ndims - 1] *= 2;
00543 
00544   }
00545   
00546   void assign_weights(int reference_pt_num, int level, double volume, int pos,
00547                       int skip) {
00548     if(level == -1) {
00549       discretized_[pos] += volume;
00550     }
00551     else {
00552       
00553       // Recurse in the right direction
00554       double coord = rset_.get(level, reference_pt_num);
00555       double leftgridcoord = mincoords_[level] + minindices_[level] *
00556         gridsizes_[level];
00557       double rightgridcoord = leftgridcoord + gridsizes_[level];
00558       double leftvolume = volume * (rightgridcoord - coord);
00559       double rightvolume = volume * (coord - leftgridcoord);
00560       int nextskip = size_[level] * skip;
00561       int nextleftpos = pos + skip * minindices_[level];
00562       
00563       if(leftvolume > 0.0) {
00564         assign_weights(reference_pt_num, level - 1, leftvolume, nextleftpos, 
00565                        nextskip);
00566       }
00567 
00568       if(rightvolume > 0.0) {
00569         assign_weights(reference_pt_num, level - 1, rightvolume, 
00570                        nextleftpos + skip, nextskip);
00571       }
00572     }
00573   }
00574 
00575   void retrieve_weights(int query_pt_num, double volume, int level, int pos, 
00576                         int skip, double divfactor) {
00577 
00578     if(level == -1) {
00579       densities_[query_pt_num] += discretized_[pos] * volume / divfactor;
00580     }
00581     else {
00582       
00583       // Recurse in the right direction
00584       double coord = qset_.get(level, query_pt_num);
00585       double leftgridcoord = mincoords_[level] + minindices_[level] *
00586         gridsizes_[level];
00587       double rightgridcoord = leftgridcoord + gridsizes_[level];
00588       double leftvolume = volume * (rightgridcoord - coord);
00589       double rightvolume = volume * (coord - leftgridcoord);
00590       int nextskip = size_[level] * skip;
00591       int nextleftpos = pos + skip * minindices_[level];
00592       
00593       if(leftvolume > 0.0) {
00594         retrieve_weights(query_pt_num, leftvolume, level - 1, nextleftpos, 
00595                          nextskip, divfactor);
00596       }
00597       if(rightvolume > 0.0) {
00598         retrieve_weights(query_pt_num, rightvolume, level - 1, 
00599                          nextleftpos + skip, nextskip, divfactor);
00600       }
00601     }
00602   }
00603 
00607   void RetrieveDensities() {
00608 
00609     double normc = 
00610       (kernel_.CalcNormConstant(rset_.n_rows()) * rset_.n_cols());
00611 
00612     for(index_t r = 0; r < qset_.n_cols(); r++) {
00613       densities_[r] = 0.0;
00614       
00615       for(index_t d = 0; d < qset_.n_rows(); d++) {
00616         minindices_[d] = (int) floor((qset_.get(d, r) - mincoords_[d])/
00617                                      gridsizes_[d]);
00618       }
00619       retrieve_weights(r, 1.0, qset_.n_rows() - 1, 0, 1, 
00620                        gridbinvolume_ * normc);
00621     }
00622   }
00623 
00624   void discretize_dataset() {
00625 
00626     // Temporary used to count the number of elements in the enlarged 
00627     // matrices for the kernel weights and bin counts. Also calculate the 
00628     // volume of each grid bin.
00629     numgridpts_ = 1;
00630     gridbinvolume_ = 1.0;
00631         
00632     double min, max;
00633     
00634     // Find the min/max in each coordinate direction, and calculate the grid
00635     // size in each dimension.
00636     for(index_t d = 0; d < qset_.n_rows(); d++) {
00637       int possiblesample;
00638       min = DBL_MAX;
00639       max = -DBL_MAX;
00640       
00641       for(index_t r = 0; r < rset_.n_cols(); r++) {
00642         double coord = rset_.get(d, r);
00643         if(coord > max)
00644           max = coord;
00645         if(coord < min)
00646           min = coord;
00647       }
00648       
00649       // Following Silverman's advice here
00650       mincoords_[d] = min;
00651       maxcoords_[d] = max;
00652       diffcoords_[d] = maxcoords_[d] - mincoords_[d];
00653       gridsizes_[d] = diffcoords_[d] / ((double) m_ - 1);
00654       gridbinvolume_ *= gridsizes_[d];
00655 
00656       // Determine how many kernel weight calculation to do for this 
00657       // dimension.
00658       kernelweights_dims_[d] = m_ - 1;
00659       possiblesample = (int) floor(TAU * sqrt(kernel_.bandwidth_sq()) / 
00660                                    gridsizes_[d]);
00661       
00662       if(kernelweights_dims_[d] > possiblesample) {
00663         if(possiblesample == 0) {
00664           possiblesample = 1;
00665         }
00666         kernelweights_dims_[d] = possiblesample;
00667       }
00668 
00669       // Wand p440: Need to calculate the actual dimension of the matrix
00670       // after the necessary 0 padding of the kernel weight matrix and the
00671       // bin count matrix.
00672       size_[d] = (int) ceil(log(m_ + kernelweights_dims_[d]) / log(2));
00673       size_[d] = 1 << size_[d];
00674 
00675       numgridpts_ *= size_[d];
00676     }
00677 
00678     // Allocate the memory for discretized grid count matrix and initialize 
00679     // it.
00680     discretized_.Init(numgridpts_);
00681     discretized_.SetZero();
00682 
00683     double inv_gvolume = 1.0 / gridbinvolume_;
00684     
00685     // Now loop over each data and calculate the weights at each grid point.
00686     for(index_t r = 0; r < rset_.n_cols(); r++) {
00687 
00688       // First locate the bin the data point falls into and identify it by
00689       // the lower grid coordinates.
00690       for(index_t d = 0; d < rset_.n_rows(); d++) {
00691         minindices_[d] = (int) floor((rset_.get(d, r) - mincoords_[d])/
00692                                      gridsizes_[d]);
00693       }
00694 
00695       // Assign the weights around the neighboring grid points due to this
00696       // data point. This results in 2^num_dims number of recursion per data
00697       // point.
00698       assign_weights(r, qset_.n_rows() - 1, inv_gvolume, 0, 1);
00699     }
00700 
00701   }
00702   
00703   void Gaussify(double acc, double precalc, int level, int pos, int skip) {
00704 
00705     if(level == -1) {
00706       kernelweights_[pos] = exp(precalc * acc);
00707     }
00708     else {
00709       int half = kernelweights_dims_[level];
00710       int g;
00711       for(g = 0; g <= half; g++) {
00712         double addThis = g * gridsizes_[level];
00713         double newacc = acc + addThis * addThis;
00714         int newskip = skip * size_[level];
00715         
00716         Gaussify(newacc, precalc, level - 1, pos + skip * g, newskip);
00717         
00718         // If this is not the 0th frequency, then do the mirror image thingie.
00719         if(g != 0) {
00720           Gaussify(newacc, precalc, level - 1,
00721                    pos + skip * (size_[level] - g), newskip);
00722         }
00723       }
00724     }
00725   }
00726   
00727   
00728  public:
00729 
00731 
00733   FFTKde() {}
00734   
00736   ~FFTKde() {}
00737   
00739 
00745   void get_density_estimates(Vector *results) {
00746     results->Init(densities_.length());
00747 
00748     for(index_t i = 0; i < densities_.length(); i++) {
00749       (*results)[i] = densities_[i];
00750     }
00751   }
00752 
00760   void Init(Matrix &qset, Matrix &rset, struct datanode *module_in) {
00761     
00762     // initialize module to the incoming one
00763     module_ = module_in;
00764 
00765     printf("Initializing FFT KDE...\n");
00766     fx_timer_start(module_, "fft_kde_init");
00767 
00768     // initialize the kernel and read in the number of grid points
00769     kernel_.Init(fx_param_double_req(module_, "bandwidth"));
00770     m_ = fx_param_int(module_, "num_grid_pts_per_dim", 128);
00771 
00772     // set aliases to the query and reference datasets and initialize
00773     // query density sets
00774     qset_.Copy(qset);
00775     densities_.Init(qset_.n_cols());
00776     rset_.Copy(rset);
00777 
00778     // initialize member variables.
00779     size_.Init(qset_.n_rows());
00780     minindices_.Init(rset_.n_rows());
00781     mincoords_.Init(qset_.n_rows());
00782     maxcoords_.Init(qset_.n_rows());
00783     diffcoords_.Init(qset_.n_rows());
00784     gridsizes_.Init(qset_.n_rows());
00785     kernelweights_dims_.Init(qset_.n_rows());
00786 
00787     // set up the discretized grid for the reference dataset
00788     discretize_dataset();
00789 
00790     nyquistnum_ = 2 * numgridpts_ / size_[rset_.n_rows() - 1];
00791 
00792     d_fnyquist_.Init(nyquistnum_);
00793     k_fnyquist_.Init(nyquistnum_);
00794     kernelweights_.Init(numgridpts_);
00795 
00796     fx_timer_stop(module_, "fft_kde_init");
00797     printf("FFT KDE initialization completed...\n");
00798   }
00799 
00802   void Compute() {
00803 
00804     printf("Computing FFT KDE...\n");
00805     fx_timer_start(module_, "fft_kde");
00806 
00807     // FFT the discretized bin count matrix.
00808     d_fnyquist_.SetZero();
00809     k_fnyquist_.SetZero();
00810     kernelweights_.SetZero();
00811     fftrn(discretized_.ptr(), d_fnyquist_.ptr(), rset_.n_rows(), 
00812           size_.begin(), 1);
00813 
00814     // Calculate the required kernel weights at each grid point. This matrix
00815     // will be convolved with fourier transformed data set.
00816     double precalc = -0.5 / kernel_.bandwidth_sq();
00817     Gaussify(0.0, precalc, rset_.n_rows() - 1, 0, 1);
00818 
00819     // FFT the kernel weight matrix.
00820     fftrn(kernelweights_.ptr(), k_fnyquist_.ptr(), 
00821           rset_.n_rows(), size_.begin(), 1);
00822 
00823     // We need to invoke the convolution theorem for FFT here. Take each
00824     // corresponding complex number in kernelweights and discretized and do
00825     // an element-wise multiplication. Later, pass it to inverse fft function,
00826     // and we have our answer!
00827     for(index_t d = 0; d < numgridpts_; d += 2) {
00828       double real1 = discretized_[d];
00829       double complex1 = discretized_[d + 1];
00830       double real2 = kernelweights_[d];
00831       double complex2 = kernelweights_[d + 1];
00832       discretized_[d] = real1 * real2 - complex1 * complex2;
00833       discretized_[d + 1] = real1 * complex2 + complex1 * real2;
00834     }
00835 
00836     for(index_t d = 0; d < nyquistnum_; d += 2) {
00837       double real1 = d_fnyquist_[d];
00838       double complex1 = d_fnyquist_[d + 1];
00839       double real2 = k_fnyquist_[d];
00840       double complex2 = k_fnyquist_[d + 1];
00841       d_fnyquist_[d] = real1 * real2 - complex1 * complex2;
00842       d_fnyquist_[d + 1] = real1 * complex2 + complex1 * real2;
00843     }
00844 
00845     // Inverse FFT the elementwise multiplied matrix.
00846     fftrn(discretized_.ptr(), d_fnyquist_.ptr(), 
00847           rset_.n_rows(), size_.begin(), -1);
00848 
00849     // Retrieve the densities of each data point.
00850     RetrieveDensities();
00851 
00852     fx_timer_stop(module_, "fft_kde");
00853     printf("FFT KDE completed...\n");
00854   }
00855 
00863   void PrintDebug() {
00864 
00865     FILE *stream = stdout;
00866     const char *fname = NULL;
00867 
00868     if((fname = fx_param_str(module_, "fft_kde_output", NULL)) != NULL) {
00869       stream = fopen(fname, "w+");
00870     }
00871     for(index_t q = 0; q < qset_.n_cols(); q++) {
00872       fprintf(stream, "%g\n", densities_[q]);
00873     }
00874     
00875     if(stream != stdout) {
00876       fclose(stream);
00877     }
00878   }
00879 
00880 };
00881 
00882 #endif