uselapack.cc

Go to the documentation of this file.
00001 /* MLPACK 0.2
00002  *
00003  * Copyright (c) 2008, 2009 Alexander Gray,
00004  *                          Garry Boyer,
00005  *                          Ryan Riegel,
00006  *                          Nikolaos Vasiloglou,
00007  *                          Dongryeol Lee,
00008  *                          Chip Mappus, 
00009  *                          Nishant Mehta,
00010  *                          Hua Ouyang,
00011  *                          Parikshit Ram,
00012  *                          Long Tran,
00013  *                          Wee Chin Wong
00014  *
00015  * Copyright (c) 2008, 2009 Georgia Institute of Technology
00016  *
00017  * This program is free software; you can redistribute it and/or
00018  * modify it under the terms of the GNU General Public License as
00019  * published by the Free Software Foundation; either version 2 of the
00020  * License, or (at your option) any later version.
00021  *
00022  * This program is distributed in the hope that it will be useful, but
00023  * WITHOUT ANY WARRANTY; without even the implied warranty of
00024  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00025  * General Public License for more details.
00026  *
00027  * You should have received a copy of the GNU General Public License
00028  * along with this program; if not, write to the Free Software
00029  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00030  * 02110-1301, USA.
00031  */
00038 #include "fastlib/la/uselapack.h"
00039 #include "fastlib/la/la.h"
00040 //#include "uselapack.h"
00041 //#include "la.h"
00042 
00043 int la::dgetri_block_size;
00044 int la::dgeqrf_block_size;
00045 int la::dorgqr_block_size;
00046 int la::dgeqrf_dorgqr_block_size;
00047 
00048 namespace la {
00049 
00050   struct zzzLapackInit {
00051     zzzLapackInit() {
00052       double fake_matrix[64];
00053       double fake_workspace;
00054       double fake_vector;
00055       f77_integer fake_pivots;
00056       f77_integer fake_info;
00057       
00058       /* TODO: This may want to be ilaenv */
00059       F77_FUNC(dgetri)(1, fake_matrix, 1, &fake_pivots, &fake_workspace,
00060           -1, &fake_info);
00061       la::dgetri_block_size = int(fake_workspace);
00062       
00063       F77_FUNC(dgeqrf)(1, 1, fake_matrix, 1, &fake_vector, &fake_workspace, -1,
00064           &fake_info);
00065       la::dgeqrf_block_size = int(fake_workspace);
00066       
00067       F77_FUNC(dorgqr)(1, 1, 1, fake_matrix, 1, &fake_vector, &fake_workspace, -1,
00068           &fake_info);
00069       la::dorgqr_block_size = int(fake_workspace);
00070       
00071       la::dgeqrf_dorgqr_block_size =
00072           std::max(la::dgeqrf_block_size, la::dorgqr_block_size);
00073     }
00074   };
00075 };
00076 
00077 success_t la::PLUInit(const Matrix &A,
00078     ArrayList<f77_integer> *pivots, Matrix *L, Matrix *U) {
00079   index_t m = A.n_rows();
00080   index_t n = A.n_cols();
00081   success_t success;
00082 
00083   if (m > n) {
00084     pivots->Init(n);
00085     L->Copy(A);
00086     U->Init(n, n);
00087     success = PLUExpert(pivots->begin(), L);
00088 
00089     if (!PASSED(success)) {
00090       return success;
00091     }
00092 
00093     for (index_t j = 0; j < n; j++) {
00094       double *lcol = L->GetColumnPtr(j);
00095       double *ucol = U->GetColumnPtr(j);
00096 
00097       mem::Copy(ucol, lcol, j + 1);
00098       mem::Zero(ucol + j + 1, n - j - 1);
00099       mem::Zero(lcol, j);
00100       lcol[j] = 1.0;
00101     }
00102   } else {
00103     pivots->Init(m);
00104     L->Init(m, m);
00105     U->Copy(A);
00106     success = PLUExpert(pivots->begin(), U);
00107 
00108     if (!PASSED(success)) {
00109       return success;
00110     }
00111 
00112     for (index_t j = 0; j < m; j++) {
00113       double *lcol = L->GetColumnPtr(j);
00114       double *ucol = U->GetColumnPtr(j);
00115 
00116       mem::Zero(lcol, j);
00117       lcol[j] = 1.0;
00118       mem::Copy(lcol + j + 1, ucol + j + 1, m - j - 1);
00119       mem::Zero(ucol + j + 1, m - j - 1);
00120     }
00121   }
00122 
00123   return success;
00124 }
00125 
00126 success_t la::Inverse(Matrix *A) {
00127   f77_integer pivots[A->n_rows()];
00128 
00129   success_t success = PLUExpert(pivots, A);
00130 
00131   if (!PASSED(success)) {
00132     return success;
00133   }
00134 
00135   return InverseExpert(pivots, A);
00136 }
00137   
00138 
00139 success_t la::InverseOverwrite(const Matrix &A, Matrix *B) {
00140   f77_integer pivots[A.n_rows()];
00141 
00142   if (likely(A.ptr() != B->ptr())) {
00143     B->CopyValues(A);
00144   }
00145   success_t success = PLUExpert(pivots, B);
00146 
00147   if (!PASSED(success)) {
00148     return success;
00149   }
00150 
00151   return InverseExpert(pivots, B);
00152 }
00153 
00154 long double la::Determinant(const Matrix &A) {
00155   DEBUG_MATSQUARE(A);
00156   int n = A.n_rows();
00157   f77_integer pivots[n];
00158   Matrix LU;
00159 
00160   LU.Copy(A);
00161   PLUExpert(pivots, &LU);
00162 
00163   long double det = 1.0;
00164 
00165   for (index_t i = 0; i < n; i++) {
00166     if (pivots[i] != i+1) {
00167       // pivoting occured (note FORTRAN has 1-based indexing)
00168       det = -det;
00169     }
00170     det *= LU.get(i, i);
00171   }
00172 
00173   return det;
00174 }
00175 
00176 double la::DeterminantLog(const Matrix &A, int *sign_out) {
00177   DEBUG_MATSQUARE(A);
00178   int n = A.n_rows();
00179   f77_integer pivots[n];
00180   Matrix LU;
00181 
00182   LU.Copy(A);
00183   PLUExpert(pivots, &LU);
00184 
00185   double log_det = 0.0;
00186   int sign_det = 1;
00187 
00188   for (index_t i = 0; i < n; i++) {
00189     if (pivots[i] != i+1) {
00190       // pivoting occured (note FORTRAN has one-based indexing)
00191       sign_det = -sign_det;
00192     }
00193 
00194     double value = LU.get(i, i);
00195 
00196     if (value < 0) {
00197       sign_det = -sign_det;
00198       value = -value;
00199     } else if (!(value > 0)) {
00200       sign_det = 0;
00201       log_det = DBL_NAN;
00202       break;
00203     }
00204 
00205     log_det += log(value);
00206   }
00207 
00208   if (sign_out) {
00209     *sign_out = sign_det;
00210   }
00211 
00212   return log_det;
00213 }
00214 
00215 /*
00216 Replaced this with a non-querying version that uses cached block sizes.
00217 
00218 success_t la::QRExpert(Matrix *A_in_Q_out, Matrix *R) {
00219   f77_integer info;
00220   f77_integer m = A_in_Q_out->n_rows();
00221   f77_integer n = A_in_Q_out->n_cols();
00222   f77_integer k = std::min(m, n);
00223   double d; // for querying optimal work size
00224   double tau[k];
00225 
00226   // Obtain both Q and R in A_in_Q_out
00227   F77_FUNC(dgeqrf)(m, n, A_in_Q_out->ptr(), m,
00228       tau, &d, -1, &info);
00229   {
00230     f77_integer lwork = (f77_integer)d;
00231     double work[lwork];
00232 
00233     F77_FUNC(dgeqrf)(m, n, A_in_Q_out->ptr(), m,
00234         tau, work, lwork, &info);
00235   }
00236   
00237   R->SetZero();
00238   if (info != 0) {
00239     return SUCCESS_FROM_LAPACK(info);
00240   }
00241 
00242   // Extract R
00243   for (index_t j = 0; j < n; j++) {
00244     mem::Copy(R->GetColumnPtr(j), A_in_Q_out->GetColumnPtr(j),
00245         std::min(j + 1, k));
00246   }
00247   
00248   // Fix Q
00249   F77_FUNC(dorgqr)(m, k, k, A_in_Q_out->ptr(), m,
00250       tau, &d, -1, &info);
00251   {
00252     f77_integer lwork = (f77_integer)d;
00253     double work[lwork];
00254 
00255     F77_FUNC(dorgqr)(m, k, k, A_in_Q_out->ptr(), m,
00256         tau, work, lwork, &info);
00257   }
00258 
00259   return SUCCESS_FROM_LAPACK(info);
00260 }
00261 */
00262 
00263 success_t la::QRExpert(Matrix *A_in_Q_out, Matrix *R) {
00264   f77_integer info;
00265   f77_integer m = A_in_Q_out->n_rows();
00266   f77_integer n = A_in_Q_out->n_cols();
00267   f77_integer k = std::min(m, n);
00268   f77_integer lwork = n * dgeqrf_dorgqr_block_size;
00269   double tau[k + lwork];
00270   double *work = tau + k;
00271 
00272   // Obtain both Q and R in A_in_Q_out
00273   F77_FUNC(dgeqrf)(m, n, A_in_Q_out->ptr(), m,
00274      tau, work, lwork, &info);
00275 
00276   if (info != 0) {
00277     return SUCCESS_FROM_LAPACK(info);
00278   }
00279 
00280   // Extract R
00281   for (index_t j = 0; j < n; j++) {
00282     double *r_col = R->GetColumnPtr(j);
00283     double *q_col = A_in_Q_out->GetColumnPtr(j);
00284     int i = std::min(j + 1, index_t(k));
00285     mem::Copy(r_col, q_col, i);
00286     mem::Zero(r_col + i, k - i);
00287   }
00288 
00289   // Fix Q
00290   F77_FUNC(dorgqr)(m, k, k, A_in_Q_out->ptr(), m,
00291       tau, work, lwork, &info);
00292 
00293   return SUCCESS_FROM_LAPACK(info);
00294 }
00295 
00296 success_t la::QRInit(const Matrix &A, Matrix *Q, Matrix *R) {
00297   index_t k = std::min(A.n_rows(), A.n_cols());
00298   Q->Copy(A);
00299   R->Init(k, A.n_cols());
00300   success_t success = QRExpert(Q, R);
00301   Q->ResizeNoalias(k);
00302 
00303   return success;
00304 }
00305 
00306 success_t la::SchurExpert(Matrix *A_in_T_out,
00307     double *w_real, double *w_imag, double *Z) {
00308   DEBUG_MATSQUARE(*A_in_T_out);
00309   f77_integer info;
00310   f77_integer n = A_in_T_out->n_rows();
00311   f77_integer sdim;
00312   const char *job = Z ? "V" : "N";
00313   double d; // for querying optimal work size
00314 
00315   F77_FUNC(dgees)(job, "N", NULL,
00316       n, A_in_T_out->ptr(), n, &sdim, w_real, w_imag,
00317       Z, n, &d, -1, NULL, &info);
00318   {
00319     f77_integer lwork = (f77_integer)d;
00320     double work[lwork];
00321 
00322     F77_FUNC(dgees)(job, "N", NULL,
00323         n, A_in_T_out->ptr(), n, &sdim, w_real, w_imag,
00324         Z, n, work, lwork, NULL, &info);
00325   }
00326 
00327   return SUCCESS_FROM_LAPACK(info);
00328 }
00329 
00330 success_t la::EigenExpert(Matrix *A_garbage,
00331     double *w_real, double *w_imag, double *V_raw) {
00332   DEBUG_MATSQUARE(*A_garbage);
00333   f77_integer info;
00334   f77_integer n = A_garbage->n_rows();
00335   const char *job = V_raw ? "V" : "N";
00336   double d; // for querying optimal work size
00337 
00338   F77_FUNC(dgeev)("N", job, n, A_garbage->ptr(), n,
00339       w_real, w_imag, NULL, 1, V_raw, n, &d, -1, &info);
00340   {
00341     f77_integer lwork = (f77_integer)d;
00342     double work[lwork];
00343 
00344     F77_FUNC(dgeev)("N", job, n, A_garbage->ptr(), n,
00345         w_real, w_imag, NULL, 1, V_raw, n, work, lwork, &info);
00346   }
00347 
00348   return SUCCESS_FROM_LAPACK(info);
00349 }
00350 
00351 success_t la::EigenvaluesInit(const Matrix &A, Vector *w) {
00352   DEBUG_MATSQUARE(A);
00353   int n = A.n_rows();
00354   w->Init(n);
00355   double w_imag[n];
00356 
00357   Matrix tmp;
00358   tmp.Copy(A);
00359   success_t success = SchurExpert(&tmp, w->ptr(), w_imag, NULL);
00360 
00361   if (!PASSED(success)) {
00362     return success;
00363   }
00364 
00365   for (index_t j = 0; j < n; j++) {
00366     if (unlikely(w_imag[j] != 0.0)) {
00367       (*w)[j] = DBL_NAN;
00368     }
00369   }
00370 
00371   return success;
00372 }
00373 
00374 success_t la::EigenvectorsInit(const Matrix &A,
00375     Vector *w_real, Vector *w_imag, Matrix *V_real, Matrix *V_imag) {
00376   DEBUG_MATSQUARE(A);
00377   index_t n = A.n_rows();
00378   w_real->Init(n);
00379   w_imag->Init(n);
00380   V_real->Init(n, n);
00381   V_imag->Init(n, n);
00382 
00383   Matrix tmp;
00384   tmp.Copy(A);
00385   success_t success = EigenExpert(&tmp,
00386       w_real->ptr(), w_imag->ptr(), V_real->ptr());
00387 
00388   if (!PASSED(success)) {
00389     return success;
00390   }
00391 
00392   V_imag->SetZero();
00393   for (index_t j = 0; j < n; j++) {
00394     if (unlikely(w_imag->get(j) != 0.0)) {
00395       double *r_cur = V_real->GetColumnPtr(j);
00396       double *r_next = V_real->GetColumnPtr(j+1);
00397       double *i_cur = V_imag->GetColumnPtr(j);
00398       double *i_next = V_imag->GetColumnPtr(j+1);
00399 
00400       for (index_t i = 0; i < n; i++) {
00401         i_next[i] = -(i_cur[i] = r_next[i]);
00402         r_next[i] = r_cur[i];
00403       }
00404 
00405       j++; // skip paired column
00406     }
00407   }
00408 
00409   return success;
00410 }
00411 
00412 success_t la::EigenvectorsInit(const Matrix &A, Vector *w, Matrix *V) {
00413   DEBUG_MATSQUARE(A);
00414   index_t n = A.n_rows();
00415   w->Init(n);
00416   double w_imag[n];
00417   V->Init(n, n);
00418 
00419   Matrix tmp;
00420   tmp.Copy(A);
00421   success_t success = EigenExpert(&tmp, w->ptr(), w_imag, V->ptr());
00422 
00423   if (!PASSED(success)) {
00424     return success;
00425   }
00426 
00427   for (index_t j = 0; j < n; j++) {
00428     if (unlikely(w_imag[j] != 0.0)) {
00429       (*w)[j] = DBL_NAN;
00430     }
00431   }
00432 
00433   return success;
00434 }
00435 
00436 success_t la::GenEigenSymmetric(int itype, Matrix *A_eigenvec, Matrix *B_chol, double *w) {
00437   DEBUG_MATSQUARE(*A_eigenvec);
00438   DEBUG_MATSQUARE(*B_chol);
00439   DEBUG_ASSERT(A_eigenvec->n_rows()==B_chol->n_rows());
00440   f77_integer itype_f77 = itype;
00441   f77_integer info;
00442   f77_integer n = A_eigenvec->n_rows();
00443   const char *job = "V"; // Compute eigenvalues and eigenvectors.
00444   double d; // for querying optimal work size
00445 
00446   F77_FUNC(dsygv)(&itype_f77, job, "U", n, A_eigenvec->ptr(), n, B_chol->ptr(), n, w,
00447       &d, -1, &info);
00448   {
00449     f77_integer lwork = (f77_integer)d;
00450     double work[lwork];
00451 
00452     F77_FUNC(dsygv)(&itype_f77, job, "U", n, A_eigenvec->ptr(), n, B_chol->ptr(), n, w,
00453       work, lwork, &info);
00454   }
00455 
00456   return SUCCESS_FROM_LAPACK(info);
00457 }
00458 
00459 success_t la::GenEigenNonSymmetric(Matrix *A_garbage, Matrix *B_garbage,
00460     double *alpha_real, double *alpha_imag, double *beta, double *V_raw) {
00461   DEBUG_MATSQUARE(*A_garbage);
00462   DEBUG_MATSQUARE(*B_garbage);
00463   DEBUG_ASSERT(A_garbage->n_rows()==B_garbage->n_rows());
00464   f77_integer info;
00465   f77_integer n = A_garbage->n_rows();
00466   const char *job = V_raw ? "V" : "N";
00467   double d; // for querying optimal work size
00468 
00469   F77_FUNC(dgegv)("N", job, n, A_garbage->ptr(), n, B_garbage->ptr(), n, 
00470       alpha_real, alpha_imag, beta, NULL, 1, V_raw, n, &d, -1, &info);
00471   {
00472     f77_integer lwork = (f77_integer)d;
00473     double work[lwork];
00474 
00475     F77_FUNC(dgegv)("N", job, n, A_garbage->ptr(), n, B_garbage->ptr(), n, 
00476       alpha_real, alpha_imag, beta, NULL, 1, V_raw, n, work, lwork, &info);
00477   }
00478 
00479   return SUCCESS_FROM_LAPACK(info);
00480 }
00481 
00482 /*
00483 DGESDD is supposed to be faster, although I haven't actually found this
00484 to be the case.
00485 
00486 success_t la::SVDExpert(Matrix* A_garbage, double *s, double *U, double *VT) {
00487   f77_integer info;
00488   f77_integer m = A_garbage->n_rows();
00489   f77_integer n = A_garbage->n_cols();
00490   f77_integer k = std::min(m, n);
00491   const char *job_u = U ? (U == A_garbage->ptr ? "O" : "S") : "N";
00492   const char *job_v = VT ? "S" : "N";
00493   double d; // for querying optimal work size
00494 
00495   F77_FUNC(dgesvd)(job_u, job_v, m, n, A_garbage->ptr(), m,
00496       s, U, m, VT, k, &d, -1, &info);
00497   {
00498     f77_integer lwork = (f77_integer)lwork_dbl;
00499     double work[lwork];
00500 
00501     F77_FUNC(dgesvd)(job_u, job_v, m, n, A_garbage->ptr(), m,
00502         s, U, m, VT, k, work, lwork, &info);
00503   }
00504 
00505   return SUCCESS_FROM_LAPACK(info);
00506 }
00507 */
00508 
00509 success_t la::SVDExpert(Matrix* A_garbage, double *s, double *U, double *VT) {
00510   DEBUG_ASSERT_MSG((U == NULL) == (VT == NULL),
00511                    "You must fill both U and VT or neither.");
00512   f77_integer info;
00513   f77_integer m = A_garbage->n_rows();
00514   f77_integer n = A_garbage->n_cols();
00515   f77_integer k = std::min(m, n);
00516   f77_integer iwork[8 * k];
00517   const char *job = U ? "S" : "N";
00518   double d; // for querying optimal work size
00519 
00520   F77_FUNC(dgesdd)(job, m, n, A_garbage->ptr(), m,
00521       s, U, m, VT, k, &d, -1, iwork, &info);
00522   {
00523     f77_integer lwork = (f77_integer)d;
00524     // work for DGESDD can be large, we really do need to malloc it
00525     double *work = mem::Alloc<double>(lwork);
00526 
00527     F77_FUNC(dgesdd)(job, m, n, A_garbage->ptr(), m,
00528         s, U, m, VT, k, work, lwork, iwork, &info);
00529 
00530     mem::Free(work);
00531   }
00532 
00533   return SUCCESS_FROM_LAPACK(info);
00534 }
00535 
00536 success_t la::Cholesky(Matrix *A_in_U_out) {
00537   DEBUG_MATSQUARE(*A_in_U_out);
00538   f77_integer info;
00539   f77_integer n = A_in_U_out->n_rows();
00540 
00541   F77_FUNC(dpotrf)("U", n, A_in_U_out->ptr(), n, &info);
00542 
00543   /* set the garbage part of the matrix to 0. */
00544   for (f77_integer j = 0; j < n; j++) {
00545     mem::Zero(A_in_U_out->GetColumnPtr(j) + j + 1, n - j - 1);
00546   }
00547 
00548   return SUCCESS_FROM_LAPACK(info);
00549 }
00550 
00551 static la::zzzLapackInit lapack_initializer;