smo.h

Go to the documentation of this file.
00001 /* MLPACK 0.2
00002  *
00003  * Copyright (c) 2008, 2009 Alexander Gray,
00004  *                          Garry Boyer,
00005  *                          Ryan Riegel,
00006  *                          Nikolaos Vasiloglou,
00007  *                          Dongryeol Lee,
00008  *                          Chip Mappus, 
00009  *                          Nishant Mehta,
00010  *                          Hua Ouyang,
00011  *                          Parikshit Ram,
00012  *                          Long Tran,
00013  *                          Wee Chin Wong
00014  *
00015  * Copyright (c) 2008, 2009 Georgia Institute of Technology
00016  *
00017  * This program is free software; you can redistribute it and/or
00018  * modify it under the terms of the GNU General Public License as
00019  * published by the Free Software Foundation; either version 2 of the
00020  * License, or (at your option) any later version.
00021  *
00022  * This program is distributed in the hope that it will be useful, but
00023  * WITHOUT ANY WARRANTY; without even the implied warranty of
00024  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00025  * General Public License for more details.
00026  *
00027  * You should have received a copy of the GNU General Public License
00028  * along with this program; if not, write to the Free Software
00029  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00030  * 02110-1301, USA.
00031  */
00079 #ifndef U_SVM_SMO_H
00080 #define U_SVM_SMO_H
00081 
00082 #include "fastlib/fastlib.h"
00083 
00084 // the tolerance for determing the optimality
00085 const double SMO_OPT_TOLERANCE = 1.0e-4;
00086 // maximum # of interations for SMO training
00087 const index_t MAX_NUM_ITER = 10000;
00088 // after # of iterations to do shrinking
00089 const index_t NUM_FOR_SHRINKING = 1000;
00090 // threshold that determines whether need to do unshrinking
00091 const double SMO_UNSHRINKING_TOLERANCE = 10 * SMO_OPT_TOLERANCE;
00092 // threshold that determines whether an alpha is a SV or not
00093 const double SMO_ALPHA_ZERO = 1.0e-4;
00094 // for indefinite kernels
00095 const double TAU = 1e-12;
00096 
00097 const double ID_LOWER_BOUNDED = -1;
00098 const double ID_UPPER_BOUNDED = 1;
00099 const double ID_FREE = 0;
00100 
00101 template<typename TKernel>
00102 class SMO {
00103   FORBID_ACCIDENTAL_COPIES(SMO);
00104 
00105  public:
00106   typedef TKernel Kernel;
00107 
00108  private:
00109   int learner_typeid_;
00110   index_t ct_iter_; /* counter for the number of iterations */
00111   index_t ct_shrinking_; /* counter for doing shrinking  */
00112 
00113   Kernel kernel_;
00114   const Dataset *dataset_;
00115   index_t n_data_; /* number of data samples */
00116   index_t n_features_; /* # of features == # of row - 1, exclude the last row (for labels) */
00117   Matrix datamatrix_; /* alias for the data matrix */
00118 
00119   Vector alpha_; /* the alphas, to be optimized */
00120   Vector alpha_status_; /*  ID_LOWER_BOUND (-1), ID_UPPER_BOUND (1), ID_FREE (0) */
00121   
00122   index_t n_alpha_; /* number of variables to be optimized */
00123   index_t n_active_; /* number of samples in the active set */
00124   ArrayList<index_t> active_set_; /* list that stores the indices of active alphas */
00125   bool unshrinked_; /* indicator: where unshrinking has be carried out  */
00126   index_t i_cache_, j_cache_; /* indices for the most recently cached kernel value */
00127   double cached_kernel_value_; /* cache */
00128 
00129   ArrayList<int> y_; /* list that stores "labels" */
00130 
00131   double bias_;
00132   index_t n_sv_; /* number of support vectors */
00133 
00134   Vector grad_; /* gradient value */
00135   Vector grad_bar_; /* gradient value when treat free variables as 0 */
00136 
00137   // parameters
00138   int budget_;
00139   double Cp_; // C_+, for SVM_C, y==1
00140   double Cn_; // C_-, for SVM_C, y==-1
00141   double epsilon_; // for SVM_R
00142   int wss_; // working set selection scheme, 1 for 1st order expansion; 2 for 2nd order expansion
00143 
00144  public:
00145   SMO() {}
00146   ~SMO() {}
00147 
00151   void InitPara(int learner_typeid, ArrayList<double> &param_) {
00152     // init parameters
00153     budget_ = (int)param_[0];
00154     if (learner_typeid == 0) { // SVM_C
00155       Cp_ = param_[1];
00156       Cn_ = param_[2];
00157       wss_ = (int) param_[3];
00158     }
00159     else if (learner_typeid == 1) { // SVM_R
00160       Cp_ = param_[1];
00161       Cn_ = Cp_;
00162       epsilon_ = param_[2];
00163       wss_ = (int) param_[3];
00164     }
00165   }
00166 
00167   void Train(int learner_typeid, const Dataset* dataset_in);
00168 
00169   Kernel& kernel() {
00170     return kernel_;
00171   }
00172 
00173   double Bias() const {
00174     return bias_;
00175   }
00176 
00177   void GetSVM(ArrayList<index_t> &dataset_index, ArrayList<double> &coef, ArrayList<bool> &sv_indicator);
00178 
00179  private:
00180   void LearnersInit_(int learner_typeid);
00181 
00182   int TrainIteration_();
00183 
00184   void ReconstructGradient_(int learner_typeid);
00185   
00186   void Shrinking_();
00187 
00188   bool WorkingSetSelection_(index_t &i, index_t &j);
00189 
00190   void UpdatingGradientAlpha_(index_t i, index_t j);
00191 
00192   void CalcBias_();
00193 
00194   /*  void GetVector_(index_t i, Vector *v) const {
00195     datamatrix_.MakeColumnSubvector(i, 0, datamatrix_.n_rows()-1, v);
00196   }
00197   */
00198 
00202   double GetC_(index_t i) {
00203     return (y_[i] > 0 ? Cp_ : Cn_);
00204   }
00205 
00206   void UpdateAlphaStatus_(index_t i) {
00207     if (alpha_[i] >= GetC_(i)) {
00208       alpha_status_[i] = ID_UPPER_BOUNDED;
00209     }
00210     else if (alpha_[i] <= 0) {
00211       alpha_status_[i] = ID_LOWER_BOUNDED;
00212     }
00213     else { // 0 < alpha_[i] < C
00214       alpha_status_[i] = ID_FREE;
00215     }
00216   }
00217 
00218   bool IsUpperBounded(index_t i) {
00219     return alpha_status_[i] == ID_UPPER_BOUNDED;
00220   }
00221   bool IsLowerBounded(index_t i) {
00222     return alpha_status_[i] == ID_LOWER_BOUNDED;
00223   }
00224 
00228   double CalcKernelValue_(index_t i, index_t j) {
00229     // the alpha indices have been swaped in shrinking processes
00230     i = active_set_[i];
00231     j = active_set_[j];
00232 
00233     // for SVM_R where n_alpha_==2*n_data_
00234     if (learner_typeid_ == 1) {
00235       i = i >= n_data_ ? (i-n_data_) : i;
00236       j = j >= n_data_ ? (j-n_data_) : j;
00237     }
00238 
00239     // Check cache
00240     //if (i == i_cache_ && j == j_cache_) {
00241     //  return cached_kernel_value_;
00242     //}
00243 
00244     double *v_i, *v_j;
00245     v_i = datamatrix_.GetColumnPtr(i);
00246     v_j = datamatrix_.GetColumnPtr(j);
00247 
00248     // Do Caching. Store the recently caculated kernel values.
00249     //i_cache_ = i;
00250     //j_cache_ = j;
00251     cached_kernel_value_ = kernel_.Eval(v_i, v_j, n_features_);
00252     return cached_kernel_value_;
00253   }
00254 };
00255 
00256 
00262 template<typename TKernel>
00263 void SMO<TKernel>::LearnersInit_(int learner_typeid) {
00264   index_t i;
00265   learner_typeid_ = learner_typeid;
00266   
00267   if (learner_typeid_ == 0) { // SVM_C
00268     n_alpha_ = n_data_;
00269 
00270     alpha_.Init(n_alpha_);
00271     alpha_.SetZero();
00272 
00273     // initialize gradient
00274     grad_.Init(n_alpha_);
00275     for (i=0; i<n_alpha_; i++) {
00276       grad_[i] = 1;
00277     }
00278 
00279     y_.Init(n_alpha_);
00280     for (i = 0; i < n_alpha_; i++) {
00281       y_[i] = datamatrix_.get(datamatrix_.n_rows()-1, i) > 0 ? 1 : -1;
00282       }
00283   }
00284   else if (learner_typeid_ == 1) { // SVM_R
00285     n_alpha_ = 2 * n_data_;
00286 
00287     alpha_.Init(2 * n_alpha_);
00288     alpha_.SetZero();
00289 
00290     // initialize gradient
00291     grad_.Init(n_alpha_);
00292     y_.Init(n_alpha_);
00293     for (i = 0; i < n_data_; i++) {
00294       y_[i] = 1; // -> alpha_i
00295       y_[i + n_data_] = -1; // -> alpha_i^*
00296       grad_[i] = epsilon_ - datamatrix_.get(datamatrix_.n_rows()-1, i);
00297       grad_[i + n_data_] = epsilon_ + datamatrix_.get(datamatrix_.n_rows()-1, i);
00298     }
00299   }
00300   else if (learner_typeid_ == 2) { // SVM_DE
00301   }
00302   
00303   // initialize active set
00304   n_active_ = n_alpha_;
00305   active_set_.Init(n_active_);
00306   for (i=0; i<n_active_; i++) {
00307       active_set_[i] = i;
00308   }
00309 }
00310 
00311 
00317 template<typename TKernel>
00318 void SMO<TKernel>::ReconstructGradient_(int learner_typeid) {
00319   index_t i, j;
00320   if (n_active_ == n_alpha_)
00321     return;
00322   if (learner_typeid == 0) { // SVM_C
00323     for (i=n_active_; i<n_alpha_; i++) {
00324       grad_[i] = grad_bar_[i] + 1;
00325     }
00326   }
00327   else if (learner_typeid == 1) { // SVM_R
00328     for (i=n_active_; i<n_alpha_; i++) {
00329       j = i >= n_data_ ? (i-n_data_) : i;
00330       grad_[j] = grad_bar_[j] + datamatrix_.get(datamatrix_.n_rows()-1, j) - epsilon_;
00331     }
00332   }
00333 
00334   for (i=0; i<n_active_; i++) {
00335     if (alpha_status_[i] == ID_FREE) {
00336       for (j=n_active_; j<n_alpha_; j++) {
00337         grad_[j] += alpha_[i] * CalcKernelValue_(i,j);
00338       }
00339     }
00340   }
00341 }
00342 
00343 
00350 template<typename TKernel>
00351 void SMO<TKernel>::Shrinking_() {
00352   index_t t;
00353   double yg;
00354 
00355   // find grad_max and grad_min
00356   double grad_max = -INFINITY;
00357   double grad_min =  INFINITY;
00358   for (t=0; t<n_active_; t++) { // find argmax(y*grad), t\in I_up
00359     if (y_[t] == 1) {
00360       if (!IsUpperBounded(t)) // t\in I_up, y==1: y[t]alpha[t] <= C
00361         if (grad_[t] >= grad_max) { // y==1
00362           grad_max = grad_[t];
00363         }
00364       if (!IsLowerBounded(t)) // t\in I_down, y==1: y[t]alpha[t] >= 0
00365         if (grad_[t] <= grad_min) { // y==1
00366           grad_min = grad_[t];
00367         }
00368     }
00369     else { // y[t] == -1
00370       if (!IsLowerBounded(t)) // t\in I_up, y==-1: y[t]alpha[t] <= 0
00371         if (-grad_[t] >= grad_max) { // y==-1
00372           grad_max = -grad_[t];
00373         }
00374       if (!IsUpperBounded(t)) // t\in I_down, y==-1: y[t]alpha[t] >= -C
00375         if (-grad_[t] <= grad_min) { // y==-1
00376           grad_min = -grad_[t];
00377         }
00378     }
00379   }
00380 
00381   // find the alpha to be shrunk
00382   for (t=0; t<n_active_; t++) {
00383     // Shrinking: put inactive alphas behind the active set
00384     yg = y_[t] * grad_[t];
00385     if ( yg < grad_min || yg > grad_max ) { // this alpha need be shrunk
00386       n_active_ --;
00387       while (n_active_ > t) {
00388         yg = y_[n_active_] * grad_[n_active_];
00389         if ( yg >= grad_min && yg <= grad_max ) { // this alpha need not be shrunk
00390           active_set_[t] = n_active_; // swap indices
00391           active_set_[n_active_] = t;
00392           break;
00393         }
00394         n_active_ --;
00395       }
00396     }
00397   }
00398 
00399   // determine whether need to do Unshrinking
00400   if ( unshrinked_ || grad_max - grad_min <= SMO_UNSHRINKING_TOLERANCE ) {
00401     // Unshrinking: put shrinked alphas back to active set
00402     ReconstructGradient_(learner_typeid_);
00403     for ( t=n_alpha_-1; t>n_active_; t-- ) {
00404       yg = y_[t] * grad_[t];
00405       if (yg >= grad_min && yg <= grad_max) { // this alpha need be unshrunk
00406         while (n_active_ < t) {
00407           yg = y_[n_active_] * grad_[n_active_];
00408           if ( yg < grad_min || yg > grad_max ) { // this alpha need not be unshrunk
00409             active_set_[t] = n_active_; // swap indices
00410             active_set_[n_active_] = t;
00411             break;
00412           }
00413           n_active_ ++;
00414         }
00415         n_active_ ++;
00416       }
00417     }
00418     unshrinked_ = true; // indicator: unshrinking has been carried out in this round
00419   }
00420 
00421 }
00422 
00423 
00429 template<typename TKernel>
00430 void SMO<TKernel>::Train(int learner_typeid, const Dataset* dataset_in) {
00431   index_t i,j;
00432   /* general learner-independent initializations */
00433   dataset_ = dataset_in;
00434   datamatrix_.Alias(dataset_->matrix());
00435   n_data_ = datamatrix_.n_cols();
00436   n_features_ = datamatrix_.n_rows() - 1;
00437 
00438   budget_ = min(budget_, n_data_);
00439   bias_ = 0.0;
00440   n_sv_ = 0;
00441   unshrinked_ = false;
00442   i_cache_ = -1; j_cache_ = -1;
00443   cached_kernel_value_ = INFINITY;
00444 
00445   /* learners initialization */
00446   LearnersInit_(learner_typeid);
00447   
00448   alpha_status_.Init(n_alpha_);
00449   for (i=0; i<n_alpha_; i++)
00450     UpdateAlphaStatus_(i);
00451 
00452   // initialize gradient_bar
00453   grad_bar_.Init(n_alpha_);
00454   grad_bar_.SetZero();
00455   for(i=0; i<n_alpha_; i++) {
00456     if(!IsLowerBounded(i))
00457       {
00458         for(j=0; j<n_alpha_; j++)
00459           grad_[j] += alpha_[i] * CalcKernelValue_(i,j);
00460         if(IsUpperBounded(i))
00461           for(j=0; j<n_alpha_; j++)
00462             grad_bar_[j] += GetC_(i) * CalcKernelValue_(i,j);
00463       }
00464   }
00465 
00466   ct_iter_ = 0;
00467   ct_shrinking_ = min(n_data_, NUM_FOR_SHRINKING) + 1;
00468   /* Begin SMO iterations */
00469   int stop_condition = 0;
00470   while (1) {
00471     VERBOSE_GOT_HERE(0);
00472     
00473     /* for every min(n_data_, 1000) iterations, do shrinking */
00474     if (--ct_shrinking_ == 0) {
00475       Shrinking_();
00476       ct_shrinking_ = min(n_data_, NUM_FOR_SHRINKING);
00477     }
00478 
00479     // Find working set, check stopping criterion, update gradient and alphas
00480     stop_condition = TrainIteration_();
00481     // termination check, stop_condition==1 or 2->terminate
00482     if (stop_condition == 1) // optimality reached
00483       break;
00484     else if (stop_condition == 2) {// max num of iterations exceeded
00485       fprintf(stderr, "Max # of iterations (%d) exceeded !!!\n", MAX_NUM_ITER);
00486       break;
00487     }
00488   }
00489 }
00490 
00496 template<typename TKernel>
00497 int SMO<TKernel>::TrainIteration_() {
00498   ct_iter_ ++;
00499   index_t i,j;
00500   if (WorkingSetSelection_(i,j) == true) {
00501     ReconstructGradient_(learner_typeid_); // reconstruct the whole gradient
00502     n_active_ = 1;
00503     if (WorkingSetSelection_(i,j) == true) { // optimality reached
00504       return 1;
00505     }
00506     else {
00507       ct_shrinking_ = 1; // do shrinking in the next iteration
00508       return 0;
00509     }
00510   }
00511   else if (ct_iter_ >= MAX_NUM_ITER) { // max num of iterations exceeded
00512     return 2;
00513   }
00514   else{ // update gradient and alphas, and continue iterations
00515     UpdatingGradientAlpha_(i, j);
00516     return 0;
00517   }
00518 }
00519 
00528 template<typename TKernel>
00529 bool SMO<TKernel>::WorkingSetSelection_(index_t &out_i, index_t &out_j) {
00530   double grad_max = -INFINITY;
00531   double grad_min =  INFINITY;
00532   int idx_i = 0;
00533   int idx_j = 0;
00534   
00535   // Find i using maximal violating pair scheme
00536   index_t t;
00537   for (t=0; t<n_active_; t++) { // find argmax(y*grad), t\in I_up
00538     if (y_[t] == 1) {
00539       if (!IsUpperBounded(t)) // t\in I_up, y==1: y[t]alpha[t] <= C
00540         if (grad_[t] >= grad_max) { // y==1
00541           grad_max = grad_[t];
00542           idx_i = t;
00543         }
00544     }
00545     else { // y[t] == -1
00546       if (!IsLowerBounded(t)) // t\in I_up, y==-1: y[t]alpha[t] <= 0
00547         if (-grad_[t] >= grad_max) { // y==-1
00548           grad_max = -grad_[t];
00549           idx_i = t;
00550         }
00551     }
00552   }
00553   out_i = idx_i; // i found
00554 
00555   /*  Find j using maximal violating pair scheme (1st order approximation) */
00556   if (wss_ == 1) {
00557     for (t=0; t<n_active_; t++) { // find argmin(y*grad), t\in I_down
00558       if (y_[t] == 1) {
00559         if (!IsLowerBounded(t)) // t\in I_down, y==1: y[t]alpha[t] >= 0
00560           if (grad_[t] <= grad_min) { // y==1
00561             grad_min = grad_[t];
00562             idx_j = t;
00563           }
00564       }
00565       else { // y[t] == -1
00566         if (!IsUpperBounded(t)) // t\in I_down, y==-1: y[t]alpha[t] >= -C
00567           if (-grad_[t] <= grad_min) { // y==-1
00568             grad_min = -grad_[t];
00569             idx_j = t;
00570           }
00571       }
00572     }
00573     out_j = idx_j; // i found
00574   }
00575   /* Find j using 2nd order working set selection scheme; need to calc kernels, but faster convergence */
00576   else if (wss_ == 2) {
00577     double K_ii = CalcKernelValue_(out_i, out_i);
00578     double opt_gain_max = -INFINITY;
00579     double grad_diff;
00580     double quad_kernel;
00581     double opt_gain = -INFINITY;
00582     for (t=0; t<n_active_; t++) {
00583       double K_it = CalcKernelValue_(out_i, t);
00584       double K_tt = CalcKernelValue_(t, t);
00585       if (y_[t] == 1) {
00586         if (!IsLowerBounded(t)) { // t\in I_down, y==1: y[t]alpha[t] >= 0
00587           // calculate grad_min for Stopping Criterion
00588           if (grad_[t] <= grad_min) // y==1
00589             grad_min = grad_[t];
00590           // find j
00591           grad_diff = grad_max - grad_[t]; // max(y_i*grad_i) - y_t*grad_t
00592           if (grad_diff > 0) {
00593             quad_kernel = K_ii + K_tt - 2 * K_it;
00594             if (quad_kernel > 0) // for positive definite kernels
00595               opt_gain = ( grad_diff * grad_diff ) / quad_kernel; // actually ../2*quad_kernel
00596             else // handle non-positive definite kernels
00597               opt_gain = ( grad_diff * grad_diff ) / TAU;
00598             // find max(opt_gain)
00599             if (opt_gain > opt_gain_max) {
00600               idx_j = t;
00601               opt_gain_max = opt_gain;
00602             }
00603           }
00604         }
00605       }
00606       else { // y[t] == -1
00607         if (!IsUpperBounded(t)) {// t\in I_down, y==-1: y[t]alpha[t] >= -C
00608           // calculate grad_min for Stopping Criterion
00609           if (-grad_[t] <= grad_min) // y==-1
00610             grad_min = -grad_[t];
00611           // find j
00612           grad_diff = grad_max + grad_[t]; // max(y_i*grad_i) - y_t*grad_t
00613           if (grad_diff > 0) {
00614             quad_kernel = K_ii + K_tt - 2 * K_it;
00615             if (quad_kernel > 0) // for positive definite kernels
00616               opt_gain = ( grad_diff * grad_diff ) / quad_kernel; // actually ../2*quad_kernel
00617             else // handle non-positive definite kernels
00618               opt_gain = ( grad_diff * grad_diff ) / TAU;
00619             // find max(opt_gain)
00620             if (opt_gain > opt_gain_max) {
00621               idx_j = t;
00622               opt_gain_max = opt_gain;
00623             }
00624           }
00625         }
00626       }
00627     }
00628   }
00629   out_j = idx_j; // j found
00630   
00631   // Stopping Criterion check
00632   if (grad_max - grad_min <= SMO_OPT_TOLERANCE)
00633     return true; // optimality reached
00634 
00635   return false;
00636 }
00637 
00644 template<typename TKernel>
00645 void SMO<TKernel>::UpdatingGradientAlpha_(index_t i, index_t j) {
00646   index_t t;
00647 
00648   double a_i = alpha_[i];
00649   double a_j = alpha_[j];
00650   int y_i = y_[i];
00651   int y_j = y_[j];
00652   double C_i = GetC_(i); // can be Cp (for y==1) or Cn (for y==-1)
00653   double C_j = GetC_(j);
00654 
00655   /* cached kernel values */
00656   double K_ii, K_ij, K_jj;
00657   K_ii = CalcKernelValue_(i, i);
00658   K_ij = CalcKernelValue_(i, j);
00659   K_jj = CalcKernelValue_(j, j);
00660   
00661 
00662   double first_order_diff = y_i * grad_[i] - y_j * grad_[j];
00663   double second_order_diff = K_ii + K_jj - 2 * K_ij;
00664   if (second_order_diff < 0) // handle non-positive definite kernels
00665     second_order_diff = TAU;
00666   double newton_step = first_order_diff / second_order_diff;
00667 
00668   double step_B, step_A;
00669   if (y_i == 1) {
00670     step_B = C_i - a_i;
00671   }
00672   else { // y_i == -1
00673     step_B = a_i;
00674   }
00675   if (y_j == 1) {
00676     step_A = a_j;
00677   }
00678   else { // y_j == -1
00679     step_A = C_j - a_j;
00680   }
00681   double min_step_temp = min(step_B, step_A);
00682   double min_step = min(min_step_temp, newton_step);
00683 
00684   // Update alphas
00685   alpha_[i] = a_i + y_i * min_step;
00686   alpha_[j] = a_j - y_j * min_step;
00687 
00688   // Update gradient
00689   for (t=0; t<n_active_; t++) {
00690     grad_[t] = grad_[t] + min_step * y_[t] *( CalcKernelValue_(j, t) - CalcKernelValue_(i, t) );
00691   }
00692 
00693   // Update alpha active status
00694   UpdateAlphaStatus_(i);
00695   UpdateAlphaStatus_(j);
00696 
00697   // Update gradient_bar
00698   bool ub_i = IsUpperBounded(i);
00699   bool ub_j = IsUpperBounded(j);
00700   if( ub_i != IsUpperBounded(i) ) {
00701       if(ub_i)
00702         for(t=0; t<n_alpha_; t++)
00703           grad_bar_[t] -= C_i * CalcKernelValue_(i, t);
00704       else
00705         for(t=0; t<n_alpha_; t++)
00706           grad_bar_[t] += C_i * CalcKernelValue_(i, t);
00707   }
00708   
00709   if( ub_j != IsUpperBounded(j) ) {
00710     if(ub_j)
00711       for(t=0; t<n_alpha_; t++)
00712         grad_bar_[t] -= C_j * CalcKernelValue_(j, t);
00713     else
00714       for(t=0; t<n_alpha_; t++)
00715         grad_bar_[t] += C_j * CalcKernelValue_(j, t);
00716   }
00717   
00718   // Calculate the bias term
00719   CalcBias_();
00720 
00721   VERBOSE_GOT_HERE(0);
00722 }
00723 
00730 template<typename TKernel>
00731 void SMO<TKernel>::CalcBias_() {
00732   double b;
00733   index_t n_free = 0;
00734   double ub = INFINITY, lb = -INFINITY, sum_free = 0;
00735   for (index_t i=0; i<n_active_; i++){
00736     double yg = y_[i] * grad_[i];
00737       
00738     if (IsUpperBounded(i)) {
00739       if(y_[i] == -1)
00740         ub = min(ub, yg);
00741       else
00742         lb = max(lb, yg);
00743     }
00744     else if (IsLowerBounded(i)) {
00745       if(y_[i] == +1)
00746         ub = min(ub, yg);
00747       else
00748         lb = max(lb, yg);
00749     }
00750     else {
00751       n_free++;
00752       sum_free += yg;
00753     }
00754   }
00755   
00756   if(n_free>0)
00757     b = - sum_free / n_free;
00758   else
00759     b = - (ub + lb) / 2;
00760   
00761   bias_ = b;
00762 }
00763 
00764 /* Get SVM results:coefficients, number and indecies of SVs
00765 *
00766 * @param: sample indices of the training (sub)set in the total training set
00767 * @param: support vector coefficients: alpha*y
00768 * @param: bool indicators  FOR THE TRAINING SET: is/isn't a support vector
00769 *
00770 */
00771 template<typename TKernel>
00772 void SMO<TKernel>::GetSVM(ArrayList<index_t> &dataset_index, ArrayList<double> &coef, ArrayList<bool> &sv_indicator) {
00773   if (learner_typeid_ == 0) {// SVM_C
00774     for (index_t i = 0; i < n_data_; i++) {
00775       if (alpha_[i] >= SMO_ALPHA_ZERO) { // support vectors found
00776         coef.PushBack() = alpha_[i] * y_[i];
00777         sv_indicator[dataset_index[i]] = true;
00778         n_sv_++;
00779       }
00780       else {
00781         coef.PushBack() = 0;
00782       }
00783     }
00784   }
00785   else if (learner_typeid_ == 1) {// SVM_R
00786     for (index_t i = 0; i < n_data_; i++) {
00787       double alpha_diff = -alpha_[i] + alpha_[i+n_data_]; // alpha_i^* - alpha_i
00788       if (fabs(alpha_diff) >= SMO_ALPHA_ZERO) { // support vectors found
00789         coef.PushBack() = alpha_diff; 
00790         sv_indicator[dataset_index[i]] = true;
00791         n_sv_++;
00792       }
00793       else {
00794         coef.PushBack() = 0;
00795       }
00796     }
00797   }
00798 }
00799 
00800 #endif