00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00040 #include "optimizers.h"
00041
00042 void NelderMead::Eval(double **pts) {
00043
00044 index_t dim = dimension(), num_func_eval;
00045 index_t i, j, ihi, ilo, inhi,mpts = dim + 1;
00046 double sum, swap, *psum;
00047 long double swap_y, rtol, ytry, ysave, TINY = 1.0e-10;
00048 long double *y;
00049 Vector param_passed;
00050 long double tol = fx_param_double(opt_module_,"tolerance", 1.0e-5);
00051 index_t NMAX = fx_param_int(opt_module_, "MAX_FUNC_EVAL", 50000);
00052
00053 param_passed.Init(dim);
00054 psum = (double*)malloc(dim * sizeof(double));
00055 num_func_eval = 0;
00056 y = (long double*)malloc(mpts*sizeof(long double));
00057 for(i = 0; i < mpts; i++) {
00058 param_passed.CopyValues(pts[i]);
00059 y[i] = (*func_ptr_)(param_passed, data());
00060 }
00061
00062 for(;;) {
00063 ilo = 0;
00064 ihi = y[0] > y[1] ? (inhi = 1,0) : (inhi = 0,1);
00065 for( i = 0; i < mpts; i++ ) {
00066 if(y[i] <= y[ilo]) ilo = i;
00067 if(y[i] > y[ihi]) {
00068 inhi = ihi;
00069 ihi = i;
00070 }
00071 else if((y[i] > y[inhi])&&(i != ihi)) inhi = i;
00072 }
00073
00074 rtol = 2.0 * fabs(y[ihi] - y[ilo]) / ( fabs(y[ihi]) + fabs(y[ilo]) + TINY ) ;
00075 if(rtol < tol) {
00076 swap_y = y[0];
00077 y[0] = y[ilo];
00078 y[ilo] = swap_y;
00079 for( i = 0; i < dim; i++ ) {
00080 swap = pts[0][i];
00081 pts[0][i] = pts[ilo][i] ;
00082 pts[ilo][i] = swap;
00083 }
00084 break;
00085 }
00086 if(num_func_eval > NMAX){
00087 NOTIFY("Maximum number of function evaluations exceeded");
00088 break;
00089 }
00090 num_func_eval += 2;
00091
00092
00093
00094
00095 for( j = 0 ; j < dim ; j++ ){
00096 sum = 0.0;
00097 for( i = 0 ; i < mpts ; i++ )
00098 if (i != ihi)
00099 sum += pts[i][j];
00100
00101 psum[j] = sum / dim;
00102 }
00103
00104 ytry = ModSimplex_(pts, y, psum, ihi, -1.0);
00105 if( ytry <= y[ilo] ) {
00106
00107
00108 ytry = ModSimplex_(pts, y, psum, ihi, 2.0);
00109 }
00110 else if( ytry >= y[ihi] ) {
00111
00112
00113
00114 ysave = y[ihi];
00115
00116 ytry = ModSimplex_(pts, y, psum, ihi, 0.5);
00117 if( ytry > y[ihi] ) {
00118
00119
00120 for( i = 0; i < mpts; i++ ) {
00121 if( i != ilo ) {
00122 for( j = 0; j < dim; j++ ) {
00123 pts[i][j] = psum[j] = 0.5 * ( pts[i][j] + pts[ilo][j] );
00124 }
00125 param_passed.CopyValues(psum);
00126 y[i] = (*func_ptr_)(param_passed, data());
00127 }
00128 }
00129 num_func_eval += dim;
00130 for( j = 0 ; j < dim ; j++ ){
00131 sum = 0.0;
00132 for( i = 0 ; i < mpts ; i++ )
00133 if (i != ihi)
00134 sum += pts[i][j];
00135 psum[j] = sum / dim;
00136 }
00137 }
00138 }
00139 else --num_func_eval;
00140 }
00141 fx_result_int(opt_module_, "func_evals", num_func_eval);
00142 return;
00143 }
00144
00145 long double NelderMead::ModSimplex_(double **pts, long double *y,
00146 double *psum, index_t ihi,
00147 float fac) {
00148
00149 index_t j, dim = dimension();
00150 long double ytry;
00151 double *ptry;
00152 Vector param_passed;
00153
00154 param_passed.Init(dim);
00155 ptry = (double*) malloc (dim * sizeof(double));
00156 for (j = 0; j < dim; j++) {
00157 ptry[j] = psum[j] * (1 - fac) + pts[ihi][j] * fac;
00158 }
00159 param_passed.CopyValues(ptry);
00160 ytry = (*func_ptr_)(param_passed, data());
00161 if (ytry < y[ihi]) {
00162 y[ihi] = ytry;
00163 for (j = 0; j < dim; j++) {
00164 pts[ihi][j] = ptry[j];
00165 }
00166 }
00167 return ytry;
00168 }
00169
00170 void QuasiNewton::Eval(double *pt) {
00171
00172 index_t n = dimension(), iters;
00173 index_t i, its, MAXIMUM_ITERATIONS = fx_param_int(opt_module_,"MAX_ITERS",200);
00174 long double temp_1, temp_2, temp_3, temp_4, f_previous, f_min,
00175 maximum_step_length, sum = 0.0, sumdg, sumxi, temp, test;
00176 Vector dgrad, grad, hdgrad, xi;
00177 Vector pold, pnew;
00178 Matrix hessian;
00179 double EPSILON = fx_param_double(opt_module_, "EPSILON", 3.0e-8);
00180 double TOLERANCE = fx_param_double(opt_module_, "TOLERANCE", 1.0e-5);
00181 double MAX_STEP_SIZE = fx_param_double(opt_module_, "MAX_STEP_SIZE", 100.0);
00182 double g_tol = fx_param_double(opt_module_, "gtol", 1.0e-7);
00183
00184 dgrad.Init(n);
00185 grad.Init(n);
00186 hdgrad.Init(n);
00187 hessian.Init(n,n);
00188 pnew.Init(n);
00189 xi.Init(n);
00190 pold.Copy(pt,n);
00191 f_previous = (*func_ptr_)(pold, data(), &grad);
00192 Vector tmp;
00193 tmp.Init(n);
00194 tmp.SetAll(1.0);
00195 hessian.SetDiagonal(tmp);
00196 la::ScaleOverwrite(-1.0, grad, &xi);
00197
00198 sum = la::Dot(pold, pold);
00199 double fmax;
00200 if( sqrt(sum) > (float)n ) {
00201 fmax = sqrt(sum);
00202 }
00203 else {
00204 fmax = (float)n;
00205 }
00206 maximum_step_length = MAX_STEP_SIZE*fmax;
00207
00208 for(its = 0; its < MAXIMUM_ITERATIONS; its++) {
00209
00210 dgrad.CopyValues(grad);
00211 LineSearch_(pold, f_previous, &grad, &xi,
00212 &pnew, &f_min, maximum_step_length);
00213 f_previous = f_min;
00214 la::SubOverwrite(pold, pnew, &xi);
00215 pold.CopyValues(pnew);
00216
00217 for(i = 0; i < n; i++) {
00218 pt[i] = pold.get(i);
00219 }
00220
00221 test = 0.0;
00222 for(i = 0; i < n; i++){
00223 if(fabs(pold.get(i)) > 1.0) fmax = fabs(pold.get(i));
00224 else{ fmax = 1.0; }
00225 temp = fabs(xi.get(i)) / fmax;
00226 if(temp > test) test = temp;
00227 }
00228 if(test < TOLERANCE) {
00229 iters = its;
00230 fx_result_int(opt_module_, "iters", iters);
00231 return;
00232 }
00233
00234 test = 0.0;
00235 if(f_min > 1.0) temp_1 = f_min;
00236 else{ temp_1 = 1.0; }
00237
00238 for(i = 0; i < n; i++) {
00239 if(fabs(pold.get(i)) > 1.0) fmax = pold.get(i);
00240 else{ fmax = 1.0; }
00241
00242 temp = fabs(grad.get(i))*fmax / temp_1;
00243 if(temp > test) test = temp;
00244 }
00245 if(test < g_tol) {
00246 iters = its;
00247 fx_result_int(opt_module_, "iters", iters);
00248 return;
00249 }
00250
00251 la::SubFrom(grad, &dgrad);
00252 la::Scale(-1.0, &dgrad);
00253 la::MulOverwrite(hessian,dgrad, &hdgrad);
00254
00255 temp_2 = la::Dot(dgrad, xi);
00256 temp_4 = la::Dot(dgrad, hdgrad);
00257 sumdg = la::Dot(dgrad, dgrad);
00258 sumxi = la::Dot(xi, xi);
00259
00260 if (temp_2 > sqrt(EPSILON*sumdg*sumxi)) {
00261 temp_2 = 1.0 / temp_2;
00262 temp_3 = 1.0 / temp_4;
00263
00264 la::ScaleOverwrite(temp_2, xi, &dgrad);
00265 la::AddExpert((-1.0*temp_3), hdgrad, &dgrad);
00266
00267 Matrix co, ro, tmp;
00268 co.AliasColVector(xi);
00269 ro.AliasRowVector(xi);
00270 la::MulInit(co, ro, &tmp);
00271 la::AddExpert(temp_2, tmp, &hessian);
00272
00273 co.Destruct();
00274 ro.Destruct();
00275 tmp.Destruct();
00276 co.AliasColVector(hdgrad);
00277 ro.AliasRowVector(hdgrad);
00278 la::MulInit(co, ro, &tmp);
00279 la::AddExpert((-1.0*temp_3), tmp, &hessian);
00280
00281 co.Destruct();
00282 ro.Destruct();
00283 tmp.Destruct();
00284 co.AliasColVector(dgrad);
00285 ro.AliasRowVector(dgrad);
00286 la::MulInit(co, ro, &tmp);
00287 la::AddExpert(temp_4, tmp, &hessian);
00288 }
00289 la::MulOverwrite(hessian, grad, &xi);
00290 la::Scale((-1.0), &xi);
00291 }
00292 NOTIFY("Too many iterations in Quasi Newton\n");
00293 }
00294
00295 void QuasiNewton::LineSearch_(Vector pold, long double fold,
00296 Vector *grad, Vector *xi,
00297 Vector *pnew, long double *f_min,
00298 long double maximum_step_length) {
00299
00300 index_t i, n = dimension();
00301 long double a, step_length, previous_step_length = 0.0,
00302 minimum_step_length, b, disc, previous_f_value = 0.0,
00303 rhs1, rhs2, slope, sum, temp, test, temp_step_length,
00304 MIN_DECREASE = 1.0e-4, TOLERANCE = 1.0e-7;
00305
00306 sum = la::Dot(*xi, *xi);
00307 sum = sqrt(sum);
00308 if(sum > maximum_step_length) {
00309 la::Scale((maximum_step_length/sum), xi);
00310 }
00311 slope = la::Dot(*grad, *xi);
00312 if(slope >= 0.0){
00313 return;
00314 }
00315 test = 0.0;
00316 for(i = 0; i < n; i++) {
00317 double fmax;
00318 fmax = (fabs(pold.get(i)) > 1.0 ? fabs(pold.get(i)) : 1.0);
00319 temp = fabs((*xi).get(i)) / fmax;
00320 if(temp > test) test = temp;
00321 }
00322
00323 minimum_step_length = TOLERANCE/test;
00324 step_length = 1.0;
00325 for(;;) {
00326
00327 pnew->CopyValues(pold);
00328 la::AddExpert(step_length, *xi, pnew);
00329
00330 *f_min = (*func_ptr_)((*pnew), data(), grad);
00331 if(step_length < minimum_step_length) {
00332 pnew->CopyValues(pold);
00333 return;
00334 }
00335 else if( *f_min <= fold + MIN_DECREASE*step_length*slope) {
00336 return;
00337 }
00338 else {
00339 if (step_length == 1.0) {
00340 temp_step_length = -slope/(2.0*(*f_min - fold - slope));
00341 }
00342 else {
00343 rhs1 = *f_min - fold - step_length*slope;
00344 rhs2 = previous_f_value - fold - previous_step_length*slope;
00345 a = (rhs1 / (step_length*step_length)
00346 - rhs2/(previous_step_length*previous_step_length))
00347 / (step_length-previous_step_length);
00348 b = (-previous_step_length*rhs1/(step_length*step_length)
00349 +step_length*rhs2/(previous_step_length*previous_step_length))
00350 / (step_length - previous_step_length);
00351 if(a == 0.0) {
00352 temp_step_length = -slope / (2.0*b);
00353 }
00354 else {
00355 disc = b*b - 3.0*a*slope;
00356 if(disc < 0.0) {
00357 temp_step_length = 0.5*step_length;
00358 }
00359 else if (b <= 0.0) {
00360 temp_step_length = (-b+sqrt(disc))/(3.0*a);
00361 }
00362 else {
00363 temp_step_length = -slope / (b+sqrt(disc));
00364 }
00365 }
00366 if(temp_step_length > 0.5*step_length) {
00367 temp_step_length = 0.5*step_length;
00368 }
00369 }
00370 }
00371 previous_step_length = step_length;
00372 previous_f_value = *f_min;
00373 step_length = (temp_step_length > 0.1*step_length
00374 ? temp_step_length : 0.1*step_length);
00375 }
00376 }