Mantid
Loading...
Searching...
No Matches
TrustRegionMinimizer.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
7// This code was originally translated from Fortran code on
8// https://ccpforge.cse.rl.ac.uk/gf/project/ral_nlls June 2016
9//----------------------------------------------------------------------
10// Includes
11//----------------------------------------------------------------------
16
17#include <cmath>
18
19#include <gsl/gsl_blas.h>
20
21#include <iostream>
22
24
25// clang-format off
27DECLARE_FUNCMINIMIZER(TrustRegionMinimizer, Trust Region)
29// clang-format on
30
32 declareProperty("InitialRadius", 100.0, "Initial radius of the trust region.");
33}
34
37std::string TrustRegionMinimizer::name() const { return "Trust Region"; }
38
45void TrustRegionMinimizer::initialize(API::ICostFunction_sptr costFunction, size_t maxIterations) {
46 m_leastSquares = std::dynamic_pointer_cast<CostFunctions::CostFuncLeastSquares>(costFunction);
47 if (!m_leastSquares) {
48 throw std::runtime_error("Trust-region minimizer can only be used with Least "
49 "squares cost function.");
50 }
51
52 m_function = m_leastSquares->getFittingFunction();
53 auto &values = *m_leastSquares->getValues();
54 auto n = static_cast<int>(m_leastSquares->nParams());
55 auto m = static_cast<int>(values.size());
56 if (n > m) {
57 throw std::runtime_error("More parameters than data.");
58 }
59 m_options.maxit = static_cast<int>(maxIterations);
61 m_x.allocate(n);
62 m_leastSquares->getParameters(m_x);
63 int j = 0;
64 for (size_t i = 0; i < m_function->nParams(); ++i) {
65 if (m_function->isActive(i)) {
66 m_J.m_index.emplace_back(j);
67 j++;
68 } else
69 m_J.m_index.emplace_back(-1);
70 }
71 m_options.initial_radius = getProperty("InitialRadius");
72}
73
79 m_leastSquares->setParameters(x);
80 auto &domain = *m_leastSquares->getDomain();
81 auto &values = *m_leastSquares->getValues();
82 m_function->function(domain, values);
83 auto m = static_cast<int>(values.size());
84 if (f.len() != m) {
85 f.allocate(m);
86 }
87 for (size_t i = 0; i < values.size(); ++i) {
88 f.set(i, (values.getCalculated(i) - values.getFitData(i)) * values.getFitWeight(i));
89 }
90}
91
97 m_leastSquares->setParameters(x);
98 auto &domain = *m_leastSquares->getDomain();
99 auto &values = *m_leastSquares->getValues();
100 auto n = static_cast<int>(m_leastSquares->nParams());
101 auto m = static_cast<int>(values.size());
102 if (J.len1() != m || J.len2() != n) {
103 J.allocate(m, n);
104 }
105
106 m_J.setJ(&J);
107 m_function->functionDeriv(domain, m_J);
108
109 for (int i = 1; i <= m; ++i) {
110 double w = values.getFitWeight(i - 1);
111 for (int j = 1; j <= n; ++j) {
112 J(i, j) *= w;
113 }
114 }
115}
116
123 DoubleFortranMatrix &h) const {
124 UNUSED_ARG(x);
125 UNUSED_ARG(f);
126 auto n = static_cast<int>(m_leastSquares->nParams());
127 if (h.len1() != n || h.len2() != n) {
128 h.allocate(n, n);
129 }
130 // Mantid fit functions don't calculate second derivatives.
131 // For now the Hessian will not be used.
132 h.zero();
133}
134
137bool TrustRegionMinimizer::iterate(size_t /*iteration*/) {
138 int max_tr_decrease = 100;
139 auto &w = m_workspace;
140 auto &options = m_options;
141 auto &inform = m_inform;
142 auto &X = m_x;
143 int n = m_x.len();
144 int m = static_cast<int>(m_leastSquares->getValues()->size());
145
146 if (w.first_call == 0) {
147
148 w.first_call = 1; // ?
149
150 // evaluate the residual
151 evalF(X, w.f);
152 inform.f_eval = inform.f_eval + 1;
153
154 // and evaluate the jacobian
155 evalJ(X, w.J);
156 inform.g_eval = inform.g_eval + 1;
157
158 if (options.relative_tr_radius == 1) {
159 // first, let's get diag(J^TJ)
160 double Jmax = 0.0;
161 for (int i = 1; i <= n; ++i) {
162 // note:: assumes column-storage of J
163 // JtJdiag = norm2( w.J( (i-1)*m + 1 : i*m ) );
164 double JtJdiag = 0.0;
165 for (int j = 1; j <= m; ++j) { // for_do(j, 1, m)
166 JtJdiag += pow(w.J(j, i), 2);
167 }
168 JtJdiag = sqrt(JtJdiag);
169 if (JtJdiag > Jmax)
170 Jmax = JtJdiag;
171 }
172 w.Delta = options.initial_radius_scale * (pow(Jmax, 2));
173 } else {
174 w.Delta = options.initial_radius;
175 }
176
177 if (options.calculate_svd_J) {
178 // calculate the svd of J (if needed)
179 NLLS::getSvdJ(w.J, w.smallest_sv(1), w.largest_sv(1));
180 }
181
182 w.normF = NLLS::norm2(w.f);
183 w.normF0 = w.normF;
184
185 // g = -J^Tf
186 NLLS::multJt(w.J, w.f, w.g);
187 w.g *= -1.0;
188 w.normJF = NLLS::norm2(w.g);
189 w.normJF0 = w.normJF;
190 w.normJFold = w.normJF;
191
192 // save some data
193 inform.obj = 0.5 * (pow(w.normF, 2));
194 inform.norm_g = w.normJF;
195 inform.scaled_g = w.normJF / w.normF;
196
197 // if we need to output vectors of the history of the residual
198 // and gradient, the set the initial values
199 if (options.output_progress_vectors) {
200 w.resvec(1) = inform.obj;
201 w.gradvec(1) = inform.norm_g;
202 }
203
204 // Select the order of the model to be used..
205 switch (options.model) {
206 case 1: // first-order
207 {
208 w.hf.zero();
209 w.use_second_derivatives = false;
210 break;
211 }
212 case 2: // second order
213 {
214 if (options.exact_second_derivatives) {
215 evalHF(X, w.f, w.hf);
216 inform.h_eval = inform.h_eval + 1;
217 } else {
218 // S_0 = 0 (see Dennis, Gay and Welsch)
219 w.hf.zero();
220 }
221 w.use_second_derivatives = true;
222 break;
223 }
224 case 3: // hybrid (MNT)
225 {
226 // set the tolerance :: make this relative
227 w.hybrid_tol = options.hybrid_tol * (w.normJF / (0.5 * (pow(w.normF, 2))));
228 // use first-order method initially
229 w.hf.zero();
230 w.use_second_derivatives = false;
231 if (!options.exact_second_derivatives) {
232 // initialize hf_temp too
233 w.hf_temp.zero();
234 }
235 break;
236 }
237 default:
238 throw std::logic_error("Unsupported model.");
239 }
240 }
241
242 w.iter = w.iter + 1;
243 inform.iter = w.iter;
244
245 bool success = false;
246 int no_reductions = 0;
247 double normFnew = 0.0;
248
249 while (!success) { // loop until successful
250 no_reductions = no_reductions + 1;
251 if (no_reductions > max_tr_decrease + 1) {
252 return true;
253 }
254 // Calculate the step d that the model thinks we should take next
255 calculateStep(w.J, w.f, w.hf, w.Delta, w.d, w.normd, options);
256
257 // Accept the step?
258 w.Xnew = X;
259 w.Xnew += w.d;
260 evalF(w.Xnew, w.fnew);
261 inform.f_eval = inform.f_eval + 1;
262 normFnew = NLLS::norm2(w.fnew);
263
264 // Get the value of the model
265 // md := m_k(d)
266 // evaluated at the new step
267 double md = evaluateModel(w.f, w.J, w.hf, w.d, options, w.evaluate_model_ws);
268
269 // Calculate the quantity
270 // rho = 0.5||f||^2 - 0.5||fnew||^2 = actual_reduction
271 // -------------------------- -------------------
272 // m_k(0) - m_k(d) predicted_reduction
273 //
274 // if model is good, rho should be close to one
275 auto rho = calculateRho(w.normF, normFnew, md, options);
276 if (!std::isfinite(rho) || rho <= options.eta_successful) {
277 if ((w.use_second_derivatives) && (options.model == 3) && (no_reductions == 1)) {
278 // recalculate rho based on the approx GN model
279 // (i.e. the Gauss-Newton model evaluated at the Quasi-Newton step)
280 double rho_gn = calculateRho(w.normF, normFnew, w.evaluate_model_ws.md_gn, options);
281 if (rho_gn > options.eta_successful) {
282 // switch back to gauss-newton
283 w.use_second_derivatives = false;
284 w.hf_temp.zero(); // discard S_k, as it's been polluted
285 w.hf.zero();
286 }
287 }
288 } else {
289 success = true;
290 }
291
292 // Update the TR radius
293 updateTrustRegionRadius(rho, options, w);
294
295 if (!success) {
296 // finally, check d makes progress
297 if (NLLS::norm2(w.d) < std::numeric_limits<double>::epsilon() * NLLS::norm2(w.Xnew)) {
298 m_errorString = "Failed to make progress.";
299 return false;
300 }
301 }
302 } // loop
303 // if we reach here, a successful step has been found
304
305 // update X and f
306 X = w.Xnew;
307 w.f = w.fnew;
308
309 if (!options.exact_second_derivatives) {
310 // first, let's save some old values...
311 // g_old = -J_k^T r_k
312 w.g_old = w.g;
313 // g_mixed = -J_k^T r_{k+1}
314 NLLS::multJt(w.J, w.fnew, w.g_mixed);
315 w.g_mixed *= -1.0;
316 }
317
318 // evaluate J and hf at the new point
319 evalJ(X, w.J);
320 inform.g_eval = inform.g_eval + 1;
321
322 if (options.calculate_svd_J) {
323 NLLS::getSvdJ(w.J, w.smallest_sv(w.iter + 1), w.largest_sv(w.iter + 1));
324 }
325
326 // g = -J^Tf
327 NLLS::multJt(w.J, w.f, w.g);
328 w.g *= -1.0;
329
330 w.normJFold = w.normJF;
331 w.normF = normFnew;
332 w.normJF = NLLS::norm2(w.g);
333
334 // setup the vectors needed if second derivatives are not available
335 if (!options.exact_second_derivatives) {
336 w.y = w.g_old;
337 w.y -= w.g;
338 w.y_sharp = w.g_mixed;
339 w.y_sharp -= w.g;
340 }
341
342 if (options.model == 3) {
343 // hybrid method -- check if we need second derivatives
344
345 if (w.use_second_derivatives) {
346 if (w.normJF > w.normJFold) {
347 // switch to Gauss-Newton
348 w.use_second_derivatives = false;
349 // save hf as hf_temp
350 w.hf_temp = w.hf;
351 w.hf.zero();
352 }
353 } else {
354 auto FunctionValue = 0.5 * (pow(w.normF, 2));
355 if (w.normJF / FunctionValue < w.hybrid_tol) {
356 w.hybrid_count = w.hybrid_count + 1;
357 if (w.hybrid_count == options.hybrid_switch_its) {
358 // use (Quasi-)Newton
359 w.use_second_derivatives = true;
360 w.hybrid_count = 0;
361 // copy hf from hf_temp
362 if (!options.exact_second_derivatives) {
363 w.hf = w.hf_temp;
364 }
365 }
366 } else {
367 w.hybrid_count = 0;
368 }
369 }
370
371 if (!w.use_second_derivatives) {
372 // call apply_second_order_info anyway, so that we update the
373 // second order approximation
374 if (!options.exact_second_derivatives) {
375 rankOneUpdate(w.hf_temp, w);
376 }
377 }
378 }
379
380 if (w.use_second_derivatives) {
381 // apply_second_order_info(n, m, X, w, evalHF, params, options, inform,
382 // weights);
383 if (options.exact_second_derivatives) {
384 evalHF(X, w.f, w.hf);
385 inform.h_eval = inform.h_eval + 1;
386 } else {
387 // use the rank-one approximation...
388 rankOneUpdate(w.hf, w);
389 }
390 }
391
392 // update the stats
393 inform.obj = 0.5 * (pow(w.normF, 2));
394 inform.norm_g = w.normJF;
395 inform.scaled_g = w.normJF / w.normF;
396 if (options.output_progress_vectors) {
397 w.resvec(w.iter + 1) = inform.obj;
398 w.gradvec(w.iter + 1) = inform.norm_g;
399 }
400
401 // Test convergence
402 testConvergence(w.normF, w.normJF, w.normF0, w.normJF0, options, inform);
403
404 if (inform.convergence_normf == 1 || inform.convergence_normg == 1) {
405 return false;
406 }
407
408 inform.iter = w.iter;
409 inform.resvec = w.resvec;
410 inform.gradvec = w.gradvec;
411
412 return true;
413}
414
416namespace {
417
418const double HUGEST = std::numeric_limits<double>::max();
419const double EPSILON_MCH = std::numeric_limits<double>::epsilon();
420const double LARGEST = HUGEST;
421const double LOWER_DEFAULT = -0.5 * LARGEST;
422const double UPPER_DEFAULT = LARGEST;
423const double POINT4 = 0.4;
424const double ZERO = 0.0;
425const double ONE = 1.0;
426const double TWO = 2.0;
427const double THREE = 3.0;
428const double FOUR = 4.0;
429const double SIX = 6.0;
430const double HALF = 0.5;
431const double ONESIXTH = ONE / SIX;
432const double SIXTH = ONESIXTH;
433const double ONE_THIRD = ONE / THREE;
434const double TWO_THIRDS = TWO / THREE;
435const double THREE_QUARTERS = 0.75;
436const double TWENTY_FOUR = 24.0;
437const int MAX_DEGREE = 3;
438const int HISTORY_MAX = 100;
439const double TEN_EPSILON_MCH = 10.0 * EPSILON_MCH;
440const double ROOTS_TOL = TEN_EPSILON_MCH;
441const double INFINITE_NUMBER = HUGEST;
442
445inline double sign(double x, double y) { return y >= 0.0 ? fabs(x) : -fabs(x); }
446
450struct control_type {
453
455 // zero
456 double h_min = EPSILON_MCH;
457
459 double lower = LOWER_DEFAULT;
460 double upper = UPPER_DEFAULT;
461
464 double stop_normal = EPSILON_MCH;
465 double stop_absolute_normal = EPSILON_MCH;
466
468 // constraint
470 bool equality_problem = false;
471};
472
476struct history_type {
477 //
479 double lambda = 0.0;
480
482 double x_norm = 0.0;
483};
484
488struct inform_type {
489 //
490
492 int len_history = 0;
493
495 double obj = HUGEST;
496
498 double x_norm = 0.0;
499
501 double multiplier = 0.0;
502
505 double pole = 0.0;
506
508 bool hard_case = false;
509
511 std::vector<history_type> history;
512};
513
520double biggest(double a, double b, double c, double d) { return std::max(std::max(a, b), std::max(c, d)); }
521
527double biggest(double a, double b, double c) { return std::max(std::max(a, b), c); }
528
532double maxAbsVal(const DoubleFortranVector &v) {
533 auto p = v.indicesOfMinMaxElements();
534 return std::max(fabs(v.get(p.first)), fabs(v.get(p.second)));
535}
536
542std::pair<double, double> minMaxValues(const DoubleFortranVector &v) {
543 auto p = v.indicesOfMinMaxElements();
544 return std::make_pair(v.get(p.first), v.get(p.second));
545}
546
551double twoNorm(const DoubleFortranVector &v) {
552 if (v.size() == 0)
553 return 0.0;
554 return v.norm();
555}
556
561double dotProduct(const DoubleFortranVector &v1, const DoubleFortranVector &v2) { return v1.dot(v2); }
562
567double maxVal(const DoubleFortranVector &v, int n) {
568 double res = std::numeric_limits<double>::lowest();
569 for (int i = 1; i <= n; ++i) {
570 auto val = v(i);
571 if (val > res) {
572 res = val;
573 }
574 }
575 return res;
576}
577
591void rootsQuadratic(double a0, double a1, double a2, double tol, int &nroots, double &root1, double &root2) {
592
593 auto rhs = tol * a1 * a1;
594 if (fabs(a0 * a2) > rhs) { // really is quadratic
595 root2 = a1 * a1 - FOUR * a2 * a0;
596 if (fabs(root2) <= pow(EPSILON_MCH * a1, 2)) { // numerical double root
597 nroots = 2;
598 root1 = -HALF * a1 / a2;
599 root2 = root1;
600 } else if (root2 < ZERO) { // complex not real roots
601 nroots = 0;
602 root1 = ZERO;
603 root2 = ZERO;
604 } else { // distint real roots
605 auto d = -HALF * (a1 + sign(sqrt(root2), a1));
606 nroots = 2;
607 root1 = d / a2;
608 root2 = a0 / d;
609 if (root1 > root2) {
610 d = root1;
611 root1 = root2;
612 root2 = d;
613 }
614 }
615 } else if (a2 == ZERO) {
616 if (a1 == ZERO) {
617 if (a0 == ZERO) { // the function is zero
618 nroots = 1;
619 root1 = ZERO;
620 root2 = ZERO;
621 } else { // the function is constant
622 nroots = 0;
623 root1 = ZERO;
624 root2 = ZERO;
625 }
626 } else { // the function is linear
627 nroots = 1;
628 root1 = -a0 / a1;
629 root2 = ZERO;
630 }
631 } else { // very ill-conditioned quadratic
632 nroots = 2;
633 if (-a1 / a2 > ZERO) {
634 root1 = ZERO;
635 root2 = -a1 / a2;
636 } else {
637 root1 = -a1 / a2;
638 root2 = ZERO;
639 }
640 }
641
642 // perfom a Newton iteration to ensure that the roots are accurate
643
644 if (nroots >= 1) {
645 auto p = (a2 * root1 + a1) * root1 + a0;
646 auto pprime = TWO * a2 * root1 + a1;
647 if (pprime != ZERO) {
648 root1 = root1 - p / pprime;
649 }
650 if (nroots == 2) {
651 p = (a2 * root2 + a1) * root2 + a0;
652 pprime = TWO * a2 * root2 + a1;
653 if (pprime != ZERO) {
654 root2 = root2 - p / pprime;
655 }
656 }
657 }
658}
659
675void rootsCubic(double a0, double a1, double a2, double a3, double tol, int &nroots, double &root1, double &root2,
676 double &root3) {
677
678 // Check to see if the cubic is actually a quadratic
679 if (a3 == ZERO) {
680 rootsQuadratic(a0, a1, a2, tol, nroots, root1, root2);
681 root3 = INFINITE_NUMBER;
682 return;
683 }
684
685 // Deflate the polnomial if the trailing coefficient is zero
686 if (a0 == ZERO) {
687 root1 = ZERO;
688 rootsQuadratic(a1, a2, a3, tol, nroots, root2, root3);
689 nroots = nroots + 1;
690 return;
691 }
692
693 // 1. Use Nonweiler's method (CACM 11:4, 1968, pp269)
694
695 double c0 = a0 / a3;
696 double c1 = a1 / a3;
697 double c2 = a2 / a3;
698
699 double s = c2 / THREE;
700 double t = s * c2;
701 double b = 0.5 * (s * (TWO_THIRDS * t - c1) + c0);
702 t = (t - c1) / THREE;
703 double c = t * t * t;
704 double d = b * b - c;
705
706 // 1 real + 2 equal real or 2 complex roots
707 if (d >= ZERO) {
708 d = pow(sqrt(d) + fabs(b), ONE_THIRD);
709 if (d != ZERO) {
710 if (b > ZERO) {
711 b = -d;
712 } else {
713 b = d;
714 }
715 c = t / b;
716 }
717 d = sqrt(THREE_QUARTERS) * (b - c);
718 b = b + c;
719 c = -0.5 * b - s;
720 root1 = b - s;
721 if (d == ZERO) {
722 nroots = 3;
723 root2 = c;
724 root3 = c;
725 } else {
726 nroots = 1;
727 }
728 } else { // 3 real roots
729 if (b == ZERO) {
730 d = TWO_THIRDS * atan(ONE);
731 } else {
732 d = atan(sqrt(-d) / fabs(b)) / THREE;
733 }
734 if (b < ZERO) {
735 b = TWO * sqrt(t);
736 } else {
737 b = -TWO * sqrt(t);
738 }
739 c = cos(d) * b;
740 t = -sqrt(THREE_QUARTERS) * sin(d) * b - HALF * c;
741 d = -t - c - s;
742 c = c - s;
743 t = t - s;
744 if (fabs(c) > fabs(t)) {
745 root3 = c;
746 } else {
747 root3 = t;
748 t = c;
749 }
750 if (fabs(d) > fabs(t)) {
751 root2 = d;
752 } else {
753 root2 = t;
754 t = d;
755 }
756 root1 = t;
757 nroots = 3;
758 }
759
760 // reorder the roots
761
762 if (nroots == 3) {
763 if (root1 > root2) {
764 double a = root2;
765 root2 = root1;
766 root1 = a;
767 }
768 if (root2 > root3) {
769 double a = root3;
770 if (root1 > root3) {
771 a = root1;
772 root1 = root3;
773 }
774 root3 = root2;
775 root2 = a;
776 }
777 }
778
779 // perfom a Newton iteration to ensure that the roots are accurate
780 double p = ((a3 * root1 + a2) * root1 + a1) * root1 + a0;
781 double pprime = (THREE * a3 * root1 + TWO * a2) * root1 + a1;
782 if (pprime != ZERO) {
783 root1 = root1 - p / pprime;
784 // p = ((a3 * root1 + a2) * root1 + a1) * root1 + a0; // never used
785 }
786
787 if (nroots == 3) {
788 p = ((a3 * root2 + a2) * root2 + a1) * root2 + a0;
789 pprime = (THREE * a3 * root2 + TWO * a2) * root2 + a1;
790 if (pprime != ZERO) {
791 root2 = root2 - p / pprime;
792 // p = ((a3 * root2 + a2) * root2 + a1) * root2 + a0; // never used
793 }
794
795 p = ((a3 * root3 + a2) * root3 + a1) * root3 + a0;
796 pprime = (THREE * a3 * root3 + TWO * a2) * root3 + a1;
797 if (pprime != ZERO) {
798 root3 = root3 - p / pprime;
799 // p = ((a3 * root3 + a2) * root3 + a1) * root3 + a0; // never used
800 }
801 }
802}
803
814void PiBetaDerivs(int max_order, double beta, const DoubleFortranVector &x_norm2,
815 DoubleFortranVector &pi_beta) { // cppcheck-suppress constParameterReference
816 double hbeta = HALF * beta;
817 pi_beta(0) = pow(x_norm2(0), hbeta);
818 pi_beta(1) = hbeta * (pow(x_norm2(0), (hbeta - ONE))) * x_norm2(1);
819 if (max_order == 1)
820 return;
821 pi_beta(2) =
822 hbeta * (pow(x_norm2(0), (hbeta - TWO))) * ((hbeta - ONE) * pow(x_norm2(1), 2) + x_norm2(0) * x_norm2(2));
823 if (max_order == 2)
824 return;
825 pi_beta(3) = hbeta * (pow(x_norm2(0), (hbeta - THREE))) *
826 (x_norm2(3) * pow(x_norm2(0), 2) +
827 (hbeta - ONE) * (THREE * x_norm2(0) * x_norm2(1) * x_norm2(2) + (hbeta - TWO) * pow(x_norm2(1), 3)));
828}
829
834void intitializeControl(control_type &control) {
835 control.stop_normal = pow(EPSILON_MCH, 0.75);
836 control.stop_absolute_normal = pow(EPSILON_MCH, 0.75);
837}
838
855void solveSubproblemMain(int n, double radius, double f, const DoubleFortranVector &c, const DoubleFortranVector &h,
856 DoubleFortranVector &x, const control_type &control, inform_type &inform) {
857
858 // set initial values
859
860 if (x.len() != n) {
861 x.allocate(n);
862 }
863 x.zero();
864 inform.x_norm = ZERO;
865 inform.obj = f;
866 inform.hard_case = false;
867
868 // Check that arguments are OK
869 if (n < 0) {
870 throw std::runtime_error("Number of unknowns for trust-region subproblem is negative.");
871 }
872 if (radius < 0) {
873 throw std::runtime_error("Trust-region radius for trust-region subproblem is negative");
874 }
875
876 DoubleFortranVector x_norm2(0, MAX_DEGREE), pi_beta(0, MAX_DEGREE);
877
878 // compute the two-norm of c and the extreme eigenvalues of H
879
880 double c_norm = twoNorm(c);
881 double lambda_min = 0.0;
882 double lambda_max = 0.0;
883 std::tie(lambda_min, lambda_max) = minMaxValues(h);
884
885 double lambda = 0.0;
887 if (c_norm == ZERO && lambda_min >= ZERO) {
888 if (control.equality_problem) {
889 int i_hard = 1; // TODO: is init value of 1 correct?
890 for (int i = 1; i <= n; ++i) { // do i = 1, n
891 if (h(i) == lambda_min) {
892 i_hard = i;
893 break;
894 }
895 }
896 x(i_hard) = ONE / radius;
897 inform.x_norm = radius;
898 inform.obj = f + lambda_min * radius * radius;
899 }
900 return;
901 }
902
903 // construct values lambda_l and lambda_u for which lambda_l <=
904 // lambda_optimal
905 // <= lambda_u, and ensure that all iterates satisfy lambda_l <= lambda
906 // <= lambda_u
907
908 double c_norm_over_radius = c_norm / radius;
909 double lambda_l = 0.0, lambda_u = 0.0;
910 if (control.equality_problem) {
911 lambda_l = biggest(control.lower, -lambda_min, c_norm_over_radius - lambda_max);
912 lambda_u = std::min(control.upper, c_norm_over_radius - lambda_min);
913 } else {
914 lambda_l = biggest(control.lower, ZERO, -lambda_min, c_norm_over_radius - lambda_max);
915 lambda_u = std::min(control.upper, std::max(ZERO, c_norm_over_radius - lambda_min));
916 }
917 lambda = lambda_l;
918
919 // check for the "hard case"
920 if (lambda == -lambda_min) {
921 int i_hard = 1; // TODO: is init value of 1 correct?
922 double c2 = ZERO;
923 inform.hard_case = true;
924 for (int i = 1; i <= n; ++i) { // for_do(i, 1, n)
925 if (h(i) == lambda_min) {
926 if (fabs(c(i)) > EPSILON_MCH * c_norm) {
927 inform.hard_case = false;
928 c2 = c2 + pow(c(i), 2);
929 } else {
930 i_hard = i;
931 }
932 }
933 }
934
935 // the hard case may occur
936 if (inform.hard_case) {
937 for (int i = 1; i <= n; ++i) { // for_do(i, 1, n)
938 if (h(i) != lambda_min) {
939 x(i) = -c(i) / (h(i) + lambda);
940 } else {
941 x(i) = ZERO;
942 }
943 }
944 inform.x_norm = twoNorm(x);
945
946 // the hard case does occur
947
948 if (inform.x_norm <= radius) {
949 if (inform.x_norm < radius) {
950
951 // compute the step alpha so that x + alpha e_i_hard lies on the
952 // trust-region
953 // boundary and gives the smaller value of q
954
955 auto utx = x(i_hard) / radius;
956 auto distx = (radius - inform.x_norm) * ((radius + inform.x_norm) / radius);
957 auto alpha = sign(distx / (fabs(utx) + sqrt(pow(utx, 2) + distx / radius)), utx);
958
959 // record the optimal values
960
961 x(i_hard) = x(i_hard) + alpha;
962 }
963 inform.x_norm = twoNorm(x);
964 inform.obj = f + HALF * (dotProduct(c, x) - lambda * pow(radius, 2));
965 return;
966
967 // the hard case didn't occur after all
968 } else {
969 inform.hard_case = false;
970
971 // compute the first derivative of ||x|(lambda)||^2 - radius^2
972 auto w_norm2 = ZERO;
973 for (int i = 1; i <= n; ++i) { // for_do(i, 1, n)
974 if (h(i) != lambda_min)
975 w_norm2 = w_norm2 + pow(c(i), 2) / pow((h(i) + lambda), 3);
976 }
977 x_norm2(1) = -TWO * w_norm2;
978
979 // compute the newton correction
980
981 lambda = lambda + (pow(inform.x_norm, 2) - pow(radius, 2)) / x_norm2(1);
982 lambda_l = std::max(lambda_l, lambda);
983 }
984
985 // there is a singularity at lambda. compute the point for which the
986 // sum of squares of the singular terms is equal to radius^2
987 } else {
988 lambda = lambda + std::max(sqrt(c2) / radius, lambda * EPSILON_MCH);
989 lambda_l = std::max(lambda_l, lambda);
990 }
991 }
992
993 // the iterates will all be in the L region. Prepare for the main loop
994 auto max_order = std::max(1, std::min(MAX_DEGREE, control.taylor_max_degree));
995
996 // start the main loop
997 for (;;) {
998
999 // if h(lambda) is positive definite, solve h(lambda) x = - c
1000
1001 for (int i = 1; i <= n; ++i) { // for_do(i, 1, n)
1002 x(i) = -c(i) / (h(i) + lambda);
1003 }
1004
1005 // compute the two-norm of x
1006 inform.x_norm = twoNorm(x);
1007 x_norm2(0) = pow(inform.x_norm, 2);
1008
1009 // if the newton step lies within the trust region, exit
1010
1011 if (lambda == ZERO && inform.x_norm <= radius) {
1012 inform.obj = f + HALF * dotProduct(c, x);
1013 return;
1014 }
1015
1018
1019 if (fabs(inform.x_norm - radius) <= std::max(control.stop_normal * radius, control.stop_absolute_normal)) {
1020 break;
1021 }
1022
1023 lambda_l = std::max(lambda_l, lambda);
1024
1025 // record, for the future, values of lambda which give small ||x||
1026 if (inform.len_history < HISTORY_MAX) {
1027 history_type history_item;
1028 history_item.lambda = lambda;
1029 history_item.x_norm = inform.x_norm;
1030 inform.history.emplace_back(history_item);
1031 inform.len_history = inform.len_history + 1;
1032 }
1033
1034 // a lambda in L has been found. It is now simply a matter of applying
1035 // a variety of Taylor-series-based methods starting from this lambda
1036
1037 // precaution against rounding producing lambda outside L
1038
1039 if (lambda > lambda_u) {
1040 throw std::runtime_error("Lambda for trust-region subproblem is ill conditioned");
1041 }
1042
1043 // compute first derivatives of x^T M x
1044
1045 // form ||w||^2 = x^T H^-1(lambda) x
1046
1047 double w_norm2 = ZERO;
1048 for (int i = 1; i <= n; ++i) { // for_do(i, 1, n)
1049 w_norm2 = w_norm2 + pow(c(i), 2) / pow(h(i) + lambda, 3);
1050 }
1051
1052 // compute the first derivative of x_norm2 = x^T M x
1053 x_norm2(1) = -TWO * w_norm2;
1054
1055 // compute pi_beta = ||x||^beta and its first derivative when beta = - 1
1056 double beta = -ONE;
1057 PiBetaDerivs(1, beta, x_norm2, pi_beta);
1058
1059 // compute the Newton correction (for beta = - 1)
1060
1061 auto delta_lambda = -(pi_beta(0) - pow((radius), beta)) / pi_beta(1);
1062
1063 DoubleFortranVector lambda_new(3);
1064 int n_lambda = 1;
1065 lambda_new(n_lambda) = lambda + delta_lambda;
1066
1067 if (max_order >= 3) {
1068
1069 // compute the second derivative of x^T x
1070
1071 double z_norm2 = ZERO;
1072 for (int i = 1; i <= n; ++i) { // for_do(i, 1, n)
1073 z_norm2 = z_norm2 + pow(c(i), 2) / pow((h(i) + lambda), 4);
1074 }
1075 x_norm2(2) = SIX * z_norm2;
1076
1077 // compute the third derivatives of x^T x
1078
1079 double v_norm2 = ZERO;
1080 for (int i = 1; i <= n; ++i) { // for_do(i, 1, n)
1081 v_norm2 = v_norm2 + pow(c(i), 2) / pow((h(i) + lambda), 5);
1082 }
1083 x_norm2(3) = -TWENTY_FOUR * v_norm2;
1084
1085 // compute pi_beta = ||x||^beta and its derivatives when beta = 2
1086
1087 beta = TWO;
1088 PiBetaDerivs(max_order, beta, x_norm2, pi_beta);
1089
1090 // compute the "cubic Taylor approximaton" step (beta = 2)
1091
1092 auto a_0 = pi_beta(0) - pow((radius), beta);
1093 auto a_1 = pi_beta(1);
1094 auto a_2 = HALF * pi_beta(2);
1095 auto a_3 = SIXTH * pi_beta(3);
1096 auto a_max = biggest(fabs(a_0), fabs(a_1), fabs(a_2), fabs(a_3));
1097 if (a_max > ZERO) {
1098 a_0 = a_0 / a_max;
1099 a_1 = a_1 / a_max;
1100 a_2 = a_2 / a_max;
1101 a_3 = a_3 / a_max;
1102 }
1103 int nroots = 0;
1104 double root1 = 0, root2 = 0, root3 = 0;
1105
1106 rootsCubic(a_0, a_1, a_2, a_3, ROOTS_TOL, nroots, root1, root2, root3);
1107 n_lambda = n_lambda + 1;
1108 if (nroots == 3) {
1109 lambda_new(n_lambda) = lambda + root3;
1110 } else {
1111 lambda_new(n_lambda) = lambda + root1;
1112 }
1113
1114 // compute pi_beta = ||x||^beta and its derivatives when beta = - 0.4
1115
1116 beta = -POINT4;
1117 PiBetaDerivs(max_order, beta, x_norm2, pi_beta);
1118
1119 // compute the "cubic Taylor approximaton" step (beta = - 0.4)
1120
1121 a_0 = pi_beta(0) - pow((radius), beta);
1122 a_1 = pi_beta(1);
1123 a_2 = HALF * pi_beta(2);
1124 a_3 = SIXTH * pi_beta(3);
1125 a_max = biggest(fabs(a_0), fabs(a_1), fabs(a_2), fabs(a_3));
1126 if (a_max > ZERO) {
1127 a_0 = a_0 / a_max;
1128 a_1 = a_1 / a_max;
1129 a_2 = a_2 / a_max;
1130 a_3 = a_3 / a_max;
1131 }
1132 rootsCubic(a_0, a_1, a_2, a_3, ROOTS_TOL, nroots, root1, root2, root3);
1133 n_lambda = n_lambda + 1;
1134 if (nroots == 3) {
1135 lambda_new(n_lambda) = lambda + root3;
1136 } else {
1137 lambda_new(n_lambda) = lambda + root1;
1138 }
1139 }
1140
1141 // compute the best Taylor improvement
1142
1143 auto lambda_plus = maxVal(lambda_new, n_lambda);
1144 delta_lambda = lambda_plus - lambda;
1145 lambda = lambda_plus;
1146
1147 // improve the lower bound if possible
1148
1149 lambda_l = std::max(lambda_l, lambda_plus);
1150
1151 // check that the best Taylor improvement is significant
1152
1153 if (std::abs(delta_lambda) < EPSILON_MCH * std::max(ONE, std::abs(lambda))) {
1154 break;
1155 }
1156
1157 } // for(;;)
1158}
1159
1176void solveSubproblem(int n, double radius, double f, const DoubleFortranVector &c, const DoubleFortranVector &h,
1177 DoubleFortranVector &x, const control_type &control, inform_type &inform) {
1178 // scale the problem to solve instead
1179 // minimize q_s(x_s) = 1/2 <x_s, H_s x_s> + <c_s, x_s> + f_s
1180 // subject to ||x_s||_2 <= radius_s or ||x_s||_2 = radius_s
1181
1182 // where H_s = H / s_h and c_s = c / s_c for scale factors s_h and s_c
1183
1184 // This corresponds to
1185 // radius_s = ( s_h / s_c ) radius,
1186 // f_s = ( s_h / s_c^2 ) f
1187 // and the solution may be recovered as
1188 // x = ( s_c / s_h ) x_s
1189 // lambda = s_h lambda_s
1190 // q(x) = ( s_c^2 / s_ h ) q_s(x_s)
1191
1192 // scale H by the largest H and remove relatively tiny H
1193
1194 DoubleFortranVector h_scale(n);
1195 auto scale_h = maxAbsVal(h); // MAXVAL( ABS( H ) )
1196 if (scale_h > ZERO) {
1197 for (int i = 1; i <= n; ++i) { // do i = 1, n
1198 if (fabs(h(i)) >= control.h_min * scale_h) {
1199 h_scale(i) = h(i) / scale_h;
1200 } else {
1201 h_scale(i) = ZERO;
1202 }
1203 }
1204 } else {
1205 scale_h = ONE;
1206 h_scale.zero();
1207 }
1208
1209 // scale c by the largest c and remove relatively tiny c
1210
1211 DoubleFortranVector c_scale(n);
1212 auto scale_c = maxAbsVal(c); // maxval( abs( c ) )
1213 if (scale_c > ZERO) {
1214 for (int i = 1; i <= n; ++i) { // do i = 1, n
1215 if (fabs(c(i)) >= control.h_min * scale_c) {
1216 c_scale(i) = c(i) / scale_c;
1217 } else {
1218 c_scale(i) = ZERO;
1219 }
1220 }
1221 } else {
1222 scale_c = ONE;
1223 c_scale.zero();
1224 }
1225
1226 double radius_scale = (scale_h / scale_c) * radius;
1227 double f_scale = (scale_h / pow(scale_c, 2)) * f;
1228
1229 auto control_scale = control;
1230 if (control_scale.lower != LOWER_DEFAULT) {
1231 control_scale.lower = control_scale.lower / scale_h;
1232 }
1233 if (control_scale.upper != UPPER_DEFAULT) {
1234 control_scale.upper = control_scale.upper / scale_h;
1235 }
1236
1237 // solve the scaled problem
1238
1239 solveSubproblemMain(n, radius_scale, f_scale, c_scale, h_scale, x, control_scale, inform);
1240
1241 // unscale the solution, function value, multiplier and related values
1242
1243 // x = ( scale_c / scale_h ) * x
1244 x *= scale_c / scale_h;
1245 inform.obj *= pow(scale_c, 2) / scale_h;
1246 inform.multiplier *= scale_h;
1247 inform.pole *= scale_h;
1248 for (auto &history_item : inform.history) { // do i = 1, inform.len_history
1249 history_item.lambda *= scale_h;
1250 history_item.x_norm *= scale_c / scale_h;
1251 }
1252}
1253
1254} // namespace
1255
1273 const DoubleFortranMatrix &hf, double Delta, DoubleFortranVector &d,
1274 double &normd, const NLLS::nlls_options &options) {
1275
1276 control_type controlOptions;
1277 inform_type inform;
1278
1279 // The code finds
1280 // d = arg min_p w^T p + 0.5 * p^T D p
1281 // s.t. ||p|| \leq Delta
1282 //
1283 // where D is diagonal
1284 //
1285 // our probem in naturally in the form
1286 //
1287 // d = arg min_p v^T p + 0.5 * p^T H p
1288 // s.t. ||p|| \leq Delta
1289 //
1290 // first, find the matrix H and vector v
1291 // Set A = J^T J
1293 // add any second order information...
1294 // so A = J^T J + HF
1295 m_A += hf;
1296
1297 // now form v = J^T f
1298 NLLS::multJt(J, f, m_v);
1299
1300 // if scaling needed, do it
1301 if (options.scale != 0) {
1302 applyScaling(J, m_A, m_v, m_scale, options);
1303 }
1304
1305 // Now that we have the unprocessed matrices, we need to get an
1306 // eigendecomposition to make A diagonal
1307 //
1309
1310 // We can now change variables, setting y = Vp, getting
1311 // Vd = arg min_(Vx) v^T p + 0.5 * (Vp)^T D (Vp)
1312 // s.t. ||x|| \leq Delta
1313 // <=>
1314 // Vd = arg min_(Vx) V^Tv^T (Vp) + 0.5 * (Vp)^T D (Vp)
1315 // s.t. ||x|| \leq Delta
1316 // <=>
1317 // we need to get the transformed vector v
1319
1320 // we've now got the vectors we need, pass to solveSubproblem
1321 intitializeControl(controlOptions);
1322
1323 auto n = J.len2();
1324 if (m_v_trans.len() != n) {
1326 }
1327
1328 for (int ii = 1; ii <= n; ++ii) { // for_do(ii, 1,n)
1329 if (fabs(m_v_trans(ii)) < EPSILON_MCH) {
1330 m_v_trans(ii) = ZERO;
1331 }
1332 if (fabs(m_ew(ii)) < EPSILON_MCH) {
1333 m_ew(ii) = ZERO;
1334 }
1335 }
1336
1337 solveSubproblem(n, Delta, ZERO, m_v_trans, m_ew, m_d_trans, controlOptions, inform);
1338
1339 // and return the un-transformed vector
1341
1342 normd = NLLS::norm2(d); // ! ||d||_D
1343
1344 if (options.scale != 0) {
1345 for (int ii = 1; ii <= n; ++ii) { // for_do(ii, 1, n)
1346 d(ii) = d(ii) / m_scale(ii);
1347 }
1348 }
1349
1350} // calculateStep
1351
1355
1356} // namespace Mantid::CurveFitting::FuncMinimisers
const std::vector< double > * lambda
const std::vector< double > & rhs
#define DECLARE_FUNCMINIMIZER(classname, username)
Macro for declaring a new type of minimizers to be used with the FuncMinimizerFactory.
#define fabs(x)
Definition Matrix.cpp:22
#define UNUSED_ARG(x)
Function arguments are sometimes unused in certain implmentations but are required for documentation ...
Definition System.h:48
int len_history
the number of (||x||_M,lambda) pairs in the history
bool hard_case
has the hard case occurred?
int taylor_max_degree
maximum degree of Taylor approximant allowed
double obj
the value of the quadratic function
double h_min
any entry of H that is smaller than h_min * MAXVAL( H ) we be treated as
std::vector< history_type > history
history information
double stop_absolute_normal
bool equality_problem
is the solution is REQUIRED to lie on the boundary (i.e., is the
double multiplier
the Lagrange multiplier corresponding to the trust-region constraint
double lower
lower and upper bounds on the multiplier, if known
double stop_normal
stop when | ||x|| - radius | <= max( stop_normal * radius, stop_absolute_normal )
double pole
a lower bound max(0,-lambda_1), where lambda_1 is the left-most eigenvalue of (H,M)
double x_norm
corresponding value of ||x(lambda)||_M
double upper
std::string m_errorString
Error string.
void zero()
Set all elements to zero.
void set(const size_t i, const double value)
Set an element.
void allocate(const int iFrom, const int iTo, const int jFrom, const int jTo)
Resize the matrix.
int len2() const
Get the size along the second dimension as an int.
int len1() const
Get the size along the first dimension as an int.
void allocate(int firstIndex, int lastIndex)
Resize the vector.
int len() const
Get the length of the vector as an int.
Trust Region minimizer class using the DTRS method of GALAHAD.
std::string name() const override
Name of the minimizer.
void calculateStep(const DoubleFortranMatrix &J, const DoubleFortranVector &f, const DoubleFortranMatrix &hf, double Delta, DoubleFortranVector &d, double &normd, const NLLS::nlls_options &options)
Find a correction vector to the parameters.
double costFunctionVal() override
Return current value of the cost function.
void evalF(const DoubleFortranVector &x, DoubleFortranVector &f) const
Evaluate the fitting function and calculate the residuals.
NLLS::NLLS_workspace m_workspace
Temporary and helper objects.
void evalJ(const DoubleFortranVector &x, DoubleFortranMatrix &J) const
Evaluate the Jacobian.
std::shared_ptr< CostFunctions::CostFuncLeastSquares > m_leastSquares
Stored cost function.
void evalHF(const DoubleFortranVector &x, const DoubleFortranVector &f, DoubleFortranMatrix &h) const
Evaluate the Hessian.
NLLS::nlls_inform m_inform
Information about the fitting.
void initialize(API::ICostFunction_sptr costFunction, size_t maxIterations=0) override
Initialize minimizer, i.e. pass a function to minimize.
JacobianImpl1< DoubleFortranMatrix > m_J
The Jacobian.
API::IFunction_sptr m_function
Stored to access IFunction interface in iterate()
TypedValue getProperty(const std::string &name) const override
Get the value of a property.
std::shared_ptr< ICostFunction > ICostFunction_sptr
define a shared pointer to a cost function
void MANTID_CURVEFITTING_DLL matmultInner(const DoubleFortranMatrix &J, DoubleFortranMatrix &A)
Takes an m x n matrix J and forms the n x n matrix A given by A = J' * J.
double MANTID_CURVEFITTING_DLL norm2(const DoubleFortranVector &v)
Compute the 2-norm of a vector which is a square root of the sum of squares of its elements.
void allEigSymm(const DoubleFortranMatrix &A, DoubleFortranVector &ew, DoubleFortranMatrix &ev)
Calculate all the eigenvalues of a symmetric matrix.
void MANTID_CURVEFITTING_DLL multJ(const DoubleFortranMatrix &J, const DoubleFortranVector &x, DoubleFortranVector &Jx)
Multiply a matrix by a vector.
void MANTID_CURVEFITTING_DLL multJt(const DoubleFortranMatrix &J, const DoubleFortranVector &x, DoubleFortranVector &Jtx)
Multiply a transposed matrix by a vector.
double dotProduct(const DoubleFortranVector &x, const DoubleFortranVector &y)
Dot product of two vectors.
void MANTID_CURVEFITTING_DLL getSvdJ(const DoubleFortranMatrix &J, double &s1, double &sn)
Given an (m x n) matrix J held by columns as a vector, this routine returns the largest and smallest ...
FortranVector< EigenVector > DoubleFortranVector
void initialize(int n, int m, const nlls_options &options)
Initialize the workspace.
int scale
scale the variables? 0 - no scaling 1 - use the scaling in GSL (W s.t.
Definition Workspaces.h:125
int maxit
the maximum number of iterations performed
Definition Workspaces.h:38
double initial_radius
if relative_tr_radius /= 1, then set the initial value for the trust-region radius (-ve => ||g_0||)
Definition Workspaces.h:77