Mantid
Loading...
Searching...
No Matches
TrustRegionMinimizer.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
7// This code was originally translated from Fortran code on
8// https://ccpforge.cse.rl.ac.uk/gf/project/ral_nlls June 2016
9//----------------------------------------------------------------------
10// Includes
11//----------------------------------------------------------------------
16
17#include <cmath>
18
19#include <gsl/gsl_blas.h>
20
21#include <iostream>
22
24
25// clang-format off
27DECLARE_FUNCMINIMIZER(TrustRegionMinimizer, Trust Region)
29// clang-format on
30
32 declareProperty("InitialRadius", 100.0, "Initial radius of the trust region.");
33}
34
37std::string TrustRegionMinimizer::name() const { return "Trust Region"; }
38
45void TrustRegionMinimizer::initialize(API::ICostFunction_sptr costFunction, size_t maxIterations) {
46 m_leastSquares = std::dynamic_pointer_cast<CostFunctions::CostFuncLeastSquares>(costFunction);
47 if (!m_leastSquares) {
48 throw std::runtime_error("Trust-region minimizer can only be used with Least "
49 "squares cost function.");
50 }
51
52 m_function = m_leastSquares->getFittingFunction();
53 auto &values = *m_leastSquares->getValues();
54 auto n = static_cast<int>(m_leastSquares->nParams());
55 auto m = static_cast<int>(values.size());
56 if (n > m) {
57 throw std::runtime_error("More parameters than data.");
58 }
59 m_options.maxit = static_cast<int>(maxIterations);
61 m_x.allocate(n);
62 m_leastSquares->getParameters(m_x);
63 int j = 0;
64 for (size_t i = 0; i < m_function->nParams(); ++i) {
65 if (m_function->isActive(i)) {
66 m_J.m_index.emplace_back(j);
67 j++;
68 } else
69 m_J.m_index.emplace_back(-1);
70 }
71 m_options.initial_radius = getProperty("InitialRadius");
72}
73
79 m_leastSquares->setParameters(x);
80 auto &domain = *m_leastSquares->getDomain();
81 auto &values = *m_leastSquares->getValues();
82 m_function->function(domain, values);
83 auto m = static_cast<int>(values.size());
84 if (f.len() != m) {
85 f.allocate(m);
86 }
87 for (size_t i = 0; i < values.size(); ++i) {
88 f.set(i, (values.getCalculated(i) - values.getFitData(i)) * values.getFitWeight(i));
89 }
90}
91
97 m_leastSquares->setParameters(x);
98 auto &domain = *m_leastSquares->getDomain();
99 auto &values = *m_leastSquares->getValues();
100 auto n = static_cast<int>(m_leastSquares->nParams());
101 auto m = static_cast<int>(values.size());
102 if (J.len1() != m || J.len2() != n) {
103 J.allocate(m, n);
104 }
105
106 m_J.setJ(&J);
107 m_function->functionDeriv(domain, m_J);
108
109 for (int i = 1; i <= m; ++i) {
110 double w = values.getFitWeight(i - 1);
111 for (int j = 1; j <= n; ++j) {
112 J(i, j) *= w;
113 }
114 }
115}
116
123 DoubleFortranMatrix &h) const {
124 UNUSED_ARG(x);
125 UNUSED_ARG(f);
126 auto n = static_cast<int>(m_leastSquares->nParams());
127 if (h.len1() != n || h.len2() != n) {
128 h.allocate(n, n);
129 }
130 // Mantid fit functions don't calculate second derivatives.
131 // For now the Hessian will not be used.
132 h.zero();
133}
134
137bool TrustRegionMinimizer::iterate(size_t /*iteration*/) {
138 int max_tr_decrease = 100;
139 auto &w = m_workspace;
140 auto &options = m_options;
141 auto &inform = m_inform;
142 auto &X = m_x;
143 int n = m_x.len();
144 int m = static_cast<int>(m_leastSquares->getValues()->size());
145
146 if (w.first_call == 0) {
147
148 w.first_call = 1; // ?
149
150 // evaluate the residual
151 evalF(X, w.f);
152 inform.f_eval = inform.f_eval + 1;
153
154 // and evaluate the jacobian
155 evalJ(X, w.J);
156 inform.g_eval = inform.g_eval + 1;
157
158 if (options.relative_tr_radius == 1) {
159 // first, let's get diag(J^TJ)
160 double Jmax = 0.0;
161 for (int i = 1; i <= n; ++i) {
162 // note:: assumes column-storage of J
163 // JtJdiag = norm2( w.J( (i-1)*m + 1 : i*m ) );
164 double JtJdiag = 0.0;
165 for (int j = 1; j <= m; ++j) { // for_do(j, 1, m)
166 JtJdiag += pow(w.J(j, i), 2);
167 }
168 JtJdiag = sqrt(JtJdiag);
169 if (JtJdiag > Jmax)
170 Jmax = JtJdiag;
171 }
172 w.Delta = options.initial_radius_scale * (pow(Jmax, 2));
173 } else {
174 w.Delta = options.initial_radius;
175 }
176
177 if (options.calculate_svd_J) {
178 // calculate the svd of J (if needed)
179 NLLS::getSvdJ(w.J, w.smallest_sv(1), w.largest_sv(1));
180 }
181
182 w.normF = NLLS::norm2(w.f);
183 w.normF0 = w.normF;
184
185 // g = -J^Tf
186 NLLS::multJt(w.J, w.f, w.g);
187 w.g *= -1.0;
188 w.normJF = NLLS::norm2(w.g);
189 w.normJF0 = w.normJF;
190 w.normJFold = w.normJF;
191
192 // save some data
193 inform.obj = 0.5 * (pow(w.normF, 2));
194 inform.norm_g = w.normJF;
195 inform.scaled_g = w.normJF / w.normF;
196
197 // if we need to output vectors of the history of the residual
198 // and gradient, the set the initial values
199 if (options.output_progress_vectors) {
200 w.resvec(1) = inform.obj;
201 w.gradvec(1) = inform.norm_g;
202 }
203
204 // Select the order of the model to be used..
205 switch (options.model) {
206 case 1: // first-order
207 {
208 w.hf.zero();
209 w.use_second_derivatives = false;
210 break;
211 }
212 case 2: // second order
213 {
214 if (options.exact_second_derivatives) {
215 evalHF(X, w.f, w.hf);
216 inform.h_eval = inform.h_eval + 1;
217 } else {
218 // S_0 = 0 (see Dennis, Gay and Welsch)
219 w.hf.zero();
220 }
221 w.use_second_derivatives = true;
222 break;
223 }
224 case 3: // hybrid (MNT)
225 {
226 // set the tolerance :: make this relative
227 w.hybrid_tol = options.hybrid_tol * (w.normJF / (0.5 * (pow(w.normF, 2))));
228 // use first-order method initially
229 w.hf.zero();
230 w.use_second_derivatives = false;
231 if (!options.exact_second_derivatives) {
232 // initialize hf_temp too
233 w.hf_temp.zero();
234 }
235 break;
236 }
237 default:
238 throw std::logic_error("Unsupported model.");
239 }
240 }
241
242 w.iter = w.iter + 1;
243 inform.iter = w.iter;
244
245 bool success = false;
246 int no_reductions = 0;
247 double normFnew = 0.0;
248
249 while (!success) { // loop until successful
250 no_reductions = no_reductions + 1;
251 if (no_reductions > max_tr_decrease + 1) {
252 return true;
253 }
254 // Calculate the step d that the model thinks we should take next
255 calculateStep(w.J, w.f, w.hf, w.Delta, w.d, w.normd, options);
256
257 // Accept the step?
258 w.Xnew = X;
259 w.Xnew += w.d;
260 evalF(w.Xnew, w.fnew);
261 inform.f_eval = inform.f_eval + 1;
262 normFnew = NLLS::norm2(w.fnew);
263
264 // Get the value of the model
265 // md := m_k(d)
266 // evaluated at the new step
267 double md = evaluateModel(w.f, w.J, w.hf, w.d, options, w.evaluate_model_ws);
268
269 // Calculate the quantity
270 // rho = 0.5||f||^2 - 0.5||fnew||^2 = actual_reduction
271 // -------------------------- -------------------
272 // m_k(0) - m_k(d) predicted_reduction
273 //
274 // if model is good, rho should be close to one
275 auto rho = calculateRho(w.normF, normFnew, md, options);
276 if (!std::isfinite(rho) || rho <= options.eta_successful) {
277 if ((w.use_second_derivatives) && (options.model == 3) && (no_reductions == 1)) {
278 // recalculate rho based on the approx GN model
279 // (i.e. the Gauss-Newton model evaluated at the Quasi-Newton step)
280 double rho_gn = calculateRho(w.normF, normFnew, w.evaluate_model_ws.md_gn, options);
281 if (rho_gn > options.eta_successful) {
282 // switch back to gauss-newton
283 w.use_second_derivatives = false;
284 w.hf_temp.zero(); // discard S_k, as it's been polluted
285 w.hf.zero();
286 }
287 }
288 } else {
289 success = true;
290 }
291
292 // Update the TR radius
293 updateTrustRegionRadius(rho, options, w);
294
295 if (!success) {
296 // finally, check d makes progress
297 if (NLLS::norm2(w.d) < std::numeric_limits<double>::epsilon() * NLLS::norm2(w.Xnew)) {
298 m_errorString = "Failed to make progress.";
299 return false;
300 }
301 }
302 } // loop
303 // if we reach here, a successful step has been found
304
305 // update X and f
306 X = w.Xnew;
307 w.f = w.fnew;
308
309 if (!options.exact_second_derivatives) {
310 // first, let's save some old values...
311 // g_old = -J_k^T r_k
312 w.g_old = w.g;
313 // g_mixed = -J_k^T r_{k+1}
314 NLLS::multJt(w.J, w.fnew, w.g_mixed);
315 w.g_mixed *= -1.0;
316 }
317
318 // evaluate J and hf at the new point
319 evalJ(X, w.J);
320 inform.g_eval = inform.g_eval + 1;
321
322 if (options.calculate_svd_J) {
323 NLLS::getSvdJ(w.J, w.smallest_sv(w.iter + 1), w.largest_sv(w.iter + 1));
324 }
325
326 // g = -J^Tf
327 NLLS::multJt(w.J, w.f, w.g);
328 w.g *= -1.0;
329
330 w.normJFold = w.normJF;
331 w.normF = normFnew;
332 w.normJF = NLLS::norm2(w.g);
333
334 // setup the vectors needed if second derivatives are not available
335 if (!options.exact_second_derivatives) {
336 w.y = w.g_old;
337 w.y -= w.g;
338 w.y_sharp = w.g_mixed;
339 w.y_sharp -= w.g;
340 }
341
342 if (options.model == 3) {
343 // hybrid method -- check if we need second derivatives
344
345 if (w.use_second_derivatives) {
346 if (w.normJF > w.normJFold) {
347 // switch to Gauss-Newton
348 w.use_second_derivatives = false;
349 // save hf as hf_temp
350 w.hf_temp = w.hf;
351 w.hf.zero();
352 }
353 } else {
354 auto FunctionValue = 0.5 * (pow(w.normF, 2));
355 if (w.normJF / FunctionValue < w.hybrid_tol) {
356 w.hybrid_count = w.hybrid_count + 1;
357 if (w.hybrid_count == options.hybrid_switch_its) {
358 // use (Quasi-)Newton
359 w.use_second_derivatives = true;
360 w.hybrid_count = 0;
361 // copy hf from hf_temp
362 if (!options.exact_second_derivatives) {
363 w.hf = w.hf_temp;
364 }
365 }
366 } else {
367 w.hybrid_count = 0;
368 }
369 }
370
371 if (!w.use_second_derivatives) {
372 // call apply_second_order_info anyway, so that we update the
373 // second order approximation
374 if (!options.exact_second_derivatives) {
375 rankOneUpdate(w.hf_temp, w);
376 }
377 }
378 }
379
380 if (w.use_second_derivatives) {
381 // apply_second_order_info(n, m, X, w, evalHF, params, options, inform,
382 // weights);
383 if (options.exact_second_derivatives) {
384 evalHF(X, w.f, w.hf);
385 inform.h_eval = inform.h_eval + 1;
386 } else {
387 // use the rank-one approximation...
388 rankOneUpdate(w.hf, w);
389 }
390 }
391
392 // update the stats
393 inform.obj = 0.5 * (pow(w.normF, 2));
394 inform.norm_g = w.normJF;
395 inform.scaled_g = w.normJF / w.normF;
396 if (options.output_progress_vectors) {
397 w.resvec(w.iter + 1) = inform.obj;
398 w.gradvec(w.iter + 1) = inform.norm_g;
399 }
400
401 // Test convergence
402 testConvergence(w.normF, w.normJF, w.normF0, w.normJF0, options, inform);
403
404 if (inform.convergence_normf == 1 || inform.convergence_normg == 1) {
405 return false;
406 }
407
408 inform.iter = w.iter;
409 inform.resvec = w.resvec;
410 inform.gradvec = w.gradvec;
411
412 return true;
413}
414
416namespace {
417
418const double HUGEST = std::numeric_limits<double>::max();
419const double EPSILON_MCH = std::numeric_limits<double>::epsilon();
420const double LARGEST = HUGEST;
421const double LOWER_DEFAULT = -0.5 * LARGEST;
422const double UPPER_DEFAULT = LARGEST;
423const double POINT4 = 0.4;
424const double ZERO = 0.0;
425const double ONE = 1.0;
426const double TWO = 2.0;
427const double THREE = 3.0;
428const double FOUR = 4.0;
429const double SIX = 6.0;
430const double HALF = 0.5;
431const double ONESIXTH = ONE / SIX;
432const double SIXTH = ONESIXTH;
433const double ONE_THIRD = ONE / THREE;
434const double TWO_THIRDS = TWO / THREE;
435const double THREE_QUARTERS = 0.75;
436const double TWENTY_FOUR = 24.0;
437const int MAX_DEGREE = 3;
438const int HISTORY_MAX = 100;
439const double TEN_EPSILON_MCH = 10.0 * EPSILON_MCH;
440const double ROOTS_TOL = TEN_EPSILON_MCH;
441const double INFINITE_NUMBER = HUGEST;
442
445inline double sign(double x, double y) { return y >= 0.0 ? fabs(x) : -fabs(x); }
446
450struct control_type {
453
455 // zero
456 double h_min = EPSILON_MCH;
457
459 double lower = LOWER_DEFAULT;
460 double upper = UPPER_DEFAULT;
461
464 double stop_normal = EPSILON_MCH;
465 double stop_absolute_normal = EPSILON_MCH;
466
468 // constraint
470 bool equality_problem = false;
471};
472
476struct history_type {
477 //
479 double lambda = 0.0;
480
482 double x_norm = 0.0;
483};
484
488struct inform_type {
489 //
490
492 int len_history = 0;
493
495 double obj = HUGEST;
496
498 double x_norm = 0.0;
499
501 double multiplier = 0.0;
502
505 double pole = 0.0;
506
508 bool hard_case = false;
509
511 std::vector<history_type> history;
512};
513
520double biggest(double a, double b, double c, double d) { return std::max(std::max(a, b), std::max(c, d)); }
521
527double biggest(double a, double b, double c) { return std::max(std::max(a, b), c); }
528
532double maxAbsVal(const DoubleFortranVector &v) {
533 auto p = v.indicesOfMinMaxElements();
534 return std::max(fabs(v.get(p.first)), fabs(v.get(p.second)));
535}
536
542std::pair<double, double> minMaxValues(const DoubleFortranVector &v) {
543 auto p = v.indicesOfMinMaxElements();
544 return std::make_pair(v.get(p.first), v.get(p.second));
545}
546
551double twoNorm(const DoubleFortranVector &v) {
552 if (v.size() == 0)
553 return 0.0;
554 return v.norm();
555}
556
561double dotProduct(const DoubleFortranVector &v1, const DoubleFortranVector &v2) { return v1.dot(v2); }
562
567double maxVal(const DoubleFortranVector &v, int n) {
568 double res = std::numeric_limits<double>::lowest();
569 for (int i = 1; i <= n; ++i) {
570 auto val = v(i);
571 if (val > res) {
572 res = val;
573 }
574 }
575 return res;
576}
577
591void rootsQuadratic(double a0, double a1, double a2, double tol, int &nroots, double &root1, double &root2) {
592
593 auto rhs = tol * a1 * a1;
594 if (fabs(a0 * a2) > rhs) { // really is quadratic
595 root2 = a1 * a1 - FOUR * a2 * a0;
596 if (fabs(root2) <= pow(EPSILON_MCH * a1, 2)) { // numerical double root
597 nroots = 2;
598 root1 = -HALF * a1 / a2;
599 root2 = root1;
600 } else if (root2 < ZERO) { // complex not real roots
601 nroots = 0;
602 root1 = ZERO;
603 root2 = ZERO;
604 } else { // distint real roots
605 auto d = -HALF * (a1 + sign(sqrt(root2), a1));
606 nroots = 2;
607 root1 = d / a2;
608 root2 = a0 / d;
609 if (root1 > root2) {
610 d = root1;
611 root1 = root2;
612 root2 = d;
613 }
614 }
615 } else if (a2 == ZERO) {
616 if (a1 == ZERO) {
617 if (a0 == ZERO) { // the function is zero
618 nroots = 1;
619 root1 = ZERO;
620 root2 = ZERO;
621 } else { // the function is constant
622 nroots = 0;
623 root1 = ZERO;
624 root2 = ZERO;
625 }
626 } else { // the function is linear
627 nroots = 1;
628 root1 = -a0 / a1;
629 root2 = ZERO;
630 }
631 } else { // very ill-conditioned quadratic
632 nroots = 2;
633 if (-a1 / a2 > ZERO) {
634 root1 = ZERO;
635 root2 = -a1 / a2;
636 } else {
637 root1 = -a1 / a2;
638 root2 = ZERO;
639 }
640 }
641
642 // perfom a Newton iteration to ensure that the roots are accurate
643
644 if (nroots >= 1) {
645 auto p = (a2 * root1 + a1) * root1 + a0;
646 auto pprime = TWO * a2 * root1 + a1;
647 if (pprime != ZERO) {
648 root1 = root1 - p / pprime;
649 }
650 if (nroots == 2) {
651 p = (a2 * root2 + a1) * root2 + a0;
652 pprime = TWO * a2 * root2 + a1;
653 if (pprime != ZERO) {
654 root2 = root2 - p / pprime;
655 }
656 }
657 }
658}
659
675void rootsCubic(double a0, double a1, double a2, double a3, double tol, int &nroots, double &root1, double &root2,
676 double &root3) {
677
678 // Check to see if the cubic is actually a quadratic
679 if (a3 == ZERO) {
680 rootsQuadratic(a0, a1, a2, tol, nroots, root1, root2);
681 root3 = INFINITE_NUMBER;
682 return;
683 }
684
685 // Deflate the polnomial if the trailing coefficient is zero
686 if (a0 == ZERO) {
687 root1 = ZERO;
688 rootsQuadratic(a1, a2, a3, tol, nroots, root2, root3);
689 nroots = nroots + 1;
690 return;
691 }
692
693 // 1. Use Nonweiler's method (CACM 11:4, 1968, pp269)
694
695 double c0 = a0 / a3;
696 double c1 = a1 / a3;
697 double c2 = a2 / a3;
698
699 double s = c2 / THREE;
700 double t = s * c2;
701 double b = 0.5 * (s * (TWO_THIRDS * t - c1) + c0);
702 t = (t - c1) / THREE;
703 double c = t * t * t;
704 double d = b * b - c;
705
706 // 1 real + 2 equal real or 2 complex roots
707 if (d >= ZERO) {
708 d = pow(sqrt(d) + fabs(b), ONE_THIRD);
709 if (d != ZERO) {
710 if (b > ZERO) {
711 b = -d;
712 } else {
713 b = d;
714 }
715 c = t / b;
716 }
717 d = sqrt(THREE_QUARTERS) * (b - c);
718 b = b + c;
719 c = -0.5 * b - s;
720 root1 = b - s;
721 if (d == ZERO) {
722 nroots = 3;
723 root2 = c;
724 root3 = c;
725 } else {
726 nroots = 1;
727 }
728 } else { // 3 real roots
729 if (b == ZERO) {
730 d = TWO_THIRDS * atan(ONE);
731 } else {
732 d = atan(sqrt(-d) / fabs(b)) / THREE;
733 }
734 if (b < ZERO) {
735 b = TWO * sqrt(t);
736 } else {
737 b = -TWO * sqrt(t);
738 }
739 c = cos(d) * b;
740 t = -sqrt(THREE_QUARTERS) * sin(d) * b - HALF * c;
741 d = -t - c - s;
742 c = c - s;
743 t = t - s;
744 if (fabs(c) > fabs(t)) {
745 root3 = c;
746 } else {
747 root3 = t;
748 t = c;
749 }
750 if (fabs(d) > fabs(t)) {
751 root2 = d;
752 } else {
753 root2 = t;
754 t = d;
755 }
756 root1 = t;
757 nroots = 3;
758 }
759
760 // reorder the roots
761
762 if (nroots == 3) {
763 if (root1 > root2) {
764 double a = root2;
765 root2 = root1;
766 root1 = a;
767 }
768 if (root2 > root3) {
769 double a = root3;
770 if (root1 > root3) {
771 a = root1;
772 root1 = root3;
773 }
774 root3 = root2;
775 root2 = a;
776 }
777 }
778
779 // perfom a Newton iteration to ensure that the roots are accurate
780 double p = ((a3 * root1 + a2) * root1 + a1) * root1 + a0;
781 double pprime = (THREE * a3 * root1 + TWO * a2) * root1 + a1;
782 if (pprime != ZERO) {
783 root1 = root1 - p / pprime;
784 // p = ((a3 * root1 + a2) * root1 + a1) * root1 + a0; // never used
785 }
786
787 if (nroots == 3) {
788 p = ((a3 * root2 + a2) * root2 + a1) * root2 + a0;
789 pprime = (THREE * a3 * root2 + TWO * a2) * root2 + a1;
790 if (pprime != ZERO) {
791 root2 = root2 - p / pprime;
792 // p = ((a3 * root2 + a2) * root2 + a1) * root2 + a0; // never used
793 }
794
795 p = ((a3 * root3 + a2) * root3 + a1) * root3 + a0;
796 pprime = (THREE * a3 * root3 + TWO * a2) * root3 + a1;
797 if (pprime != ZERO) {
798 root3 = root3 - p / pprime;
799 // p = ((a3 * root3 + a2) * root3 + a1) * root3 + a0; // never used
800 }
801 }
802}
803
814void PiBetaDerivs(int max_order, double beta, const DoubleFortranVector &x_norm2, DoubleFortranVector &pi_beta) {
815 double hbeta = HALF * beta;
816 pi_beta(0) = pow(x_norm2(0), hbeta);
817 pi_beta(1) = hbeta * (pow(x_norm2(0), (hbeta - ONE))) * x_norm2(1);
818 if (max_order == 1)
819 return;
820 pi_beta(2) =
821 hbeta * (pow(x_norm2(0), (hbeta - TWO))) * ((hbeta - ONE) * pow(x_norm2(1), 2) + x_norm2(0) * x_norm2(2));
822 if (max_order == 2)
823 return;
824 pi_beta(3) = hbeta * (pow(x_norm2(0), (hbeta - THREE))) *
825 (x_norm2(3) * pow(x_norm2(0), 2) +
826 (hbeta - ONE) * (THREE * x_norm2(0) * x_norm2(1) * x_norm2(2) + (hbeta - TWO) * pow(x_norm2(1), 3)));
827}
828
833void intitializeControl(control_type &control) {
834 control.stop_normal = pow(EPSILON_MCH, 0.75);
835 control.stop_absolute_normal = pow(EPSILON_MCH, 0.75);
836}
837
854void solveSubproblemMain(int n, double radius, double f, const DoubleFortranVector &c, const DoubleFortranVector &h,
855 DoubleFortranVector &x, const control_type &control, inform_type &inform) {
856
857 // set initial values
858
859 if (x.len() != n) {
860 x.allocate(n);
861 }
862 x.zero();
863 inform.x_norm = ZERO;
864 inform.obj = f;
865 inform.hard_case = false;
866
867 // Check that arguments are OK
868 if (n < 0) {
869 throw std::runtime_error("Number of unknowns for trust-region subproblem is negative.");
870 }
871 if (radius < 0) {
872 throw std::runtime_error("Trust-region radius for trust-region subproblem is negative");
873 }
874
875 DoubleFortranVector x_norm2(0, MAX_DEGREE), pi_beta(0, MAX_DEGREE);
876
877 // compute the two-norm of c and the extreme eigenvalues of H
878
879 double c_norm = twoNorm(c);
880 double lambda_min = 0.0;
881 double lambda_max = 0.0;
882 std::tie(lambda_min, lambda_max) = minMaxValues(h);
883
884 double lambda = 0.0;
886 if (c_norm == ZERO && lambda_min >= ZERO) {
887 if (control.equality_problem) {
888 int i_hard = 1; // TODO: is init value of 1 correct?
889 for (int i = 1; i <= n; ++i) { // do i = 1, n
890 if (h(i) == lambda_min) {
891 i_hard = i;
892 break;
893 }
894 }
895 x(i_hard) = ONE / radius;
896 inform.x_norm = radius;
897 inform.obj = f + lambda_min * radius * radius;
898 }
899 return;
900 }
901
902 // construct values lambda_l and lambda_u for which lambda_l <=
903 // lambda_optimal
904 // <= lambda_u, and ensure that all iterates satisfy lambda_l <= lambda
905 // <= lambda_u
906
907 double c_norm_over_radius = c_norm / radius;
908 double lambda_l = 0.0, lambda_u = 0.0;
909 if (control.equality_problem) {
910 lambda_l = biggest(control.lower, -lambda_min, c_norm_over_radius - lambda_max);
911 lambda_u = std::min(control.upper, c_norm_over_radius - lambda_min);
912 } else {
913 lambda_l = biggest(control.lower, ZERO, -lambda_min, c_norm_over_radius - lambda_max);
914 lambda_u = std::min(control.upper, std::max(ZERO, c_norm_over_radius - lambda_min));
915 }
916 lambda = lambda_l;
917
918 // check for the "hard case"
919 if (lambda == -lambda_min) {
920 int i_hard = 1; // TODO: is init value of 1 correct?
921 double c2 = ZERO;
922 inform.hard_case = true;
923 for (int i = 1; i <= n; ++i) { // for_do(i, 1, n)
924 if (h(i) == lambda_min) {
925 if (fabs(c(i)) > EPSILON_MCH * c_norm) {
926 inform.hard_case = false;
927 c2 = c2 + pow(c(i), 2);
928 } else {
929 i_hard = i;
930 }
931 }
932 }
933
934 // the hard case may occur
935 if (inform.hard_case) {
936 for (int i = 1; i <= n; ++i) { // for_do(i, 1, n)
937 if (h(i) != lambda_min) {
938 x(i) = -c(i) / (h(i) + lambda);
939 } else {
940 x(i) = ZERO;
941 }
942 }
943 inform.x_norm = twoNorm(x);
944
945 // the hard case does occur
946
947 if (inform.x_norm <= radius) {
948 if (inform.x_norm < radius) {
949
950 // compute the step alpha so that x + alpha e_i_hard lies on the
951 // trust-region
952 // boundary and gives the smaller value of q
953
954 auto utx = x(i_hard) / radius;
955 auto distx = (radius - inform.x_norm) * ((radius + inform.x_norm) / radius);
956 auto alpha = sign(distx / (fabs(utx) + sqrt(pow(utx, 2) + distx / radius)), utx);
957
958 // record the optimal values
959
960 x(i_hard) = x(i_hard) + alpha;
961 }
962 inform.x_norm = twoNorm(x);
963 inform.obj = f + HALF * (dotProduct(c, x) - lambda * pow(radius, 2));
964 return;
965
966 // the hard case didn't occur after all
967 } else {
968 inform.hard_case = false;
969
970 // compute the first derivative of ||x|(lambda)||^2 - radius^2
971 auto w_norm2 = ZERO;
972 for (int i = 1; i <= n; ++i) { // for_do(i, 1, n)
973 if (h(i) != lambda_min)
974 w_norm2 = w_norm2 + pow(c(i), 2) / pow((h(i) + lambda), 3);
975 }
976 x_norm2(1) = -TWO * w_norm2;
977
978 // compute the newton correction
979
980 lambda = lambda + (pow(inform.x_norm, 2) - pow(radius, 2)) / x_norm2(1);
981 lambda_l = std::max(lambda_l, lambda);
982 }
983
984 // there is a singularity at lambda. compute the point for which the
985 // sum of squares of the singular terms is equal to radius^2
986 } else {
987 lambda = lambda + std::max(sqrt(c2) / radius, lambda * EPSILON_MCH);
988 lambda_l = std::max(lambda_l, lambda);
989 }
990 }
991
992 // the iterates will all be in the L region. Prepare for the main loop
993 auto max_order = std::max(1, std::min(MAX_DEGREE, control.taylor_max_degree));
994
995 // start the main loop
996 for (;;) {
997
998 // if h(lambda) is positive definite, solve h(lambda) x = - c
999
1000 for (int i = 1; i <= n; ++i) { // for_do(i, 1, n)
1001 x(i) = -c(i) / (h(i) + lambda);
1002 }
1003
1004 // compute the two-norm of x
1005 inform.x_norm = twoNorm(x);
1006 x_norm2(0) = pow(inform.x_norm, 2);
1007
1008 // if the newton step lies within the trust region, exit
1009
1010 if (lambda == ZERO && inform.x_norm <= radius) {
1011 inform.obj = f + HALF * dotProduct(c, x);
1012 return;
1013 }
1014
1017
1018 if (fabs(inform.x_norm - radius) <= std::max(control.stop_normal * radius, control.stop_absolute_normal)) {
1019 break;
1020 }
1021
1022 lambda_l = std::max(lambda_l, lambda);
1023
1024 // record, for the future, values of lambda which give small ||x||
1025 if (inform.len_history < HISTORY_MAX) {
1026 history_type history_item;
1027 history_item.lambda = lambda;
1028 history_item.x_norm = inform.x_norm;
1029 inform.history.emplace_back(history_item);
1030 inform.len_history = inform.len_history + 1;
1031 }
1032
1033 // a lambda in L has been found. It is now simply a matter of applying
1034 // a variety of Taylor-series-based methods starting from this lambda
1035
1036 // precaution against rounding producing lambda outside L
1037
1038 if (lambda > lambda_u) {
1039 throw std::runtime_error("Lambda for trust-region subproblem is ill conditioned");
1040 }
1041
1042 // compute first derivatives of x^T M x
1043
1044 // form ||w||^2 = x^T H^-1(lambda) x
1045
1046 double w_norm2 = ZERO;
1047 for (int i = 1; i <= n; ++i) { // for_do(i, 1, n)
1048 w_norm2 = w_norm2 + pow(c(i), 2) / pow(h(i) + lambda, 3);
1049 }
1050
1051 // compute the first derivative of x_norm2 = x^T M x
1052 x_norm2(1) = -TWO * w_norm2;
1053
1054 // compute pi_beta = ||x||^beta and its first derivative when beta = - 1
1055 double beta = -ONE;
1056 PiBetaDerivs(1, beta, x_norm2, pi_beta);
1057
1058 // compute the Newton correction (for beta = - 1)
1059
1060 auto delta_lambda = -(pi_beta(0) - pow((radius), beta)) / pi_beta(1);
1061
1062 DoubleFortranVector lambda_new(3);
1063 int n_lambda = 1;
1064 lambda_new(n_lambda) = lambda + delta_lambda;
1065
1066 if (max_order >= 3) {
1067
1068 // compute the second derivative of x^T x
1069
1070 double z_norm2 = ZERO;
1071 for (int i = 1; i <= n; ++i) { // for_do(i, 1, n)
1072 z_norm2 = z_norm2 + pow(c(i), 2) / pow((h(i) + lambda), 4);
1073 }
1074 x_norm2(2) = SIX * z_norm2;
1075
1076 // compute the third derivatives of x^T x
1077
1078 double v_norm2 = ZERO;
1079 for (int i = 1; i <= n; ++i) { // for_do(i, 1, n)
1080 v_norm2 = v_norm2 + pow(c(i), 2) / pow((h(i) + lambda), 5);
1081 }
1082 x_norm2(3) = -TWENTY_FOUR * v_norm2;
1083
1084 // compute pi_beta = ||x||^beta and its derivatives when beta = 2
1085
1086 beta = TWO;
1087 PiBetaDerivs(max_order, beta, x_norm2, pi_beta);
1088
1089 // compute the "cubic Taylor approximaton" step (beta = 2)
1090
1091 auto a_0 = pi_beta(0) - pow((radius), beta);
1092 auto a_1 = pi_beta(1);
1093 auto a_2 = HALF * pi_beta(2);
1094 auto a_3 = SIXTH * pi_beta(3);
1095 auto a_max = biggest(fabs(a_0), fabs(a_1), fabs(a_2), fabs(a_3));
1096 if (a_max > ZERO) {
1097 a_0 = a_0 / a_max;
1098 a_1 = a_1 / a_max;
1099 a_2 = a_2 / a_max;
1100 a_3 = a_3 / a_max;
1101 }
1102 int nroots = 0;
1103 double root1 = 0, root2 = 0, root3 = 0;
1104
1105 rootsCubic(a_0, a_1, a_2, a_3, ROOTS_TOL, nroots, root1, root2, root3);
1106 n_lambda = n_lambda + 1;
1107 if (nroots == 3) {
1108 lambda_new(n_lambda) = lambda + root3;
1109 } else {
1110 lambda_new(n_lambda) = lambda + root1;
1111 }
1112
1113 // compute pi_beta = ||x||^beta and its derivatives when beta = - 0.4
1114
1115 beta = -POINT4;
1116 PiBetaDerivs(max_order, beta, x_norm2, pi_beta);
1117
1118 // compute the "cubic Taylor approximaton" step (beta = - 0.4)
1119
1120 a_0 = pi_beta(0) - pow((radius), beta);
1121 a_1 = pi_beta(1);
1122 a_2 = HALF * pi_beta(2);
1123 a_3 = SIXTH * pi_beta(3);
1124 a_max = biggest(fabs(a_0), fabs(a_1), fabs(a_2), fabs(a_3));
1125 if (a_max > ZERO) {
1126 a_0 = a_0 / a_max;
1127 a_1 = a_1 / a_max;
1128 a_2 = a_2 / a_max;
1129 a_3 = a_3 / a_max;
1130 }
1131 rootsCubic(a_0, a_1, a_2, a_3, ROOTS_TOL, nroots, root1, root2, root3);
1132 n_lambda = n_lambda + 1;
1133 if (nroots == 3) {
1134 lambda_new(n_lambda) = lambda + root3;
1135 } else {
1136 lambda_new(n_lambda) = lambda + root1;
1137 }
1138 }
1139
1140 // compute the best Taylor improvement
1141
1142 auto lambda_plus = maxVal(lambda_new, n_lambda);
1143 delta_lambda = lambda_plus - lambda;
1144 lambda = lambda_plus;
1145
1146 // improve the lower bound if possible
1147
1148 lambda_l = std::max(lambda_l, lambda_plus);
1149
1150 // check that the best Taylor improvement is significant
1151
1152 if (std::abs(delta_lambda) < EPSILON_MCH * std::max(ONE, std::abs(lambda))) {
1153 break;
1154 }
1155
1156 } // for(;;)
1157}
1158
1175void solveSubproblem(int n, double radius, double f, const DoubleFortranVector &c, const DoubleFortranVector &h,
1176 DoubleFortranVector &x, const control_type &control, inform_type &inform) {
1177 // scale the problem to solve instead
1178 // minimize q_s(x_s) = 1/2 <x_s, H_s x_s> + <c_s, x_s> + f_s
1179 // subject to ||x_s||_2 <= radius_s or ||x_s||_2 = radius_s
1180
1181 // where H_s = H / s_h and c_s = c / s_c for scale factors s_h and s_c
1182
1183 // This corresponds to
1184 // radius_s = ( s_h / s_c ) radius,
1185 // f_s = ( s_h / s_c^2 ) f
1186 // and the solution may be recovered as
1187 // x = ( s_c / s_h ) x_s
1188 // lambda = s_h lambda_s
1189 // q(x) = ( s_c^2 / s_ h ) q_s(x_s)
1190
1191 // scale H by the largest H and remove relatively tiny H
1192
1193 DoubleFortranVector h_scale(n);
1194 auto scale_h = maxAbsVal(h); // MAXVAL( ABS( H ) )
1195 if (scale_h > ZERO) {
1196 for (int i = 1; i <= n; ++i) { // do i = 1, n
1197 if (fabs(h(i)) >= control.h_min * scale_h) {
1198 h_scale(i) = h(i) / scale_h;
1199 } else {
1200 h_scale(i) = ZERO;
1201 }
1202 }
1203 } else {
1204 scale_h = ONE;
1205 h_scale.zero();
1206 }
1207
1208 // scale c by the largest c and remove relatively tiny c
1209
1210 DoubleFortranVector c_scale(n);
1211 auto scale_c = maxAbsVal(c); // maxval( abs( c ) )
1212 if (scale_c > ZERO) {
1213 for (int i = 1; i <= n; ++i) { // do i = 1, n
1214 if (fabs(c(i)) >= control.h_min * scale_c) {
1215 c_scale(i) = c(i) / scale_c;
1216 } else {
1217 c_scale(i) = ZERO;
1218 }
1219 }
1220 } else {
1221 scale_c = ONE;
1222 c_scale.zero();
1223 }
1224
1225 double radius_scale = (scale_h / scale_c) * radius;
1226 double f_scale = (scale_h / pow(scale_c, 2)) * f;
1227
1228 auto control_scale = control;
1229 if (control_scale.lower != LOWER_DEFAULT) {
1230 control_scale.lower = control_scale.lower / scale_h;
1231 }
1232 if (control_scale.upper != UPPER_DEFAULT) {
1233 control_scale.upper = control_scale.upper / scale_h;
1234 }
1235
1236 // solve the scaled problem
1237
1238 solveSubproblemMain(n, radius_scale, f_scale, c_scale, h_scale, x, control_scale, inform);
1239
1240 // unscale the solution, function value, multiplier and related values
1241
1242 // x = ( scale_c / scale_h ) * x
1243 x *= scale_c / scale_h;
1244 inform.obj *= pow(scale_c, 2) / scale_h;
1245 inform.multiplier *= scale_h;
1246 inform.pole *= scale_h;
1247 for (auto &history_item : inform.history) { // do i = 1, inform.len_history
1248 history_item.lambda *= scale_h;
1249 history_item.x_norm *= scale_c / scale_h;
1250 }
1251}
1252
1253} // namespace
1254
1272 const DoubleFortranMatrix &hf, double Delta, DoubleFortranVector &d,
1273 double &normd, const NLLS::nlls_options &options) {
1274
1275 control_type controlOptions;
1276 inform_type inform;
1277
1278 // The code finds
1279 // d = arg min_p w^T p + 0.5 * p^T D p
1280 // s.t. ||p|| \leq Delta
1281 //
1282 // where D is diagonal
1283 //
1284 // our probem in naturally in the form
1285 //
1286 // d = arg min_p v^T p + 0.5 * p^T H p
1287 // s.t. ||p|| \leq Delta
1288 //
1289 // first, find the matrix H and vector v
1290 // Set A = J^T J
1292 // add any second order information...
1293 // so A = J^T J + HF
1294 m_A += hf;
1295
1296 // now form v = J^T f
1297 NLLS::multJt(J, f, m_v);
1298
1299 // if scaling needed, do it
1300 if (options.scale != 0) {
1301 applyScaling(J, m_A, m_v, m_scale, options);
1302 }
1303
1304 // Now that we have the unprocessed matrices, we need to get an
1305 // eigendecomposition to make A diagonal
1306 //
1308
1309 // We can now change variables, setting y = Vp, getting
1310 // Vd = arg min_(Vx) v^T p + 0.5 * (Vp)^T D (Vp)
1311 // s.t. ||x|| \leq Delta
1312 // <=>
1313 // Vd = arg min_(Vx) V^Tv^T (Vp) + 0.5 * (Vp)^T D (Vp)
1314 // s.t. ||x|| \leq Delta
1315 // <=>
1316 // we need to get the transformed vector v
1318
1319 // we've now got the vectors we need, pass to solveSubproblem
1320 intitializeControl(controlOptions);
1321
1322 auto n = J.len2();
1323 if (m_v_trans.len() != n) {
1325 }
1326
1327 for (int ii = 1; ii <= n; ++ii) { // for_do(ii, 1,n)
1328 if (fabs(m_v_trans(ii)) < EPSILON_MCH) {
1329 m_v_trans(ii) = ZERO;
1330 }
1331 if (fabs(m_ew(ii)) < EPSILON_MCH) {
1332 m_ew(ii) = ZERO;
1333 }
1334 }
1335
1336 solveSubproblem(n, Delta, ZERO, m_v_trans, m_ew, m_d_trans, controlOptions, inform);
1337
1338 // and return the un-transformed vector
1340
1341 normd = NLLS::norm2(d); // ! ||d||_D
1342
1343 if (options.scale != 0) {
1344 for (int ii = 1; ii <= n; ++ii) { // for_do(ii, 1, n)
1345 d(ii) = d(ii) / m_scale(ii);
1346 }
1347 }
1348
1349} // calculateStep
1350
1354
1355} // namespace Mantid::CurveFitting::FuncMinimisers
const std::vector< double > * lambda
const std::vector< double > & rhs
#define DECLARE_FUNCMINIMIZER(classname, username)
Macro for declaring a new type of minimizers to be used with the FuncMinimizerFactory.
#define fabs(x)
Definition: Matrix.cpp:22
double radius
Definition: Rasterize.cpp:31
#define UNUSED_ARG(x)
Function arguments are sometimes unused in certain implmentations but are required for documentation ...
Definition: System.h:64
int len_history
the number of (||x||_M,lambda) pairs in the history
bool hard_case
has the hard case occurred?
int taylor_max_degree
maximum degree of Taylor approximant allowed
double obj
the value of the quadratic function
double h_min
any entry of H that is smaller than h_min * MAXVAL( H ) we be treated as
std::vector< history_type > history
history information
double stop_absolute_normal
bool equality_problem
is the solution is REQUIRED to lie on the boundary (i.e., is the
double multiplier
the Lagrange multiplier corresponding to the trust-region constraint
double lower
lower and upper bounds on the multiplier, if known
double stop_normal
stop when | ||x|| - radius | <= max( stop_normal * radius, stop_absolute_normal )
double pole
a lower bound max(0,-lambda_1), where lambda_1 is the left-most eigenvalue of (H,M)
double x_norm
corresponding value of ||x(lambda)||_M
double upper
std::string m_errorString
Error string.
void zero()
Set all elements to zero.
void set(const size_t i, const double value)
Set an element.
void allocate(const int iFrom, const int iTo, const int jFrom, const int jTo)
Resize the matrix.
int len2() const
Get the size along the second dimension as an int.
int len1() const
Get the size along the first dimension as an int.
void allocate(int firstIndex, int lastIndex)
Resize the vector.
int len() const
Get the length of the vector as an int.
Trust Region minimizer class using the DTRS method of GALAHAD.
std::string name() const override
Name of the minimizer.
void calculateStep(const DoubleFortranMatrix &J, const DoubleFortranVector &f, const DoubleFortranMatrix &hf, double Delta, DoubleFortranVector &d, double &normd, const NLLS::nlls_options &options)
Find a correction vector to the parameters.
double costFunctionVal() override
Return current value of the cost function.
void evalF(const DoubleFortranVector &x, DoubleFortranVector &f) const
Evaluate the fitting function and calculate the residuals.
NLLS::NLLS_workspace m_workspace
Temporary and helper objects.
void evalJ(const DoubleFortranVector &x, DoubleFortranMatrix &J) const
Evaluate the Jacobian.
std::shared_ptr< CostFunctions::CostFuncLeastSquares > m_leastSquares
Stored cost function.
void evalHF(const DoubleFortranVector &x, const DoubleFortranVector &f, DoubleFortranMatrix &h) const
Evaluate the Hessian.
NLLS::nlls_inform m_inform
Information about the fitting.
void initialize(API::ICostFunction_sptr costFunction, size_t maxIterations=0) override
Initialize minimizer, i.e. pass a function to minimize.
JacobianImpl1< DoubleFortranMatrix > m_J
The Jacobian.
API::IFunction_sptr m_function
Stored to access IFunction interface in iterate()
TypedValue getProperty(const std::string &name) const override
Get the value of a property.
std::shared_ptr< ICostFunction > ICostFunction_sptr
define a shared pointer to a cost function
Definition: ICostFunction.h:60
void MANTID_CURVEFITTING_DLL matmultInner(const DoubleFortranMatrix &J, DoubleFortranMatrix &A)
Takes an m x n matrix J and forms the n x n matrix A given by A = J' * J.
Definition: TrustRegion.cpp:31
double MANTID_CURVEFITTING_DLL norm2(const DoubleFortranVector &v)
Compute the 2-norm of a vector which is a square root of the sum of squares of its elements.
Definition: TrustRegion.cpp:57
void allEigSymm(const DoubleFortranMatrix &A, DoubleFortranVector &ew, DoubleFortranMatrix &ev)
Calculate all the eigenvalues of a symmetric matrix.
void MANTID_CURVEFITTING_DLL multJ(const DoubleFortranMatrix &J, const DoubleFortranVector &x, DoubleFortranVector &Jx)
Multiply a matrix by a vector.
Definition: TrustRegion.cpp:68
void MANTID_CURVEFITTING_DLL multJt(const DoubleFortranMatrix &J, const DoubleFortranVector &x, DoubleFortranVector &Jtx)
Multiply a transposed matrix by a vector.
Definition: TrustRegion.cpp:82
double dotProduct(const DoubleFortranVector &x, const DoubleFortranVector &y)
Dot product of two vectors.
Definition: TrustRegion.cpp:93
void MANTID_CURVEFITTING_DLL getSvdJ(const DoubleFortranMatrix &J, double &s1, double &sn)
Given an (m x n) matrix J held by columns as a vector, this routine returns the largest and smallest ...
Definition: TrustRegion.cpp:45
FortranVector< EigenVector > DoubleFortranVector
void initialize(int n, int m, const nlls_options &options)
Initialize the workspace.
Definition: Workspaces.cpp:22
int scale
scale the variables? 0 - no scaling 1 - use the scaling in GSL (W s.t.
Definition: Workspaces.h:125
int maxit
the maximum number of iterations performed
Definition: Workspaces.h:38
double initial_radius
if relative_tr_radius /= 1, then set the initial value for the trust-region radius (-ve => ||g_0||)
Definition: Workspaces.h:77