Mantid
Loading...
Searching...
No Matches
TrustRegion.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
7// This code was originally translated from Fortran code on
8// https://ccpforge.cse.rl.ac.uk/gf/project/ral_nlls June 2016
9//----------------------------------------------------------------------
10// Includes
11//----------------------------------------------------------------------
13
14#include <algorithm>
15#include <cmath>
16#include <functional>
17#include <limits>
18#include <string>
19
21
23const double EPSILON_MCH = std::numeric_limits<double>::epsilon();
24
32 auto n = J.len2();
33 A.allocate(n, n);
34
35 A.mutator() = J.inspector().transpose() * J.inspector();
36}
37
45void getSvdJ(const DoubleFortranMatrix &J, double &s1, double &sn) {
46 Eigen::BDCSVD<Eigen::MatrixXd> svd(J.inspector());
47 auto S = svd.singularValues();
48
49 s1 = S(0);
50 sn = S(S.size() - 1);
51}
52
57double norm2(const DoubleFortranVector &v) {
58 if (v.size() == 0)
59 return 0.0;
60 return v.norm();
61}
62
69 // dgemv('N',m,n,alpha,J,m,x,1,beta,Jx,1);
70 if (Jx.len() != J.len1()) {
71 Jx.allocate(J.len1());
72 }
73
74 Jx.mutator() = J.inspector() * x.inspector();
75}
76
83 // dgemv('T',m,n,alpha,J,m,x,1,beta,Jtx,1)
84 if (Jtx.len() != J.len2()) {
85 Jtx.allocate(J.len2());
86 }
87
88 Jtx.mutator() = J.inspector().transpose() * x.inspector();
89}
90
93double dotProduct(const DoubleFortranVector &x, const DoubleFortranVector &y) { return x.dot(y); }
94
113 const DoubleFortranVector &d, const nlls_options &options, evaluate_model_work &w) {
114
115 // Jd = J*d
116 multJ(J, d, w.Jd);
117
118 // First, get the base
119 // 0.5 (f^T f + f^T J d + d^T' J ^T J d )
120 DoubleFortranVector temp = f;
121 temp += w.Jd;
122 w.md_gn = 0.5 * pow(norm2(temp), 2);
123 double md = 0.0;
124 switch (options.model) {
125 case 1: // first-order (no Hessian)
126 md = w.md_gn;
127 break;
128 default:
129 // these have a dynamic H -- recalculate
130 // H = J^T J + HF, HF is (an approx?) to the Hessian
131 multJ(hf, d, w.Hd);
132 md = w.md_gn + 0.5 * dotProduct(d, w.Hd);
133 }
134 return md;
135}
136
148double calculateRho(double normf, double normfnew, double md, const nlls_options &options) {
149 UNUSED_ARG(options);
150 auto actual_reduction = (0.5 * pow(normf, 2)) - (0.5 * pow(normfnew, 2));
151 auto predicted_reduction = ((0.5 * pow(normf, 2)) - md);
152 double rho = 0.0;
153 if (fabs(actual_reduction) < 10 * EPSILON_MCH) {
154 rho = ONE;
155 } else if (fabs(predicted_reduction) < 10 * EPSILON_MCH) {
156 rho = ONE;
157 } else {
158 rho = actual_reduction / predicted_reduction;
159 }
160 return rho;
161}
162
168
169 auto yts = dotProduct(w.d, w.y);
170 if (fabs(yts) < sqrt(10 * EPSILON_MCH)) {
172 return;
173 }
174
175 multJ(hf, w.d, w.Sks); // hfs = S_k * d
176
177 w.ysharpSks = w.y_sharp;
178 w.ysharpSks -= w.Sks;
179
180 // now, let's scale hd (Nocedal and Wright, Section 10.2)
181 auto dSks = fabs(dotProduct(w.d, w.Sks));
182 auto alpha = fabs(dotProduct(w.d, w.y_sharp)) / dSks;
183 alpha = std::min(ONE, alpha);
184 hf *= alpha;
185
186 // update S_k (again, as in N&W, Section 10.2)
187
188 // hf = hf + (1/yts) (y# - Sk d)^T y:
189 alpha = 1 / yts;
190
191 w.hf.mutator() = alpha * w.ysharpSks.mutator() * w.y.mutator().transpose() + w.hf.mutator();
192 w.hf.mutator() = alpha * w.y.mutator() * w.ysharpSks.mutator().transpose() + w.hf.mutator();
193 alpha = -dotProduct(w.ysharpSks, w.d) / (pow(yts, 2));
194 w.hf.mutator() = alpha * w.y.mutator() * w.y.mutator().transpose() + w.hf.mutator();
195}
196
205void updateTrustRegionRadius(double &rho, const nlls_options &options, NLLS_workspace &w) {
206
207 switch (options.tr_update_strategy) {
208 case 1: // default, step-function
209 if (!std::isfinite(rho)) {
210 w.Delta = std::max(options.radius_reduce, options.radius_reduce_max) * w.Delta;
211 rho = -ONE; // set to be negative, so that the logic works....
212 } else if (rho < options.eta_success_but_reduce) {
213 // unsuccessful....reduce Delta
214 w.Delta = std::max(options.radius_reduce, options.radius_reduce_max) * w.Delta;
215 } else if (rho < options.eta_very_successful) {
216 // doing ok...retain status quo
217 } else if (rho < options.eta_too_successful) {
218 // more than very successful -- increase delta
219 w.Delta = std::min(options.maximum_radius, options.radius_increase * w.normd);
220 // increase based on normd = ||d||_D
221 // if d is on the tr boundary, this is Delta
222 // otherwise, point was within the tr, and there's no point
223 // increasing the radius
224 } else {
225 // too successful....accept step, but don't change w.Delta
226 }
227 break;
228 case 2: // Continuous method
229 // Based on that proposed by Hans Bruun Nielsen, TR
230 // IMM-REP-1999-05
231 // http://www2.imm.dtu.dk/documents/ftp/tr99/tr05_99.pdf
232 if (!std::isfinite(rho)) {
233 w.Delta = std::max(options.radius_reduce, options.radius_reduce_max) * w.Delta;
234 rho = -ONE; // set to be negative, so that the logic works....
235 } else if (rho >= options.eta_too_successful) {
236 // too successful....accept step, but don't change w.Delta
237 } else if (rho > options.eta_successful) {
238 w.Delta = w.Delta * std::min(options.radius_increase,
239 std::max(options.radius_reduce,
240 1 - ((options.radius_increase - 1) * (pow((1 - 2 * rho), w.tr_p)))));
241 w.tr_nu = options.radius_reduce;
242 } else {
243 w.Delta = w.Delta * w.tr_nu;
244 w.tr_nu = w.tr_nu * 0.5;
245 }
246 break;
247 default:
248 throw std::runtime_error("Bad strategy.");
249 }
250}
251
254void testConvergence(double normF, double normJF, double normF0, double normJF0, const nlls_options &options,
255 nlls_inform &inform) {
256
257 if (normF <= std::max(options.stop_g_absolute, options.stop_g_relative * normF0)) {
258 inform.convergence_normf = 1;
259 return;
260 }
261
262 if ((normJF / normF) <= std::max(options.stop_g_absolute, options.stop_g_relative * (normJF0 / normF0))) {
263 inform.convergence_normg = 1;
264 }
265}
266
280void applyScaling(const DoubleFortranMatrix &J, DoubleFortranMatrix &A, // cppcheck-suppress constParameterReference
281 DoubleFortranVector &v, DoubleFortranVector &scale, // cppcheck-suppress constParameterReference
282 const nlls_options &options) {
283 auto m = J.len1();
284 auto n = J.len2();
285 if (scale.len() != n) {
286 scale.allocate(n);
287 }
288
289 switch (options.scale) {
290 case 1:
291 case 2:
292 for (int ii = 1; ii <= n; ++ii) { // do ii = 1,n
293 double temp = ZERO;
294 if (options.scale == 1) {
297 for (int jj = 1; jj <= m; ++jj) { // for_do(jj, 1,m)
298 // get_element_of_matrix(J,m,jj,ii,Jij);
299 temp = temp + pow(J(jj, ii), 2);
300 }
301 } else if (options.scale == 2) {
303 for (int jj = 1; jj <= n; ++jj) { // for_do(jj, 1,n)
304 temp = temp + pow(A(ii, jj), 2);
305 }
306 }
307 if (temp < options.scale_min) {
308 if (options.scale_trim_min) {
309 temp = options.scale_min;
310 } else {
311 temp = ONE;
312 }
313 } else if (temp > options.scale_max) {
314 if (options.scale_trim_max) {
315 temp = options.scale_max;
316 } else {
317 temp = ONE;
318 }
319 }
320 temp = sqrt(temp);
321 if (options.scale_require_increase) {
322 scale(ii) = std::max(temp, scale(ii));
323 } else {
324 scale(ii) = temp;
325 }
326 }
327 break;
328 default:
329 throw std::runtime_error("Scaling error.");
330 }
331
332 // Now we have the w.diagonal scaling matrix, actually scale the
333 // Hessian approximation and J^Tf
334 for (int ii = 1; ii <= n; ++ii) { // for_do(ii, 1,n)
335 double temp = scale(ii);
336 v(ii) = v(ii) / temp;
337 for (int jj = 1; jj <= n; ++jj) { // for_do(jj,1,n)
338 A(ii, jj) = A(ii, jj) / temp;
339 A(jj, ii) = A(jj, ii) / temp;
340 }
341 }
342}
343
350 auto M = A;
351 M.eigenSystem(ew, ev);
352}
353
354// This isn't used because we don't calculate second derivatives in Mantid
355// If we start using them the method should be un-commented and used here
356//
357// void apply_second_order_info(int n, int m, const DoubleFortranVector &X,
358// NLLS_workspace &w, eval_hf_type evalHF,
359// params_base_type params,
360// const nlls_options &options, nlls_inform &inform,
361// const DoubleFortranVector &weights) {
362//
363// if (options.exact_second_derivatives) {
364// DoubleFortranVector temp = w.f;
365// temp *= weights;
366// evalHF(inform.external_return, n, m, X, temp, w.hf, params);
367// inform.h_eval = inform.h_eval + 1;
368// } else {
369// // use the rank-one approximation...
370// rankOneUpdate(w.hf, w, n);
371// }
372//}
373
374} // namespace Mantid::CurveFitting::NLLS
#define fabs(x)
Definition Matrix.cpp:22
#define UNUSED_ARG(x)
Function arguments are sometimes unused in certain implmentations but are required for documentation ...
Definition System.h:48
void eigenSystem(EigenVector &eigenValues, EigenMatrix &eigenVectors)
Calculate the eigensystem of a symmetric matrix.
map_type & mutator()
Get the map to Eigen matrix.
Definition EigenMatrix.h:56
const map_type inspector() const
Get a const copy of the Eigen matrix.
Definition EigenMatrix.h:58
vec_map_type & mutator()
Get the map of the eigen vector.
Definition EigenVector.h:51
double norm() const
Get vector norm (length)
size_t size() const
Size of the vector.
void allocate(const int iFrom, const int iTo, const int jFrom, const int jTo)
Resize the matrix.
int len2() const
Get the size along the second dimension as an int.
int len1() const
Get the size along the first dimension as an int.
void allocate(int firstIndex, int lastIndex)
Resize the vector.
int len() const
Get the length of the vector as an int.
void rankOneUpdate(DoubleFortranMatrix &hf, NLLS_workspace &w)
Update the Hessian matrix without actually evaluating it (quasi-Newton?)
double calculateRho(double normf, double normfnew, double md, const nlls_options &options)
Calculate the quantity 0.5||f||^2 - 0.5||fnew||^2 actual_reduction rho = -----------------------— = -...
double evaluateModel(const DoubleFortranVector &f, const DoubleFortranMatrix &J, const DoubleFortranMatrix &hf, const DoubleFortranVector &d, const nlls_options &options, evaluate_model_work &w)
Input: f = f(x_k), J = J(x_k), hf = \sum_{i=1}^m f_i(x_k) \nabla^2 f_i(x_k) (or an approx)
void MANTID_CURVEFITTING_DLL matmultInner(const DoubleFortranMatrix &J, DoubleFortranMatrix &A)
Takes an m x n matrix J and forms the n x n matrix A given by A = J' * J.
double MANTID_CURVEFITTING_DLL norm2(const DoubleFortranVector &v)
Compute the 2-norm of a vector which is a square root of the sum of squares of its elements.
void testConvergence(double normF, double normJF, double normF0, double normJF0, const nlls_options &options, nlls_inform &inform)
Test the convergence.
void updateTrustRegionRadius(double &rho, const nlls_options &options, NLLS_workspace &w)
Update the trust region radius which is hidden in NLLS_workspace w (w.Delta).
void allEigSymm(const DoubleFortranMatrix &A, DoubleFortranVector &ew, DoubleFortranMatrix &ev)
Calculate all the eigenvalues of a symmetric matrix.
void MANTID_CURVEFITTING_DLL multJ(const DoubleFortranMatrix &J, const DoubleFortranVector &x, DoubleFortranVector &Jx)
Multiply a matrix by a vector.
void MANTID_CURVEFITTING_DLL multJt(const DoubleFortranMatrix &J, const DoubleFortranVector &x, DoubleFortranVector &Jtx)
Multiply a transposed matrix by a vector.
double dotProduct(const DoubleFortranVector &x, const DoubleFortranVector &y)
Dot product of two vectors.
void MANTID_CURVEFITTING_DLL getSvdJ(const DoubleFortranMatrix &J, double &s1, double &sn)
Given an (m x n) matrix J held by columns as a vector, this routine returns the largest and smallest ...
const double EPSILON_MCH
Too small values don't work well with numerical derivatives.
void applyScaling(const DoubleFortranMatrix &J, DoubleFortranMatrix &A, DoubleFortranVector &v, DoubleFortranVector &scale, const nlls_options &options)
Apply_scaling input Jacobian matrix, J ouput scaled Hessisan, H, and J^Tf, v.
all workspaces called from the top level
Definition Workspaces.h:251
workspace for subroutine evaluateModel
Definition Workspaces.h:222
inform derived type with component defaults
Definition Workspaces.h:157
int convergence_normf
test on the size of f satisfied?
Definition Workspaces.h:172
int convergence_normg
test on the size of the gradient satisfied?
Definition Workspaces.h:175
double eta_successful
a potential iterate will only be accepted if the actual decrease f - f(x_new) is larger than ....
Definition Workspaces.h:88
int model
specify the model used.
Definition Workspaces.h:46
int scale
scale the variables? 0 - no scaling 1 - use the scaling in GSL (W s.t.
Definition Workspaces.h:125
int tr_update_strategy
Trust region update strategy 1 - usual step function 2 - continuous method of Hans Bruun Nielsen (IMM...
Definition Workspaces.h:106
double stop_g_absolute
overall convergence tolerances.
Definition Workspaces.h:63
double maximum_radius
maximum permitted trust-region radius
Definition Workspaces.h:80
double radius_increase
on very successful iterations, the trust-region radius will be increased by the factor ....
Definition Workspaces.h:99