Mantid
Loading...
Searching...
No Matches
TrustRegion.cpp
Go to the documentation of this file.
1// Mantid Repository : https://github.com/mantidproject/mantid
2//
3// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
4// NScD Oak Ridge National Laboratory, European Spallation Source,
5// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
6// SPDX - License - Identifier: GPL - 3.0 +
7// This code was originally translated from Fortran code on
8// https://ccpforge.cse.rl.ac.uk/gf/project/ral_nlls June 2016
9//----------------------------------------------------------------------
10// Includes
11//----------------------------------------------------------------------
13
14#include <algorithm>
15#include <cmath>
16#include <functional>
17#include <limits>
18#include <string>
19
21
23const double EPSILON_MCH = std::numeric_limits<double>::epsilon();
24
32 auto n = J.len2();
33 A.allocate(n, n);
34
35 A.mutator() = J.inspector().transpose() * J.inspector();
36}
37
45void getSvdJ(const DoubleFortranMatrix &J, double &s1, double &sn) {
46 Eigen::BDCSVD<Eigen::MatrixXd> svd(J.inspector());
47 auto S = svd.singularValues();
48
49 s1 = S(0);
50 sn = S(S.size() - 1);
51}
52
57double norm2(const DoubleFortranVector &v) {
58 if (v.size() == 0)
59 return 0.0;
60 return v.norm();
61}
62
69 // dgemv('N',m,n,alpha,J,m,x,1,beta,Jx,1);
70 if (Jx.len() != J.len1()) {
71 Jx.allocate(J.len1());
72 }
73
74 Jx.mutator() = J.inspector() * x.inspector();
75}
76
83 // dgemv('T',m,n,alpha,J,m,x,1,beta,Jtx,1)
84 if (Jtx.len() != J.len2()) {
85 Jtx.allocate(J.len2());
86 }
87
88 Jtx.mutator() = J.inspector().transpose() * x.inspector();
89}
90
93double dotProduct(const DoubleFortranVector &x, const DoubleFortranVector &y) { return x.dot(y); }
94
113 const DoubleFortranVector &d, const nlls_options &options, evaluate_model_work &w) {
114
115 // Jd = J*d
116 multJ(J, d, w.Jd);
117
118 // First, get the base
119 // 0.5 (f^T f + f^T J d + d^T' J ^T J d )
120 DoubleFortranVector temp = f;
121 temp += w.Jd;
122 w.md_gn = 0.5 * pow(norm2(temp), 2);
123 double md = 0.0;
124 switch (options.model) {
125 case 1: // first-order (no Hessian)
126 md = w.md_gn;
127 break;
128 default:
129 // these have a dynamic H -- recalculate
130 // H = J^T J + HF, HF is (an approx?) to the Hessian
131 multJ(hf, d, w.Hd);
132 md = w.md_gn + 0.5 * dotProduct(d, w.Hd);
133 }
134 return md;
135}
136
148double calculateRho(double normf, double normfnew, double md, const nlls_options &options) {
149 UNUSED_ARG(options);
150 auto actual_reduction = (0.5 * pow(normf, 2)) - (0.5 * pow(normfnew, 2));
151 auto predicted_reduction = ((0.5 * pow(normf, 2)) - md);
152 double rho = 0.0;
153 if (fabs(actual_reduction) < 10 * EPSILON_MCH) {
154 rho = ONE;
155 } else if (fabs(predicted_reduction) < 10 * EPSILON_MCH) {
156 rho = ONE;
157 } else {
158 rho = actual_reduction / predicted_reduction;
159 }
160 return rho;
161}
162
168
169 auto yts = dotProduct(w.d, w.y);
170 if (fabs(yts) < sqrt(10 * EPSILON_MCH)) {
172 return;
173 }
174
175 multJ(hf, w.d, w.Sks); // hfs = S_k * d
176
177 w.ysharpSks = w.y_sharp;
178 w.ysharpSks -= w.Sks;
179
180 // now, let's scale hd (Nocedal and Wright, Section 10.2)
181 auto dSks = fabs(dotProduct(w.d, w.Sks));
182 auto alpha = fabs(dotProduct(w.d, w.y_sharp)) / dSks;
183 alpha = std::min(ONE, alpha);
184 hf *= alpha;
185
186 // update S_k (again, as in N&W, Section 10.2)
187
188 // hf = hf + (1/yts) (y# - Sk d)^T y:
189 alpha = 1 / yts;
190
191 w.hf.mutator() = alpha * w.ysharpSks.mutator() * w.y.mutator().transpose() + w.hf.mutator();
192 w.hf.mutator() = alpha * w.y.mutator() * w.ysharpSks.mutator().transpose() + w.hf.mutator();
193 alpha = -dotProduct(w.ysharpSks, w.d) / (pow(yts, 2));
194 w.hf.mutator() = alpha * w.y.mutator() * w.y.mutator().transpose() + w.hf.mutator();
195}
196
205void updateTrustRegionRadius(double &rho, const nlls_options &options, NLLS_workspace &w) {
206
207 switch (options.tr_update_strategy) {
208 case 1: // default, step-function
209 if (!std::isfinite(rho)) {
210 w.Delta = std::max(options.radius_reduce, options.radius_reduce_max) * w.Delta;
211 rho = -ONE; // set to be negative, so that the logic works....
212 } else if (rho < options.eta_success_but_reduce) {
213 // unsuccessful....reduce Delta
214 w.Delta = std::max(options.radius_reduce, options.radius_reduce_max) * w.Delta;
215 } else if (rho < options.eta_very_successful) {
216 // doing ok...retain status quo
217 } else if (rho < options.eta_too_successful) {
218 // more than very successful -- increase delta
219 w.Delta = std::min(options.maximum_radius, options.radius_increase * w.normd);
220 // increase based on normd = ||d||_D
221 // if d is on the tr boundary, this is Delta
222 // otherwise, point was within the tr, and there's no point
223 // increasing the radius
224 } else {
225 // too successful....accept step, but don't change w.Delta
226 }
227 break;
228 case 2: // Continuous method
229 // Based on that proposed by Hans Bruun Nielsen, TR
230 // IMM-REP-1999-05
231 // http://www2.imm.dtu.dk/documents/ftp/tr99/tr05_99.pdf
232 if (!std::isfinite(rho)) {
233 w.Delta = std::max(options.radius_reduce, options.radius_reduce_max) * w.Delta;
234 rho = -ONE; // set to be negative, so that the logic works....
235 } else if (rho >= options.eta_too_successful) {
236 // too successful....accept step, but don't change w.Delta
237 } else if (rho > options.eta_successful) {
238 w.Delta = w.Delta * std::min(options.radius_increase,
239 std::max(options.radius_reduce,
240 1 - ((options.radius_increase - 1) * (pow((1 - 2 * rho), w.tr_p)))));
241 w.tr_nu = options.radius_reduce;
242 } else {
243 w.Delta = w.Delta * w.tr_nu;
244 w.tr_nu = w.tr_nu * 0.5;
245 }
246 break;
247 default:
248 throw std::runtime_error("Bad strategy.");
249 }
250}
251
254void testConvergence(double normF, double normJF, double normF0, double normJF0, const nlls_options &options,
255 nlls_inform &inform) {
256
257 if (normF <= std::max(options.stop_g_absolute, options.stop_g_relative * normF0)) {
258 inform.convergence_normf = 1;
259 return;
260 }
261
262 if ((normJF / normF) <= std::max(options.stop_g_absolute, options.stop_g_relative * (normJF0 / normF0))) {
263 inform.convergence_normg = 1;
264 }
265}
266
281 DoubleFortranVector &scale, const nlls_options &options) {
282 auto m = J.len1();
283 auto n = J.len2();
284 if (scale.len() != n) {
285 scale.allocate(n);
286 }
287
288 switch (options.scale) {
289 case 1:
290 case 2:
291 for (int ii = 1; ii <= n; ++ii) { // do ii = 1,n
292 double temp = ZERO;
293 if (options.scale == 1) {
296 for (int jj = 1; jj <= m; ++jj) { // for_do(jj, 1,m)
297 // get_element_of_matrix(J,m,jj,ii,Jij);
298 temp = temp + pow(J(jj, ii), 2);
299 }
300 } else if (options.scale == 2) {
302 for (int jj = 1; jj <= n; ++jj) { // for_do(jj, 1,n)
303 temp = temp + pow(A(ii, jj), 2);
304 }
305 }
306 if (temp < options.scale_min) {
307 if (options.scale_trim_min) {
308 temp = options.scale_min;
309 } else {
310 temp = ONE;
311 }
312 } else if (temp > options.scale_max) {
313 if (options.scale_trim_max) {
314 temp = options.scale_max;
315 } else {
316 temp = ONE;
317 }
318 }
319 temp = sqrt(temp);
320 if (options.scale_require_increase) {
321 scale(ii) = std::max(temp, scale(ii));
322 } else {
323 scale(ii) = temp;
324 }
325 }
326 break;
327 default:
328 throw std::runtime_error("Scaling error.");
329 }
330
331 // Now we have the w.diagonal scaling matrix, actually scale the
332 // Hessian approximation and J^Tf
333 for (int ii = 1; ii <= n; ++ii) { // for_do(ii, 1,n)
334 double temp = scale(ii);
335 v(ii) = v(ii) / temp;
336 for (int jj = 1; jj <= n; ++jj) { // for_do(jj,1,n)
337 A(ii, jj) = A(ii, jj) / temp;
338 A(jj, ii) = A(jj, ii) / temp;
339 }
340 }
341}
342
349 auto M = A;
350 M.eigenSystem(ew, ev);
351}
352
353// This isn't used because we don't calculate second derivatives in Mantid
354// If we start using them the method should be un-commented and used here
355//
356// void apply_second_order_info(int n, int m, const DoubleFortranVector &X,
357// NLLS_workspace &w, eval_hf_type evalHF,
358// params_base_type params,
359// const nlls_options &options, nlls_inform &inform,
360// const DoubleFortranVector &weights) {
361//
362// if (options.exact_second_derivatives) {
363// DoubleFortranVector temp = w.f;
364// temp *= weights;
365// evalHF(inform.external_return, n, m, X, temp, w.hf, params);
366// inform.h_eval = inform.h_eval + 1;
367// } else {
368// // use the rank-one approximation...
369// rankOneUpdate(w.hf, w, n);
370// }
371//}
372
373} // namespace Mantid::CurveFitting::NLLS
#define fabs(x)
Definition: Matrix.cpp:22
#define UNUSED_ARG(x)
Function arguments are sometimes unused in certain implmentations but are required for documentation ...
Definition: System.h:64
void eigenSystem(EigenVector &eigenValues, EigenMatrix &eigenVectors)
Calculate the eigensystem of a symmetric matrix.
map_type & mutator()
Get the map to Eigen matrix.
Definition: EigenMatrix.h:56
const map_type inspector() const
Get a const copy of the Eigen matrix.
Definition: EigenMatrix.h:58
vec_map_type & mutator()
Get the map of the eigen vector.
Definition: EigenVector.h:51
double norm() const
Get vector norm (length)
size_t size() const
Size of the vector.
void allocate(const int iFrom, const int iTo, const int jFrom, const int jTo)
Resize the matrix.
int len2() const
Get the size along the second dimension as an int.
int len1() const
Get the size along the first dimension as an int.
void allocate(int firstIndex, int lastIndex)
Resize the vector.
int len() const
Get the length of the vector as an int.
void rankOneUpdate(DoubleFortranMatrix &hf, NLLS_workspace &w)
Update the Hessian matrix without actually evaluating it (quasi-Newton?)
double calculateRho(double normf, double normfnew, double md, const nlls_options &options)
Calculate the quantity 0.5||f||^2 - 0.5||fnew||^2 actual_reduction rho = -----------------------— = -...
double evaluateModel(const DoubleFortranVector &f, const DoubleFortranMatrix &J, const DoubleFortranMatrix &hf, const DoubleFortranVector &d, const nlls_options &options, evaluate_model_work &w)
Input: f = f(x_k), J = J(x_k), hf = \sum_{i=1}^m f_i(x_k) \nabla^2 f_i(x_k) (or an approx)
void MANTID_CURVEFITTING_DLL matmultInner(const DoubleFortranMatrix &J, DoubleFortranMatrix &A)
Takes an m x n matrix J and forms the n x n matrix A given by A = J' * J.
Definition: TrustRegion.cpp:31
double MANTID_CURVEFITTING_DLL norm2(const DoubleFortranVector &v)
Compute the 2-norm of a vector which is a square root of the sum of squares of its elements.
Definition: TrustRegion.cpp:57
void testConvergence(double normF, double normJF, double normF0, double normJF0, const nlls_options &options, nlls_inform &inform)
Test the convergence.
void updateTrustRegionRadius(double &rho, const nlls_options &options, NLLS_workspace &w)
Update the trust region radius which is hidden in NLLS_workspace w (w.Delta).
void allEigSymm(const DoubleFortranMatrix &A, DoubleFortranVector &ew, DoubleFortranMatrix &ev)
Calculate all the eigenvalues of a symmetric matrix.
void MANTID_CURVEFITTING_DLL multJ(const DoubleFortranMatrix &J, const DoubleFortranVector &x, DoubleFortranVector &Jx)
Multiply a matrix by a vector.
Definition: TrustRegion.cpp:68
void MANTID_CURVEFITTING_DLL multJt(const DoubleFortranMatrix &J, const DoubleFortranVector &x, DoubleFortranVector &Jtx)
Multiply a transposed matrix by a vector.
Definition: TrustRegion.cpp:82
double dotProduct(const DoubleFortranVector &x, const DoubleFortranVector &y)
Dot product of two vectors.
Definition: TrustRegion.cpp:93
void MANTID_CURVEFITTING_DLL getSvdJ(const DoubleFortranMatrix &J, double &s1, double &sn)
Given an (m x n) matrix J held by columns as a vector, this routine returns the largest and smallest ...
Definition: TrustRegion.cpp:45
const double EPSILON_MCH
Too small values don't work well with numerical derivatives.
Definition: TrustRegion.cpp:23
void applyScaling(const DoubleFortranMatrix &J, DoubleFortranMatrix &A, DoubleFortranVector &v, DoubleFortranVector &scale, const nlls_options &options)
Apply_scaling input Jacobian matrix, J ouput scaled Hessisan, H, and J^Tf, v.
all workspaces called from the top level
Definition: Workspaces.h:251
workspace for subroutine evaluateModel
Definition: Workspaces.h:222
inform derived type with component defaults
Definition: Workspaces.h:157
int convergence_normf
test on the size of f satisfied?
Definition: Workspaces.h:172
int convergence_normg
test on the size of the gradient satisfied?
Definition: Workspaces.h:175
double eta_successful
a potential iterate will only be accepted if the actual decrease f - f(x_new) is larger than ....
Definition: Workspaces.h:88
int model
specify the model used.
Definition: Workspaces.h:46
int scale
scale the variables? 0 - no scaling 1 - use the scaling in GSL (W s.t.
Definition: Workspaces.h:125
int tr_update_strategy
Trust region update strategy 1 - usual step function 2 - continuous method of Hans Bruun Nielsen (IMM...
Definition: Workspaces.h:106
double stop_g_absolute
overall convergence tolerances.
Definition: Workspaces.h:63
double maximum_radius
maximum permitted trust-region radius
Definition: Workspaces.h:80
double radius_increase
on very successful iterations, the trust-region radius will be increased by the factor ....
Definition: Workspaces.h:99