19#include <gsl/gsl_blas.h>
32 declareProperty(
"InitialRadius", 100.0,
"Initial radius of the trust region.");
46 m_leastSquares = std::dynamic_pointer_cast<CostFunctions::CostFuncLeastSquares>(costFunction);
48 throw std::runtime_error(
"Trust-region minimizer can only be used with Least "
49 "squares cost function.");
55 auto m =
static_cast<int>(values.size());
57 throw std::runtime_error(
"More parameters than data.");
64 for (
size_t i = 0; i <
m_function->nParams(); ++i) {
66 m_J.m_index.emplace_back(j);
69 m_J.m_index.emplace_back(-1);
83 auto m =
static_cast<int>(values.size());
87 for (
size_t i = 0; i < values.size(); ++i) {
88 f.
set(i, (values.getCalculated(i) - values.getFitData(i)) * values.getFitWeight(i));
101 auto m =
static_cast<int>(values.size());
109 for (
int i = 1; i <=
m; ++i) {
110 double w = values.getFitWeight(i - 1);
111 for (
int j = 1; j <=
n; ++j) {
138 int max_tr_decrease = 100;
146 if (w.first_call == 0) {
152 inform.f_eval = inform.f_eval + 1;
156 inform.g_eval = inform.g_eval + 1;
158 if (options.relative_tr_radius == 1) {
161 for (
int i = 1; i <=
n; ++i) {
164 double JtJdiag = 0.0;
165 for (
int j = 1; j <=
m; ++j) {
166 JtJdiag += pow(w.J(j, i), 2);
168 JtJdiag = sqrt(JtJdiag);
172 w.Delta = options.initial_radius_scale * (pow(Jmax, 2));
174 w.Delta = options.initial_radius;
177 if (options.calculate_svd_J) {
189 w.normJF0 = w.normJF;
190 w.normJFold = w.normJF;
193 inform.obj = 0.5 * (pow(w.normF, 2));
194 inform.norm_g = w.normJF;
195 inform.scaled_g = w.normJF / w.normF;
199 if (options.output_progress_vectors) {
200 w.resvec(1) = inform.obj;
201 w.gradvec(1) = inform.norm_g;
205 switch (options.model) {
209 w.use_second_derivatives =
false;
214 if (options.exact_second_derivatives) {
216 inform.h_eval = inform.h_eval + 1;
221 w.use_second_derivatives =
true;
227 w.hybrid_tol = options.hybrid_tol * (w.normJF / (0.5 * (pow(w.normF, 2))));
230 w.use_second_derivatives =
false;
231 if (!options.exact_second_derivatives) {
238 throw std::logic_error(
"Unsupported model.");
243 inform.iter = w.iter;
245 bool success =
false;
246 int no_reductions = 0;
247 double normFnew = 0.0;
250 no_reductions = no_reductions + 1;
251 if (no_reductions > max_tr_decrease + 1) {
255 calculateStep(w.J, w.f, w.hf, w.Delta, w.d, w.normd, options);
260 evalF(w.Xnew, w.fnew);
261 inform.f_eval = inform.f_eval + 1;
267 double md = evaluateModel(w.f, w.J, w.hf, w.d, options, w.evaluate_model_ws);
275 auto rho = calculateRho(w.normF, normFnew, md, options);
276 if (!std::isfinite(
rho) ||
rho <= options.eta_successful) {
277 if ((w.use_second_derivatives) && (options.model == 3) && (no_reductions == 1)) {
280 double rho_gn = calculateRho(w.normF, normFnew, w.evaluate_model_ws.md_gn, options);
281 if (rho_gn > options.eta_successful) {
283 w.use_second_derivatives =
false;
293 updateTrustRegionRadius(
rho, options, w);
309 if (!options.exact_second_derivatives) {
320 inform.g_eval = inform.g_eval + 1;
322 if (options.calculate_svd_J) {
323 NLLS::getSvdJ(w.J, w.smallest_sv(w.iter + 1), w.largest_sv(w.iter + 1));
330 w.normJFold = w.normJF;
335 if (!options.exact_second_derivatives) {
338 w.y_sharp = w.g_mixed;
342 if (options.model == 3) {
345 if (w.use_second_derivatives) {
346 if (w.normJF > w.normJFold) {
348 w.use_second_derivatives =
false;
354 auto FunctionValue = 0.5 * (pow(w.normF, 2));
355 if (w.normJF / FunctionValue < w.hybrid_tol) {
356 w.hybrid_count = w.hybrid_count + 1;
357 if (w.hybrid_count == options.hybrid_switch_its) {
359 w.use_second_derivatives =
true;
362 if (!options.exact_second_derivatives) {
371 if (!w.use_second_derivatives) {
374 if (!options.exact_second_derivatives) {
375 rankOneUpdate(w.hf_temp, w);
380 if (w.use_second_derivatives) {
383 if (options.exact_second_derivatives) {
385 inform.h_eval = inform.h_eval + 1;
388 rankOneUpdate(w.hf, w);
393 inform.obj = 0.5 * (pow(w.normF, 2));
394 inform.norm_g = w.normJF;
395 inform.scaled_g = w.normJF / w.normF;
396 if (options.output_progress_vectors) {
397 w.resvec(w.iter + 1) = inform.obj;
398 w.gradvec(w.iter + 1) = inform.norm_g;
402 testConvergence(w.normF, w.normJF, w.normF0, w.normJF0, options, inform);
404 if (inform.convergence_normf == 1 || inform.convergence_normg == 1) {
408 inform.iter = w.iter;
409 inform.resvec = w.resvec;
410 inform.gradvec = w.gradvec;
418const double HUGEST = std::numeric_limits<double>::max();
419const double EPSILON_MCH = std::numeric_limits<double>::epsilon();
420const double LARGEST = HUGEST;
421const double LOWER_DEFAULT = -0.5 * LARGEST;
422const double UPPER_DEFAULT = LARGEST;
423const double POINT4 = 0.4;
424const double ZERO = 0.0;
425const double ONE = 1.0;
426const double TWO = 2.0;
427const double THREE = 3.0;
428const double FOUR = 4.0;
429const double SIX = 6.0;
430const double HALF = 0.5;
431const double ONESIXTH = ONE / SIX;
432const double SIXTH = ONESIXTH;
433const double ONE_THIRD = ONE / THREE;
434const double TWO_THIRDS = TWO / THREE;
435const double THREE_QUARTERS = 0.75;
436const double TWENTY_FOUR = 24.0;
437const int MAX_DEGREE = 3;
438const int HISTORY_MAX = 100;
439const double TEN_EPSILON_MCH = 10.0 * EPSILON_MCH;
440const double ROOTS_TOL = TEN_EPSILON_MCH;
441const double INFINITE_NUMBER = HUGEST;
445inline double sign(
double x,
double y) {
return y >= 0.0 ?
fabs(
x) : -
fabs(
x); }
520double biggest(
double a,
double b,
double c,
double d) {
return std::max(std::max(a, b), std::max(c,
d)); }
527double biggest(
double a,
double b,
double c) {
return std::max(std::max(a, b), c); }
533 auto p = v.indicesOfMinMaxElements();
534 return std::max(
fabs(v.get(p.first)),
fabs(v.get(p.second)));
543 auto p = v.indicesOfMinMaxElements();
544 return std::make_pair(v.get(p.first), v.get(p.second));
568 double res = std::numeric_limits<double>::lowest();
569 for (
int i = 1; i <=
n; ++i) {
591void rootsQuadratic(
double a0,
double a1,
double a2,
double tol,
int &nroots,
double &root1,
double &root2) {
593 auto rhs = tol * a1 * a1;
595 root2 = a1 * a1 - FOUR * a2 * a0;
596 if (
fabs(root2) <= pow(EPSILON_MCH * a1, 2)) {
598 root1 = -HALF * a1 / a2;
600 }
else if (root2 < ZERO) {
605 auto d = -HALF * (a1 + sign(sqrt(root2), a1));
615 }
else if (a2 == ZERO) {
633 if (-a1 / a2 > ZERO) {
645 auto p = (a2 * root1 + a1) * root1 + a0;
646 auto pprime = TWO * a2 * root1 + a1;
647 if (pprime != ZERO) {
648 root1 = root1 - p / pprime;
651 p = (a2 * root2 + a1) * root2 + a0;
652 pprime = TWO * a2 * root2 + a1;
653 if (pprime != ZERO) {
654 root2 = root2 - p / pprime;
675void rootsCubic(
double a0,
double a1,
double a2,
double a3,
double tol,
int &nroots,
double &root1,
double &root2,
680 rootsQuadratic(a0, a1, a2, tol, nroots, root1, root2);
681 root3 = INFINITE_NUMBER;
688 rootsQuadratic(a1, a2, a3, tol, nroots, root2, root3);
699 double s = c2 / THREE;
701 double b = 0.5 * (s * (TWO_THIRDS * t - c1) + c0);
702 t = (t - c1) / THREE;
703 double c = t * t * t;
704 double d = b * b - c;
708 d = pow(sqrt(
d) +
fabs(b), ONE_THIRD);
717 d = sqrt(THREE_QUARTERS) * (b - c);
730 d = TWO_THIRDS * atan(ONE);
732 d = atan(sqrt(-
d) /
fabs(b)) / THREE;
740 t = -sqrt(THREE_QUARTERS) * sin(
d) * b - HALF * c;
780 double p = ((a3 * root1 + a2) * root1 + a1) * root1 + a0;
781 double pprime = (THREE * a3 * root1 + TWO * a2) * root1 + a1;
782 if (pprime != ZERO) {
783 root1 = root1 - p / pprime;
788 p = ((a3 * root2 + a2) * root2 + a1) * root2 + a0;
789 pprime = (THREE * a3 * root2 + TWO * a2) * root2 + a1;
790 if (pprime != ZERO) {
791 root2 = root2 - p / pprime;
795 p = ((a3 * root3 + a2) * root3 + a1) * root3 + a0;
796 pprime = (THREE * a3 * root3 + TWO * a2) * root3 + a1;
797 if (pprime != ZERO) {
798 root3 = root3 - p / pprime;
815 double hbeta = HALF * beta;
816 pi_beta(0) = pow(x_norm2(0), hbeta);
817 pi_beta(1) = hbeta * (pow(x_norm2(0), (hbeta - ONE))) * x_norm2(1);
821 hbeta * (pow(x_norm2(0), (hbeta - TWO))) * ((hbeta - ONE) * pow(x_norm2(1), 2) + x_norm2(0) * x_norm2(2));
824 pi_beta(3) = hbeta * (pow(x_norm2(0), (hbeta - THREE))) *
825 (x_norm2(3) * pow(x_norm2(0), 2) +
826 (hbeta - ONE) * (THREE * x_norm2(0) * x_norm2(1) * x_norm2(2) + (hbeta - TWO) * pow(x_norm2(1), 3)));
833void intitializeControl(control_type &control) {
834 control.stop_normal = pow(EPSILON_MCH, 0.75);
835 control.stop_absolute_normal = pow(EPSILON_MCH, 0.75);
863 inform.x_norm = ZERO;
865 inform.hard_case =
false;
869 throw std::runtime_error(
"Number of unknowns for trust-region subproblem is negative.");
872 throw std::runtime_error(
"Trust-region radius for trust-region subproblem is negative");
879 double c_norm = twoNorm(c);
880 double lambda_min = 0.0;
881 double lambda_max = 0.0;
882 std::tie(lambda_min, lambda_max) = minMaxValues(h);
886 if (c_norm == ZERO && lambda_min >= ZERO) {
887 if (control.equality_problem) {
889 for (
int i = 1; i <=
n; ++i) {
890 if (h(i) == lambda_min) {
907 double c_norm_over_radius = c_norm /
radius;
908 double lambda_l = 0.0, lambda_u = 0.0;
909 if (control.equality_problem) {
910 lambda_l = biggest(control.lower, -lambda_min, c_norm_over_radius - lambda_max);
911 lambda_u = std::min(control.upper, c_norm_over_radius - lambda_min);
913 lambda_l = biggest(control.lower, ZERO, -lambda_min, c_norm_over_radius - lambda_max);
914 lambda_u = std::min(control.upper, std::max(ZERO, c_norm_over_radius - lambda_min));
919 if (
lambda == -lambda_min) {
922 inform.hard_case =
true;
923 for (
int i = 1; i <=
n; ++i) {
924 if (h(i) == lambda_min) {
925 if (
fabs(c(i)) > EPSILON_MCH * c_norm) {
926 inform.hard_case =
false;
927 c2 = c2 + pow(c(i), 2);
935 if (inform.hard_case) {
936 for (
int i = 1; i <=
n; ++i) {
937 if (h(i) != lambda_min) {
938 x(i) = -c(i) / (h(i) +
lambda);
943 inform.x_norm = twoNorm(
x);
947 if (inform.x_norm <=
radius) {
948 if (inform.x_norm <
radius) {
956 auto alpha = sign(distx / (
fabs(utx) + sqrt(pow(utx, 2) + distx /
radius)), utx);
960 x(i_hard) =
x(i_hard) + alpha;
962 inform.x_norm = twoNorm(
x);
968 inform.hard_case =
false;
972 for (
int i = 1; i <=
n; ++i) {
973 if (h(i) != lambda_min)
974 w_norm2 = w_norm2 + pow(c(i), 2) / pow((h(i) +
lambda), 3);
976 x_norm2(1) = -TWO * w_norm2;
981 lambda_l = std::max(lambda_l,
lambda);
988 lambda_l = std::max(lambda_l,
lambda);
993 auto max_order = std::max(1, std::min(MAX_DEGREE, control.taylor_max_degree));
1000 for (
int i = 1; i <=
n; ++i) {
1001 x(i) = -c(i) / (h(i) +
lambda);
1005 inform.x_norm = twoNorm(
x);
1006 x_norm2(0) = pow(inform.x_norm, 2);
1018 if (
fabs(inform.x_norm -
radius) <= std::max(control.stop_normal *
radius, control.stop_absolute_normal)) {
1022 lambda_l = std::max(lambda_l,
lambda);
1025 if (inform.len_history < HISTORY_MAX) {
1026 history_type history_item;
1027 history_item.lambda =
lambda;
1028 history_item.x_norm = inform.x_norm;
1029 inform.history.emplace_back(history_item);
1030 inform.len_history = inform.len_history + 1;
1039 throw std::runtime_error(
"Lambda for trust-region subproblem is ill conditioned");
1046 double w_norm2 = ZERO;
1047 for (
int i = 1; i <=
n; ++i) {
1048 w_norm2 = w_norm2 + pow(c(i), 2) / pow(h(i) +
lambda, 3);
1052 x_norm2(1) = -TWO * w_norm2;
1056 PiBetaDerivs(1, beta, x_norm2, pi_beta);
1060 auto delta_lambda = -(pi_beta(0) - pow((
radius), beta)) / pi_beta(1);
1064 lambda_new(n_lambda) =
lambda + delta_lambda;
1066 if (max_order >= 3) {
1070 double z_norm2 = ZERO;
1071 for (
int i = 1; i <=
n; ++i) {
1072 z_norm2 = z_norm2 + pow(c(i), 2) / pow((h(i) +
lambda), 4);
1074 x_norm2(2) = SIX * z_norm2;
1078 double v_norm2 = ZERO;
1079 for (
int i = 1; i <=
n; ++i) {
1080 v_norm2 = v_norm2 + pow(c(i), 2) / pow((h(i) +
lambda), 5);
1082 x_norm2(3) = -TWENTY_FOUR * v_norm2;
1087 PiBetaDerivs(max_order, beta, x_norm2, pi_beta);
1091 auto a_0 = pi_beta(0) - pow((
radius), beta);
1092 auto a_1 = pi_beta(1);
1093 auto a_2 = HALF * pi_beta(2);
1094 auto a_3 = SIXTH * pi_beta(3);
1103 double root1 = 0, root2 = 0, root3 = 0;
1105 rootsCubic(a_0, a_1, a_2, a_3, ROOTS_TOL, nroots, root1, root2, root3);
1106 n_lambda = n_lambda + 1;
1108 lambda_new(n_lambda) =
lambda + root3;
1110 lambda_new(n_lambda) =
lambda + root1;
1116 PiBetaDerivs(max_order, beta, x_norm2, pi_beta);
1120 a_0 = pi_beta(0) - pow((
radius), beta);
1122 a_2 = HALF * pi_beta(2);
1123 a_3 = SIXTH * pi_beta(3);
1131 rootsCubic(a_0, a_1, a_2, a_3, ROOTS_TOL, nroots, root1, root2, root3);
1132 n_lambda = n_lambda + 1;
1134 lambda_new(n_lambda) =
lambda + root3;
1136 lambda_new(n_lambda) =
lambda + root1;
1142 auto lambda_plus = maxVal(lambda_new, n_lambda);
1143 delta_lambda = lambda_plus -
lambda;
1148 lambda_l = std::max(lambda_l, lambda_plus);
1152 if (std::abs(delta_lambda) < EPSILON_MCH * std::max(ONE, std::abs(
lambda))) {
1194 auto scale_h = maxAbsVal(h);
1195 if (scale_h > ZERO) {
1196 for (
int i = 1; i <=
n; ++i) {
1197 if (
fabs(h(i)) >= control.h_min * scale_h) {
1198 h_scale(i) = h(i) / scale_h;
1211 auto scale_c = maxAbsVal(c);
1212 if (scale_c > ZERO) {
1213 for (
int i = 1; i <=
n; ++i) {
1214 if (
fabs(c(i)) >= control.h_min * scale_c) {
1215 c_scale(i) = c(i) / scale_c;
1225 double radius_scale = (scale_h / scale_c) *
radius;
1226 double f_scale = (scale_h / pow(scale_c, 2)) * f;
1228 auto control_scale = control;
1229 if (control_scale.lower != LOWER_DEFAULT) {
1230 control_scale.lower = control_scale.lower / scale_h;
1232 if (control_scale.upper != UPPER_DEFAULT) {
1233 control_scale.upper = control_scale.upper / scale_h;
1238 solveSubproblemMain(
n, radius_scale, f_scale, c_scale, h_scale,
x, control_scale, inform);
1243 x *= scale_c / scale_h;
1244 inform.obj *= pow(scale_c, 2) / scale_h;
1245 inform.multiplier *= scale_h;
1246 inform.pole *= scale_h;
1247 for (
auto &history_item : inform.history) {
1248 history_item.lambda *= scale_h;
1249 history_item.x_norm *= scale_c / scale_h;
1275 control_type controlOptions;
1300 if (options.
scale != 0) {
1320 intitializeControl(controlOptions);
1327 for (
int ii = 1; ii <=
n; ++ii) {
1331 if (
fabs(
m_ew(ii)) < EPSILON_MCH) {
1343 if (options.
scale != 0) {
1344 for (
int ii = 1; ii <=
n; ++ii) {
const std::vector< double > * lambda
const std::vector< double > & rhs
#define DECLARE_FUNCMINIMIZER(classname, username)
Macro for declaring a new type of minimizers to be used with the FuncMinimizerFactory.
#define UNUSED_ARG(x)
Function arguments are sometimes unused in certain implmentations but are required for documentation ...
int len_history
the number of (||x||_M,lambda) pairs in the history
bool hard_case
has the hard case occurred?
int taylor_max_degree
maximum degree of Taylor approximant allowed
double obj
the value of the quadratic function
double h_min
any entry of H that is smaller than h_min * MAXVAL( H ) we be treated as
std::vector< history_type > history
history information
double stop_absolute_normal
bool equality_problem
is the solution is REQUIRED to lie on the boundary (i.e., is the
double multiplier
the Lagrange multiplier corresponding to the trust-region constraint
double lower
lower and upper bounds on the multiplier, if known
double stop_normal
stop when | ||x|| - radius | <= max( stop_normal * radius, stop_absolute_normal )
double pole
a lower bound max(0,-lambda_1), where lambda_1 is the left-most eigenvalue of (H,M)
double x_norm
corresponding value of ||x(lambda)||_M
std::string m_errorString
Error string.
void zero()
Set all elements to zero.
void set(const size_t i, const double value)
Set an element.
void allocate(const int iFrom, const int iTo, const int jFrom, const int jTo)
Resize the matrix.
int len2() const
Get the size along the second dimension as an int.
int len1() const
Get the size along the first dimension as an int.
void allocate(int firstIndex, int lastIndex)
Resize the vector.
int len() const
Get the length of the vector as an int.
Trust Region minimizer class using the DTRS method of GALAHAD.
std::string name() const override
Name of the minimizer.
void calculateStep(const DoubleFortranMatrix &J, const DoubleFortranVector &f, const DoubleFortranMatrix &hf, double Delta, DoubleFortranVector &d, double &normd, const NLLS::nlls_options &options)
Find a correction vector to the parameters.
double costFunctionVal() override
Return current value of the cost function.
void evalF(const DoubleFortranVector &x, DoubleFortranVector &f) const
Evaluate the fitting function and calculate the residuals.
NLLS::NLLS_workspace m_workspace
Temporary and helper objects.
void evalJ(const DoubleFortranVector &x, DoubleFortranMatrix &J) const
Evaluate the Jacobian.
DoubleFortranVector m_d_trans
DoubleFortranVector m_scale
std::shared_ptr< CostFunctions::CostFuncLeastSquares > m_leastSquares
Stored cost function.
bool iterate(size_t) override
Do one iteration.
NLLS::nlls_options m_options
Options.
void evalHF(const DoubleFortranVector &x, const DoubleFortranVector &f, DoubleFortranMatrix &h) const
Evaluate the Hessian.
NLLS::nlls_inform m_inform
Information about the fitting.
void initialize(API::ICostFunction_sptr costFunction, size_t maxIterations=0) override
Initialize minimizer, i.e. pass a function to minimize.
JacobianImpl1< DoubleFortranMatrix > m_J
The Jacobian.
DoubleFortranVector m_v_trans
DoubleFortranVector m_x
Fitting parameters.
API::IFunction_sptr m_function
Stored to access IFunction interface in iterate()
TypedValue getProperty(const std::string &name) const override
Get the value of a property.
std::shared_ptr< ICostFunction > ICostFunction_sptr
define a shared pointer to a cost function
void MANTID_CURVEFITTING_DLL matmultInner(const DoubleFortranMatrix &J, DoubleFortranMatrix &A)
Takes an m x n matrix J and forms the n x n matrix A given by A = J' * J.
double MANTID_CURVEFITTING_DLL norm2(const DoubleFortranVector &v)
Compute the 2-norm of a vector which is a square root of the sum of squares of its elements.
void allEigSymm(const DoubleFortranMatrix &A, DoubleFortranVector &ew, DoubleFortranMatrix &ev)
Calculate all the eigenvalues of a symmetric matrix.
void MANTID_CURVEFITTING_DLL multJ(const DoubleFortranMatrix &J, const DoubleFortranVector &x, DoubleFortranVector &Jx)
Multiply a matrix by a vector.
void MANTID_CURVEFITTING_DLL multJt(const DoubleFortranMatrix &J, const DoubleFortranVector &x, DoubleFortranVector &Jtx)
Multiply a transposed matrix by a vector.
double dotProduct(const DoubleFortranVector &x, const DoubleFortranVector &y)
Dot product of two vectors.
void MANTID_CURVEFITTING_DLL getSvdJ(const DoubleFortranMatrix &J, double &s1, double &sn)
Given an (m x n) matrix J held by columns as a vector, this routine returns the largest and smallest ...
FortranVector< EigenVector > DoubleFortranVector
void initialize(int n, int m, const nlls_options &options)
Initialize the workspace.
int scale
scale the variables? 0 - no scaling 1 - use the scaling in GSL (W s.t.
int maxit
the maximum number of iterations performed
double initial_radius
if relative_tr_radius /= 1, then set the initial value for the trust-region radius (-ve => ||g_0||)