19#include <gsl/gsl_blas.h>
32 declareProperty(
"InitialRadius", 100.0,
"Initial radius of the trust region.");
46 m_leastSquares = std::dynamic_pointer_cast<CostFunctions::CostFuncLeastSquares>(costFunction);
48 throw std::runtime_error(
"Trust-region minimizer can only be used with Least "
49 "squares cost function.");
55 auto m =
static_cast<int>(values.size());
57 throw std::runtime_error(
"More parameters than data.");
64 for (
size_t i = 0; i <
m_function->nParams(); ++i) {
66 m_J.m_index.emplace_back(j);
69 m_J.m_index.emplace_back(-1);
83 auto m =
static_cast<int>(values.size());
87 for (
size_t i = 0; i < values.size(); ++i) {
88 f.
set(i, (values.getCalculated(i) - values.getFitData(i)) * values.getFitWeight(i));
101 auto m =
static_cast<int>(values.size());
109 for (
int i = 1; i <=
m; ++i) {
110 double w = values.getFitWeight(i - 1);
111 for (
int j = 1; j <=
n; ++j) {
138 int max_tr_decrease = 100;
146 if (w.first_call == 0) {
152 inform.f_eval = inform.f_eval + 1;
156 inform.g_eval = inform.g_eval + 1;
158 if (options.relative_tr_radius == 1) {
161 for (
int i = 1; i <=
n; ++i) {
164 double JtJdiag = 0.0;
165 for (
int j = 1; j <=
m; ++j) {
166 JtJdiag += pow(w.J(j, i), 2);
168 JtJdiag = sqrt(JtJdiag);
172 w.Delta = options.initial_radius_scale * (pow(Jmax, 2));
174 w.Delta = options.initial_radius;
177 if (options.calculate_svd_J) {
189 w.normJF0 = w.normJF;
190 w.normJFold = w.normJF;
193 inform.obj = 0.5 * (pow(w.normF, 2));
194 inform.norm_g = w.normJF;
195 inform.scaled_g = w.normJF / w.normF;
199 if (options.output_progress_vectors) {
200 w.resvec(1) = inform.obj;
201 w.gradvec(1) = inform.norm_g;
205 switch (options.model) {
209 w.use_second_derivatives =
false;
214 if (options.exact_second_derivatives) {
216 inform.h_eval = inform.h_eval + 1;
221 w.use_second_derivatives =
true;
227 w.hybrid_tol = options.hybrid_tol * (w.normJF / (0.5 * (pow(w.normF, 2))));
230 w.use_second_derivatives =
false;
231 if (!options.exact_second_derivatives) {
238 throw std::logic_error(
"Unsupported model.");
243 inform.iter = w.iter;
245 bool success =
false;
246 int no_reductions = 0;
247 double normFnew = 0.0;
250 no_reductions = no_reductions + 1;
251 if (no_reductions > max_tr_decrease + 1) {
255 calculateStep(w.J, w.f, w.hf, w.Delta, w.d, w.normd, options);
260 evalF(w.Xnew, w.fnew);
261 inform.f_eval = inform.f_eval + 1;
267 double md = evaluateModel(w.f, w.J, w.hf, w.d, options, w.evaluate_model_ws);
275 auto rho = calculateRho(w.normF, normFnew, md, options);
276 if (!std::isfinite(
rho) ||
rho <= options.eta_successful) {
277 if ((w.use_second_derivatives) && (options.model == 3) && (no_reductions == 1)) {
280 double rho_gn = calculateRho(w.normF, normFnew, w.evaluate_model_ws.md_gn, options);
281 if (rho_gn > options.eta_successful) {
283 w.use_second_derivatives =
false;
293 updateTrustRegionRadius(
rho, options, w);
309 if (!options.exact_second_derivatives) {
320 inform.g_eval = inform.g_eval + 1;
322 if (options.calculate_svd_J) {
323 NLLS::getSvdJ(w.J, w.smallest_sv(w.iter + 1), w.largest_sv(w.iter + 1));
330 w.normJFold = w.normJF;
335 if (!options.exact_second_derivatives) {
338 w.y_sharp = w.g_mixed;
342 if (options.model == 3) {
345 if (w.use_second_derivatives) {
346 if (w.normJF > w.normJFold) {
348 w.use_second_derivatives =
false;
354 auto FunctionValue = 0.5 * (pow(w.normF, 2));
355 if (w.normJF / FunctionValue < w.hybrid_tol) {
356 w.hybrid_count = w.hybrid_count + 1;
357 if (w.hybrid_count == options.hybrid_switch_its) {
359 w.use_second_derivatives =
true;
362 if (!options.exact_second_derivatives) {
371 if (!w.use_second_derivatives) {
374 if (!options.exact_second_derivatives) {
375 rankOneUpdate(w.hf_temp, w);
380 if (w.use_second_derivatives) {
383 if (options.exact_second_derivatives) {
385 inform.h_eval = inform.h_eval + 1;
388 rankOneUpdate(w.hf, w);
393 inform.obj = 0.5 * (pow(w.normF, 2));
394 inform.norm_g = w.normJF;
395 inform.scaled_g = w.normJF / w.normF;
396 if (options.output_progress_vectors) {
397 w.resvec(w.iter + 1) = inform.obj;
398 w.gradvec(w.iter + 1) = inform.norm_g;
402 testConvergence(w.normF, w.normJF, w.normF0, w.normJF0, options, inform);
404 if (inform.convergence_normf == 1 || inform.convergence_normg == 1) {
408 inform.iter = w.iter;
409 inform.resvec = w.resvec;
410 inform.gradvec = w.gradvec;
418const double HUGEST = std::numeric_limits<double>::max();
419const double EPSILON_MCH = std::numeric_limits<double>::epsilon();
420const double LARGEST = HUGEST;
421const double LOWER_DEFAULT = -0.5 * LARGEST;
422const double UPPER_DEFAULT = LARGEST;
423const double POINT4 = 0.4;
424const double ZERO = 0.0;
425const double ONE = 1.0;
426const double TWO = 2.0;
427const double THREE = 3.0;
428const double FOUR = 4.0;
429const double SIX = 6.0;
430const double HALF = 0.5;
431const double ONESIXTH = ONE / SIX;
432const double SIXTH = ONESIXTH;
433const double ONE_THIRD = ONE / THREE;
434const double TWO_THIRDS = TWO / THREE;
435const double THREE_QUARTERS = 0.75;
436const double TWENTY_FOUR = 24.0;
437const int MAX_DEGREE = 3;
438const int HISTORY_MAX = 100;
439const double TEN_EPSILON_MCH = 10.0 * EPSILON_MCH;
440const double ROOTS_TOL = TEN_EPSILON_MCH;
441const double INFINITE_NUMBER = HUGEST;
445inline double sign(
double x,
double y) {
return y >= 0.0 ?
fabs(
x) : -
fabs(
x); }
520double biggest(
double a,
double b,
double c,
double d) {
return std::max(std::max(a, b), std::max(c,
d)); }
527double biggest(
double a,
double b,
double c) {
return std::max(std::max(a, b), c); }
533 auto p = v.indicesOfMinMaxElements();
534 return std::max(
fabs(v.get(p.first)),
fabs(v.get(p.second)));
543 auto p = v.indicesOfMinMaxElements();
544 return std::make_pair(v.get(p.first), v.get(p.second));
568 double res = std::numeric_limits<double>::lowest();
569 for (
int i = 1; i <=
n; ++i) {
591void rootsQuadratic(
double a0,
double a1,
double a2,
double tol,
int &nroots,
double &root1,
double &root2) {
593 auto rhs = tol * a1 * a1;
595 root2 = a1 * a1 - FOUR * a2 * a0;
596 if (
fabs(root2) <= pow(EPSILON_MCH * a1, 2)) {
598 root1 = -HALF * a1 / a2;
600 }
else if (root2 < ZERO) {
605 auto d = -HALF * (a1 + sign(sqrt(root2), a1));
615 }
else if (a2 == ZERO) {
633 if (-a1 / a2 > ZERO) {
645 auto p = (a2 * root1 + a1) * root1 + a0;
646 auto pprime = TWO * a2 * root1 + a1;
647 if (pprime != ZERO) {
648 root1 = root1 - p / pprime;
651 p = (a2 * root2 + a1) * root2 + a0;
652 pprime = TWO * a2 * root2 + a1;
653 if (pprime != ZERO) {
654 root2 = root2 - p / pprime;
675void rootsCubic(
double a0,
double a1,
double a2,
double a3,
double tol,
int &nroots,
double &root1,
double &root2,
680 rootsQuadratic(a0, a1, a2, tol, nroots, root1, root2);
681 root3 = INFINITE_NUMBER;
688 rootsQuadratic(a1, a2, a3, tol, nroots, root2, root3);
699 double s = c2 / THREE;
701 double b = 0.5 * (s * (TWO_THIRDS * t - c1) + c0);
702 t = (t - c1) / THREE;
703 double c = t * t * t;
704 double d = b * b - c;
708 d = pow(sqrt(
d) +
fabs(b), ONE_THIRD);
717 d = sqrt(THREE_QUARTERS) * (b - c);
730 d = TWO_THIRDS * atan(ONE);
732 d = atan(sqrt(-
d) /
fabs(b)) / THREE;
740 t = -sqrt(THREE_QUARTERS) * sin(
d) * b - HALF * c;
780 double p = ((a3 * root1 + a2) * root1 + a1) * root1 + a0;
781 double pprime = (THREE * a3 * root1 + TWO * a2) * root1 + a1;
782 if (pprime != ZERO) {
783 root1 = root1 - p / pprime;
788 p = ((a3 * root2 + a2) * root2 + a1) * root2 + a0;
789 pprime = (THREE * a3 * root2 + TWO * a2) * root2 + a1;
790 if (pprime != ZERO) {
791 root2 = root2 - p / pprime;
795 p = ((a3 * root3 + a2) * root3 + a1) * root3 + a0;
796 pprime = (THREE * a3 * root3 + TWO * a2) * root3 + a1;
797 if (pprime != ZERO) {
798 root3 = root3 - p / pprime;
816 double hbeta = HALF * beta;
817 pi_beta(0) = pow(x_norm2(0), hbeta);
818 pi_beta(1) = hbeta * (pow(x_norm2(0), (hbeta - ONE))) * x_norm2(1);
822 hbeta * (pow(x_norm2(0), (hbeta - TWO))) * ((hbeta - ONE) * pow(x_norm2(1), 2) + x_norm2(0) * x_norm2(2));
825 pi_beta(3) = hbeta * (pow(x_norm2(0), (hbeta - THREE))) *
826 (x_norm2(3) * pow(x_norm2(0), 2) +
827 (hbeta - ONE) * (THREE * x_norm2(0) * x_norm2(1) * x_norm2(2) + (hbeta - TWO) * pow(x_norm2(1), 3)));
834void intitializeControl(control_type &control) {
835 control.stop_normal = pow(EPSILON_MCH, 0.75);
836 control.stop_absolute_normal = pow(EPSILON_MCH, 0.75);
864 inform.x_norm = ZERO;
866 inform.hard_case =
false;
870 throw std::runtime_error(
"Number of unknowns for trust-region subproblem is negative.");
873 throw std::runtime_error(
"Trust-region radius for trust-region subproblem is negative");
880 double c_norm = twoNorm(c);
881 double lambda_min = 0.0;
882 double lambda_max = 0.0;
883 std::tie(lambda_min, lambda_max) = minMaxValues(h);
887 if (c_norm == ZERO && lambda_min >= ZERO) {
888 if (control.equality_problem) {
890 for (
int i = 1; i <=
n; ++i) {
891 if (h(i) == lambda_min) {
896 x(i_hard) = ONE / radius;
897 inform.x_norm = radius;
898 inform.obj = f + lambda_min * radius * radius;
908 double c_norm_over_radius = c_norm / radius;
909 double lambda_l = 0.0, lambda_u = 0.0;
910 if (control.equality_problem) {
911 lambda_l = biggest(control.lower, -lambda_min, c_norm_over_radius - lambda_max);
912 lambda_u = std::min(control.upper, c_norm_over_radius - lambda_min);
914 lambda_l = biggest(control.lower, ZERO, -lambda_min, c_norm_over_radius - lambda_max);
915 lambda_u = std::min(control.upper, std::max(ZERO, c_norm_over_radius - lambda_min));
920 if (
lambda == -lambda_min) {
923 inform.hard_case =
true;
924 for (
int i = 1; i <=
n; ++i) {
925 if (h(i) == lambda_min) {
926 if (
fabs(c(i)) > EPSILON_MCH * c_norm) {
927 inform.hard_case =
false;
928 c2 = c2 + pow(c(i), 2);
936 if (inform.hard_case) {
937 for (
int i = 1; i <=
n; ++i) {
938 if (h(i) != lambda_min) {
939 x(i) = -c(i) / (h(i) +
lambda);
944 inform.x_norm = twoNorm(
x);
948 if (inform.x_norm <= radius) {
949 if (inform.x_norm < radius) {
955 auto utx =
x(i_hard) / radius;
956 auto distx = (radius - inform.x_norm) * ((radius + inform.x_norm) / radius);
957 auto alpha = sign(distx / (
fabs(utx) + sqrt(pow(utx, 2) + distx / radius)), utx);
961 x(i_hard) =
x(i_hard) + alpha;
963 inform.x_norm = twoNorm(
x);
969 inform.hard_case =
false;
973 for (
int i = 1; i <=
n; ++i) {
974 if (h(i) != lambda_min)
975 w_norm2 = w_norm2 + pow(c(i), 2) / pow((h(i) +
lambda), 3);
977 x_norm2(1) = -TWO * w_norm2;
981 lambda =
lambda + (pow(inform.x_norm, 2) - pow(radius, 2)) / x_norm2(1);
982 lambda_l = std::max(lambda_l,
lambda);
989 lambda_l = std::max(lambda_l,
lambda);
994 auto max_order = std::max(1, std::min(MAX_DEGREE, control.taylor_max_degree));
1001 for (
int i = 1; i <=
n; ++i) {
1002 x(i) = -c(i) / (h(i) +
lambda);
1006 inform.x_norm = twoNorm(
x);
1007 x_norm2(0) = pow(inform.x_norm, 2);
1011 if (
lambda == ZERO && inform.x_norm <= radius) {
1019 if (
fabs(inform.x_norm - radius) <= std::max(control.stop_normal * radius, control.stop_absolute_normal)) {
1023 lambda_l = std::max(lambda_l,
lambda);
1026 if (inform.len_history < HISTORY_MAX) {
1027 history_type history_item;
1028 history_item.lambda =
lambda;
1029 history_item.x_norm = inform.x_norm;
1030 inform.history.emplace_back(history_item);
1031 inform.len_history = inform.len_history + 1;
1040 throw std::runtime_error(
"Lambda for trust-region subproblem is ill conditioned");
1047 double w_norm2 = ZERO;
1048 for (
int i = 1; i <=
n; ++i) {
1049 w_norm2 = w_norm2 + pow(c(i), 2) / pow(h(i) +
lambda, 3);
1053 x_norm2(1) = -TWO * w_norm2;
1057 PiBetaDerivs(1, beta, x_norm2, pi_beta);
1061 auto delta_lambda = -(pi_beta(0) - pow((radius), beta)) / pi_beta(1);
1065 lambda_new(n_lambda) =
lambda + delta_lambda;
1067 if (max_order >= 3) {
1071 double z_norm2 = ZERO;
1072 for (
int i = 1; i <=
n; ++i) {
1073 z_norm2 = z_norm2 + pow(c(i), 2) / pow((h(i) +
lambda), 4);
1075 x_norm2(2) = SIX * z_norm2;
1079 double v_norm2 = ZERO;
1080 for (
int i = 1; i <=
n; ++i) {
1081 v_norm2 = v_norm2 + pow(c(i), 2) / pow((h(i) +
lambda), 5);
1083 x_norm2(3) = -TWENTY_FOUR * v_norm2;
1088 PiBetaDerivs(max_order, beta, x_norm2, pi_beta);
1092 auto a_0 = pi_beta(0) - pow((radius), beta);
1093 auto a_1 = pi_beta(1);
1094 auto a_2 = HALF * pi_beta(2);
1095 auto a_3 = SIXTH * pi_beta(3);
1104 double root1 = 0, root2 = 0, root3 = 0;
1106 rootsCubic(a_0, a_1, a_2, a_3, ROOTS_TOL, nroots, root1, root2, root3);
1107 n_lambda = n_lambda + 1;
1109 lambda_new(n_lambda) =
lambda + root3;
1111 lambda_new(n_lambda) =
lambda + root1;
1117 PiBetaDerivs(max_order, beta, x_norm2, pi_beta);
1121 a_0 = pi_beta(0) - pow((radius), beta);
1123 a_2 = HALF * pi_beta(2);
1124 a_3 = SIXTH * pi_beta(3);
1132 rootsCubic(a_0, a_1, a_2, a_3, ROOTS_TOL, nroots, root1, root2, root3);
1133 n_lambda = n_lambda + 1;
1135 lambda_new(n_lambda) =
lambda + root3;
1137 lambda_new(n_lambda) =
lambda + root1;
1143 auto lambda_plus = maxVal(lambda_new, n_lambda);
1144 delta_lambda = lambda_plus -
lambda;
1149 lambda_l = std::max(lambda_l, lambda_plus);
1153 if (std::abs(delta_lambda) < EPSILON_MCH * std::max(ONE, std::abs(
lambda))) {
1195 auto scale_h = maxAbsVal(h);
1196 if (scale_h > ZERO) {
1197 for (
int i = 1; i <=
n; ++i) {
1198 if (
fabs(h(i)) >= control.h_min * scale_h) {
1199 h_scale(i) = h(i) / scale_h;
1212 auto scale_c = maxAbsVal(c);
1213 if (scale_c > ZERO) {
1214 for (
int i = 1; i <=
n; ++i) {
1215 if (
fabs(c(i)) >= control.h_min * scale_c) {
1216 c_scale(i) = c(i) / scale_c;
1226 double radius_scale = (scale_h / scale_c) * radius;
1227 double f_scale = (scale_h / pow(scale_c, 2)) * f;
1229 auto control_scale = control;
1230 if (control_scale.lower != LOWER_DEFAULT) {
1231 control_scale.lower = control_scale.lower / scale_h;
1233 if (control_scale.upper != UPPER_DEFAULT) {
1234 control_scale.upper = control_scale.upper / scale_h;
1239 solveSubproblemMain(
n, radius_scale, f_scale, c_scale, h_scale,
x, control_scale, inform);
1244 x *= scale_c / scale_h;
1245 inform.obj *= pow(scale_c, 2) / scale_h;
1246 inform.multiplier *= scale_h;
1247 inform.pole *= scale_h;
1248 for (
auto &history_item : inform.
history) {
1249 history_item.lambda *= scale_h;
1250 history_item.x_norm *= scale_c / scale_h;
1276 control_type controlOptions;
1301 if (options.
scale != 0) {
1321 intitializeControl(controlOptions);
1328 for (
int ii = 1; ii <=
n; ++ii) {
1332 if (
fabs(
m_ew(ii)) < EPSILON_MCH) {
1344 if (options.
scale != 0) {
1345 for (
int ii = 1; ii <=
n; ++ii) {
const std::vector< double > * lambda
const std::vector< double > & rhs
#define DECLARE_FUNCMINIMIZER(classname, username)
Macro for declaring a new type of minimizers to be used with the FuncMinimizerFactory.
#define UNUSED_ARG(x)
Function arguments are sometimes unused in certain implmentations but are required for documentation ...
int len_history
the number of (||x||_M,lambda) pairs in the history
bool hard_case
has the hard case occurred?
int taylor_max_degree
maximum degree of Taylor approximant allowed
double obj
the value of the quadratic function
double h_min
any entry of H that is smaller than h_min * MAXVAL( H ) we be treated as
std::vector< history_type > history
history information
double stop_absolute_normal
bool equality_problem
is the solution is REQUIRED to lie on the boundary (i.e., is the
double multiplier
the Lagrange multiplier corresponding to the trust-region constraint
double lower
lower and upper bounds on the multiplier, if known
double stop_normal
stop when | ||x|| - radius | <= max( stop_normal * radius, stop_absolute_normal )
double pole
a lower bound max(0,-lambda_1), where lambda_1 is the left-most eigenvalue of (H,M)
double x_norm
corresponding value of ||x(lambda)||_M
std::string m_errorString
Error string.
void zero()
Set all elements to zero.
void set(const size_t i, const double value)
Set an element.
void allocate(const int iFrom, const int iTo, const int jFrom, const int jTo)
Resize the matrix.
int len2() const
Get the size along the second dimension as an int.
int len1() const
Get the size along the first dimension as an int.
void allocate(int firstIndex, int lastIndex)
Resize the vector.
int len() const
Get the length of the vector as an int.
Trust Region minimizer class using the DTRS method of GALAHAD.
std::string name() const override
Name of the minimizer.
void calculateStep(const DoubleFortranMatrix &J, const DoubleFortranVector &f, const DoubleFortranMatrix &hf, double Delta, DoubleFortranVector &d, double &normd, const NLLS::nlls_options &options)
Find a correction vector to the parameters.
double costFunctionVal() override
Return current value of the cost function.
void evalF(const DoubleFortranVector &x, DoubleFortranVector &f) const
Evaluate the fitting function and calculate the residuals.
NLLS::NLLS_workspace m_workspace
Temporary and helper objects.
void evalJ(const DoubleFortranVector &x, DoubleFortranMatrix &J) const
Evaluate the Jacobian.
DoubleFortranVector m_d_trans
DoubleFortranVector m_scale
std::shared_ptr< CostFunctions::CostFuncLeastSquares > m_leastSquares
Stored cost function.
bool iterate(size_t) override
Do one iteration.
NLLS::nlls_options m_options
Options.
void evalHF(const DoubleFortranVector &x, const DoubleFortranVector &f, DoubleFortranMatrix &h) const
Evaluate the Hessian.
NLLS::nlls_inform m_inform
Information about the fitting.
void initialize(API::ICostFunction_sptr costFunction, size_t maxIterations=0) override
Initialize minimizer, i.e. pass a function to minimize.
JacobianImpl1< DoubleFortranMatrix > m_J
The Jacobian.
DoubleFortranVector m_v_trans
DoubleFortranVector m_x
Fitting parameters.
API::IFunction_sptr m_function
Stored to access IFunction interface in iterate()
TypedValue getProperty(const std::string &name) const override
Get the value of a property.
std::shared_ptr< ICostFunction > ICostFunction_sptr
define a shared pointer to a cost function
void MANTID_CURVEFITTING_DLL matmultInner(const DoubleFortranMatrix &J, DoubleFortranMatrix &A)
Takes an m x n matrix J and forms the n x n matrix A given by A = J' * J.
double MANTID_CURVEFITTING_DLL norm2(const DoubleFortranVector &v)
Compute the 2-norm of a vector which is a square root of the sum of squares of its elements.
void allEigSymm(const DoubleFortranMatrix &A, DoubleFortranVector &ew, DoubleFortranMatrix &ev)
Calculate all the eigenvalues of a symmetric matrix.
void MANTID_CURVEFITTING_DLL multJ(const DoubleFortranMatrix &J, const DoubleFortranVector &x, DoubleFortranVector &Jx)
Multiply a matrix by a vector.
void MANTID_CURVEFITTING_DLL multJt(const DoubleFortranMatrix &J, const DoubleFortranVector &x, DoubleFortranVector &Jtx)
Multiply a transposed matrix by a vector.
double dotProduct(const DoubleFortranVector &x, const DoubleFortranVector &y)
Dot product of two vectors.
void MANTID_CURVEFITTING_DLL getSvdJ(const DoubleFortranMatrix &J, double &s1, double &sn)
Given an (m x n) matrix J held by columns as a vector, this routine returns the largest and smallest ...
FortranVector< EigenVector > DoubleFortranVector
void initialize(int n, int m, const nlls_options &options)
Initialize the workspace.
int scale
scale the variables? 0 - no scaling 1 - use the scaling in GSL (W s.t.
int maxit
the maximum number of iterations performed
double initial_radius
if relative_tr_radius /= 1, then set the initial value for the trust-region radius (-ve => ||g_0||)