d2/d1f/TrustRegion_8cpp_source.html

// Mantid Repository : https://github.com/mantidproject/mantid

//

// Copyright &copy; 2018 ISIS Rutherford Appleton Laboratory UKRI,

//   NScD Oak Ridge National Laboratory, European Spallation Source,

//   Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS

// SPDX - License - Identifier: GPL - 3.0 +

// This code was originally translated from Fortran code on

// https://ccpforge.cse.rl.ac.uk/gf/project/ral_nlls June 2016

//----------------------------------------------------------------------

// Includes

//----------------------------------------------------------------------

#include "MantidCurveFitting/RalNlls/TrustRegion.h"


#include <algorithm>

#include <cmath>

#include <functional>

#include <limits>

#include <string>


namespace Mantid::CurveFitting::NLLS {


const double EPSILON_MCH = std::numeric_limits<double>::epsilon();


void matmultInner(const DoubleFortranMatrix &J, DoubleFortranMatrix &A) {

  auto n = J.len2();

  A.allocate(n, n);


  A.mutator() = J.inspector().transpose() * J.inspector();

}


void getSvdJ(const DoubleFortranMatrix &J, double &s1, double &sn) {

  Eigen::BDCSVD<Eigen::MatrixXd> svd(J.inspector());

  auto S = svd.singularValues();


  s1 = S(0);

  sn = S(S.size() - 1);

}


double norm2(const DoubleFortranVector &v) {

  if (v.size() == 0)

    return 0.0;

  return v.norm();

}


void multJ(const DoubleFortranMatrix &J, const DoubleFortranVector &x, DoubleFortranVector &Jx) {

  // dgemv('N',m,n,alpha,J,m,x,1,beta,Jx,1);

  if (Jx.len() != J.len1()) {

    Jx.allocate(J.len1());

  }


  Jx.mutator() = J.inspector() * x.inspector();

}


void multJt(const DoubleFortranMatrix &J, const DoubleFortranVector &x, DoubleFortranVector &Jtx) {

  // dgemv('T',m,n,alpha,J,m,x,1,beta,Jtx,1)

  if (Jtx.len() != J.len2()) {

    Jtx.allocate(J.len2());

  }


  Jtx.mutator() = J.inspector().transpose() * x.inspector();

}


double dotProduct(const DoubleFortranVector &x, const DoubleFortranVector &y) { return x.dot(y); }


double evaluateModel(const DoubleFortranVector &f, const DoubleFortranMatrix &J, const DoubleFortranMatrix &hf,

                     const DoubleFortranVector &d, const nlls_options &options, evaluate_model_work &w) {


  // Jd = J*d

  multJ(J, d, w.Jd);


  // First, get the base

  // 0.5 (f^T f + f^T J d + d^T' J ^T J d )

  DoubleFortranVector temp = f;

  temp += w.Jd;

  w.md_gn = 0.5 * pow(norm2(temp), 2);

  double md = 0.0;

  switch (options.model) {

  case 1: // first-order (no Hessian)

    md = w.md_gn;

    break;

  default:

    // these have a dynamic H -- recalculate

    // H = J^T J + HF, HF is (an approx?) to the Hessian

    multJ(hf, d, w.Hd);

    md = w.md_gn + 0.5 * dotProduct(d, w.Hd);

  }

  return md;

}


double calculateRho(double normf, double normfnew, double md, const nlls_options &options) {

  UNUSED_ARG(options);

  auto actual_reduction = (0.5 * pow(normf, 2)) - (0.5 * pow(normfnew, 2));

  auto predicted_reduction = ((0.5 * pow(normf, 2)) - md);

  double rho = 0.0;

  if (fabs(actual_reduction) < 10 * EPSILON_MCH) {

    rho = ONE;

  } else if (fabs(predicted_reduction) < 10 * EPSILON_MCH) {

    rho = ONE;

  } else {

    rho = actual_reduction / predicted_reduction;

  }

  return rho;

}


void rankOneUpdate(DoubleFortranMatrix &hf, NLLS_workspace &w) {


  auto yts = dotProduct(w.d, w.y);

  if (fabs(yts) < sqrt(10 * EPSILON_MCH)) {

    return;

  }


  multJ(hf, w.d, w.Sks); // hfs = S_k * d


  w.ysharpSks = w.y_sharp;

  w.ysharpSks -= w.Sks;


  // now, let's scale hd (Nocedal and Wright, Section 10.2)

  auto dSks = fabs(dotProduct(w.d, w.Sks));

  auto alpha = fabs(dotProduct(w.d, w.y_sharp)) / dSks;

  alpha = std::min(ONE, alpha);

  hf *= alpha;


  // update S_k (again, as in N&W, Section 10.2)


  // hf = hf + (1/yts) (y# - Sk d)^T y:

  alpha = 1 / yts;


  w.hf.mutator() = alpha * w.ysharpSks.mutator() * w.y.mutator().transpose() + w.hf.mutator();

  w.hf.mutator() = alpha * w.y.mutator() * w.ysharpSks.mutator().transpose() + w.hf.mutator();

  alpha = -dotProduct(w.ysharpSks, w.d) / (pow(yts, 2));

  w.hf.mutator() = alpha * w.y.mutator() * w.y.mutator().transpose() + w.hf.mutator();

}


void updateTrustRegionRadius(double &rho, const nlls_options &options, NLLS_workspace &w) {


  switch (options.tr_update_strategy) {

  case 1: // default, step-function

    if (!std::isfinite(rho)) {

      w.Delta = std::max(options.radius_reduce, options.radius_reduce_max) * w.Delta;

      rho = -ONE; // set to be negative, so that the logic works....

    } else if (rho < options.eta_success_but_reduce) {

      // unsuccessful....reduce Delta

      w.Delta = std::max(options.radius_reduce, options.radius_reduce_max) * w.Delta;

    } else if (rho < options.eta_very_successful) {

      //  doing ok...retain status quo

    } else if (rho < options.eta_too_successful) {

      // more than very successful -- increase delta

      w.Delta = std::min(options.maximum_radius, options.radius_increase * w.normd);

      // increase based on normd = ||d||_D

      // if d is on the tr boundary, this is Delta

      // otherwise, point was within the tr, and there's no point

      // increasing the radius

    } else {

      // too successful....accept step, but don't change w.Delta

    }

    break;

  case 2: //  Continuous method

          //  Based on that proposed by Hans Bruun Nielsen, TR

          //  IMM-REP-1999-05

          //  http://www2.imm.dtu.dk/documents/ftp/tr99/tr05_99.pdf

    if (!std::isfinite(rho)) {

      w.Delta = std::max(options.radius_reduce, options.radius_reduce_max) * w.Delta;

      rho = -ONE; // set to be negative, so that the logic works....

    } else if (rho >= options.eta_too_successful) {

      // too successful....accept step, but don't change w.Delta

    } else if (rho > options.eta_successful) {

      w.Delta = w.Delta * std::min(options.radius_increase,

                                   std::max(options.radius_reduce,

                                            1 - ((options.radius_increase - 1) * (pow((1 - 2 * rho), w.tr_p)))));

      w.tr_nu = options.radius_reduce;

    } else {

      w.Delta = w.Delta * w.tr_nu;

      w.tr_nu = w.tr_nu * 0.5;

    }

    break;

  default:

    throw std::runtime_error("Bad strategy.");

  }

}


void testConvergence(double normF, double normJF, double normF0, double normJF0, const nlls_options &options,

                     nlls_inform &inform) {


  if (normF <= std::max(options.stop_g_absolute, options.stop_g_relative * normF0)) {

    inform.convergence_normf = 1;

    return;

  }


  if ((normJF / normF) <= std::max(options.stop_g_absolute, options.stop_g_relative * (normJF0 / normF0))) {

    inform.convergence_normg = 1;

  }

}


void applyScaling(const DoubleFortranMatrix &J, DoubleFortranMatrix &A, DoubleFortranVector &v,

                  DoubleFortranVector &scale, const nlls_options &options) {

  auto m = J.len1();

  auto n = J.len2();

  if (scale.len() != n) {

    scale.allocate(n);

  }


  switch (options.scale) {

  case 1:

  case 2:

    for (int ii = 1; ii <= n; ++ii) { // do ii = 1,n

      double temp = ZERO;

      if (options.scale == 1) {

        for (int jj = 1; jj <= m; ++jj) { // for_do(jj, 1,m)

          // get_element_of_matrix(J,m,jj,ii,Jij);

          temp = temp + pow(J(jj, ii), 2);

        }

      } else if (options.scale == 2) {

        for (int jj = 1; jj <= n; ++jj) { // for_do(jj, 1,n)

          temp = temp + pow(A(ii, jj), 2);

        }

      }

      if (temp < options.scale_min) {

        if (options.scale_trim_min) {

          temp = options.scale_min;

        } else {

          temp = ONE;

        }

      } else if (temp > options.scale_max) {

        if (options.scale_trim_max) {

          temp = options.scale_max;

        } else {

          temp = ONE;

        }

      }

      temp = sqrt(temp);

      if (options.scale_require_increase) {

        scale(ii) = std::max(temp, scale(ii));

      } else {

        scale(ii) = temp;

      }

    }

    break;

  default:

    throw std::runtime_error("Scaling error.");

  }


  // Now we have the w.diagonal scaling matrix, actually scale the

  // Hessian approximation and J^Tf

  for (int ii = 1; ii <= n; ++ii) { // for_do(ii, 1,n)

    double temp = scale(ii);

    v(ii) = v(ii) / temp;

    for (int jj = 1; jj <= n; ++jj) { // for_do(jj,1,n)

      A(ii, jj) = A(ii, jj) / temp;

      A(jj, ii) = A(jj, ii) / temp;

    }

  }

}


void allEigSymm(const DoubleFortranMatrix &A, DoubleFortranVector &ew, DoubleFortranMatrix &ev) {

  auto M = A;

  M.eigenSystem(ew, ev);

}


// This isn't used because we don't calculate second derivatives in Mantid

// If we start using them the method should be un-commented and used here

//

// void apply_second_order_info(int n, int m, const DoubleFortranVector &X,

//                             NLLS_workspace &w, eval_hf_type evalHF,

//                             params_base_type params,

//                             const nlls_options &options, nlls_inform &inform,

//                             const DoubleFortranVector &weights) {

//

//  if (options.exact_second_derivatives) {

//    DoubleFortranVector temp = w.f;

//    temp *= weights;

//    evalHF(inform.external_return, n, m, X, temp, w.hf, params);

//    inform.h_eval = inform.h_eval + 1;

//  } else {

//    // use the rank-one approximation...

//    rankOneUpdate(w.hf, w, n);

//  }

//}


} // namespace Mantid::CurveFitting::NLLS

n
size_t n
Definition: AugmentedLagrangianOptimizer.cpp:42

rho
double rho
Definition: AugmentedLagrangianOptimizer.cpp:48

fabs
#define fabs(x)
Definition: Matrix.cpp:22

UNUSED_ARG
#define UNUSED_ARG(x)
Function arguments are sometimes unused in certain implmentations but are required for documentation ...
Definition: System.h:64

TrustRegion.h

Mantid::CurveFitting::EigenMatrix::eigenSystem
void eigenSystem(EigenVector &eigenValues, EigenMatrix &eigenVectors)
Calculate the eigensystem of a symmetric matrix.
Definition: EigenMatrix.cpp:299

Mantid::CurveFitting::EigenMatrix::mutator
map_type & mutator()
Get the map to Eigen matrix.
Definition: EigenMatrix.h:56

Mantid::CurveFitting::EigenMatrix::inspector
const map_type inspector() const
Get a const copy of the Eigen matrix.
Definition: EigenMatrix.h:58

Mantid::CurveFitting::EigenVector::mutator
vec_map_type & mutator()
Get the map of the eigen vector.
Definition: EigenVector.h:51

Mantid::CurveFitting::EigenVector::norm
double norm() const
Get vector norm (length)
Definition: EigenVector.cpp:189

Mantid::CurveFitting::EigenVector::size
size_t size() const
Size of the vector.
Definition: EigenVector.cpp:108

Mantid::CurveFitting::FortranMatrix< EigenMatrix >

Mantid::CurveFitting::FortranMatrix::allocate
void allocate(const int iFrom, const int iTo, const int jFrom, const int jTo)
Resize the matrix.
Definition: EigenFortranMatrix.h:103

Mantid::CurveFitting::FortranMatrix::len2
int len2() const
Get the size along the second dimension as an int.
Definition: EigenFortranMatrix.h:140

Mantid::CurveFitting::FortranMatrix::len1
int len1() const
Get the size along the first dimension as an int.
Definition: EigenFortranMatrix.h:137

Mantid::CurveFitting::FortranVector< EigenVector >

Mantid::CurveFitting::FortranVector::allocate
void allocate(int firstIndex, int lastIndex)
Resize the vector.
Definition: EigenFortranVector.h:85

Mantid::CurveFitting::FortranVector::len
int len() const
Get the length of the vector as an int.
Definition: EigenFortranVector.h:130

Mantid::CurveFitting::NLLS
Definition: TrustRegion.h:15

Mantid::CurveFitting::NLLS::rankOneUpdate
void rankOneUpdate(DoubleFortranMatrix &hf, NLLS_workspace &w)
Update the Hessian matrix without actually evaluating it (quasi-Newton?)
Definition: TrustRegion.cpp:167

Mantid::CurveFitting::NLLS::calculateRho
double calculateRho(double normf, double normfnew, double md, const nlls_options &options)
Calculate the quantity 0.5||f||^2 - 0.5||fnew||^2 actual_reduction rho = -----------------------— = -...
Definition: TrustRegion.cpp:148

Mantid::CurveFitting::NLLS::evaluateModel
double evaluateModel(const DoubleFortranVector &f, const DoubleFortranMatrix &J, const DoubleFortranMatrix &hf, const DoubleFortranVector &d, const nlls_options &options, evaluate_model_work &w)
Input: f = f(x_k), J = J(x_k), hf = \sum_{i=1}^m f_i(x_k) \nabla^2 f_i(x_k) (or an approx)
Definition: TrustRegion.cpp:112

Mantid::CurveFitting::NLLS::matmultInner
void MANTID_CURVEFITTING_DLL matmultInner(const DoubleFortranMatrix &J, DoubleFortranMatrix &A)
Takes an m x n matrix J and forms the n x n matrix A given by A = J' * J.
Definition: TrustRegion.cpp:31

Mantid::CurveFitting::NLLS::norm2
double MANTID_CURVEFITTING_DLL norm2(const DoubleFortranVector &v)
Compute the 2-norm of a vector which is a square root of the sum of squares of its elements.
Definition: TrustRegion.cpp:57

Mantid::CurveFitting::NLLS::testConvergence
void testConvergence(double normF, double normJF, double normF0, double normJF0, const nlls_options &options, nlls_inform &inform)
Test the convergence.
Definition: TrustRegion.cpp:254

Mantid::CurveFitting::NLLS::updateTrustRegionRadius
void updateTrustRegionRadius(double &rho, const nlls_options &options, NLLS_workspace &w)
Update the trust region radius which is hidden in NLLS_workspace w (w.Delta).
Definition: TrustRegion.cpp:205

Mantid::CurveFitting::NLLS::allEigSymm
void allEigSymm(const DoubleFortranMatrix &A, DoubleFortranVector &ew, DoubleFortranMatrix &ev)
Calculate all the eigenvalues of a symmetric matrix.
Definition: TrustRegion.cpp:348

Mantid::CurveFitting::NLLS::multJ
void MANTID_CURVEFITTING_DLL multJ(const DoubleFortranMatrix &J, const DoubleFortranVector &x, DoubleFortranVector &Jx)
Multiply a matrix by a vector.
Definition: TrustRegion.cpp:68

Mantid::CurveFitting::NLLS::multJt
void MANTID_CURVEFITTING_DLL multJt(const DoubleFortranMatrix &J, const DoubleFortranVector &x, DoubleFortranVector &Jtx)
Multiply a transposed matrix by a vector.
Definition: TrustRegion.cpp:82

Mantid::CurveFitting::NLLS::dotProduct
double dotProduct(const DoubleFortranVector &x, const DoubleFortranVector &y)
Dot product of two vectors.
Definition: TrustRegion.cpp:93

Mantid::CurveFitting::NLLS::getSvdJ
void MANTID_CURVEFITTING_DLL getSvdJ(const DoubleFortranMatrix &J, double &s1, double &sn)
Given an (m x n) matrix J held by columns as a vector, this routine returns the largest and smallest ...
Definition: TrustRegion.cpp:45

Mantid::CurveFitting::NLLS::EPSILON_MCH
const double EPSILON_MCH
Too small values don't work well with numerical derivatives.
Definition: TrustRegion.cpp:23

Mantid::CurveFitting::NLLS::applyScaling
void applyScaling(const DoubleFortranMatrix &J, DoubleFortranMatrix &A, DoubleFortranVector &v, DoubleFortranVector &scale, const nlls_options &options)
Apply_scaling input Jacobian matrix, J ouput scaled Hessisan, H, and J^Tf, v.
Definition: TrustRegion.cpp:280

Mantid::Geometry::m
z F d m
Definition: SpaceGroupFactory.cpp:803

Mantid::Geometry::y
I a m y
Definition: SpaceGroupFactory.cpp:675

Mantid::Geometry::d
I a m d
Definition: SpaceGroupFactory.cpp:674

Mantid::Geometry::x
I a m x
Definition: SpaceGroupFactory.cpp:675

Mantid::CurveFitting::NLLS::NLLS_workspace
all workspaces called from the top level
Definition: Workspaces.h:251

Mantid::CurveFitting::NLLS::NLLS_workspace::Sks
DoubleFortranVector Sks
Definition: Workspaces.h:269

Mantid::CurveFitting::NLLS::NLLS_workspace::ysharpSks
DoubleFortranVector ysharpSks
Definition: Workspaces.h:269

Mantid::CurveFitting::NLLS::NLLS_workspace::tr_nu
double tr_nu
Definition: Workspaces.h:261

Mantid::CurveFitting::NLLS::NLLS_workspace::hf
DoubleFortranMatrix hf
Definition: Workspaces.h:266

Mantid::CurveFitting::NLLS::NLLS_workspace::tr_p
int tr_p
Definition: Workspaces.h:262

Mantid::CurveFitting::NLLS::NLLS_workspace::y
DoubleFortranVector y
Definition: Workspaces.h:268

Mantid::CurveFitting::NLLS::NLLS_workspace::d
DoubleFortranVector d
Definition: Workspaces.h:267

Mantid::CurveFitting::NLLS::NLLS_workspace::Delta
double Delta
Definition: Workspaces.h:256

Mantid::CurveFitting::NLLS::NLLS_workspace::normd
double normd
Definition: Workspaces.h:257

Mantid::CurveFitting::NLLS::NLLS_workspace::y_sharp
DoubleFortranVector y_sharp
Definition: Workspaces.h:268

Mantid::CurveFitting::NLLS::evaluate_model_work
workspace for subroutine evaluateModel
Definition: Workspaces.h:222

Mantid::CurveFitting::NLLS::evaluate_model_work::Hd
DoubleFortranVector Hd
Definition: Workspaces.h:223

Mantid::CurveFitting::NLLS::evaluate_model_work::Jd
DoubleFortranVector Jd
Definition: Workspaces.h:223

Mantid::CurveFitting::NLLS::evaluate_model_work::md_gn
double md_gn
Definition: Workspaces.h:224

Mantid::CurveFitting::NLLS::nlls_inform
inform derived type with component defaults
Definition: Workspaces.h:157

Mantid::CurveFitting::NLLS::nlls_inform::convergence_normf
int convergence_normf
test on the size of f satisfied?
Definition: Workspaces.h:172

Mantid::CurveFitting::NLLS::nlls_inform::convergence_normg
int convergence_normg
test on the size of the gradient satisfied?
Definition: Workspaces.h:175

Mantid::CurveFitting::NLLS::nlls_options
Definition: Workspaces.h:33

Mantid::CurveFitting::NLLS::nlls_options::eta_successful
double eta_successful
a potential iterate will only be accepted if the actual decrease f - f(x_new) is larger than ....
Definition: Workspaces.h:88

Mantid::CurveFitting::NLLS::nlls_options::stop_g_relative
double stop_g_relative
Definition: Workspaces.h:64

Mantid::CurveFitting::NLLS::nlls_options::model
int model
specify the model used.
Definition: Workspaces.h:46

Mantid::CurveFitting::NLLS::nlls_options::radius_reduce
double radius_reduce
Definition: Workspaces.h:100

Mantid::CurveFitting::NLLS::nlls_options::scale
int scale
scale the variables? 0 - no scaling 1 - use the scaling in GSL (W s.t.
Definition: Workspaces.h:125

Mantid::CurveFitting::NLLS::nlls_options::eta_too_successful
double eta_too_successful
Definition: Workspaces.h:91

Mantid::CurveFitting::NLLS::nlls_options::scale_max
double scale_max
Definition: Workspaces.h:126

Mantid::CurveFitting::NLLS::nlls_options::eta_success_but_reduce
double eta_success_but_reduce
Definition: Workspaces.h:89

Mantid::CurveFitting::NLLS::nlls_options::eta_very_successful
double eta_very_successful
Definition: Workspaces.h:90

Mantid::CurveFitting::NLLS::nlls_options::tr_update_strategy
int tr_update_strategy
Trust region update strategy 1 - usual step function 2 - continuous method of Hans Bruun Nielsen (IMM...
Definition: Workspaces.h:106

Mantid::CurveFitting::NLLS::nlls_options::scale_require_increase
bool scale_require_increase
Definition: Workspaces.h:130

Mantid::CurveFitting::NLLS::nlls_options::stop_g_absolute
double stop_g_absolute
overall convergence tolerances.
Definition: Workspaces.h:63

Mantid::CurveFitting::NLLS::nlls_options::maximum_radius
double maximum_radius
maximum permitted trust-region radius
Definition: Workspaces.h:80

Mantid::CurveFitting::NLLS::nlls_options::radius_increase
double radius_increase
on very successful iterations, the trust-region radius will be increased by the factor ....
Definition: Workspaces.h:99

Mantid::CurveFitting::NLLS::nlls_options::scale_min
double scale_min
Definition: Workspaces.h:127

Mantid::CurveFitting::NLLS::nlls_options::scale_trim_max
bool scale_trim_max
Definition: Workspaces.h:129

Mantid::CurveFitting::NLLS::nlls_options::scale_trim_min
bool scale_trim_min
Definition: Workspaces.h:128

Mantid::CurveFitting::NLLS::nlls_options::radius_reduce_max
double radius_reduce_max
Definition: Workspaces.h:101