RDKit
Open-source cheminformatics and machine learning.
BFGSOpt.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #include <cmath>
12 #include <RDGeneral/Invariant.h>
14 #include <cstring>
15 #include <vector>
16 #include <algorithm>
17 
18 namespace BFGSOpt {
21 const double FUNCTOL =
22  1e-4; //!< Default tolerance for function convergence in the minimizer
23 const double MOVETOL =
24  1e-7; //!< Default tolerance for x changes in the minimizer
25 const int MAXITS = 200; //!< Default maximum number of iterations
26 const double EPS = 3e-8; //!< Default gradient tolerance in the minimizer
27 const double TOLX =
28  4. * EPS; //!< Default direction vector tolerance in the minimizer
29 const double MAXSTEP = 100.0; //!< Default maximum step size in the minimizer
30 
31 //! Do a Quasi-Newton minimization along a line.
32 /*!
33  See Numerical Recipes in C, Section 9.7 for a description of the algorithm.
34 
35  \param dim the dimensionality of the space.
36  \param oldPt the current position, as an array.
37  \param oldVal the current function value.
38  \param grad the value of the function gradient at oldPt
39  \param dir the minimization direction
40  \param newPt used to return the final position
41  \param newVal used to return the final function value
42  \param func the function to minimize
43  \param maxStep the maximum allowable step size
44  \param resCode used to return the results of the search.
45 
46  Possible values for resCode are on return are:
47  - 0: success
48  - 1: the stepsize got too small. This probably indicates success.
49  - -1: the direction is bad (orthogonal to the gradient)
50 */
51 template <typename EnergyFunctor>
52 void linearSearch(unsigned int dim, double *oldPt, double oldVal, double *grad,
53  double *dir, double *newPt, double &newVal,
54  EnergyFunctor func, double maxStep, int &resCode) {
55  PRECONDITION(oldPt, "bad input array");
56  PRECONDITION(grad, "bad input array");
57  PRECONDITION(dir, "bad input array");
58  PRECONDITION(newPt, "bad input array");
59 
60  const unsigned int MAX_ITER_LINEAR_SEARCH = 1000;
61  double sum = 0.0, slope = 0.0, test = 0.0, lambda = 0.0;
62  double lambda2 = 0.0, lambdaMin = 0.0, tmpLambda = 0.0, val2 = 0.0;
63 
64  resCode = -1;
65 
66  // get the length of the direction vector:
67  sum = 0.0;
68  for (unsigned int i = 0; i < dim; i++) {
69  sum += dir[i] * dir[i];
70  }
71  sum = sqrt(sum);
72 
73  // rescale if we're trying to move too far:
74  if (sum > maxStep) {
75  for (unsigned int i = 0; i < dim; i++) {
76  dir[i] *= maxStep / sum;
77  }
78  }
79 
80  // make sure our direction has at least some component along
81  // -grad
82  slope = 0.0;
83  for (unsigned int i = 0; i < dim; i++) {
84  slope += dir[i] * grad[i];
85  }
86  if (slope >= 0.0) {
87  return;
88  }
89 
90  test = 0.0;
91  for (unsigned int i = 0; i < dim; i++) {
92  double temp = fabs(dir[i]) / std::max(fabs(oldPt[i]), 1.0);
93  if (temp > test) {
94  test = temp;
95  }
96  }
97 
98  lambdaMin = MOVETOL / test;
99  lambda = 1.0;
100  unsigned int it = 0;
101  while (it < MAX_ITER_LINEAR_SEARCH) {
102  // std::cerr << "\t" << it<<" : "<<lambda << " " << lambdaMin << std::endl;
103  if (lambda < lambdaMin) {
104  // the position change is too small
105  resCode = 1;
106  break;
107  }
108  for (unsigned int i = 0; i < dim; i++) {
109  newPt[i] = oldPt[i] + lambda * dir[i];
110  }
111  newVal = func(newPt);
112 
113  if (newVal - oldVal <= FUNCTOL * lambda * slope) {
114  // we're converged on the function:
115  resCode = 0;
116  return;
117  }
118  // if we made it this far, we need to backtrack:
119  if (it == 0) {
120  // it's the first step:
121  tmpLambda = -slope / (2.0 * (newVal - oldVal - slope));
122  } else {
123  double rhs1 = newVal - oldVal - lambda * slope;
124  double rhs2 = val2 - oldVal - lambda2 * slope;
125  double a = (rhs1 / (lambda * lambda) - rhs2 / (lambda2 * lambda2)) /
126  (lambda - lambda2);
127  double b = (-lambda2 * rhs1 / (lambda * lambda) +
128  lambda * rhs2 / (lambda2 * lambda2)) /
129  (lambda - lambda2);
130  if (a == 0.0) {
131  tmpLambda = -slope / (2.0 * b);
132  } else {
133  double disc = b * b - 3 * a * slope;
134  if (disc < 0.0) {
135  tmpLambda = 0.5 * lambda;
136  } else if (b <= 0.0) {
137  tmpLambda = (-b + sqrt(disc)) / (3.0 * a);
138  } else {
139  tmpLambda = -slope / (b + sqrt(disc));
140  }
141  }
142  if (tmpLambda > 0.5 * lambda) {
143  tmpLambda = 0.5 * lambda;
144  }
145  }
146  lambda2 = lambda;
147  val2 = newVal;
148  lambda = std::max(tmpLambda, 0.1 * lambda);
149  ++it;
150  }
151  // nothing was done
152  // std::cerr<<" RETURN AT END: "<<it<<" "<<resCode<<std::endl;
153  for (unsigned int i = 0; i < dim; i++) {
154  newPt[i] = oldPt[i];
155  }
156 }
157 
158 #define CLEANUP() \
159  { \
160  delete[] grad; \
161  delete[] dGrad; \
162  delete[] hessDGrad; \
163  delete[] newPos; \
164  delete[] xi; \
165  delete[] invHessian; \
166  }
167 //! Do a BFGS minimization of a function.
168 /*!
169  See Numerical Recipes in C, Section 10.7 for a description of the algorithm.
170 
171  \param dim the dimensionality of the space.
172  \param pos the starting position, as an array.
173  \param gradTol tolerance for gradient convergence
174  \param numIters used to return the number of iterations required
175  \param funcVal used to return the final function value
176  \param func the function to minimize
177  \param gradFunc calculates the gradient of func
178  \param funcTol tolerance for changes in the function value for convergence.
179  \param maxIts maximum number of iterations allowed
180  \param snapshotFreq a snapshot of the minimization trajectory
181  will be stored after as many steps as indicated
182  through this parameter; defaults to 0 (no
183  snapshots stored)
184  \param snapshotVect pointer to a std::vector<Snapshot> object that will
185  receive the coordinates and energies every snapshotFreq steps; defaults to
186  NULL (no snapshots stored)
187 
188  \return a flag indicating success (or type of failure). Possible values are:
189  - 0: success
190  - 1: too many iterations were required
191 */
192 template <typename EnergyFunctor, typename GradientFunctor>
193 int minimize(unsigned int dim, double *pos, double gradTol,
194  unsigned int &numIters, double &funcVal, EnergyFunctor func,
195  GradientFunctor gradFunc, unsigned int snapshotFreq,
196  RDKit::SnapshotVect *snapshotVect, double funcTol = TOLX,
197  unsigned int maxIts = MAXITS) {
198  RDUNUSED_PARAM(funcTol);
199  PRECONDITION(pos, "bad input array");
200  PRECONDITION(gradTol > 0, "bad tolerance");
201 
202  double sum, maxStep, fp;
203 
204  double *grad, *dGrad, *hessDGrad;
205  double *newPos, *xi;
206  double *invHessian;
207 
208  grad = new double[dim];
209  dGrad = new double[dim];
210  hessDGrad = new double[dim];
211  newPos = new double[dim];
212  xi = new double[dim];
213  invHessian = new double[dim * dim];
214  snapshotFreq = std::min(snapshotFreq, maxIts);
215 
216  // evaluate the function and gradient in our current position:
217  fp = func(pos);
218  gradFunc(pos, grad);
219 
220  sum = 0.0;
221  memset(invHessian, 0, dim * dim * sizeof(double));
222  for (unsigned int i = 0; i < dim; i++) {
223  unsigned int itab = i * dim;
224  // initialize the inverse hessian to be identity:
225  invHessian[itab + i] = 1.0;
226  // the first line dir is -grad:
227  xi[i] = -grad[i];
228  sum += pos[i] * pos[i];
229  }
230  // pick a max step size:
231  maxStep = MAXSTEP * std::max(sqrt(sum), static_cast<double>(dim));
232 
233  for (unsigned int iter = 1; iter <= maxIts; iter++) {
234  numIters = iter;
235  int status;
236 
237  // do the line search:
238  linearSearch(dim, pos, fp, grad, xi, newPos, funcVal, func, maxStep,
239  status);
240  CHECK_INVARIANT(status >= 0, "bad direction in linearSearch");
241 
242  // save the function value for the next search:
243  fp = funcVal;
244 
245  // set the direction of this line and save the gradient:
246  double test = 0.0;
247  for (unsigned int i = 0; i < dim; i++) {
248  xi[i] = newPos[i] - pos[i];
249  pos[i] = newPos[i];
250  double temp = fabs(xi[i]) / std::max(fabs(pos[i]), 1.0);
251  if (temp > test) {
252  test = temp;
253  }
254  dGrad[i] = grad[i];
255  }
256  // std::cerr<<" iter: "<<iter<<" "<<fp<<" "<<test<<"
257  // "<<TOLX<<std::endl;
258  if (test < TOLX) {
259  if (snapshotVect && snapshotFreq) {
260  RDKit::Snapshot s(boost::shared_array<double>(newPos), fp);
261  snapshotVect->push_back(s);
262  newPos = nullptr;
263  }
264  CLEANUP();
265  return 0;
266  }
267 
268  // update the gradient:
269  double gradScale = gradFunc(pos, grad);
270 
271  // is the gradient converged?
272  test = 0.0;
273  double term = std::max(funcVal * gradScale, 1.0);
274  for (unsigned int i = 0; i < dim; i++) {
275  double temp = fabs(grad[i]) * std::max(fabs(pos[i]), 1.0);
276  test = std::max(test, temp);
277  dGrad[i] = grad[i] - dGrad[i];
278  }
279  test /= term;
280  // std::cerr<<" "<<gradScale<<" "<<test<<"
281  // "<<gradTol<<std::endl;
282  if (test < gradTol) {
283  if (snapshotVect && snapshotFreq) {
284  RDKit::Snapshot s(boost::shared_array<double>(newPos), fp);
285  snapshotVect->push_back(s);
286  newPos = nullptr;
287  }
288  CLEANUP();
289  return 0;
290  }
291 
292  // for(unsigned int i=0;i<dim;i++){
293  // figure out how much the gradient changed:
294  //}
295 
296  // compute hessian*dGrad:
297  double fac = 0, fae = 0, sumDGrad = 0, sumXi = 0;
298  for (unsigned int i = 0; i < dim; i++) {
299 #if 0
300  unsigned int itab = i * dim;
301  hessDGrad[i] = 0.0;
302  for (unsigned int j = 0; j < dim; j++) {
303  hessDGrad[i] += invHessian[itab + j] * dGrad[j];
304  }
305 
306 #else
307  double *ivh = &(invHessian[i * dim]);
308  double &hdgradi = hessDGrad[i];
309  double *dgj = dGrad;
310  hdgradi = 0.0;
311  for (unsigned int j = 0; j < dim; ++j, ++ivh, ++dgj) {
312  hdgradi += *ivh * *dgj;
313  }
314 #endif
315  fac += dGrad[i] * xi[i];
316  fae += dGrad[i] * hessDGrad[i];
317  sumDGrad += dGrad[i] * dGrad[i];
318  sumXi += xi[i] * xi[i];
319  }
320  if (fac > sqrt(EPS * sumDGrad * sumXi)) {
321  fac = 1.0 / fac;
322  double fad = 1.0 / fae;
323  for (unsigned int i = 0; i < dim; i++) {
324  dGrad[i] = fac * xi[i] - fad * hessDGrad[i];
325  }
326 
327  for (unsigned int i = 0; i < dim; i++) {
328  unsigned int itab = i * dim;
329 #if 0
330  for (unsigned int j = i; j < dim; j++) {
331  invHessian[itab + j] += fac * xi[i] * xi[j] -
332  fad * hessDGrad[i] * hessDGrad[j] +
333  fae * dGrad[i] * dGrad[j];
334  invHessian[j * dim + i] = invHessian[itab + j];
335  }
336 #else
337  double pxi = fac * xi[i], hdgi = fad * hessDGrad[i],
338  dgi = fae * dGrad[i];
339  double *pxj = &(xi[i]), *hdgj = &(hessDGrad[i]), *dgj = &(dGrad[i]);
340  for (unsigned int j = i; j < dim; ++j, ++pxj, ++hdgj, ++dgj) {
341  invHessian[itab + j] += pxi * *pxj - hdgi * *hdgj + dgi * *dgj;
342  invHessian[j * dim + i] = invHessian[itab + j];
343  }
344 #endif
345  }
346  }
347  // generate the next direction to move:
348  for (unsigned int i = 0; i < dim; i++) {
349  unsigned int itab = i * dim;
350  xi[i] = 0.0;
351 #if 0
352  for (unsigned int j = 0; j < dim; j++) {
353  xi[i] -= invHessian[itab + j] * grad[j];
354  }
355 #else
356  double &pxi = xi[i], *ivh = &(invHessian[itab]), *gj = grad;
357  for (unsigned int j = 0; j < dim; ++j, ++ivh, ++gj) {
358  pxi -= *ivh * *gj;
359  }
360 #endif
361  }
362  if (snapshotVect && snapshotFreq && !(iter % snapshotFreq)) {
363  RDKit::Snapshot s(boost::shared_array<double>(newPos), fp);
364  snapshotVect->push_back(s);
365  newPos = new double[dim];
366  }
367  }
368  CLEANUP();
369  return 1;
370 }
371 
372 //! Do a BFGS minimization of a function.
373 /*!
374  \param dim the dimensionality of the space.
375  \param pos the starting position, as an array.
376  \param gradTol tolerance for gradient convergence
377  \param numIters used to return the number of iterations required
378  \param funcVal used to return the final function value
379  \param func the function to minimize
380  \param gradFunc calculates the gradient of func
381  \param funcTol tolerance for changes in the function value for convergence.
382  \param maxIts maximum number of iterations allowed
383 
384  \return a flag indicating success (or type of failure). Possible values are:
385  - 0: success
386  - 1: too many iterations were required
387 */
388 template <typename EnergyFunctor, typename GradientFunctor>
389 int minimize(unsigned int dim, double *pos, double gradTol,
390  unsigned int &numIters, double &funcVal, EnergyFunctor func,
391  GradientFunctor gradFunc, double funcTol = TOLX,
392  unsigned int maxIts = MAXITS) {
393  return minimize(dim, pos, gradTol, numIters, funcVal, func, gradFunc, 0,
394  nullptr, funcTol, maxIts);
395 }
396 
397 } // namespace BFGSOpt
#define CLEANUP()
Definition: BFGSOpt.h:158
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:101
#define RDUNUSED_PARAM(x)
Definition: Invariant.h:196
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
#define RDKIT_OPTIMIZER_EXPORT
Definition: export.h:337
const double EPS
Default gradient tolerance in the minimizer.
Definition: BFGSOpt.h:26
const double TOLX
Default direction vector tolerance in the minimizer.
Definition: BFGSOpt.h:27
void linearSearch(unsigned int dim, double *oldPt, double oldVal, double *grad, double *dir, double *newPt, double &newVal, EnergyFunctor func, double maxStep, int &resCode)
Do a Quasi-Newton minimization along a line.
Definition: BFGSOpt.h:52
const double FUNCTOL
Default tolerance for function convergence in the minimizer.
Definition: BFGSOpt.h:21
int minimize(unsigned int dim, double *pos, double gradTol, unsigned int &numIters, double &funcVal, EnergyFunctor func, GradientFunctor gradFunc, unsigned int snapshotFreq, RDKit::SnapshotVect *snapshotVect, double funcTol=TOLX, unsigned int maxIts=MAXITS)
Do a BFGS minimization of a function.
Definition: BFGSOpt.h:193
const double MAXSTEP
Default maximum step size in the minimizer.
Definition: BFGSOpt.h:29
RDKIT_OPTIMIZER_EXPORT int REALLY_A_HEADER_ONLY_LIBRARY
const double MOVETOL
Default tolerance for x changes in the minimizer.
Definition: BFGSOpt.h:23
const int MAXITS
Default maximum number of iterations.
Definition: BFGSOpt.h:25
RDKIT_OPTIMIZER_EXPORT int HEAD_ONLY_LIBRARY
const double lambda
scaling factor for rBO correction
Definition: UFF/Params.h:89
std::vector< Snapshot > SnapshotVect
Definition: Snapshot.h:20