You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

210 lines
7.2 KiB

  1. /* -*- c++ -*- (enables emacs c++ mode) */
  2. /*===========================================================================
  3. Copyright (C) 2004-2015 Yves Renard
  4. This file is a part of GETFEM++
  5. Getfem++ is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU Lesser General Public License as published
  7. by the Free Software Foundation; either version 3 of the License, or
  8. (at your option) any later version along with the GCC Runtime Library
  9. Exception either version 3.1 or (at your option) any later version.
  10. This program is distributed in the hope that it will be useful, but
  11. WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12. or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
  13. License and GCC Runtime Library Exception for more details.
  14. You should have received a copy of the GNU Lesser General Public License
  15. along with this program; if not, write to the Free Software Foundation,
  16. Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
  17. As a special exception, you may use this file as it is a part of a free
  18. software library without restriction. Specifically, if other files
  19. instantiate templates or use macros or inline functions from this file,
  20. or you compile this file and link it with other files to produce an
  21. executable, this file does not by itself cause the resulting executable
  22. to be covered by the GNU Lesser General Public License. This exception
  23. does not however invalidate any other reasons why the executable file
  24. might be covered by the GNU Lesser General Public License.
  25. ===========================================================================*/
  26. /**@file gmm_solver_bfgs.h
  27. @author Yves Renard <Yves.Renard@insa-lyon.fr>
  28. @date October 14 2004.
  29. @brief Implements BFGS (Broyden, Fletcher, Goldfarb, Shanno) algorithm.
  30. */
  31. #ifndef GMM_BFGS_H
  32. #define GMM_BFGS_H
  33. #include "gmm_kernel.h"
  34. #include "gmm_iter.h"
  35. namespace gmm {
  36. // BFGS algorithm (Broyden, Fletcher, Goldfarb, Shanno)
  37. // Quasi Newton method for optimization problems.
  38. // with Wolfe Line search.
  39. // delta[k] = x[k+1] - x[k]
  40. // gamma[k] = grad f(x[k+1]) - grad f(x[k])
  41. // H[0] = I
  42. // BFGS : zeta[k] = delta[k] - H[k] gamma[k]
  43. // DFP : zeta[k] = H[k] gamma[k]
  44. // tau[k] = gamma[k]^T zeta[k]
  45. // rho[k] = 1 / gamma[k]^T delta[k]
  46. // BFGS : H[k+1] = H[k] + rho[k](zeta[k] delta[k]^T + delta[k] zeta[k]^T)
  47. // - rho[k]^2 tau[k] delta[k] delta[k]^T
  48. // DFP : H[k+1] = H[k] + rho[k] delta[k] delta[k]^T
  49. // - (1/tau[k])zeta[k] zeta[k]^T
  50. // Object representing the inverse of the Hessian
  51. template <typename VECTOR> struct bfgs_invhessian {
  52. typedef typename linalg_traits<VECTOR>::value_type T;
  53. typedef typename number_traits<T>::magnitude_type R;
  54. std::vector<VECTOR> delta, gamma, zeta;
  55. std::vector<T> tau, rho;
  56. int version;
  57. template<typename VEC1, typename VEC2> void hmult(const VEC1 &X, VEC2 &Y) {
  58. copy(X, Y);
  59. for (size_type k = 0 ; k < delta.size(); ++k) {
  60. T xdelta = vect_sp(X, delta[k]), xzeta = vect_sp(X, zeta[k]);
  61. switch (version) {
  62. case 0 : // BFGS
  63. add(scaled(zeta[k], rho[k]*xdelta), Y);
  64. add(scaled(delta[k], rho[k]*(xzeta-rho[k]*tau[k]*xdelta)), Y);
  65. break;
  66. case 1 : // DFP
  67. add(scaled(delta[k], rho[k]*xdelta), Y);
  68. add(scaled(zeta[k], -xzeta/tau[k]), Y);
  69. break;
  70. }
  71. }
  72. }
  73. void restart(void) {
  74. delta.resize(0); gamma.resize(0); zeta.resize(0);
  75. tau.resize(0); rho.resize(0);
  76. }
  77. template<typename VECT1, typename VECT2>
  78. void update(const VECT1 &deltak, const VECT2 &gammak) {
  79. T vsp = vect_sp(deltak, gammak);
  80. if (vsp == T(0)) return;
  81. size_type N = vect_size(deltak), k = delta.size();
  82. VECTOR Y(N);
  83. hmult(gammak, Y);
  84. delta.resize(k+1); gamma.resize(k+1); zeta.resize(k+1);
  85. tau.resize(k+1); rho.resize(k+1);
  86. resize(delta[k], N); resize(gamma[k], N); resize(zeta[k], N);
  87. gmm::copy(deltak, delta[k]);
  88. gmm::copy(gammak, gamma[k]);
  89. rho[k] = R(1) / vsp;
  90. if (version == 0)
  91. add(delta[k], scaled(Y, -1), zeta[k]);
  92. else
  93. gmm::copy(Y, zeta[k]);
  94. tau[k] = vect_sp(gammak, zeta[k]);
  95. }
  96. bfgs_invhessian(int v = 0) { version = v; }
  97. };
  98. template <typename FUNCTION, typename DERIVATIVE, typename VECTOR>
  99. void bfgs(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x,
  100. int restart, iteration& iter, int version = 0,
  101. double lambda_init=0.001, double print_norm=1.0) {
  102. typedef typename linalg_traits<VECTOR>::value_type T;
  103. typedef typename number_traits<T>::magnitude_type R;
  104. bfgs_invhessian<VECTOR> invhessian(version);
  105. VECTOR r(vect_size(x)), d(vect_size(x)), y(vect_size(x)), r2(vect_size(x));
  106. grad(x, r);
  107. R lambda = lambda_init, valx = f(x), valy;
  108. int nb_restart(0);
  109. if (iter.get_noisy() >= 1) cout << "value " << valx / print_norm << " ";
  110. while (! iter.finished_vect(r)) {
  111. invhessian.hmult(r, d); gmm::scale(d, T(-1));
  112. // Wolfe Line search
  113. R derivative = gmm::vect_sp(r, d);
  114. R lambda_min(0), lambda_max(0), m1 = 0.27, m2 = 0.57;
  115. bool unbounded = true, blocked = false, grad_computed = false;
  116. for(;;) {
  117. add(x, scaled(d, lambda), y);
  118. valy = f(y);
  119. if (iter.get_noisy() >= 2) {
  120. cout.precision(15);
  121. cout << "Wolfe line search, lambda = " << lambda
  122. << " value = " << valy /print_norm << endl;
  123. // << " derivative = " << derivative
  124. // << " lambda min = " << lambda_min << " lambda max = "
  125. // << lambda_max << endl; getchar();
  126. }
  127. if (valy <= valx + m1 * lambda * derivative) {
  128. grad(y, r2); grad_computed = true;
  129. T derivative2 = gmm::vect_sp(r2, d);
  130. if (derivative2 >= m2*derivative) break;
  131. lambda_min = lambda;
  132. }
  133. else {
  134. lambda_max = lambda;
  135. unbounded = false;
  136. }
  137. if (unbounded) lambda *= R(10);
  138. else lambda = (lambda_max + lambda_min) / R(2);
  139. if (lambda == lambda_max || lambda == lambda_min) break;
  140. // valy <= R(2)*valx replaced by
  141. // valy <= valx + gmm::abs(derivative)*lambda_init
  142. // for compatibility with negative values (08.24.07).
  143. if (valy <= valx + R(2)*gmm::abs(derivative)*lambda &&
  144. (lambda < R(lambda_init*1E-8) ||
  145. (!unbounded && lambda_max-lambda_min < R(lambda_init*1E-8))))
  146. { blocked = true; lambda = lambda_init; break; }
  147. }
  148. // Rank two update
  149. ++iter;
  150. if (!grad_computed) grad(y, r2);
  151. gmm::add(scaled(r2, -1), r);
  152. if ((iter.get_iteration() % restart) == 0 || blocked) {
  153. if (iter.get_noisy() >= 1) cout << "Restart\n";
  154. invhessian.restart();
  155. if (++nb_restart > 10) {
  156. if (iter.get_noisy() >= 1) cout << "BFGS is blocked, exiting\n";
  157. return;
  158. }
  159. }
  160. else {
  161. invhessian.update(gmm::scaled(d,lambda), gmm::scaled(r,-1));
  162. nb_restart = 0;
  163. }
  164. copy(r2, r); copy(y, x); valx = valy;
  165. if (iter.get_noisy() >= 1)
  166. cout << "BFGS value " << valx/print_norm << "\t";
  167. }
  168. }
  169. template <typename FUNCTION, typename DERIVATIVE, typename VECTOR>
  170. inline void dfp(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x,
  171. int restart, iteration& iter, int version = 1) {
  172. bfgs(f, grad, x, restart, iter, version);
  173. }
  174. }
  175. #endif