You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
210 lines
7.2 KiB
210 lines
7.2 KiB
/* -*- c++ -*- (enables emacs c++ mode) */
|
|
/*===========================================================================
|
|
|
|
Copyright (C) 2004-2015 Yves Renard
|
|
|
|
This file is a part of GETFEM++
|
|
|
|
Getfem++ is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU Lesser General Public License as published
|
|
by the Free Software Foundation; either version 3 of the License, or
|
|
(at your option) any later version along with the GCC Runtime Library
|
|
Exception either version 3.1 or (at your option) any later version.
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
License and GCC Runtime Library Exception for more details.
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
along with this program; if not, write to the Free Software Foundation,
|
|
Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
As a special exception, you may use this file as it is a part of a free
|
|
software library without restriction. Specifically, if other files
|
|
instantiate templates or use macros or inline functions from this file,
|
|
or you compile this file and link it with other files to produce an
|
|
executable, this file does not by itself cause the resulting executable
|
|
to be covered by the GNU Lesser General Public License. This exception
|
|
does not however invalidate any other reasons why the executable file
|
|
might be covered by the GNU Lesser General Public License.
|
|
|
|
===========================================================================*/
|
|
|
|
/**@file gmm_solver_bfgs.h
|
|
@author Yves Renard <Yves.Renard@insa-lyon.fr>
|
|
@date October 14 2004.
|
|
@brief Implements BFGS (Broyden, Fletcher, Goldfarb, Shanno) algorithm.
|
|
*/
|
|
#ifndef GMM_BFGS_H
|
|
#define GMM_BFGS_H
|
|
|
|
#include "gmm_kernel.h"
|
|
#include "gmm_iter.h"
|
|
|
|
namespace gmm {
|
|
|
|
// BFGS algorithm (Broyden, Fletcher, Goldfarb, Shanno)
|
|
// Quasi Newton method for optimization problems.
|
|
// with Wolfe Line search.
|
|
|
|
|
|
// delta[k] = x[k+1] - x[k]
|
|
// gamma[k] = grad f(x[k+1]) - grad f(x[k])
|
|
// H[0] = I
|
|
// BFGS : zeta[k] = delta[k] - H[k] gamma[k]
|
|
// DFP : zeta[k] = H[k] gamma[k]
|
|
// tau[k] = gamma[k]^T zeta[k]
|
|
// rho[k] = 1 / gamma[k]^T delta[k]
|
|
// BFGS : H[k+1] = H[k] + rho[k](zeta[k] delta[k]^T + delta[k] zeta[k]^T)
|
|
// - rho[k]^2 tau[k] delta[k] delta[k]^T
|
|
// DFP : H[k+1] = H[k] + rho[k] delta[k] delta[k]^T
|
|
// - (1/tau[k])zeta[k] zeta[k]^T
|
|
|
|
// Object representing the inverse of the Hessian
|
|
template <typename VECTOR> struct bfgs_invhessian {
|
|
|
|
typedef typename linalg_traits<VECTOR>::value_type T;
|
|
typedef typename number_traits<T>::magnitude_type R;
|
|
|
|
std::vector<VECTOR> delta, gamma, zeta;
|
|
std::vector<T> tau, rho;
|
|
int version;
|
|
|
|
template<typename VEC1, typename VEC2> void hmult(const VEC1 &X, VEC2 &Y) {
|
|
copy(X, Y);
|
|
for (size_type k = 0 ; k < delta.size(); ++k) {
|
|
T xdelta = vect_sp(X, delta[k]), xzeta = vect_sp(X, zeta[k]);
|
|
switch (version) {
|
|
case 0 : // BFGS
|
|
add(scaled(zeta[k], rho[k]*xdelta), Y);
|
|
add(scaled(delta[k], rho[k]*(xzeta-rho[k]*tau[k]*xdelta)), Y);
|
|
break;
|
|
case 1 : // DFP
|
|
add(scaled(delta[k], rho[k]*xdelta), Y);
|
|
add(scaled(zeta[k], -xzeta/tau[k]), Y);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void restart(void) {
|
|
delta.resize(0); gamma.resize(0); zeta.resize(0);
|
|
tau.resize(0); rho.resize(0);
|
|
}
|
|
|
|
template<typename VECT1, typename VECT2>
|
|
void update(const VECT1 &deltak, const VECT2 &gammak) {
|
|
T vsp = vect_sp(deltak, gammak);
|
|
if (vsp == T(0)) return;
|
|
size_type N = vect_size(deltak), k = delta.size();
|
|
VECTOR Y(N);
|
|
hmult(gammak, Y);
|
|
delta.resize(k+1); gamma.resize(k+1); zeta.resize(k+1);
|
|
tau.resize(k+1); rho.resize(k+1);
|
|
resize(delta[k], N); resize(gamma[k], N); resize(zeta[k], N);
|
|
gmm::copy(deltak, delta[k]);
|
|
gmm::copy(gammak, gamma[k]);
|
|
rho[k] = R(1) / vsp;
|
|
if (version == 0)
|
|
add(delta[k], scaled(Y, -1), zeta[k]);
|
|
else
|
|
gmm::copy(Y, zeta[k]);
|
|
tau[k] = vect_sp(gammak, zeta[k]);
|
|
}
|
|
|
|
bfgs_invhessian(int v = 0) { version = v; }
|
|
};
|
|
|
|
|
|
template <typename FUNCTION, typename DERIVATIVE, typename VECTOR>
|
|
void bfgs(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x,
|
|
int restart, iteration& iter, int version = 0,
|
|
double lambda_init=0.001, double print_norm=1.0) {
|
|
|
|
typedef typename linalg_traits<VECTOR>::value_type T;
|
|
typedef typename number_traits<T>::magnitude_type R;
|
|
|
|
bfgs_invhessian<VECTOR> invhessian(version);
|
|
VECTOR r(vect_size(x)), d(vect_size(x)), y(vect_size(x)), r2(vect_size(x));
|
|
grad(x, r);
|
|
R lambda = lambda_init, valx = f(x), valy;
|
|
int nb_restart(0);
|
|
|
|
if (iter.get_noisy() >= 1) cout << "value " << valx / print_norm << " ";
|
|
while (! iter.finished_vect(r)) {
|
|
|
|
invhessian.hmult(r, d); gmm::scale(d, T(-1));
|
|
|
|
// Wolfe Line search
|
|
R derivative = gmm::vect_sp(r, d);
|
|
R lambda_min(0), lambda_max(0), m1 = 0.27, m2 = 0.57;
|
|
bool unbounded = true, blocked = false, grad_computed = false;
|
|
|
|
for(;;) {
|
|
add(x, scaled(d, lambda), y);
|
|
valy = f(y);
|
|
if (iter.get_noisy() >= 2) {
|
|
cout.precision(15);
|
|
cout << "Wolfe line search, lambda = " << lambda
|
|
<< " value = " << valy /print_norm << endl;
|
|
// << " derivative = " << derivative
|
|
// << " lambda min = " << lambda_min << " lambda max = "
|
|
// << lambda_max << endl; getchar();
|
|
}
|
|
if (valy <= valx + m1 * lambda * derivative) {
|
|
grad(y, r2); grad_computed = true;
|
|
T derivative2 = gmm::vect_sp(r2, d);
|
|
if (derivative2 >= m2*derivative) break;
|
|
lambda_min = lambda;
|
|
}
|
|
else {
|
|
lambda_max = lambda;
|
|
unbounded = false;
|
|
}
|
|
if (unbounded) lambda *= R(10);
|
|
else lambda = (lambda_max + lambda_min) / R(2);
|
|
if (lambda == lambda_max || lambda == lambda_min) break;
|
|
// valy <= R(2)*valx replaced by
|
|
// valy <= valx + gmm::abs(derivative)*lambda_init
|
|
// for compatibility with negative values (08.24.07).
|
|
if (valy <= valx + R(2)*gmm::abs(derivative)*lambda &&
|
|
(lambda < R(lambda_init*1E-8) ||
|
|
(!unbounded && lambda_max-lambda_min < R(lambda_init*1E-8))))
|
|
{ blocked = true; lambda = lambda_init; break; }
|
|
}
|
|
|
|
// Rank two update
|
|
++iter;
|
|
if (!grad_computed) grad(y, r2);
|
|
gmm::add(scaled(r2, -1), r);
|
|
if ((iter.get_iteration() % restart) == 0 || blocked) {
|
|
if (iter.get_noisy() >= 1) cout << "Restart\n";
|
|
invhessian.restart();
|
|
if (++nb_restart > 10) {
|
|
if (iter.get_noisy() >= 1) cout << "BFGS is blocked, exiting\n";
|
|
return;
|
|
}
|
|
}
|
|
else {
|
|
invhessian.update(gmm::scaled(d,lambda), gmm::scaled(r,-1));
|
|
nb_restart = 0;
|
|
}
|
|
copy(r2, r); copy(y, x); valx = valy;
|
|
if (iter.get_noisy() >= 1)
|
|
cout << "BFGS value " << valx/print_norm << "\t";
|
|
}
|
|
|
|
}
|
|
|
|
|
|
template <typename FUNCTION, typename DERIVATIVE, typename VECTOR>
|
|
inline void dfp(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x,
|
|
int restart, iteration& iter, int version = 1) {
|
|
bfgs(f, grad, x, restart, iter, version);
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
#endif
|
|
|