You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							208 lines
						
					
					
						
							7.1 KiB
						
					
					
				
			
		
		
		
			
			
			
				
					
				
				
					
				
			
		
		
	
	
							208 lines
						
					
					
						
							7.1 KiB
						
					
					
				
								/* -*- c++ -*- (enables emacs c++ mode) */
							 | 
						|
								/*===========================================================================
							 | 
						|
								 
							 | 
						|
								 Copyright (C) 2004-2012 Yves Renard
							 | 
						|
								 
							 | 
						|
								 This file is a part of GETFEM++
							 | 
						|
								 
							 | 
						|
								 Getfem++  is  free software;  you  can  redistribute  it  and/or modify it
							 | 
						|
								 under  the  terms  of the  GNU  Lesser General Public License as published
							 | 
						|
								 by  the  Free Software Foundation;  either version 3 of the License,  or
							 | 
						|
								 (at your option) any later version along with the GCC Runtime Library
							 | 
						|
								 Exception either version 3.1 or (at your option) any later version.
							 | 
						|
								 This program  is  distributed  in  the  hope  that it will be useful,  but
							 | 
						|
								 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
							 | 
						|
								 or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
							 | 
						|
								 License and GCC Runtime Library Exception for more details.
							 | 
						|
								 You  should  have received a copy of the GNU Lesser General Public License
							 | 
						|
								 along  with  this program;  if not, write to the Free Software Foundation,
							 | 
						|
								 Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
							 | 
						|
								 
							 | 
						|
								 As a special exception, you  may use  this file  as it is a part of a free
							 | 
						|
								 software  library  without  restriction.  Specifically,  if   other  files
							 | 
						|
								 instantiate  templates  or  use macros or inline functions from this file,
							 | 
						|
								 or  you compile this  file  and  link  it  with other files  to produce an
							 | 
						|
								 executable, this file  does  not  by itself cause the resulting executable
							 | 
						|
								 to be covered  by the GNU Lesser General Public License.  This   exception
							 | 
						|
								 does not  however  invalidate  any  other  reasons why the executable file
							 | 
						|
								 might be covered by the GNU Lesser General Public License.
							 | 
						|
								 
							 | 
						|
								===========================================================================*/
							 | 
						|
								
							 | 
						|
								/**@file gmm_solver_bfgs.h 
							 | 
						|
								   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
							 | 
						|
								   @date October 14 2004.
							 | 
						|
								   @brief Implements BFGS (Broyden, Fletcher, Goldfarb, Shanno) algorithm.
							 | 
						|
								 */
							 | 
						|
								#ifndef GMM_BFGS_H
							 | 
						|
								#define GMM_BFGS_H
							 | 
						|
								
							 | 
						|
								#include "gmm_kernel.h"
							 | 
						|
								#include "gmm_iter.h"
							 | 
						|
								
							 | 
						|
								namespace gmm {
							 | 
						|
								
							 | 
						|
								  // BFGS algorithm (Broyden, Fletcher, Goldfarb, Shanno)
							 | 
						|
								  // Quasi Newton method for optimization problems.
							 | 
						|
								  // with Wolfe Line search.
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								  // delta[k] = x[k+1] - x[k]
							 | 
						|
								  // gamma[k] = grad f(x[k+1]) - grad f(x[k])
							 | 
						|
								  // H[0] = I
							 | 
						|
								  // BFGS : zeta[k] = delta[k] - H[k] gamma[k]
							 | 
						|
								  // DFP  : zeta[k] = H[k] gamma[k]
							 | 
						|
								  // tau[k] = gamma[k]^T zeta[k]
							 | 
						|
								  // rho[k] = 1 / gamma[k]^T delta[k]
							 | 
						|
								  // BFGS : H[k+1] = H[k] + rho[k](zeta[k] delta[k]^T + delta[k] zeta[k]^T)
							 | 
						|
								  //                 - rho[k]^2 tau[k] delta[k] delta[k]^T
							 | 
						|
								  // DFP  : H[k+1] = H[k] + rho[k] delta[k] delta[k]^T 
							 | 
						|
								  //                 - (1/tau[k])zeta[k] zeta[k]^T 
							 | 
						|
								
							 | 
						|
								  // Object representing the inverse of the Hessian
							 | 
						|
								  template <typename VECTOR> struct bfgs_invhessian {
							 | 
						|
								    
							 | 
						|
								    typedef typename linalg_traits<VECTOR>::value_type T;
							 | 
						|
								    typedef typename number_traits<T>::magnitude_type R;
							 | 
						|
								
							 | 
						|
								    std::vector<VECTOR> delta, gamma, zeta;
							 | 
						|
								    std::vector<T> tau, rho;
							 | 
						|
								    int version;
							 | 
						|
								
							 | 
						|
								    template<typename VEC1, typename VEC2> void hmult(const VEC1 &X, VEC2 &Y) {
							 | 
						|
								      copy(X, Y);
							 | 
						|
								      for (size_type k = 0 ; k < delta.size(); ++k) {
							 | 
						|
									T xdelta = vect_sp(X, delta[k]), xzeta = vect_sp(X, zeta[k]);
							 | 
						|
									switch (version) {
							 | 
						|
									case 0 : // BFGS
							 | 
						|
									  add(scaled(zeta[k], rho[k]*xdelta), Y);
							 | 
						|
									  add(scaled(delta[k], rho[k]*(xzeta-rho[k]*tau[k]*xdelta)), Y);
							 | 
						|
									  break;
							 | 
						|
									case 1 : // DFP
							 | 
						|
									  add(scaled(delta[k], rho[k]*xdelta), Y);
							 | 
						|
									  add(scaled(zeta[k], -xzeta/tau[k]), Y);
							 | 
						|
									  break;
							 | 
						|
									}
							 | 
						|
								      }
							 | 
						|
								    }
							 | 
						|
								    
							 | 
						|
								    void restart(void) {
							 | 
						|
								      delta.resize(0); gamma.resize(0); zeta.resize(0); 
							 | 
						|
								      tau.resize(0); rho.resize(0);
							 | 
						|
								    }
							 | 
						|
								    
							 | 
						|
								    template<typename VECT1, typename VECT2>
							 | 
						|
								    void update(const VECT1 &deltak, const VECT2 &gammak) {
							 | 
						|
								      size_type N = vect_size(deltak), k = delta.size();
							 | 
						|
								      VECTOR Y(N);
							 | 
						|
								      hmult(gammak, Y);
							 | 
						|
								      delta.resize(k+1); gamma.resize(k+1); zeta.resize(k+1);
							 | 
						|
								      tau.resize(k+1); rho.resize(k+1);
							 | 
						|
								      resize(delta[k], N); resize(gamma[k], N); resize(zeta[k], N); 
							 | 
						|
								      gmm::copy(deltak, delta[k]);
							 | 
						|
								      gmm::copy(gammak, gamma[k]);
							 | 
						|
								      rho[k] = R(1) / vect_sp(deltak, gammak);
							 | 
						|
								      if (version == 0)
							 | 
						|
									add(delta[k], scaled(Y, -1), zeta[k]);
							 | 
						|
								      else
							 | 
						|
									gmm::copy(Y, zeta[k]);
							 | 
						|
								      tau[k] = vect_sp(gammak,  zeta[k]);
							 | 
						|
								    }
							 | 
						|
								    
							 | 
						|
								    bfgs_invhessian(int v = 0) { version = v; }
							 | 
						|
								  };
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								  template <typename FUNCTION, typename DERIVATIVE, typename VECTOR> 
							 | 
						|
								  void bfgs(FUNCTION f, DERIVATIVE grad, VECTOR &x,
							 | 
						|
									    int restart, iteration& iter, int version = 0,
							 | 
						|
									    double lambda_init=0.001, double print_norm=1.0) {
							 | 
						|
								
							 | 
						|
								    typedef typename linalg_traits<VECTOR>::value_type T;
							 | 
						|
								    typedef typename number_traits<T>::magnitude_type R;
							 | 
						|
								    
							 | 
						|
								    bfgs_invhessian<VECTOR> invhessian(version);
							 | 
						|
								    VECTOR r(vect_size(x)), d(vect_size(x)), y(vect_size(x)), r2(vect_size(x));
							 | 
						|
								    grad(x, r);
							 | 
						|
								    R lambda = lambda_init, valx = f(x), valy;
							 | 
						|
								    int nb_restart(0);
							 | 
						|
								    
							 | 
						|
								    if (iter.get_noisy() >= 1) cout << "value " << valx / print_norm << " ";
							 | 
						|
								    while (! iter.finished_vect(r)) {
							 | 
						|
								
							 | 
						|
								      invhessian.hmult(r, d); gmm::scale(d, T(-1));
							 | 
						|
								      
							 | 
						|
								      // Wolfe Line search
							 | 
						|
								      R derivative = gmm::vect_sp(r, d);    
							 | 
						|
								      R lambda_min(0), lambda_max(0), m1 = 0.27, m2 = 0.57;
							 | 
						|
								      bool unbounded = true, blocked = false, grad_computed = false;
							 | 
						|
								      
							 | 
						|
								      for(;;) {
							 | 
						|
									add(x, scaled(d, lambda), y);
							 | 
						|
									valy = f(y);
							 | 
						|
									if (iter.get_noisy() >= 2) {
							 | 
						|
									  cout.precision(15);
							 | 
						|
									  cout << "Wolfe line search, lambda = " << lambda 
							 | 
						|
								 	       << " value = " << valy /print_norm << endl;
							 | 
						|
								// 	       << " derivative = " << derivative
							 | 
						|
								// 	       << " lambda min = " << lambda_min << " lambda max = "
							 | 
						|
								// 	       << lambda_max << endl; getchar();
							 | 
						|
									}
							 | 
						|
									if (valy <= valx + m1 * lambda * derivative) {
							 | 
						|
									  grad(y, r2); grad_computed = true;
							 | 
						|
									  T derivative2 = gmm::vect_sp(r2, d);
							 | 
						|
									  if (derivative2 >= m2*derivative) break;
							 | 
						|
									  lambda_min = lambda;
							 | 
						|
									}
							 | 
						|
									else {
							 | 
						|
									  lambda_max = lambda;
							 | 
						|
									  unbounded = false;
							 | 
						|
									}
							 | 
						|
									if (unbounded) lambda *= R(10);
							 | 
						|
									else  lambda = (lambda_max + lambda_min) / R(2);
							 | 
						|
									if (lambda == lambda_max || lambda == lambda_min) break;
							 | 
						|
									// valy <= R(2)*valx replaced by
							 | 
						|
									// valy <= valx + gmm::abs(derivative)*lambda_init
							 | 
						|
									// for compatibility with negative values (08.24.07).
							 | 
						|
									if (valy <= valx + R(2)*gmm::abs(derivative)*lambda &&
							 | 
						|
									    (lambda < R(lambda_init*1E-8) ||
							 | 
						|
									     (!unbounded && lambda_max-lambda_min < R(lambda_init*1E-8))))
							 | 
						|
									{ blocked = true; lambda = lambda_init; break; }
							 | 
						|
								      }
							 | 
						|
								
							 | 
						|
								      // Rank two update
							 | 
						|
								      ++iter;
							 | 
						|
								      if (!grad_computed) grad(y, r2);
							 | 
						|
								      gmm::add(scaled(r2, -1), r);
							 | 
						|
								      if (iter.get_iteration() % restart == 0 || blocked) { 
							 | 
						|
									if (iter.get_noisy() >= 1) cout << "Restart\n";
							 | 
						|
									invhessian.restart();
							 | 
						|
									if (++nb_restart > 10) {
							 | 
						|
									  if (iter.get_noisy() >= 1) cout << "BFGS is blocked, exiting\n";
							 | 
						|
									  return;
							 | 
						|
									}
							 | 
						|
								      }
							 | 
						|
								      else {
							 | 
						|
									invhessian.update(gmm::scaled(d,lambda), gmm::scaled(r,-1));
							 | 
						|
									nb_restart = 0;
							 | 
						|
								      }
							 | 
						|
								      copy(r2, r); copy(y, x); valx = valy;
							 | 
						|
								      if (iter.get_noisy() >= 1)
							 | 
						|
									cout << "BFGS value " << valx/print_norm << "\t";
							 | 
						|
								    }
							 | 
						|
								
							 | 
						|
								  }
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								  template <typename FUNCTION, typename DERIVATIVE, typename VECTOR> 
							 | 
						|
								  inline void dfp(FUNCTION f, DERIVATIVE grad, VECTOR &x,
							 | 
						|
									    int restart, iteration& iter, int version = 1) {
							 | 
						|
								    bfgs(f, grad, x, restart, iter, version);
							 | 
						|
								
							 | 
						|
								  }
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								#endif 
							 | 
						|
								
							 |