MoochoPack : Framework for Large-Scale Optimization Algorithms Version of the Day
MoochoPack_BFGSUpdate_Strategy.cpp
00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 // Moocho: Multi-functional Object-Oriented arCHitecture for Optimization
00005 //                  Copyright (2003) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov) 
00038 // 
00039 // ***********************************************************************
00040 // @HEADER
00041 
00042 #include <limits>
00043 
00044 #include "MoochoPack_BFGSUpdate_Strategy.hpp"
00045 #include "MoochoPack_Exceptions.hpp"
00046 #include "AbstractLinAlgPack_TestMatrixSymSecant.hpp"
00047 #include "AbstractLinAlgPack_MatrixSymSecant.hpp"
00048 #include "AbstractLinAlgPack_MatrixSymOp.hpp"
00049 #include "AbstractLinAlgPack_MatrixOpOut.hpp"
00050 #include "AbstractLinAlgPack_VectorSpace.hpp"
00051 #include "AbstractLinAlgPack_VectorMutable.hpp"
00052 #include "AbstractLinAlgPack_VectorOut.hpp"
00053 #include "AbstractLinAlgPack_VectorStdOps.hpp"
00054 #include "AbstractLinAlgPack_LinAlgOpPack.hpp"
00055 #include "Teuchos_dyn_cast.hpp"
00056 #include "Teuchos_Assert.hpp"
00057 
00058 namespace MoochoPack {
00059 
00060 BFGSUpdate_Strategy::BFGSUpdate_Strategy(
00061   bool               rescale_init_identity
00062   ,bool              use_dampening
00063   ,ESecantTesting    secant_testing
00064   ,value_type        secant_warning_tol
00065   ,value_type        secant_error_tol
00066   )
00067   :rescale_init_identity_(rescale_init_identity)
00068   ,use_dampening_(use_dampening)
00069   ,secant_testing_(secant_testing)
00070   ,secant_warning_tol_(secant_warning_tol)
00071   ,secant_error_tol_(secant_error_tol)
00072 {}
00073 
00074 void BFGSUpdate_Strategy::perform_update(
00075   VectorMutable            *s_bfgs
00076   ,VectorMutable           *y_bfgs
00077   ,bool                    first_update
00078   ,std::ostream            &out
00079   ,EJournalOutputLevel     olevel
00080   ,bool                    check_results
00081   ,MatrixSymOp             *B
00082   ,QuasiNewtonStats        *quasi_newton_stats 
00083   )
00084 {
00085   namespace rcp = MemMngPack;
00086   using Teuchos::dyn_cast;
00087   using AbstractLinAlgPack::dot;
00088   using AbstractLinAlgPack::Vt_S;
00089   using AbstractLinAlgPack::Vp_StV;
00090   using LinAlgOpPack::Vp_V;
00091   using LinAlgOpPack::V_StV;
00092   using LinAlgOpPack::V_VpV;
00093   using LinAlgOpPack::V_VmV;
00094   using LinAlgOpPack::V_MtV;
00095 
00096   const value_type
00097     NOT_CALCULATED = std::numeric_limits<value_type>::max();
00098   value_type
00099     sTy = NOT_CALCULATED,
00100     yTy = NOT_CALCULATED;
00101   bool used_dampening = false;
00102 
00103   // Make sure the update is defined!
00104   if( sTy == NOT_CALCULATED )
00105     sTy = dot( *s_bfgs, *y_bfgs );
00106   if( sTy == 0 ) {
00107     if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS ) {
00108       out << "\n(y'*s) == 0, skipping the update ...\n";
00109     }
00110     quasi_newton_stats->set_updated_stats( QuasiNewtonStats::INDEF_SKIPED );
00111     return;
00112   }
00113 
00114   MatrixSymSecant
00115       &B_updatable = dyn_cast<MatrixSymSecant>(*B);
00116 
00117   // /////////////////////////////////////////////////////////////
00118   // Consider rescaling the initial identity hessian before
00119   // the update.
00120   // 
00121   // This was taken from Nocedal & Wright, 1999, p. 200 
00122   // 
00123   // Bo = (y'*y)/(y'*s) * I
00124   // 
00125   if( first_update && rescale_init_identity() ) {
00126     if( sTy == NOT_CALCULATED )
00127       sTy = dot( *s_bfgs, *y_bfgs );
00128     if( yTy == NOT_CALCULATED )
00129       yTy = dot( *y_bfgs, *y_bfgs );
00130     const value_type
00131       Iscale = yTy/sTy;
00132     const value_type
00133       Iscale_too_small = 1e-5;  // ToDo: Make this adjustable
00134     if( Iscale >= Iscale_too_small ) {  
00135       if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS ) {
00136         out << "\nRescaling the initial identity matrix before the update as:\n"
00137           << "Iscale = (y'*y)/(y'*s) = ("<<yTy<<")/("<<sTy<<") = "<<Iscale<<"\n"
00138           << "B =  Iscale * eye(n-r) ...\n";
00139       }
00140       B_updatable.init_identity( y_bfgs->space(), Iscale );
00141       if( (int)olevel >= (int)PRINT_ITERATION_QUANTITIES ) {
00142         out << "\nB after rescaling = \n" << *B;
00143       }
00144     }
00145     else {
00146       if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS ) {
00147         out << "\nSkipping rescaling of the initial identity matrix since:\n"
00148           << "Iscale = (y'*y)/(y'*s) = ("<<yTy<<")/("<<sTy<<") = "<<Iscale
00149           << " < Iscale_too_small = " << Iscale_too_small << std::endl;
00150       }
00151     }
00152   }
00153 
00154   // ////////////////////////////////////////////////////
00155   // Modify the s_bfgs and y_bfgs vectors for dampening?
00156   VectorSpace::vec_mut_ptr_t
00157     Bs = Teuchos::null;
00158   if( use_dampening() ) {
00159     if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS )
00160     {
00161       out
00162         << "\nConsidering the dampened update ...\n";
00163     }
00164     // Bs = Bm1 * s_bfgs
00165     Bs = y_bfgs->space().create_member();
00166     V_MtV( Bs.get(), *B, BLAS_Cpp::no_trans, *s_bfgs );
00167     // sTy = s_bfgs' * y_bfgs
00168     if( sTy == NOT_CALCULATED )
00169       sTy = dot( *s_bfgs, *y_bfgs );
00170     // sTBs = s_bfgs' * Bs
00171     const value_type sTBs = dot( *s_bfgs, *Bs );
00172     // Select dampening parameter theta
00173     const value_type
00174       theta = ( sTy >= 0.2 * sTBs )
00175       ? 1.0
00176       : (0.8 * sTBs ) / ( sTBs - sTy );
00177     if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS )
00178     {
00179       out
00180         << "\ns_bfgs'*y_bfgs = " << sTy
00181         << ( theta == 1.0 ? " >= " : " < " )
00182         << " s_bfgs' * B * s_bfgs = " << sTBs << std::endl;
00183     }
00184     if( theta == 1.0 ) {
00185       if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS )
00186       {
00187         out
00188           << "Perform the undamped update ...\n";
00189       }
00190     }
00191     else {
00192       // y_bfgs = theta*y_bfgs + (1-theta)*B*s_bfgs
00193       Vt_S( y_bfgs, theta );
00194       Vp_StV( y_bfgs, (1.0-theta), *Bs );
00195 
00196       if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS )
00197       {
00198         out
00199           << "Dampen the update ...\n"
00200           << "theta = " << theta << std::endl
00201           << "y_bfgs = theta*y_bfgs + (1-theta)*B*s_bfgs ...\n"
00202           << "||y_bfgs||inf = " << y_bfgs->norm_inf() << std::endl;
00203       }
00204 
00205       if( (int)olevel >= (int)PRINT_VECTORS )
00206       {
00207         out
00208           << "y_bfgs =\n" << *y_bfgs;
00209       }
00210 
00211       used_dampening = true;
00212     }
00213   }
00214 
00215   // Perform the update if it is defined (s_bfgs' * y_bfgs > 0.0)
00216         
00217   VectorSpace::vec_mut_ptr_t
00218     s_bfgs_save = Teuchos::null,
00219     y_bfgs_save = Teuchos::null;
00220   if( check_results ) {
00221     // Save s and y since they may be overwritten in the update.
00222     s_bfgs_save = s_bfgs->clone();
00223     y_bfgs_save = y_bfgs->clone();
00224   }
00225   try {
00226     B_updatable.secant_update(
00227       s_bfgs
00228       ,y_bfgs
00229       ,Bs.get()
00230       );
00231   }
00232   catch( const MatrixSymSecant::UpdateFailedException& excpt ) {
00233     if( (int)olevel >= (int)PRINT_BASIC_ALGORITHM_INFO ) {
00234       out << "\nThe factorization of B failed in a major way!  Rethrow!\n";
00235     }
00236     throw;
00237   }         
00238   catch( const MatrixSymSecant::UpdateSkippedException& excpt ) {
00239     if( (int)olevel >= (int)PRINT_BASIC_ALGORITHM_INFO ) {
00240       out << excpt.what() << std::endl
00241         << "\nSkipping BFGS update.  B = B ...\n";
00242     }
00243     quasi_newton_stats->set_updated_stats(
00244       QuasiNewtonStats::INDEF_SKIPED );
00245     return;
00246   }         
00247     
00248   if( (int)olevel >= (int)PRINT_ITERATION_QUANTITIES ) {
00249     out << "\nB after the BFGS update = \n" << *B;
00250   }
00251   
00252   if( ( check_results && secant_testing() == SECANT_TEST_DEFAULT )
00253     || secant_testing() == SECANT_TEST_ALWAYS )
00254   {
00255     const bool result =
00256       AbstractLinAlgPack::TestMatrixSymSecant(
00257         *B, *s_bfgs_save, *y_bfgs_save
00258         , secant_warning_tol(), secant_error_tol()
00259         , (int)olevel >= (int)PRINT_VECTORS
00260         , (int)olevel >  (int)PRINT_NOTHING ? &out : NULL
00261         , (int)olevel >= (int)PRINT_ALGORITHM_STEPS
00262         );
00263     if( !result ) {
00264       const char
00265         msg[] = "Error, the secant property for the BFGS update failed\n"
00266         "Stopping the algorithm ...\n";
00267       out << msg;
00268       TEUCHOS_TEST_FOR_EXCEPTION(
00269         true, TestFailed
00270         ," BFGSUpdate_Strategy::perform_update(...) : " << msg );
00271     }
00272   }
00273   
00274   quasi_newton_stats->set_updated_stats(
00275     used_dampening
00276     ? QuasiNewtonStats::DAMPENED_UPDATED
00277     : QuasiNewtonStats::UPDATED );
00278 }
00279 
00280 void BFGSUpdate_Strategy::print_step( std::ostream& out, const std::string& L ) const
00281 {
00282   out
00283     << L << "if use_dampening == true then\n"
00284     << L << "    if s'*y >= 0.2 * s'*B*s then\n"
00285     << L << "        theta = 1.0\n"
00286     << L << "    else\n"
00287     << L << "        theta = 0.8*s'*B*s / (s'*B*s - s'*y)\n"
00288     << L << "    end\n"
00289     << L << "    y = theta*y + (1-theta)*B*s\n"
00290     << L << "end\n"
00291     << L << "if first_update && rescale_init_identity and y'*s is sufficently positive then\n"
00292     << L << "    B = |(y'*y)/(y'*s)| * eye(size(s))\n"
00293     << L << "end\n"
00294     << L << "if s'*y is sufficently positive then\n"
00295     << L << "    *** Peform BFGS update\n"
00296     << L << "    (B, s, y ) -> B (through MatrixSymSecantUpdate interface)\n"
00297     << L << "    if ( check_results && secant_testing == SECANT_TEST_DEFAULT )\n"
00298     << L << "    or ( secant_testing == SECANT_TEST_ALWAYS ) then\n"
00299     << L << "        if B*s != y then\n"
00300     << L << "            *** The secant condition does not check out\n"
00301     << L << "            Throw TestFailed!\n"
00302     << L << "        end\n"
00303     << L << "    end\n"
00304     << L << "end\n"
00305     ;
00306 }
00307 
00308 }  // end namespace MoochoPack
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends