ThreeB 1.1
conjugate_gradient_only.h
Go to the documentation of this file.
00001 /*
00002     This file is part of B-cubed.
00003 
00004     Copyright (C) 2009, 2010, 2011, Edward Rosten and Susan Cox
00005 
00006     B-cubed is free software; you can redistribute it and/or
00007     modify it under the terms of the GNU Lesser General Public
00008     License as published by the Free Software Foundation; either
00009     version 3.0 of the License, or (at your option) any later version.
00010 
00011     This library is distributed in the hope that it will be useful,
00012     but WITHOUT ANY WARRANTY; without even the implied warranty of
00013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014     Lesser General Public License for more details.
00015 
00016     You should have received a copy of the GNU General Public License     
00017     along with this program.  If not, see <http://www.gnu.org/licenses/>
00018 */
00019 
00020 #ifndef CONJUGATE_GRADIENT_ONLY
00021 #define CONJUGATE_GRADIENT_ONLY
00022 #include <TooN/TooN.h>
00023 #include <utility>
00024 #include <cstdlib>
00025 
00026 ///Class for performing optimization with Conjugate Gradient, where only the derivatives are available.
00027 ///@ingroup gStorm
00028 template<int Size=-1, class Precision=double> struct ConjugateGradientOnly
00029 {
00030     const int size;      ///< Dimensionality of the space.
00031     TooN::Vector<Size> g;      ///< Gradient vector used by the next call to iterate()
00032     TooN::Vector<Size> new_g;  ///< The new gradient set at the end of iterate()
00033     TooN::Vector<Size> h;      ///< Conjugate vector to be searched along in the next call to iterate()
00034     TooN::Vector<Size> minus_h;///< negative of h as this is required to be passed into a function which uses references (so can't be temporary)
00035     TooN::Vector<Size> old_g;  ///< Gradient vector used to compute $h$ in the last call to iterate()
00036     TooN::Vector<Size> old_h;  ///< Conjugate vector searched along in the last call to iterate()
00037     TooN::Vector<Size> x;      ///< Current position (best known point)
00038     TooN::Vector<Size> old_x;  ///< Previous point (not set at construction)
00039 
00040     Precision tolerance; ///< Tolerance used to determine if the optimization is complete. Defaults to square root of machine precision.
00041     int       max_iterations; ///< Maximum number of iterations. Defaults to \c size\f$*100\f$
00042     
00043     TooN::Vector<Size> delta_max; ///< Maximum distance to travel along all axes in line search
00044 
00045     Precision linesearch_tolerance; ///< Tolerance used to determine if the linesearch is complete. Defaults to square root of machine precision.
00046     Precision linesearch_epsilon;   ///< Additive term in tolerance to prevent excessive iterations if \f$x_\mathrm{optimal} = 0\f$. Known as \c ZEPS in numerical recipies. Defaults to 1e-20
00047 
00048 
00049     int iterations; ///< Number of iterations performed
00050 
00051     ///Initialize the ConjugateGradient class with sensible values.
00052     ///@param start Starting point, \e x
00053     ///@param deriv  Function to compute \f$\nabla f(x)\f$
00054     ///@param d Maximum allowed movement (delta) in any dimension
00055     template<class Deriv> ConjugateGradientOnly(const TooN::Vector<Size>& start, const Deriv& deriv, const TooN::Vector<Size>& d)
00056     : size(start.size()),
00057       g(size),new_g(size),h(size),minus_h(size),old_g(size),old_h(size),x(start),old_x(size),delta_max(d)
00058     {
00059         init(start, deriv);
00060     }
00061     
00062     ///Initialise given a starting point and the derivative at the starting point
00063     ///@param start starting point
00064     ///@param deriv derivative at starting point
00065     template<int S, class P, class B> void init(const TooN::Vector<Size>& start, const TooN::Vector<S, P, B>& deriv)
00066     {
00067 
00068         using std::numeric_limits;
00069         x = start;
00070 
00071         //Start with the conjugate direction aligned with
00072         //the gradient
00073         g = deriv;
00074         h = g;
00075         minus_h=-h;
00076 
00077         tolerance = sqrt(numeric_limits<Precision>::epsilon());
00078         max_iterations = size * 100;
00079 
00080 
00081         linesearch_tolerance =  sqrt(numeric_limits<Precision>::epsilon());
00082         linesearch_epsilon = 1e-20;
00083 
00084         iterations=0;
00085     }
00086 
00087     ///Initialise given a starting point and a functor for computing derivatives
00088     ///@param start starting point
00089     ///@param deriv derivative computing functor
00090     template<class Deriv> void init(const TooN::Vector<Size>& start, const Deriv& deriv)
00091     {
00092         init(start, deriv(start));
00093     }
00094 
00095     ///Perform a linesearch from the current point (x) along the current
00096     ///conjugate vector (h).  The linesearch does not make use of values.
00097     ///You probably do not want to call this function directly. See iterate() instead.
00098     ///This function updates:
00099     /// - x
00100     /// - old_c
00101     /// - iterations
00102     /// Note that the conjugate direction and gradient are not updated.
00103     /// If bracket_minimum_forward detects a local maximum, then essentially a zero
00104     /// sized step is taken.
00105     /// @param deriv Functor returning the derivative value at a given point.
00106     template<class Deriv> void find_next_point(const Deriv& deriv)
00107     {
00108         iterations++;
00109         using std::abs;
00110         //Conjugate direction is -h
00111         //grad.-h is (should be negative)
00112 
00113         //We should have that f1 is negative.
00114         new_g = g;
00115         double f1 = g * minus_h;
00116         //If f1 is positive, then the conjugate vector points agains the
00117         //newly computed gradient. This can only happen if there is an error
00118         //in the computation of the gradient (eg we're using a stochastic method)
00119         //not much to do here except to stop.
00120         if(f1 > 0)
00121         {
00122             //Return, so try to search in a direction conjugate to the current one.
00123             return;
00124         }
00125         //What step size takes us up to the maximum length
00126         Precision max_step = HUGE_VAL;
00127         for(int i=0; i < minus_h.size(); i++)
00128             max_step = min(max_step, abs(delta_max[i]/h[i]));
00129 
00130         //Taking the maximum step size may result in NaNs.
00131         //Try the maximum step size, and seccessively reduce it.
00132         Precision full_max_step = max_step;
00133         
00134         for(;;)
00135         {
00136             new_g = deriv(x + max_step * minus_h);
00137 
00138             if(!TooN::isnan(new_g))
00139             {
00140 //  cout << "new_g is NAN free :)\n";
00141                 break;
00142             }
00143             else
00144                 max_step /=2;
00145 
00146             //If the step size gets too small then 
00147             //return as we can't really do anything
00148             if(max_step < full_max_step * linesearch_tolerance)
00149                 return;
00150         }
00151 
00152         double f2 = new_g * minus_h;
00153 
00154 
00155         //If f2 hasn't gone negative, then the largest allowed step is not large enough.
00156         //So, take a full step, then keep going in the same direction
00157         if(f2 < 0)
00158         {   
00159             //Take a full step
00160             x += max_step * minus_h;
00161             return;
00162         }
00163 
00164         //Now, x1 and x2 bracket a root, and find the root using bisection
00165         //x1 and x2 are represented by the scalars s1 and s2
00166         double s1 = 0;
00167         double s2 = max_step;
00168         double s_new = max_step;
00169 
00170         int updates[2] = {0,0};
00171 
00172         while(abs(s1 - s2) > abs(s1 + s2) * linesearch_tolerance + linesearch_epsilon)
00173         {
00174             if(updates[0] != updates[1] && updates[0] != 0)
00175             {
00176 
00177                 //Compute the new point using false position.
00178                 s_new = s1 + f1 * (s2 - s1)/(f1 - f2);
00179                 new_g = deriv(x + s_new * minus_h);
00180                 double f_new = new_g*minus_h;
00181 
00182                 updates[0] = updates[1];
00183 
00184                 if(f_new == 0)
00185                     break;
00186 
00187                 if(f_new < 0) 
00188                 {
00189                     s1 = s_new;
00190                     f1 = f_new;
00191                     updates[1] = 1;
00192                 }
00193                 else
00194                 {
00195                     s2 = s_new;
00196                     f2 = f_new;
00197                     updates[1] = 2;
00198                 }
00199             }
00200             else
00201             {
00202                 //Compute the new point
00203                 
00204                 s_new = (s1 + s2) / 2;
00205 
00206                 new_g = deriv(x + s_new * minus_h);
00207                 double f_new = new_g*minus_h;
00208 
00209                 if(f_new < 0) 
00210                 {
00211                     s1 = s_new;
00212                     f1 = f_new;
00213                     updates[0] = 1;
00214                 }
00215                 else
00216                 {
00217                     s2 = s_new;
00218                     f2 = f_new;
00219                     updates[0] = 2;
00220                 }
00221             
00222             }
00223 
00224         }
00225 
00226         //Update the current position and value
00227         //The most recent gradient computation is at s_new
00228         x += minus_h * s_new;
00229 
00230     }
00231 
00232     ///Check to see it iteration should stop. You probably do not want to use
00233     ///this function. See iterate() instead. This function updates nothing.
00234     bool finished()
00235     {
00236         using std::abs;
00237         return iterations > max_iterations || norm_inf(new_g) < tolerance;
00238     }
00239 
00240     ///After an iteration, update the gradient and conjugate using the
00241     ///Polak-Ribiere equations.
00242     ///This function updates:
00243     ///- g
00244     ///- old_g
00245     ///- h
00246     ///- old_h
00247     ///@param grad The derivatives of the function at \e x
00248     void update_vectors_PR(const TooN::Vector<Size>& grad)
00249     {
00250         //Update the position, gradient and conjugate directions
00251         old_g = g;
00252         old_h = h;
00253 
00254         g = grad;
00255         //Precision gamma = (g * g - oldg*g)/(oldg * oldg);
00256         Precision gamma = (g * g - old_g*g)/(old_g * old_g);
00257         h = g + gamma * old_h;
00258         minus_h=-h;
00259     }
00260 
00261     ///Use this function to iterate over the optimization. Note that after
00262     ///iterate returns false, g, h, old_g and old_h will not have been
00263     ///updated.
00264     ///This function updates:
00265     /// - x
00266     /// - old_c
00267     /// - iterations
00268     /// - g*
00269     /// - old_g*
00270     /// - h*
00271     /// - old_h*
00272     /// *'d variables not updated on the last iteration.
00273     ///@param deriv Functor to compute derivatives at the specified point.
00274     ///@return Whether to continue.
00275     template<class Deriv> bool iterate(const Deriv& deriv)
00276     {
00277         find_next_point(deriv);
00278 
00279         if(!finished())
00280         {
00281             update_vectors_PR(new_g);
00282             return 1;
00283         }
00284         else
00285             return 0;
00286     }
00287 };
00288 
00289 
00290 #endif
00291