00001 /* 00002 This file is part of B-cubed. 00003 00004 Copyright (C) 2009, 2010, 2011, Edward Rosten and Susan Cox 00005 00006 B-cubed is free software; you can redistribute it and/or 00007 modify it under the terms of the GNU Lesser General Public 00008 License as published by the Free Software Foundation; either 00009 version 3.0 of the License, or (at your option) any later version. 00010 00011 This library is distributed in the hope that it will be useful, 00012 but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 Lesser General Public License for more details. 00015 00016 You should have received a copy of the GNU General Public License 00017 along with this program. If not, see <http://www.gnu.org/licenses/> 00018 */ 00019 00020 #ifndef CONJUGATE_GRADIENT_ONLY 00021 #define CONJUGATE_GRADIENT_ONLY 00022 #include <TooN/TooN.h> 00023 #include <utility> 00024 #include <cstdlib> 00025 00026 ///Class for performing optimization with Conjugate Gradient, where only the derivatives are available. 00027 ///@ingroup gStorm 00028 template<int Size=-1, class Precision=double> struct ConjugateGradientOnly 00029 { 00030 const int size; ///< Dimensionality of the space. 00031 TooN::Vector<Size> g; ///< Gradient vector used by the next call to iterate() 00032 TooN::Vector<Size> new_g; ///< The new gradient set at the end of iterate() 00033 TooN::Vector<Size> h; ///< Conjugate vector to be searched along in the next call to iterate() 00034 TooN::Vector<Size> minus_h;///< negative of h as this is required to be passed into a function which uses references (so can't be temporary) 00035 TooN::Vector<Size> old_g; ///< Gradient vector used to compute $h$ in the last call to iterate() 00036 TooN::Vector<Size> old_h; ///< Conjugate vector searched along in the last call to iterate() 00037 TooN::Vector<Size> x; ///< Current position (best known point) 00038 TooN::Vector<Size> old_x; ///< Previous point (not set at construction) 00039 00040 Precision tolerance; ///< Tolerance used to determine if the optimization is complete. Defaults to square root of machine precision. 00041 int max_iterations; ///< Maximum number of iterations. Defaults to \c size\f$*100\f$ 00042 00043 TooN::Vector<Size> delta_max; ///< Maximum distance to travel along all axes in line search 00044 00045 Precision linesearch_tolerance; ///< Tolerance used to determine if the linesearch is complete. Defaults to square root of machine precision. 00046 Precision linesearch_epsilon; ///< Additive term in tolerance to prevent excessive iterations if \f$x_\mathrm{optimal} = 0\f$. Known as \c ZEPS in numerical recipies. Defaults to 1e-20 00047 00048 00049 int iterations; ///< Number of iterations performed 00050 00051 ///Initialize the ConjugateGradient class with sensible values. 00052 ///@param start Starting point, \e x 00053 ///@param deriv Function to compute \f$\nabla f(x)\f$ 00054 ///@param d Maximum allowed movement (delta) in any dimension 00055 template<class Deriv> ConjugateGradientOnly(const TooN::Vector<Size>& start, const Deriv& deriv, const TooN::Vector<Size>& d) 00056 : size(start.size()), 00057 g(size),new_g(size),h(size),minus_h(size),old_g(size),old_h(size),x(start),old_x(size),delta_max(d) 00058 { 00059 init(start, deriv); 00060 } 00061 00062 ///Initialise given a starting point and the derivative at the starting point 00063 ///@param start starting point 00064 ///@param deriv derivative at starting point 00065 template<int S, class P, class B> void init(const TooN::Vector<Size>& start, const TooN::Vector<S, P, B>& deriv) 00066 { 00067 00068 using std::numeric_limits; 00069 x = start; 00070 00071 //Start with the conjugate direction aligned with 00072 //the gradient 00073 g = deriv; 00074 h = g; 00075 minus_h=-h; 00076 00077 tolerance = sqrt(numeric_limits<Precision>::epsilon()); 00078 max_iterations = size * 100; 00079 00080 00081 linesearch_tolerance = sqrt(numeric_limits<Precision>::epsilon()); 00082 linesearch_epsilon = 1e-20; 00083 00084 iterations=0; 00085 } 00086 00087 ///Initialise given a starting point and a functor for computing derivatives 00088 ///@param start starting point 00089 ///@param deriv derivative computing functor 00090 template<class Deriv> void init(const TooN::Vector<Size>& start, const Deriv& deriv) 00091 { 00092 init(start, deriv(start)); 00093 } 00094 00095 ///Perform a linesearch from the current point (x) along the current 00096 ///conjugate vector (h). The linesearch does not make use of values. 00097 ///You probably do not want to call this function directly. See iterate() instead. 00098 ///This function updates: 00099 /// - x 00100 /// - old_c 00101 /// - iterations 00102 /// Note that the conjugate direction and gradient are not updated. 00103 /// If bracket_minimum_forward detects a local maximum, then essentially a zero 00104 /// sized step is taken. 00105 /// @param deriv Functor returning the derivative value at a given point. 00106 template<class Deriv> void find_next_point(const Deriv& deriv) 00107 { 00108 iterations++; 00109 using std::abs; 00110 //Conjugate direction is -h 00111 //grad.-h is (should be negative) 00112 00113 //We should have that f1 is negative. 00114 new_g = g; 00115 double f1 = g * minus_h; 00116 //If f1 is positive, then the conjugate vector points agains the 00117 //newly computed gradient. This can only happen if there is an error 00118 //in the computation of the gradient (eg we're using a stochastic method) 00119 //not much to do here except to stop. 00120 if(f1 > 0) 00121 { 00122 //Return, so try to search in a direction conjugate to the current one. 00123 return; 00124 } 00125 //What step size takes us up to the maximum length 00126 Precision max_step = HUGE_VAL; 00127 for(int i=0; i < minus_h.size(); i++) 00128 max_step = min(max_step, abs(delta_max[i]/h[i])); 00129 00130 //Taking the maximum step size may result in NaNs. 00131 //Try the maximum step size, and seccessively reduce it. 00132 Precision full_max_step = max_step; 00133 00134 for(;;) 00135 { 00136 new_g = deriv(x + max_step * minus_h); 00137 00138 if(!TooN::isnan(new_g)) 00139 { 00140 // cout << "new_g is NAN free :)\n"; 00141 break; 00142 } 00143 else 00144 max_step /=2; 00145 00146 //If the step size gets too small then 00147 //return as we can't really do anything 00148 if(max_step < full_max_step * linesearch_tolerance) 00149 return; 00150 } 00151 00152 double f2 = new_g * minus_h; 00153 00154 00155 //If f2 hasn't gone negative, then the largest allowed step is not large enough. 00156 //So, take a full step, then keep going in the same direction 00157 if(f2 < 0) 00158 { 00159 //Take a full step 00160 x += max_step * minus_h; 00161 return; 00162 } 00163 00164 //Now, x1 and x2 bracket a root, and find the root using bisection 00165 //x1 and x2 are represented by the scalars s1 and s2 00166 double s1 = 0; 00167 double s2 = max_step; 00168 double s_new = max_step; 00169 00170 int updates[2] = {0,0}; 00171 00172 while(abs(s1 - s2) > abs(s1 + s2) * linesearch_tolerance + linesearch_epsilon) 00173 { 00174 if(updates[0] != updates[1] && updates[0] != 0) 00175 { 00176 00177 //Compute the new point using false position. 00178 s_new = s1 + f1 * (s2 - s1)/(f1 - f2); 00179 new_g = deriv(x + s_new * minus_h); 00180 double f_new = new_g*minus_h; 00181 00182 updates[0] = updates[1]; 00183 00184 if(f_new == 0) 00185 break; 00186 00187 if(f_new < 0) 00188 { 00189 s1 = s_new; 00190 f1 = f_new; 00191 updates[1] = 1; 00192 } 00193 else 00194 { 00195 s2 = s_new; 00196 f2 = f_new; 00197 updates[1] = 2; 00198 } 00199 } 00200 else 00201 { 00202 //Compute the new point 00203 00204 s_new = (s1 + s2) / 2; 00205 00206 new_g = deriv(x + s_new * minus_h); 00207 double f_new = new_g*minus_h; 00208 00209 if(f_new < 0) 00210 { 00211 s1 = s_new; 00212 f1 = f_new; 00213 updates[0] = 1; 00214 } 00215 else 00216 { 00217 s2 = s_new; 00218 f2 = f_new; 00219 updates[0] = 2; 00220 } 00221 00222 } 00223 00224 } 00225 00226 //Update the current position and value 00227 //The most recent gradient computation is at s_new 00228 x += minus_h * s_new; 00229 00230 } 00231 00232 ///Check to see it iteration should stop. You probably do not want to use 00233 ///this function. See iterate() instead. This function updates nothing. 00234 bool finished() 00235 { 00236 using std::abs; 00237 return iterations > max_iterations || norm_inf(new_g) < tolerance; 00238 } 00239 00240 ///After an iteration, update the gradient and conjugate using the 00241 ///Polak-Ribiere equations. 00242 ///This function updates: 00243 ///- g 00244 ///- old_g 00245 ///- h 00246 ///- old_h 00247 ///@param grad The derivatives of the function at \e x 00248 void update_vectors_PR(const TooN::Vector<Size>& grad) 00249 { 00250 //Update the position, gradient and conjugate directions 00251 old_g = g; 00252 old_h = h; 00253 00254 g = grad; 00255 //Precision gamma = (g * g - oldg*g)/(oldg * oldg); 00256 Precision gamma = (g * g - old_g*g)/(old_g * old_g); 00257 h = g + gamma * old_h; 00258 minus_h=-h; 00259 } 00260 00261 ///Use this function to iterate over the optimization. Note that after 00262 ///iterate returns false, g, h, old_g and old_h will not have been 00263 ///updated. 00264 ///This function updates: 00265 /// - x 00266 /// - old_c 00267 /// - iterations 00268 /// - g* 00269 /// - old_g* 00270 /// - h* 00271 /// - old_h* 00272 /// *'d variables not updated on the last iteration. 00273 ///@param deriv Functor to compute derivatives at the specified point. 00274 ///@return Whether to continue. 00275 template<class Deriv> bool iterate(const Deriv& deriv) 00276 { 00277 find_next_point(deriv); 00278 00279 if(!finished()) 00280 { 00281 update_vectors_PR(new_g); 00282 return 1; 00283 } 00284 else 00285 return 0; 00286 } 00287 }; 00288 00289 00290 #endif 00291