The notation follows `A tutorial on hidden Markov models and selected applications in speech recognition', Rabiner, 1989. More...
Functions | |
template<int States, class Btype , class Otype > | |
std::tr1::tuple< double, TooN::Vector < Btype::NumParameters > , TooN::Matrix < Btype::NumParameters > > | forward_algorithm_hessian (TooN::Matrix< States > A, TooN::Vector< States > pi, const Btype &B, const std::vector< Otype > &O, bool compute_deriv=1, bool compute_hessian=1) |
template<int States, class Btype , class Otype > | |
double | forward_algorithm (TooN::Matrix< States > A, TooN::Vector< States > pi, const Btype &B, const std::vector< Otype > &O) |
template<int States, class Btype , class Otype > | |
std::pair< double, TooN::Vector < Btype::NumParameters > > | forward_algorithm_deriv (TooN::Matrix< States > A, TooN::Vector< States > pi, const Btype &B, const std::vector< Otype > &O) |
template<int States, class Btype , class Otype > | |
std::vector< std::tr1::array < double, States > > | forward_algorithm_delta (TooN::Matrix< States > A, TooN::Vector< States > pi, const Btype &B, const std::vector< Otype > &O) |
template<int States, class Btype , class Otype > | |
void | forward_algorithm_delta2 (TooN::Matrix< States > A, TooN::Vector< States > pi, const Btype &B, const std::vector< Otype > &O, std::vector< std::tr1::array< double, States > > &delta) |
template<int States, class Btype , class Otype > | |
std::pair< std::vector < std::tr1::array< double, States > >, std::vector < std::tr1::array< double, States > > > | forward_backward_algorithm (TooN::Matrix< States > A, TooN::Vector< States > pi, const Btype &B, const std::vector< Otype > &O) |
template<class A , class Rng > | |
int | select_random_element (const A &v, const double scale, Rng &rng) |
template<int N, class Rng > | |
int | sample_unscaled_log (std::tr1::array< double, N > a, Rng &rng) |
template<int States, class StateType , class Rng > | |
std::vector< StateType > | backward_sampling (TooN::Matrix< States > A, const std::vector< std::tr1::array< double, States > > &delta, Rng &rng) |
The notation follows `A tutorial on hidden Markov models and selected applications in speech recognition', Rabiner, 1989.
std::tr1::tuple<double, TooN::Vector<Btype::NumParameters>, TooN::Matrix<Btype::NumParameters> > forward_algorithm_hessian | ( | TooN::Matrix< States > | A, | |
TooN::Vector< States > | pi, | |||
const Btype & | B, | |||
const std::vector< Otype > & | O, | |||
bool | compute_deriv = 1 , |
|||
bool | compute_hessian = 1 | |||
) | [inline] |
The forward algorithm is defined as:
And the probability of observing the data is just:
where the state, .
All multipliers are much less than 1, to rapidly ends up as zero. Instead, store the logarithm:
and the recursion is:
including an arbitrary constant, gives:
In order to prevent a loss of scale on the addition:
so the largest exponent will be exactly 0. The final log probability is, similarly:
Z can take any value, but to keep the numbers within a convenient range:
For computing derivatives, two useful results are:
There are M parameters of B, denoted . The derivatives of P are:
Taking derivatives of and rearranging to get numerically more convenient results gives:
The derivarives of are:
with as defined in forward_algorithm.
For computing second derivatives, with yielding column vectors, two useful results are:
therefore:
and:
Define as:
so that:
The derivatives and Hessian recursion are therefore:
A | A: State transition probabilities. | |
pi | ![]() | |
O | O or I: the observed data (ie the images). | |
B | ![]() | |
compute_deriv | Whether to compute the derivative, or return zero. | |
compute_hessian | Whether to compute the Hessian, or return zero. This implies compute_deriv . |
Definition at line 153 of file forward_algorithm.h.
References ln().
Referenced by forward_algorithm_deriv(), sampled_background_spot_hessian2(), and sampled_background_spot_hessian_FAKE().
00154 { 00155 using namespace TooN; 00156 using namespace std; 00157 using namespace std::tr1; 00158 00159 if(compute_hessian == 1) 00160 compute_deriv=1; 00161 00162 static const int M = Btype::NumParameters; 00163 int states = pi.size(); 00164 00165 //delta[j][i] = delta_t(i) 00166 vector<array<double, States> > delta(O.size()); 00167 00168 //diff_delta[t][j][n] = d/de_n delta_t(j) 00169 vector<array<Vector<M>,States > > diff_delta(O.size()); 00170 00171 00172 //hess_delta[t][j][m][n] = d2/de_n de_m delta_t(j) 00173 vector<array<Matrix<M>,States > > hess_delta(O.size()); 00174 00175 //Initialization: Eqn 19, P 262 00176 //Set initial partial log probabilities: 00177 for(int i=0; i < states; i++) 00178 { 00179 delta[0][i] = ln(pi[i]) + B.log(i, O[0]); 00180 00181 if(compute_deriv) 00182 diff_delta[0][i] = B.diff_log(i, O[0]); 00183 00184 if(compute_hessian) 00185 hess_delta[0][i] = B.hess_log(i, O[0]); 00186 } 00187 00188 //Perform the recursion: Eqn 20, P262 00189 //Note, use T and T-1. Rather than T+1 and T. 00190 for(unsigned int t=1; t < O.size(); t++) 00191 { 00192 for(int j=0; j < states; j++) 00193 { 00194 double Ztj = -HUGE_VAL; //This is Z_t(j) 00195 for(int i=0; i < states; i++) 00196 Ztj = max(Ztj, delta[t-1][i] + ln(A[i][j])); 00197 00198 double sum=0; 00199 for(int i=0; i < states; i++) 00200 sum += exp(delta[t-1][i] + ln(A[i][j]) - Ztj); 00201 00202 delta[t][j] = B.log(j, O[t]) + Ztj + ln(sum); 00203 00204 if(compute_deriv) 00205 { 00206 Vector<M> sum_top = Zeros; 00207 for(int i=0; i < states; i++) 00208 sum_top += diff_delta[t-1][i] * exp(delta[t-1][i] + ln(A[i][j]) - Ztj); 00209 00210 diff_delta[t][j] = B.diff_log(j, O[t]) + (sum_top) / sum; 00211 00212 if(compute_hessian) 00213 { 00214 Matrix<M> sum_top2 = Zeros; 00215 for(int i=0; i < states; i++) 00216 sum_top2 += exp(delta[t-1][i] + ln(A[i][j]) - Ztj) * ( hess_delta[t-1][i] + diff_delta[t-1][i].as_col() * diff_delta[t-1][i].as_row()); 00217 00218 hess_delta[t][j] = B.hess_log(j, O[t]) + sum_top2 / sum - sum_top.as_col() * sum_top.as_row() / (sum*sum); 00219 } 00220 } 00221 } 00222 } 00223 00224 //Compute the log prob using normalization 00225 double Z = -HUGE_VAL; 00226 for(int i=0; i < states; i++) 00227 Z = max(Z, delta.back()[i]); 00228 00229 double sum =0; 00230 for(int i=0; i < states; i++) 00231 sum += exp(delta.back()[i] - Z); 00232 00233 double log_prob = Z + ln(sum); 00234 00235 //Compute the differential of the log 00236 Vector<M> diff_log = Zeros; 00237 //Compute the differential of the log using normalization 00238 //The convenient normalizer is ln P(O|lambda) which makes the bottom 1. 00239 for(int i=0; compute_deriv && i < states; i++) 00240 diff_log += exp(delta.back()[i] - log_prob)*diff_delta.back()[i]; 00241 00242 Matrix<M> hess_log = Zeros; 00243 //Compute the hessian of the log using normalization 00244 //The convenient normalizer is ln P(O|lambda) which makes the bottom 1. 00245 for(int i=0; compute_hessian && i < states; i++) 00246 hess_log += exp(delta.back()[i] - log_prob) * (hess_delta.back()[i] + diff_delta.back()[i].as_col() * diff_delta.back()[i].as_row()); 00247 00248 hess_log -= diff_log.as_col() * diff_log.as_row(); 00249 00250 //Compute the differential of the Hessian 00251 return make_tuple(log_prob, diff_log, hess_log); 00252 }
double forward_algorithm | ( | TooN::Matrix< States > | A, | |
TooN::Vector< States > | pi, | |||
const Btype & | B, | |||
const std::vector< Otype > & | O | |||
) | [inline] |
Run the forward algorithm and return the log probability.
A | A: State transition probabilities. | |
pi | ![]() | |
O | O or I: the observed data (ie the images). | |
B | ![]() |
Definition at line 263 of file forward_algorithm.h.
References ln().
00264 { 00265 using namespace TooN; 00266 using namespace std; 00267 using namespace std::tr1; 00268 00269 int states = pi.size(); 00270 00271 //delta[j][i] = delta_t(i) 00272 vector<array<double, States> > delta(O.size()); 00273 00274 //Initialization: Eqn 19, P 262 00275 //Set initial partial log probabilities: 00276 for(int i=0; i < states; i++) 00277 delta[0][i] = ln(pi[i]) + B.log(i, O[0]); 00278 00279 //Perform the recursion: Eqn 20, P262 00280 //Note, use T and T-1. Rather than T+1 and T. 00281 for(unsigned int t=1; t < O.size(); t++) 00282 { 00283 for(int j=0; j < states; j++) 00284 { 00285 double Ztj = -HUGE_VAL; //This is Z_t(j) 00286 for(int i=0; i < states; i++) 00287 Ztj = max(Ztj, delta[t-1][i] + ln(A[i][j])); 00288 00289 double sum=0; 00290 for(int i=0; i < states; i++) 00291 sum += exp(delta[t-1][i] + ln(A[i][j]) - Ztj); 00292 00293 delta[t][j] = B.log(j, O[t]) + Ztj + ln(sum); 00294 } 00295 } 00296 00297 //Compute the log prob using normalization 00298 double Z = -HUGE_VAL; 00299 for(int i=0; i < states; i++) 00300 Z = max(Z, delta.back()[i]); 00301 00302 double sum =0; 00303 for(int i=0; i < states; i++) 00304 sum += exp(delta.back()[i] - Z); 00305 00306 double log_prob = Z + ln(sum); 00307 00308 return log_prob; 00309 }
std::pair<double, TooN::Vector<Btype::NumParameters> > forward_algorithm_deriv | ( | TooN::Matrix< States > | A, | |
TooN::Vector< States > | pi, | |||
const Btype & | B, | |||
const std::vector< Otype > & | O | |||
) | [inline] |
Run the forward algorithm and return the log probability and its derivatives.
A | A: State transition probabilities. | |
pi | ![]() | |
O | O or I: the observed data (ie the images). | |
B | ![]() |
Definition at line 321 of file forward_algorithm.h.
References forward_algorithm_hessian().
Referenced by SpotNegProbabilityDiffWithSampledBackground::operator()().
00322 { 00323 using namespace std::tr1; 00324 double p; 00325 TooN::Vector<Btype::NumParameters> v; 00326 tie(p,v, ignore) = forward_algorithm_hessian(A, pi, B, O, 1, 0); 00327 return make_pair(p,v); 00328 }
std::vector<std::tr1::array<double, States> > forward_algorithm_delta | ( | TooN::Matrix< States > | A, | |
TooN::Vector< States > | pi, | |||
const Btype & | B, | |||
const std::vector< Otype > & | O | |||
) | [inline] |
Run the forward algorithm and return the log partials (delta).
A | A: State transition probabilities. | |
pi | ![]() | |
O | O or I: the observed data (ie the images). | |
B | ![]() |
Definition at line 342 of file forward_algorithm.h.
References ln().
Referenced by forward_backward_algorithm(), SampledMultispot::GibbsSampler::next(), and sampled_background_spot_hessian_ffbs().
00343 { 00344 using namespace TooN; 00345 using namespace std; 00346 using namespace std::tr1; 00347 00348 int states = pi.size(); 00349 00350 //delta[j][i] = delta_t(i) 00351 vector<array<double, States> > delta(O.size()); 00352 00353 //Initialization: Eqn 19, P 262 00354 //Set initial partial log probabilities: 00355 for(int i=0; i < states; i++) 00356 delta[0][i] = ln(pi[i]) + B.log(i, O[0]); 00357 00358 //Forward pass... 00359 //Perform the recursion: Eqn 20, P262 00360 //Note, use T and T-1. Rather than T+1 and T. 00361 for(unsigned int t=1; t < O.size(); t++) 00362 { 00363 for(int j=0; j < states; j++) 00364 { 00365 double Ztj = -HUGE_VAL; //This is Z_t(j) 00366 for(int i=0; i < states; i++) 00367 Ztj = max(Ztj, delta[t-1][i] + ln(A[i][j])); 00368 00369 double sum=0; 00370 for(int i=0; i < states; i++) 00371 sum += exp(delta[t-1][i] + ln(A[i][j]) - Ztj); 00372 00373 delta[t][j] = B.log(j, O[t]) + Ztj + ln(sum); 00374 } 00375 } 00376 00377 return delta; 00378 }
void forward_algorithm_delta2 | ( | TooN::Matrix< States > | A, | |
TooN::Vector< States > | pi, | |||
const Btype & | B, | |||
const std::vector< Otype > & | O, | |||
std::vector< std::tr1::array< double, States > > & | delta | |||
) | [inline] |
Run the forward algorithm and return the log partials (delta).
A | A: State transition probabilities. | |
pi | ![]() | |
O | O or I: the observed data (ie the images). | |
B | ![]() | |
delta | the ![]() |
Definition at line 390 of file forward_algorithm.h.
References ln().
00391 { 00392 using namespace TooN; 00393 using namespace std; 00394 using namespace std::tr1; 00395 00396 int states = pi.size(); 00397 00398 //delta[j][i] = delta_t(i) 00399 delta.resize(O.size()); 00400 00401 //Initialization: Eqn 19, P 262 00402 //Set initial partial log probabilities: 00403 for(int i=0; i < states; i++) 00404 delta[0][i] = ln(pi[i]) + B.log(i, O[0]); 00405 00406 Matrix<States> lA; 00407 for(int r=0; r < States; r++) 00408 for(int c=0; c < States; c++) 00409 lA[r][c] = ln(A[r][c]); 00410 00411 //Forward pass... 00412 //Perform the recursion: Eqn 20, P262 00413 //Note, use T and T-1. Rather than T+1 and T. 00414 for(unsigned int t=1; t < O.size(); t++) 00415 { 00416 for(int j=0; j < states; j++) 00417 { 00418 double Ztj = -HUGE_VAL; //This is Z_t(j) 00419 for(int i=0; i < states; i++) 00420 Ztj = max(Ztj, delta[t-1][i] + lA[i][j]); 00421 00422 double sum=0; 00423 for(int i=0; i < states; i++) 00424 sum += exp(delta[t-1][i] + lA[i][j] - Ztj); 00425 00426 delta[t][j] = B.log(j, O[t]) + Ztj + ln(sum); 00427 } 00428 } 00429 }
std::pair<std::vector<std::tr1::array<double, States> >, std::vector<std::tr1::array<double, States> > > forward_backward_algorithm | ( | TooN::Matrix< States > | A, | |
TooN::Vector< States > | pi, | |||
const Btype & | B, | |||
const std::vector< Otype > & | O | |||
) | [inline] |
Run the forward-backwards algorithm and return the log partials (delta and epsilon).
A | A: State transition probabilities. | |
pi | ![]() | |
O | O or I: the observed data (ie the images). | |
B | ![]() |
Backward pass Epsilon is log beta
Definition at line 443 of file forward_algorithm.h.
References forward_algorithm_delta(), and ln().
00444 { 00445 using namespace TooN; 00446 using namespace std; 00447 using namespace std::tr1; 00448 00449 int states = pi.size(); 00450 00451 //delta[j][i] = delta_t(i) 00452 vector<array<double, States> > delta = forward_algorithm_delta(A, pi, B, O); 00453 00454 ///Backward pass 00455 ///Epsilon is log beta 00456 vector<array<double, States> > epsilon(O.size()); 00457 00458 //Initialize beta to 1, ie epsilon to 0 00459 for(int i=0; i < states; i++) 00460 epsilon[O.size()-1][i] = 0; 00461 00462 //Perform the backwards recursion 00463 for(int t=O.size()-2; t >= 0; t--) 00464 { 00465 for(int i=0; i < states; i++) 00466 { 00467 //Find a normalizing constant 00468 double Z = -HUGE_VAL; 00469 00470 for(int j=0; j < states; j++) 00471 Z = max(Z, ln(A[i][j]) + B.log(j, O[t+1]) + epsilon[t+1][j]); 00472 00473 double sum=0; 00474 for(int j= 0; j < states; j++) 00475 sum += exp(ln(A[i][j]) + B.log(j, O[t+1]) + epsilon[t+1][j] - Z); 00476 00477 epsilon[t][i] = ln(sum) + Z; 00478 } 00479 } 00480 00481 return make_pair(delta, epsilon); 00482 }
int select_random_element | ( | const A & | v, | |
const double | scale, | |||
Rng & | rng | |||
) | [inline] |
Select an element from the container v, assuming that v is a probability distribution over elements up to some scale.
v | Uscaled probability distribution | |
scale | Scale of v | |
rng | Random number generator to use |
Definition at line 498 of file forward_algorithm.h.
Referenced by sample_unscaled_log().
int sample_unscaled_log | ( | std::tr1::array< double, N > | a, | |
Rng & | rng | |||
) | [inline] |
Select an element from the a, assuming that a stores unscaled log probabilities of the elements.
a | Uscaled probability distribution, stored as logarithms. | |
rng | Random number generator to use |
Definition at line 516 of file forward_algorithm.h.
References select_random_element().
00517 { 00518 double hi = *max_element(a.begin(), a.end()); 00519 double sum=0; 00520 00521 for(unsigned int i=0; i < a.size(); i++) 00522 { 00523 a[i] = exp(a[i] - hi); 00524 sum += a[i]; 00525 } 00526 00527 return select_random_element(a, sum, rng); 00528 }
std::vector<StateType> backward_sampling | ( | TooN::Matrix< States > | A, | |
const std::vector< std::tr1::array< double, States > > & | delta, | |||
Rng & | rng | |||
) | [inline] |
An implementation of the backwards sampling part of the forwards filtering/backwards sampling algorithm.
See `Monte Carlo smoothing for non-linear time series', Godsill and Doucet, JASA 2004
A | HMM transition matrix. | |
delta | Forward partial probabilities stored as logarithms. | |
rng | Random number generator to use |
Definition at line 538 of file forward_algorithm.h.
References ln().
00539 { 00540 //Compute the elementwise log of A 00541 for(int r=0; r < A.num_rows(); r++) 00542 for(int c=0; c < A.num_cols(); c++) 00543 A[r][c] = ln(A[r][c]); 00544 00545 std::vector<StateType> samples(delta.size()); 00546 00547 samples.back() = sample_unscaled_log<States, Rng>(delta.back(), rng); 00548 00549 //A is A[t][t+1] 00550 00551 for(int i=delta.size()-2; i >= 0; i--) 00552 { 00553 std::tr1::array<double, States> reverse_probabilities = delta[i]; 00554 00555 for(int j=0; j < States; j++) 00556 reverse_probabilities[j] += A[j][samples[i+1]]; 00557 00558 samples[i] = sample_unscaled_log<States, Rng>(reverse_probabilities, rng); 00559 } 00560 00561 return samples; 00562 }