-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBBHMM.m
91 lines (72 loc) · 3.52 KB
/
BBHMM.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
//
// BBHMM.m
// SeqLearning
//
// Created by Roberto Esposito on 20/1/10.
// Copyright 2010 University of Turin. All rights reserved.
//
#import "BBHMM.h"
@implementation BBHMM
// Learning algorithm for HMMs. It is a specialization of the EM
// algorithm that takes into account the particular form of the
// HMM model.
// A great tutorial can be found at: http://www.isi.edu/natural-language/teaching/cs562/2009/readings/B06.pdf
// In brief:
// EM: finds the set Theta of parameters that maximizes the likelyhood
// of the dataset X taking into account a set Z of hidden variables.
// The algorithm iterates until convergence two steps.
// E-step: Find the best possible assignment to Z given the current Theta
// M-step: Find Theta that maximizes the likelyhood assuming the assignment
// to Z is the one found in the E-step
// More formally:
// E-step: set Q(\theta|\theta^{old}) = E_{Z|X\theta^{old}}[\log(P(XZ|\theta)]
// M-step: \theta^old = argmax_\theta Q(\theta|\theta^old)
//
// In an HMM the algorithm is called Baum-Welch or forward-backward algorithm.
// In this particular case the algorithm is instantiated as follows (crf. C. Bishop,
// "Pattern Recognition and Machine Learning", Springer 2006, Chapter 13).
//
// Q(\theta|\theta^{old}) = \sum_{k=1}^K \gamma(z_{1k}) \log(\pi_k) +
// \sum_{n=2}^N \sum_{j=1}^K \sum_{k=1}^K \xi(z_{n-1,j},z_{nk})\log(A_{jk}) +
// \sum_{n=1}^N \sum_{k=1}^K \gamma(z_{nk})\log(p(x_n|\phi_k))
// where: j,k,l range over hidden states values indices (K is the number of different values)
// n ranges over time points (N is the length of the sequence)
// z_{nk} is a boolean variable that is 1 when the n-th hidden state is equalt to k and 0 otherwise
// \pi_k is the parameter that governs the probability of being on state k at time 1
// \xi(z_{n-1,j},z_{nk}) is the parameter that governs the transition probability from state j
// at time n-1 to state k at time n
// p(x_n | phi_k) is the output probability of x_n in state k
// \gamma(z_{nk}) is the probability that the n-th hidden variable takes value k given the
// data and the current parameter settings
// it can be shown that the updates for the parameters that maximize the likelyhood are:
// \pi_k = \gamma(z_{1k}) / \sum_{j=1}^K \gamma(\z_{1j})
// A_{jk}= \sum_{n=2}^N \xi(z_{n-1,j},z_{nk}) / \sum_{l=1}^K \xi(z_{n-1,j},z_{nl})
//
// As it is apparent from the above description, it is crucial to evaluate efficiently \gamma(z_{nk}).
// It can be shown that \gamma(\z_n) = \alpha(\z_{n})\beta(\z_n) / P(X)
// where for any n: P(X) = \sum_k \alpha(z_{nk})\beta(z_{nk})
// implying that:
// P(X) = \sum_k \alpha(z_{Nk})
// Here \alpha(z_n) and \beta(\z_n) are called respectively the forward and the backward algorithms and
// can be computed by means of dynamic programming in O(nK^2) time.
-(NSObject<BBSequenceClassifying>*) learn:(BBSeqSet*) ss {
[self initWeightsUsingDataset:ss];
while(![self converged]) {
}
}
-(BBVector*) alphaForTime:(size_t) t {
BBMatrix* cachedResult = [alphaCache cachedObjectForTime:t];
if( cachedResult != NULL )
return cachedResult;
if(t==0) {
return ;
}
// recursive call
BBMatrix* alphaPrec = [self alphaForTime:t-1];
// main computation
BBMatrix* result = BBMatrix_multiply(alphaPrec, transitionMatrix);
BBMatrix_inplace_elementwise_multiply(result, emission_probability_vector);
[alphaCache cacheObject:result forTime:t];
return result;
}
@end