from __future__ import division
from __future__ import print_function
import numpy as np
class BeamEntry:
"information about one single beam at specific time-step"
def __init__(self):
self.prTotal = 0 # blank and non-blank
self.prNonBlank = 0 # non-blank
self.prBlank = 0 # blank
self.prText = 1 # LM score
self.lmApplied = False # flag if LM was already applied to this beam
self.labeling = () # beam-labeling
class BeamState:
"information about the beams at specific time-step"
def __init__(self):
self.entries = {}
def norm(self):
"length-normalise LM score"
for (k, _) in self.entries.items():
labelingLen = len(self.entries[k].labeling)
print('what is k:', k)
self.entries[k].prText = self.entries[k].prText ** (1.0 / (labelingLen if labelingLen else 1.0))
def sort(self):
"return beam-labelings, sorted by probability"
beams = [v for (_, v) in self.entries.items()]
#for beam in beams:
#print('sort func beams:',(beam.labeling,beam.prTotal,beam.prText))
#for k,v in self.entries.items():
#print(k,v)
sortedBeams = sorted(beams, reverse=True, key=lambda x: x.prTotal * x.prText)
return [x.labeling for x in sortedBeams]
def applyLM(parentBeam, childBeam, classes, lm):
"calculate LM score of child beam by taking score from parent beam and bigram probability of last two chars"
if lm and not childBeam.lmApplied:
c1 = classes[parentBeam.labeling[-1] if parentBeam.labeling else classes.index(' ')] # first char
c2 = classes[childBeam.labeling[-1]] # second char
lmFactor = 0.01 # influence of language model
bigramProb = lm.getCharBigram(c1,
c2) ** lmFactor # probability of seeing first and second char next to each other
childBeam.prText = parentBeam.prText * bigramProb # probability of char sequence
childBeam.lmApplied = True # only apply LM once per beam entry
def addBeam(beamState, labeling):
"add beam if it does not yet exist"
if labeling not in beamState.entries:
beamState.entries[labeling] = BeamEntry()
def ctcBeamSearch(mat, classes, lm):
"beam search as described by the paper of Hwang et al. and the paper of Graves et al."
blankIdx = len(classes)
maxT, maxC = mat.shape
beamWidth = 2
# initialise beam state
last = BeamState()
labeling = ()
last.entries[labeling] = BeamEntry()
last.entries[labeling].prBlank = 1
last.entries[labeling].prTotal = 1
# go over all time-steps
for t in range(maxT):
curr = BeamState()
# get beam-labelings of best beams
bestLabelings = last.sort()[0:beamWidth]
print('bestLabelings', bestLabelings,' length ',len(bestLabelings))
# go over best beams
# t位置+1,更新目前最优的beamwidth个路径
for labeling in bestLabelings:
# probability of paths ending with a non-blank
prNonBlank = 0
# in case of non-empty beam
if labeling:
# probability of paths with repeated last char at the end
# label不更新单第二种情况,该t位置不是Blank,但是与该路径最后一位相同,并且在该路径不是以Blank结尾的情况,
# 则更新该路径不以空格结尾的概率
prNonBlank = last.entries[labeling].prNonBlank * mat[t, labeling[-1]]
print('NoneBlak probability:', last.entries[labeling].prNonBlank , mat[t, labeling[-1]])
# probability of paths ending with a blank
# label不更新的第一种情况,该t位置是Blank。则更新该路径空格结尾的概率
prBlank = (last.entries[labeling].prTotal) * mat[t, blankIdx]
# add beam at current time-step if needed
addBeam(curr, labeling)
# fill in data
curr.entries[labeling].labeling = labeling
curr.entries[labeling].prNonBlank += prNonBlank
curr.entries[labeling].prBlank += prBlank
curr.entries[labeling].prTotal += prBlank + prNonBlank
curr.entries[labeling].prText = last.entries[
labeling].prText # beam-labeling not changed, therefore also LM score unchanged from
curr.entries[labeling].lmApplied = True # LM already applied at previous time-step for this beam-labeling
# label更新,更新的情况t位置不能是空格
# extend current beam-labeling
for c in range(maxC - 1):
# add new char to current beam-labeling
newLabeling = labeling + (c,)
print('add labeling:',labeling,newLabeling)
# if new labeling contains duplicate char at the end, only consider paths ending with a blank
if labeling and labeling[-1] == c:
# label更新的第二种情况,该t位置(非空格)与路径最后一位相同,但是路径以空格结尾的情况,直接加上。
# 更新新路径不以空格结尾的概率
prNonBlank = mat[t, c] * last.entries[labeling].prBlank
print('duplicate: ',labeling,last.entries[labeling].prBlank)
else:
# label更新的第一种情况,该t位置(非空格)与路径最后一位不同,则直接加上,更新新路经不以空格结尾的概率
prNonBlank = mat[t, c] * last.entries[labeling].prTotal
# add beam at current time-step if needed
addBeam(curr, newLabeling)
# fill in data
curr.entries[newLabeling].labeling = newLabeling
curr.entries[newLabeling].prNonBlank += prNonBlank
curr.entries[newLabeling].prTotal += prNonBlank
# apply LM
applyLM(curr.entries[labeling], curr.entries[newLabeling], classes, lm)
# set new beam state
last = curr
# normalise LM scores according to beam-labeling-length
last.norm()
# sort by probability
bestLabeling = last.sort()[0] # get most probable labeling
print('bestLabeling', bestLabeling)
# map labels to chars
res = ''
for l in bestLabeling:
res += classes[l]
return res
def testBeamSearch():
"test decoder"
classes = 'ab'
mat = np.array([[0.4, 0.5, 0.1], [0.1, 0.3, 0.6], [0.1, 0.9, 0]])
#mat = np.array([[0.1, 0, 0.9], [0.1, 0, 0.9]])
print('Test beam search')
expected = 'a'
actual = ctcBeamSearch(mat, classes, None)
print('Expected: "' + expected + '"')
print('Actual: "' + actual + '"')
print('OK' if expected == actual else 'ERROR')
if __name__ == '__main__':
testBeamSearch()