The code is from Su Jianlin
# - * - Coding: UTF-. 8 - * - from keras.layers Import Layer Import keras.backend K AS class CRF (Layer): "" " pure Keras layer to achieve CRF loss is essentially a layer with CRF training parameters calculated layer, layer CRF only used to train the model, and you need to create predictive models, but still trained to use the transfer matrix "" " DEF __init__ (Self, ignore_last_label = False, ** kwargs): " "" ignore_last_label : definition of whether or not ignore the last label, play a mask effect "" " self.ignore_last_label = 1 IF ignore_last_label the else 0 Super (CRF, Self).__init__(**kwargs) defBuild (Self, input_shape): self.num_labels = input_shape [-1] - self.ignore_last_label self.trans = self.add_weight (name = ' crf_trans ' , Shape = (self.num_labels, self.num_labels), initializer of = ' glorot_uniform ' , trainable = True) DEF log_norm_step (Self, Inputs, States): "" " recursive computation normalization factor points: 1, the recursive computation; 2, with logsumexp avoid overflow. tips: by expand_dims to align tensor. " "" States = K.expand_dims (States [0], 2) # Previous Inputs = K.expand_dims (Inputs, 2) # This is the score of the tag timing, Emission Score Trans = K.expand_dims (self.trans, 0) # transfer matrix Output = K.logsumexp (Inputs States + + Trans,. 1) # E summation index, log prevent overflow return Output, [Output] DEF path_score (Self, Inputs, Labels): "" " relative path calculation target probability (not normalized) Highlights: score-by-label, together with transition probability score. Tip: use the "prediction" dot "target" of the extracted goal-scoring path . "" " # relates to the label in the CRF score plus transition probability, and this is the point score is the equivalent tag score (in the case really label to view the forecast for real tag locations total score) as labels of shape is [B, T, N], and in N this dimension is one-hot, # Here multiplied by Pred, it is where labels corresponding to the presence of a scoring, the rest are all 0, then adding a second dimension represents a value of 0 is removed, and then adding the label indicates finding a total score point_score = K.sum (K.sum (Inputs * labels, 2),. 1, keepdims = True) # by tab score, Shape [B,. 1] labels1 = K.expand_dims (labels [:,: -1],. 3) # Shape [B,. 1-T, N,. 1] labels2 = K.expand_dims (Labels [:,. 1:], 2) # Shape [B, T-. 1,. 1, N] # here corresponds to the object multiplied the current time to a transfer time, which is determined that the current time from a label transfer over time, because the labels are in the form of one-hot, so only two dimensions in the final element is a 1, all other is 0, flag indicates a transition Labels = labels1 labels2 * # two offset labels, responsible for taking the transfer target score shape [B, T-1, N, N] from the transfer matrix Trans = K.expand_dims (K.expand_dims (self.trans, 0 ), 0) #K.sum (trans * labels, [2 , 3]), since the result is trans * labels [B, T-1, N , N], but only later in two dimensions with a value of 1, the score represents the transfer trans_score K.sum = (K.sum (Trans * Labels, [2,. 3]),. 1, keepdims = True) # obtains probabilities of all the transfer time T-1 total score, K.sum (trans * labels, [ 2 3]), each time the score represents the transfer return point_score + trans_score # two portions of the score and DEF Call (Self, Inputs): # of CRF itself does not change the output, it is only a Loss return Inputs DEF Loss (Self, y_true, y_pred): # goal needs to be one hot y_pred form mask. 1-y_true = [-1 :,. 1 :,] IF self.ignore_last_label the else None y_true, y_pred =y_true [:,:,: self.num_labels], y_pred [:,:,: self.num_labels] init_states = [y_pred [:, 0]] # initial state log_norm, _, _ = K.rnn ( self.log_norm_step, y_pred [:,. 1:], init_states, mask = mask) # calculates vector Z (logarithmic) Shape [the batch_size, output_dim] log_norm = K.logsumexp (log_norm,. 1, keepdims = True) # calculates Z (logarithmic) Shape [batch_size, 1] calculates a total path_score = self.path_score (y_pred, y_true) # calculates molecule (logarithmic) return log_norm - path_score # i.e., log (numerator / denominator) DEF Accuracy (Self, y_true, y_pred): # training a function display process frame by frame accuracy, excluding the influence of mask mask =. 1-y_true [:,:, -. 1] IF self.ignore_last_label else None y_true,y_pred = y_true[:,:,:self.num_labels],y_pred[:,:,:self.num_labels] isequal = K.equal(K.argmax(y_true, 2), K.argmax(y_pred, 2)) isequal = K.cast(isequal, 'float32') if mask == None: return K.mean(isequal) else: return K.sum(isequal*mask) / K.sum(mask)