CRF keras code implementation

The code is from Su Jianlin

 

# - * - Coding: UTF-. 8 - * - 

from keras.layers Import Layer
 Import keras.backend K AS 

class CRF (Layer):
     "" " pure Keras layer to achieve CRF 
    loss is essentially a layer with CRF training parameters calculated layer, layer CRF only used to train the model, 
    and you need to create predictive models, but still trained to use the transfer matrix 
    "" " 
    DEF  __init__ (Self, ignore_last_label = False, ** kwargs):
         " "" ignore_last_label : definition of whether or not ignore the last label, play a mask effect
         "" " 
        self.ignore_last_label = 1 IF ignore_last_label the else 0 
        Super (CRF, Self).__init__(**kwargs)
    defBuild (Self, input_shape): 
        self.num_labels = input_shape [-1] - self.ignore_last_label 
        self.trans = self.add_weight (name = ' crf_trans ' , 
                                     Shape = (self.num_labels, self.num_labels), 
                                     initializer of = ' glorot_uniform ' , 
                                     trainable = True)
     DEF log_norm_step (Self, Inputs, States):
         "" " recursive computation normalization factor 
        points: 1, the recursive computation; 2, with logsumexp avoid overflow. 
        tips: by expand_dims to align tensor. 
        " ""
        States = K.expand_dims (States [0], 2) # Previous 
        Inputs = K.expand_dims (Inputs, 2) # This is the score of the tag timing, Emission Score 
        Trans = K.expand_dims (self.trans, 0) # transfer matrix 

        Output = K.logsumexp (Inputs States + + Trans,. 1) # E summation index, log prevent overflow 
        return Output, [Output] 

    DEF path_score (Self, Inputs, Labels):
         "" " relative path calculation target probability (not normalized) 
        Highlights: score-by-label, together with transition probability score. 
        Tip: use the "prediction" dot "target" of the extracted goal-scoring path . 
        "" " 
        # relates to the label in the CRF score plus transition probability, and this is the point score is the equivalent tag score (in the case really label to view the forecast for real tag locations total score) as labels of shape is [B, T, N], and in N this dimension is one-hot,
        # Here multiplied by Pred, it is where labels corresponding to the presence of a scoring, the rest are all 0, then adding a second dimension represents a value of 0 is removed, and then adding the label indicates finding a total score 
        point_score = K.sum (K.sum (Inputs * labels, 2),. 1, keepdims = True) # by tab score, Shape [B,. 1] 
        labels1 = K.expand_dims (labels [:,: -1],. 3) # Shape [B,. 1-T, N,. 1] 
        labels2 = K.expand_dims (Labels [:,. 1:], 2) # Shape [B, T-. 1,. 1, N] 
        # here corresponds to the object multiplied the current time to a transfer time, which is determined that the current time from a label transfer over time, because the labels are in the form of one-hot, so only two dimensions in the final element is a 1, all other is 0, flag indicates a transition 
        Labels = labels1 labels2 * # two offset labels, responsible for taking the transfer target score shape [B, T-1, N, N] from the transfer matrix 
        Trans = K.expand_dims (K.expand_dims (self.trans, 0 ), 0)
         #K.sum (trans * labels, [2 , 3]), since the result is trans * labels [B, T-1, N , N], but only later in two dimensions with a value of 1, the score represents the transfer 
        trans_score K.sum = (K.sum (Trans * Labels, [2,. 3]),. 1, keepdims = True) # obtains probabilities of all the transfer time T-1 total score, K.sum (trans * labels, [ 2 3]), each time the score represents the transfer 
        return point_score + trans_score # two portions of the score and 

    DEF Call (Self, Inputs): # of CRF itself does not change the output, it is only a Loss 
        return Inputs 

    DEF Loss (Self, y_true, y_pred): # goal needs to be one hot y_pred form 
        mask. 1-y_true = [-1 :,. 1 :,] IF self.ignore_last_label the else None 
        y_true, y_pred =y_true [:,:,: self.num_labels], y_pred [:,:,: self.num_labels] 
        init_states = [y_pred [:, 0]] # initial state 
        log_norm, _, _ = K.rnn ( self.log_norm_step, y_pred [:,. 1:], init_states, mask = mask) # calculates vector Z (logarithmic) Shape [the batch_size, output_dim] 
        log_norm = K.logsumexp (log_norm,. 1, keepdims = True) # calculates Z (logarithmic) Shape [batch_size, 1] calculates a total 
        path_score = self.path_score (y_pred, y_true) # calculates molecule (logarithmic) 
        return log_norm - path_score # i.e., log (numerator / denominator) 

    DEF Accuracy (Self, y_true, y_pred): # training a function display process frame by frame accuracy, excluding the influence of mask 
        mask =. 1-y_true [:,:, -. 1] IF self.ignore_last_label else None
        y_true,y_pred = y_true[:,:,:self.num_labels],y_pred[:,:,:self.num_labels]
        isequal = K.equal(K.argmax(y_true, 2), K.argmax(y_pred, 2))
        isequal = K.cast(isequal, 'float32')
        if mask == None:
            return K.mean(isequal)
        else:
            return K.sum(isequal*mask) / K.sum(mask)

 

Guess you like

Origin www.cnblogs.com/callyblog/p/11289566.html