膨胀卷积+crf模型用于做ner

之前一直都是用bilstmcrf在做ner识别，前两天在GitHub看到一个c++ 分词工具，作者提供两种方式用于做分词，一种是bi-lstm结合crf的形式，一种是膨胀卷积+crf的形式，总体来讲原理还是比较简单，而且思想都来源于15年的一篇论文https://arxiv.org/abs/1511.07122 ，后面人根据这篇论文的扩展到做ner识别，也是一篇比较出名的文章，昨天花时间写了下代码，训练一版本模型，总体来说准确率和bilstm-crf旗鼓相当，但是预测速度比bilstm-crf要快一点，下面直接看下代码：

模型一部分训练代码，在GPU上训练比bilstm也要快：


    def  embeding(self,input_x):
        with tf.name_scope("word_embedding"):
            self.w_word = tf.Variable(tf.random_uniform([self.word_vocab_size, self.word_embedd_dim], -1, 1), trainable=True,
                                      name="w_word")
            embedded_words = tf.nn.embedding_lookup(self.w_word, input_x, name="embedded_words")
            word_vectors = tf.expand_dims(embedded_words, 1)

        return word_vectors



    def inference(self, X, reuse=False):
        with tf.variable_scope("idcnn", reuse=reuse):
            filter_weights = tf.get_variable(
                "idcnn_filter",
                shape=[1, self.filterWidth, self.word_embedd_dim,
                       self.num_filter],
                initializer=tf.contrib.layers.xavier_initializer())
            layerInput = tf.nn.conv2d(X,
                                      filter_weights,
                                      strides=[1, 1, 1, 1],
                                      padding="SAME",
                                      name="init_layer")
            finalOutFromLayers = []
            totalWidthForLastDim = 0

            #set by yourself  4
            for j in range(4):
                for i in range(len(self.layers)):
                    dilation = self.layers[i]['dilation']
                    isLast = True if i == (len(self.layers) - 1) else False
                    with tf.variable_scope("atrous-conv-layer-%d" % i,
                                           reuse=True
                                           if (reuse or j > 0) else False):
                        w = tf.get_variable(
                            "filterW",
                            shape=[1, self.filterWidth, self.num_filter,
                                   self.num_filter],
                            initializer=tf.contrib.layers.xavier_initializer())
                        b = tf.get_variable("filterB", shape=[self.num_filter])
                        conv = tf.nn.atrous_conv2d(layerInput,
                                                   w,
                                                   rate=dilation,
                                                   padding="SAME")
                        conv = tf.nn.bias_add(conv, b)
                        conv = tf.nn.relu(conv)
                        if isLast:
                            finalOutFromLayers.append(conv)
                            totalWidthForLastDim += self.num_filter
                        layerInput = conv
            finalOut = tf.concat(axis=3, values=finalOutFromLayers)

            finalOut = tf.nn.dropout(finalOut, self.dropout_keep_prob)

            finalOut = tf.squeeze(finalOut, [1])
            finalOut = tf.reshape(finalOut, [-1, totalWidthForLastDim])

            finalW = tf.get_variable(
                "finalW",
                shape=[totalWidthForLastDim, self.num_tags],
                initializer=tf.contrib.layers.xavier_initializer())

            finalB = tf.get_variable("finalB",
                                     initializer=tf.constant(
                                         0.001, shape=[self.num_tags]))

            scores = tf.nn.xw_plus_b(finalOut, finalW, finalB, name="scores")
        if reuse:
            scores = tf.reshape(scores, [self.batch_size, -1, self.num_tags],
                                name="Reshape_7")
        else:
            scores = tf.reshape(scores, [self.batch_size, -1, self.num_tags],
                                name=None)
        return scores

模型保存的之后c++加载

#include <iostream>
#include <vector>
#include "tensorflow/cc/saved_model/loader.h"
#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/protobuf/meta_graph.pb.h"
#include  "tensorflow/cc/saved_model/tag_constants.h"
 
using namespace std;
using namespace tensorflow;
 
int main(int argc ,char *argv[]) {
 
    string modelpath;
 
    if(argc<2){
        cout<<"请输入模型路径";
 
        return 0;
    }else{
        modelpath=argv[1];
    }
 
    tensorflow::SessionOptions sess_options;
    tensorflow::RunOptions run_options;
    tensorflow::SavedModelBundle bundle;
    Status status;
 
    status =tensorflow::LoadSavedModel(sess_options, run_options, modelpath, {tensorflow::kSavedModelTagServe}, &bundle);
 
    if(!status.ok()){
        cout<<status.ToString()<<endl;
    }
 
    tensorflow::MetaGraphDef graph_def = bundle.meta_graph_def;
    std::unique_ptr<tensorflow::Session>& session = bundle.session;
 
    vector<int> vec={7997, 1945, 8471, 14127, 17565, 7340, 20224, 17529, 3796, 16033, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
    int ndim=vec.size();
    Tensor x(tensorflow::DT_INT32, tensorflow::TensorShape({1, ndim})); // New Tensor shape [1, ndim]
    auto x_map = x.tensor<int, 2>();
    for (int j = 0; j < ndim; j++) {
        x_map(0, j) = vec[j];
    }
    std::vector<std::pair<string, tensorflow::Tensor>> inputs;
    inputs.push_back(std::pair<std::string, tensorflow::Tensor>("input_x", x));
 
    Tensor keep_prob(tensorflow::DT_FLOAT, tensorflow::TensorShape({1}));
    keep_prob.vec<float>()(0) = 1.0f;
 
    inputs.push_back(std::pair<std::string, tensorflow::Tensor>("keep_prob", keep_prob));
 
 
 
    Tensor tensor_out(tensorflow::DT_INT32, TensorShape({1,ndim}));
    std::vector<tensorflow::Tensor> outputs={{ tensor_out }};
    status= session->Run(inputs, {"crf_pred/ReverseSequence_1"}, {}, &outputs);
    if (!status.ok()) {
        std::cout << status.ToString() << "\n";
        return 1;
    }
 
    for(int i=0;i<40;++i) {
        std::cout << outputs[0].matrix<int>()(0,i)<<" ";
    }
    cout<<endl;
 
 
    return 0;
 
}

膨胀卷积+crf模型用于做ner

猜你喜欢