tf查看模型参数

命令:

tf.train.list_variables("bert_model.ckpt")

输出:

[('bert/embeddings/LayerNorm/beta', [768]), ('bert/embeddings/LayerNorm/gamma', [768]), ('bert/embeddings/position_embeddings', [512, 768]), ('bert/embeddings/token_type_embeddings', [2, 768]), ('bert/embeddings/word_embeddings', [21128, 768]), ('bert/encoder/layer_0/attention/output/LayerNorm/beta', [768]), ('bert/encoder/layer_0/attention/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_0/attention/output/dense/bias', [768]), ('bert/encoder/layer_0/attention/output/dense/kernel', [768, 768]), ('bert/encoder/layer_0/attention/self/key/bias', [768]), ('bert/encoder/layer_0/attention/self/key/kernel', [768, 768]), ('bert/encoder/layer_0/attention/self/query/bias', [768]), ('bert/encoder/layer_0/attention/self/query/kernel', [768, 768]), ('bert/encoder/layer_0/attention/self/value/bias', [768]), ('bert/encoder/layer_0/attention/self/value/kernel', [768, 768]), ('bert/encoder/layer_0/intermediate/dense/bias', [3072]), ('bert/encoder/layer_0/intermediate/dense/kernel', [768, 3072]), ('bert/encoder/layer_0/output/LayerNorm/beta', [768]), ('bert/encoder/layer_0/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_0/output/dense/bias', [768]), ('bert/encoder/layer_0/output/dense/kernel', [3072, 768]), ('bert/encoder/layer_1/attention/output/LayerNorm/beta', [768]), ('bert/encoder/layer_1/attention/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_1/attention/output/dense/bias', [768]), ('bert/encoder/layer_1/attention/output/dense/kernel', [768, 768]), ('bert/encoder/layer_1/attention/self/key/bias', [768]), ('bert/encoder/layer_1/attention/self/key/kernel', [768, 768]), ('bert/encoder/layer_1/attention/self/query/bias', [768]), ('bert/encoder/layer_1/attention/self/query/kernel', [768, 768]), ('bert/encoder/layer_1/attention/self/value/bias', [768]), ('bert/encoder/layer_1/attention/self/value/kernel', [768, 768]), ('bert/encoder/layer_1/intermediate/dense/bias', [3072]), ('bert/encoder/layer_1/intermediate/dense/kernel', [768, 3072]), ('bert/encoder/layer_1/output/LayerNorm/beta', [768]), ('bert/encoder/layer_1/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_1/output/dense/bias', [768]), ('bert/encoder/layer_1/output/dense/kernel', [3072, 768]), ('bert/encoder/layer_10/attention/output/LayerNorm/beta', [768]), ('bert/encoder/layer_10/attention/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_10/attention/output/dense/bias', [768]), ('bert/encoder/layer_10/attention/output/dense/kernel', [768, 768]), ('bert/encoder/layer_10/attention/self/key/bias', [768]), ('bert/encoder/layer_10/attention/self/key/kernel', [768, 768]), ('bert/encoder/layer_10/attention/self/query/bias', [768]), ('bert/encoder/layer_10/attention/self/query/kernel', [768, 768]), ('bert/encoder/layer_10/attention/self/value/bias', [768]), ('bert/encoder/layer_10/attention/self/value/kernel', [768, 768]), ('bert/encoder/layer_10/intermediate/dense/bias', [3072]), ('bert/encoder/layer_10/intermediate/dense/kernel', [768, 3072]), ('bert/encoder/layer_10/output/LayerNorm/beta', [768]), ('bert/encoder/layer_10/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_10/output/dense/bias', [768]), ('bert/encoder/layer_10/output/dense/kernel', [3072, 768]), ('bert/encoder/layer_11/attention/output/LayerNorm/beta', [768]), ('bert/encoder/layer_11/attention/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_11/attention/output/dense/bias', [768]), ('bert/encoder/layer_11/attention/output/dense/kernel', [768, 768]), ('bert/encoder/layer_11/attention/self/key/bias', [768]), ('bert/encoder/layer_11/attention/self/key/kernel', [768, 768]), ('bert/encoder/layer_11/attention/self/query/bias', [768]), ('bert/encoder/layer_11/attention/self/query/kernel', [768, 768]), ('bert/encoder/layer_11/attention/self/value/bias', [768]), ('bert/encoder/layer_11/attention/self/value/kernel', [768, 768]), ('bert/encoder/layer_11/intermediate/dense/bias', [3072]), ('bert/encoder/layer_11/intermediate/dense/kernel', [768, 3072]), ('bert/encoder/layer_11/output/LayerNorm/beta', [768]), ('bert/encoder/layer_11/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_11/output/dense/bias', [768]), ('bert/encoder/layer_11/output/dense/kernel', [3072, 768]), ('bert/encoder/layer_2/attention/output/LayerNorm/beta', [768]), ('bert/encoder/layer_2/attention/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_2/attention/output/dense/bias', [768]), ('bert/encoder/layer_2/attention/output/dense/kernel', [768, 768]), ('bert/encoder/layer_2/attention/self/key/bias', [768]), ('bert/encoder/layer_2/attention/self/key/kernel', [768, 768]), ('bert/encoder/layer_2/attention/self/query/bias', [768]), ('bert/encoder/layer_2/attention/self/query/kernel', [768, 768]), ('bert/encoder/layer_2/attention/self/value/bias', [768]), ('bert/encoder/layer_2/attention/self/value/kernel', [768, 768]), ('bert/encoder/layer_2/intermediate/dense/bias', [3072]), ('bert/encoder/layer_2/intermediate/dense/kernel', [768, 3072]), ('bert/encoder/layer_2/output/LayerNorm/beta', [768]), ('bert/encoder/layer_2/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_2/output/dense/bias', [768]), ('bert/encoder/layer_2/output/dense/kernel', [3072, 768]), ('bert/encoder/layer_3/attention/output/LayerNorm/beta', [768]), ('bert/encoder/layer_3/attention/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_3/attention/output/dense/bias', [768]), ('bert/encoder/layer_3/attention/output/dense/kernel', [768, 768]), ('bert/encoder/layer_3/attention/self/key/bias', [768]), ('bert/encoder/layer_3/attention/self/key/kernel', [768, 768]), ('bert/encoder/layer_3/attention/self/query/bias', [768]), ('bert/encoder/layer_3/attention/self/query/kernel', [768, 768]), ('bert/encoder/layer_3/attention/self/value/bias', [768]), ('bert/encoder/layer_3/attention/self/value/kernel', [768, 768]), ('bert/encoder/layer_3/intermediate/dense/bias', [3072]), ('bert/encoder/layer_3/intermediate/dense/kernel', [768, 3072]), ('bert/encoder/layer_3/output/LayerNorm/beta', [768]), ('bert/encoder/layer_3/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_3/output/dense/bias', [768]), ('bert/encoder/layer_3/output/dense/kernel', [3072, 768]), ('bert/encoder/layer_4/attention/output/LayerNorm/beta', [768]), ('bert/encoder/layer_4/attention/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_4/attention/output/dense/bias', [768]), ('bert/encoder/layer_4/attention/output/dense/kernel', [768, 768]), ('bert/encoder/layer_4/attention/self/key/bias', [768]), ('bert/encoder/layer_4/attention/self/key/kernel', [768, 768]), ('bert/encoder/layer_4/attention/self/query/bias', [768]), ('bert/encoder/layer_4/attention/self/query/kernel', [768, 768]), ('bert/encoder/layer_4/attention/self/value/bias', [768]), ('bert/encoder/layer_4/attention/self/value/kernel', [768, 768]), ('bert/encoder/layer_4/intermediate/dense/bias', [3072]), ('bert/encoder/layer_4/intermediate/dense/kernel', [768, 3072]), ('bert/encoder/layer_4/output/LayerNorm/beta', [768]), ('bert/encoder/layer_4/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_4/output/dense/bias', [768]), ('bert/encoder/layer_4/output/dense/kernel', [3072, 768]), ('bert/encoder/layer_5/attention/output/LayerNorm/beta', [768]), ('bert/encoder/layer_5/attention/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_5/attention/output/dense/bias', [768]), ('bert/encoder/layer_5/attention/output/dense/kernel', [768, 768]), ('bert/encoder/layer_5/attention/self/key/bias', [768]), ('bert/encoder/layer_5/attention/self/key/kernel', [768, 768]), ('bert/encoder/layer_5/attention/self/query/bias', [768]), ('bert/encoder/layer_5/attention/self/query/kernel', [768, 768]), ('bert/encoder/layer_5/attention/self/value/bias', [768]), ('bert/encoder/layer_5/attention/self/value/kernel', [768, 768]), ('bert/encoder/layer_5/intermediate/dense/bias', [3072]), ('bert/encoder/layer_5/intermediate/dense/kernel', [768, 3072]), ('bert/encoder/layer_5/output/LayerNorm/beta', [768]), ('bert/encoder/layer_5/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_5/output/dense/bias', [768]), ('bert/encoder/layer_5/output/dense/kernel', [3072, 768]), ('bert/encoder/layer_6/attention/output/LayerNorm/beta', [768]), ('bert/encoder/layer_6/attention/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_6/attention/output/dense/bias', [768]), ('bert/encoder/layer_6/attention/output/dense/kernel', [768, 768]), ('bert/encoder/layer_6/attention/self/key/bias', [768]), ('bert/encoder/layer_6/attention/self/key/kernel', [768, 768]), ('bert/encoder/layer_6/attention/self/query/bias', [768]), ('bert/encoder/layer_6/attention/self/query/kernel', [768, 768]), ('bert/encoder/layer_6/attention/self/value/bias', [768]), ('bert/encoder/layer_6/attention/self/value/kernel', [768, 768]), ('bert/encoder/layer_6/intermediate/dense/bias', [3072]), ('bert/encoder/layer_6/intermediate/dense/kernel', [768, 3072]), ('bert/encoder/layer_6/output/LayerNorm/beta', [768]), ('bert/encoder/layer_6/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_6/output/dense/bias', [768]), ('bert/encoder/layer_6/output/dense/kernel', [3072, 768]), ('bert/encoder/layer_7/attention/output/LayerNorm/beta', [768]), ('bert/encoder/layer_7/attention/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_7/attention/output/dense/bias', [768]), ('bert/encoder/layer_7/attention/output/dense/kernel', [768, 768]), ('bert/encoder/layer_7/attention/self/key/bias', [768]), ('bert/encoder/layer_7/attention/self/key/kernel', [768, 768]), ('bert/encoder/layer_7/attention/self/query/bias', [768]), ('bert/encoder/layer_7/attention/self/query/kernel', [768, 768]), ('bert/encoder/layer_7/attention/self/value/bias', [768]), ('bert/encoder/layer_7/attention/self/value/kernel', [768, 768]), ('bert/encoder/layer_7/intermediate/dense/bias', [3072]), ('bert/encoder/layer_7/intermediate/dense/kernel', [768, 3072]), ('bert/encoder/layer_7/output/LayerNorm/beta', [768]), ('bert/encoder/layer_7/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_7/output/dense/bias', [768]), ('bert/encoder/layer_7/output/dense/kernel', [3072, 768]), ('bert/encoder/layer_8/attention/output/LayerNorm/beta', [768]), ('bert/encoder/layer_8/attention/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_8/attention/output/dense/bias', [768]), ('bert/encoder/layer_8/attention/output/dense/kernel', [768, 768]), ('bert/encoder/layer_8/attention/self/key/bias', [768]), ('bert/encoder/layer_8/attention/self/key/kernel', [768, 768]), ('bert/encoder/layer_8/attention/self/query/bias', [768]), ('bert/encoder/layer_8/attention/self/query/kernel', [768, 768]), ('bert/encoder/layer_8/attention/self/value/bias', [768]), ('bert/encoder/layer_8/attention/self/value/kernel', [768, 768]), ('bert/encoder/layer_8/intermediate/dense/bias', [3072]), ('bert/encoder/layer_8/intermediate/dense/kernel', [768, 3072]), ('bert/encoder/layer_8/output/LayerNorm/beta', [768]), ('bert/encoder/layer_8/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_8/output/dense/bias', [768]), ('bert/encoder/layer_8/output/dense/kernel', [3072, 768]), ('bert/encoder/layer_9/attention/output/LayerNorm/beta', [768]), ('bert/encoder/layer_9/attention/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_9/attention/output/dense/bias', [768]), ('bert/encoder/layer_9/attention/output/dense/kernel', [768, 768]), ('bert/encoder/layer_9/attention/self/key/bias', [768]), ('bert/encoder/layer_9/attention/self/key/kernel', [768, 768]), ('bert/encoder/layer_9/attention/self/query/bias', [768]), ('bert/encoder/layer_9/attention/self/query/kernel', [768, 768]), ('bert/encoder/layer_9/attention/self/value/bias', [768]), ('bert/encoder/layer_9/attention/self/value/kernel', [768, 768]), ('bert/encoder/layer_9/intermediate/dense/bias', [3072]), ('bert/encoder/layer_9/intermediate/dense/kernel', [768, 3072]), ('bert/encoder/layer_9/output/LayerNorm/beta', [768]), ('bert/encoder/layer_9/output/LayerNorm/gamma', [768]), ('bert/encoder/layer_9/output/dense/bias', [768]), ('bert/encoder/layer_9/output/dense/kernel', [3072, 768]), ('bert/pooler/dense/bias', [768]), ('bert/pooler/dense/kernel', [768, 768]), ('cls/predictions/output_bias', [21128]), ('cls/predictions/transform/LayerNorm/beta', [768]), ('cls/predictions/transform/LayerNorm/gamma', [768]), ('cls/predictions/transform/dense/bias', [768]), ('cls/predictions/transform/dense/kernel', [768, 768]), ('cls/seq_relationship/output_bias', [2]), ('cls/seq_relationship/output_weights', [2, 768]), ('global_step', [])]

猜你喜欢

转载自blog.csdn.net/u013069552/article/details/114490364