pytorch_model_to_tensorflow
pytorch模型(.pt文件)转tensorflow(.pb文件)
need:
transform model of pytorch1.x to tensorflow2.x,
deploy for tf-serving
说明
目的: pytorch进行实验等, tf-serving部署最后的模型;
需求: pytorch1.x的模型(.pt文件)转tensorflow1.x/2.x的模型(.pb文件—savedmodel格式);
思路:
a.pytorch模型(.pt)先转化为ONNX模型(使用torch.onnx.export, 一些通用框架, 没有独有的网络架构);
b.ONNX模型转化为tensorflow模型的savedmodel形式(使用onnx_tf, 可能variables.data会很空, 但不影响部署);
environment
python==3.8
tensorflow==2.8.0
tensorflow-addons==0.16.1
tensorflow-probability==0.16.0
keras==2.8.0
torch==1.8.0
transformers==4.15.0
onnx==1.8.1
onnx-tf==1.8.0
protobuf==3.19.2
test
1. configure address, eg. pretrained_model_name_or_path = "../ernie-tiny"
2. python t11_pytorch_to_onnx_to_tensorflow.py
result
Some weights of the model checkpoint at E:/DATA/bert-model/00_pytorch/ernie-tiny were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
pytorch-model-predict:
[[0.5206347107887268, 0.5481777787208557, 0.7332082986831665, 0.5, 0.5749790668487549, 0.5696589946746826, 0.5643221139907837]]
model_save_path\onnx\tc_model.onnx
onnx-model-predict:
[array([[0.52063483, 0.54817796, 0.73320824, 0.5 , 0.5749791 ,
0.569659 , 0.564322 ]], dtype=float32)]
2022-12-12 09:39:15.005585: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-12 09:39:15.390812: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2497 MB memory: -> device: 0, name: NVIDIA GeForce RTX 2060, pci bus id: 0000:01:00.0, compute capability: 7.5
2022-12-12 09:39:23.588536: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
WARNING:absl:Found untraced functions such as gen_tensor_dict while saving (showing 1 of 1). These functions will not be directly callable after loading.
tensorflow_model_predict:
WARNING:tensorflow:SavedModel saved prior to TF 2.5 detected when loading Keras model. Please ensure that you are saving the model with model.save() or tf.keras.models.save_model(), *NOT* tf.saved_model.save(). To confirm, there should be a file named "keras_metadata.pb" in the SavedModel directory.
WARNING:tensorflow:SavedModel saved prior to TF 2.5 detected when loading Keras model. Please ensure that you are saving the model with model.save() or tf.keras.models.save_model(), *NOT* tf.saved_model.save(). To confirm, there should be a file named "keras_metadata.pb" in the SavedModel directory.
['serving_default']
{'output_0': TensorSpec(shape=(None, 7), dtype=tf.float32, name='output_0')}
[<tf.Tensor: shape=(1, 7), dtype=float32, numpy=
array([[0.52063483, 0.54817796, 0.73320824, 0.5 , 0.5749791 ,
0.569659 , 0.56432205]], dtype=float32)>]
reference
- onnx/onnx-tensorflow: https://github.com/onnx/onnx-tensorflow
- onnx/onnx: https://github.com/onnx/onnx
- yongzhuo/Pytorch-NLU: https://github.com/yongzhuo/Pytorch-NLU
code-detail
# !/usr/bin/python
# -*- coding: utf-8 -*-
# @time : 2022/12/7 21:00
# @author : Mo
# @function: pytorch to onnx
import json
import os
from transformers import BertConfig, BertTokenizer, BertModel
from argparse import Namespace
from torch import nn
import numpy as np
import torch
class FCLayer(nn.Module):
def __init__(self, input_dim, output_dim, dropout_rate=0.1, is_active=True,
is_dropout=True, active_type="mish"):
"""
FC-Layer, mostly last output of model
args:
input_dim: input dimension, 输入维度, eg. 768
output_dim: output dimension, 输出维度, eg. 32
dropout_rate: dropout rate, 随机失活, eg. 0.1
is_dropout: use dropout or not, 是否使用随机失活dropout, eg. True
is_active: use activation or not, 是否使用激活函数如tanh, eg. True
active_type: type of activate function, 激活函数类型, eg. "tanh", "relu"
Returns:
Tensor of batch.
"""
super(FCLayer, self).__init__()
self.linear = nn.Linear(input_dim, output_dim)
self.dropout = nn.Dropout(dropout_rate) # probability of an element to be zeroed
self.is_dropout = is_dropout
self.active_type = active_type
self.is_active = is_active
self.softmax = nn.Softmax(1)
self.sigmoid = nn.Sigmoid()
self.relu = nn.ReLU(inplace=True)
self.tanh = nn.Tanh()
self.gelu = nn.GELU()
def forward(self, x):
if self.is_dropout:
x = self.dropout(x)
x = self.linear(x)
if self.is_active:
if self.active_type.upper() == "MISH":
x = x * torch.tanh(nn.functional.softplus(x))
elif self.active_type.upper() == "SWISH":
x = x * torch.sigmoid(x)
elif self.active_type.upper() == "TANH":
x = self.tanh(x)
elif self.active_type.upper() == "GELU":
x = self.gelu(x)
elif self.active_type.upper() == "RELU":
x = self.relu(x)
else:
x = self.relu(x)
return x
class TCGraph(nn.Module):
def __init__(self, graph_config, tokenizer):
# 预训练语言模型读取
self.graph_config = graph_config
pretrained_config, pretrained_model = BertConfig, BertModel
self.pretrained_config = pretrained_config.from_pretrained(graph_config.pretrained_model_name_or_path, output_hidden_states=graph_config.output_hidden_states)
self.pretrained_config.update({"gradient_checkpointing": True})
super(TCGraph, self).__init__()
if self.graph_config.is_train:
self.pretrain_model = pretrained_model.from_pretrained(graph_config.pretrained_model_name_or_path, config=self.pretrained_config)
self.pretrain_model.resize_token_embeddings(len(tokenizer))
else:
self.pretrain_model = pretrained_model(self.pretrained_config)
self.pretrain_model.resize_token_embeddings(len(tokenizer))
# 如果用隐藏层输出
if self.graph_config.output_hidden_states:
self.dense = FCLayer(
int(self.pretrained_config.hidden_size * len(self.graph_config.output_hidden_states)),
self.graph_config.num_labels,
is_dropout=self.graph_config.is_dropout, is_active=self.graph_config.is_active,
active_type=self.graph_config.active_type)
else:
self.dense = FCLayer(self.pretrained_config.hidden_size, self.graph_config.num_labels, is_dropout=self.graph_config.is_dropout,
is_active=self.graph_config.is_active, active_type=self.graph_config.active_type)
# 损失函数, loss
self.loss_bce = torch.nn.BCELoss()
# 激活层/随即失活层
self.sigmoid = torch.nn.Sigmoid()
def forward(self, input_ids, attention_mask, token_type_ids, labels=None):
output = self.pretrain_model(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
if self.graph_config.output_hidden_states:
x = output[2]
hidden_states_idx = [i for i in range(len(x))]
# cls-concat
cls = torch.cat([x[i][:, 0, :] for i in self.graph_config.output_hidden_states if i in hidden_states_idx], dim=-1)
else: # CLS
cls = output[0][:, 0, :] # CLS
logits = self.dense(cls) # full-connect: FCLayer
if labels is not None: # loss
logits_sigmoid = self.sigmoid(logits)
loss = self.loss_bce(logits_sigmoid.view(-1), labels.view(-1))
return loss, logits
else:
logits = self.sigmoid(logits)
return logits
def save_json(lines, path: str, encoding: str = "utf-8", indent: int = 4):
"""
Write Line of List<json> to file
Args:
lines: lines of list[str] which need save
path: path of save file, such as "json.txt"
encoding: type of encoding, such as "utf-8", "gbk"
"""
with open(path, "w", encoding=encoding) as fj:
fj.write(json.dumps(lines, ensure_ascii=False, indent=indent))
fj.close()
def t11_pytorch_model_to_onnx():
""" pytorch 模型 转 onnx 格式 """
model_save_path = "model_save_path"
num_labels = 7
path_onnx = os.path.join(model_save_path, "onnx", "tc_model.onnx")
path_onnx_dir = os.path.split(path_onnx)[0]
if not os.path.exists(path_onnx_dir):
os.makedirs(path_onnx_dir)
model_config["pretrained_model_name_or_path"] = pretrained_model_name_or_path
model_config["path_onnx"] = path_onnx
model_config["num_labels"] = num_labels
tokenizer = BertTokenizer.from_pretrained(pretrained_model_name_or_path)
tc_config = Namespace(**model_config)
tc_model = TCGraph(graph_config=tc_config, tokenizer=tokenizer)
device = "cuda:{}".format(tc_config.CUDA_VISIBLE_DEVICES) if (torch.cuda.is_available() \
and tc_config.is_cuda and tc_config.CUDA_VISIBLE_DEVICES != "-1") else "cpu"
# batch_data = [[[1, 2, 3, 4]*32]*32, [[1,0]*64]*32, [[0,1]*64]*32]
text = "macropodus"
tokens = tokenizer.encode_plus(text, max_length=128, truncation=True)
batch_data = {name: np.atleast_2d(value).astype(np.int64) for name, value in tokens.items()}
tc_model.to(device)
tc_model.eval()
with torch.no_grad():
inputs = {"input_ids": torch.tensor(batch_data.get("input_ids")).to(device),
"attention_mask": torch.tensor(batch_data.get("attention_mask")).to(device),
"token_type_ids": torch.tensor(batch_data.get("token_type_ids")).to(device),
}
output = tc_model(**inputs)
print("\npytorch-model-predict:")
print(output.detach().cpu().numpy().tolist())
input_names = ["input_ids", "attention_mask", "token_type_ids"]
output_names = ["outputs"]
torch.onnx.export(model=tc_model, args=(inputs["input_ids"], inputs["attention_mask"], inputs["token_type_ids"]),
f=path_onnx,
input_names=input_names,
output_names=output_names, # Be carefule to write this names
opset_version=10, # 9, 10, 11, 12
do_constant_folding=True,
use_external_data_format=True,
dynamic_axes={
"input_ids": {0: "batch", 1: "sequence"},
"attention_mask": {0: "batch", 1: "sequence"},
"token_type_ids": {0: "batch", 1: "sequence"},
output_names[0]: {0: "batch"}
}
)
def t111_tet_onnx():
""" 测试onnx模型 """
from onnxruntime import ExecutionMode, InferenceSession, SessionOptions
from transformers import BertTokenizer
import numpy as np
pretrained_model_name_or_path = model_config["pretrained_model_name_or_path"]
path_onnx = model_config["path_onnx"]
print(path_onnx)
# Create the tokenizer, InferenceSession
tokenizer = BertTokenizer.from_pretrained(pretrained_model_name_or_path)
options = SessionOptions()
options.intra_op_num_threads = 1
options.execution_mode = ExecutionMode.ORT_SEQUENTIAL
sess = InferenceSession(path_onnx, options,
providers=['CPUExecutionProvider'], # ['CUDAExecutionProvider'], #
)
text = "macropodus"
tokens = tokenizer.encode_plus(text, max_length=128, truncation=True)
tokens = {name: np.atleast_2d(value).astype(np.int64) for name, value in tokens.items()}
output = sess.run(None, tokens)
print("\nonnx-model-predict:")
print(output)
def t12_onnx_to_tensorflow():
""" onnx模型 转 tensorflow """
from onnx_tf.backend import prepare
import onnx
model_save_path = model_config["model_save_path"]
path_tensorflow = os.path.join(model_save_path, "tensorflow")
path_onnx = model_config["path_onnx"]
model_config["path_tensorflow"] = path_tensorflow
model_onnx = onnx.load(path_onnx)
tf_rep = prepare(model_onnx, device="CPU")
tf_rep.export_graph(path_tensorflow)
def t121_tet_tensorflow():
"""加载tensorflow模型测试"""
from transformers import BertTokenizerFast
import numpy as np
import keras
pretrained_model_name_or_path = model_config["pretrained_model_name_or_path"]
path_tensorflow = model_config["path_tensorflow"]
print("\ntensorflow_model_predict: ")
new_model = keras.models.load_model(path_tensorflow)
print(list(new_model.signatures.keys()))
infer = new_model.signatures["serving_default"]
print(infer.structured_outputs)
text = "macropodus"
tokenizer = BertTokenizerFast.from_pretrained(pretrained_model_name_or_path)
tokens = tokenizer.encode_plus(text, max_length=128, truncation=True)
tokens = {name: np.atleast_2d(value).astype(np.int64) for name, value in tokens.items()}
output = new_model(**tokens)
print(output)
ee = 0
model_config = {
"path_finetune": "",
"path_onnx": "",
"path_tensorflow": "",
"CUDA_VISIBLE_DEVICES": "0", # 环境, GPU-CPU, "-1"/"0"/"1"/"2"...
"USE_TORCH": "1", # transformers使用torch, 因为脚本是torch写的
"output_hidden_states": None, # [6,11] # 输出层, 即取第几层transformer的隐藏输出, list
"pretrained_model_name_or_path": "", # 预训练模型地址
"model_save_path": "model_save_path", # 训练模型保存-训练完毕模型目录
"config_name": "tc.config", # 训练模型保存-超参数文件名
"model_name": "tc.model", # 训练模型保存-全量模型
"path_train": None, # 验证语料地址, 必传, string
"path_dev": None, # 验证语料地址, 必传, 可为None
"path_tet": None, # 验证语料地址, 必传, 可为None
"task_type": "TC-MULTI-CLASS",
# 任务类型, 依据数据类型自动更新, "TC-MULTI-CLASS", "TC-MULTI-LABEL", TC为text-classification的缩写
"model_type": "BERT", # 预训练模型类型, 如bert, roberta, ernie
"loss_type": "BCE", # "BCE", # 损失函数类型,
# multi-class: 可选 None(BCE), BCE, BCE_LOGITS, MSE, FOCAL_LOSS, DICE_LOSS, LABEL_SMOOTH, MIX;
# multi-label: SOFT_MARGIN_LOSS, PRIOR_MARGIN_LOSS, FOCAL_LOSS, CIRCLE_LOSS, DICE_LOSS, MIX等
"batch_size": 32, # 批尺寸
"num_labels": 0, # 类别数, 自动更新
"max_len": 0, # 最大文本长度, -1则为自动获取覆盖0.95数据的文本长度, 0为取得最大文本长度作为maxlen
"epochs": 21, # 训练轮次
"lr": 1e-5, # 学习率
"grad_accum_steps": 1, # 梯度积累多少步
"max_grad_norm": 1.0, # 最大标准化梯度
"weight_decay": 5e-4, # 模型参数l2权重
"dropout_rate": 0.1, # 随即失活概率
"adam_eps": 1e-8, # adam优化器超参
"seed": 2021, # 随机种子, 3407, 2021
"stop_epochs": 4, # 早停轮次
"evaluate_steps": 320, # 评估步数
"save_steps": 320, # 存储步数
"warmup_steps": -1, # 预热步数
"ignore_index": 0, # 忽略的index
"max_steps": -1, # 最大步数, -1表示取满epochs
"is_train": True, # 是否训练, 另外一个人不是(而是预测)
"is_cuda": True, # 是否使用gpu, 另外一个不是gpu(而是cpu)
"is_adv": False, # 是否使用对抗训练(默认FGM)
"is_dropout": True, # 最后几层输出是否使用随即失活
"is_active": True, # 最后几层输出是否使用激活函数, 如FCLayer/SpanLayer层
"active_type": "RELU", # 最后几层输出使用的激活函数, 可填写RELU/SIGMOID/TANH/MISH/SWISH/GELU
"save_best_mertics_key": ["micro_avg", "f1-score"],
# 模型存储的判别指标, index-1可选: [micro_avg, macro_avg, weighted_avg],
# index-2可选: [precision, recall, f1-score]
"multi_label_threshold": 0.5, # 多标签分类时候生效, 大于该阈值则认为预测对的
"xy_keys": ["text", "label"], # text,label在file中对应的keys
"label_sep": "|myz|", # "|myz|" 多标签数据分割符, 用于多标签分类语料中
"len_rate": 1, # 训练数据和验证数据占比, float, 0-1闭区间
"adv_emb_name": "word_embeddings.", # emb_name这个参数要换成你模型中embedding的参数名, model.embeddings.word_embeddings.weight
"adv_eps": 1.0, # 梯度权重epsilon
"ADDITIONAL_SPECIAL_TOKENS": ["[macropodus]", "[macadam]"], # 新增特殊字符
"prior": None, # 类别先验分布, 自动设置, 为一个label_num类别数个元素的list, json无法保存np.array
"l2i": None,
"i2l": None,
"len_corpus": None, # 训练语料长度
"prior_count": None, # 每个类别样本频次
}
if __name__ == '__main__':
ee = 0
pretrained_model_name_or_path = "E:/DATA/bert-model/00_pytorch/ernie-tiny" # like bert
model_config["pretrained_model_name_or_path"] = pretrained_model_name_or_path
### pytorch 模型 转 onnx 格式
t11_pytorch_model_to_onnx()
### 测试onnx模型
t111_tet_onnx()
### onnx模型 转 tensorflow
t12_onnx_to_tensorflow()
### 测试saved_model模型
t121_tet_tensorflow()
save_json(model_config, os.path.join(model_config["model_save_path"], "model_config.json"))