tensorflow提供了一个优化工具tensorflow_model_optimization ,专门针对keras进行模型优化
主要可以进行剪枝、量化和权重聚类
这里主要使用前面两个
数据集使用以前的文章:mnn模型从训练-转换-预测
具体训练代码如下
注意:使用之前需要手动安装tensorflow_model_optimization,使用pip install tensorflow_model_optimization就行
import tempfile
import os
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
import tensorflow_model_optimization as tfmot
batch_size = 2
img_height = 180
img_width = 180
num_classes = 5
epochs = 50
validation_split=0.2
data_dir='flower_photos'
#数据集准备
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=validation_split,
subset="training",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=validation_split,
subset="validation",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
model = keras.Sequential([
keras.layers.InputLayer(input_shape=(img_height, img_height,3)),
keras.layers.Reshape(target_shape=(img_height, img_height, 3)),
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Dropout(0.2),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(num_classes)
])
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
print(model.summary())
model.fit(
train_ds,
validation_data=val_ds,
epochs=epochs
)
tf.keras.models.save_model(model, 'baseline_model.h5', include_optimizer=False)
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
open("baseline_model.tflite", "wb").write(tflite_model)
#开始剪枝
print("start pruning")
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude
num_images =3670
end_step = np.ceil(num_images / batch_size).astype(np.int32) * epochs
pruning_params = {
'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.50,
final_sparsity=0.80,
begin_step=0,
end_step=end_step)
}
model_for_pruning = prune_low_magnitude(model, **pruning_params)
model_for_pruning.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
model_for_pruning.summary()
logdir = tempfile.mkdtemp()
callbacks = [
tfmot.sparsity.keras.UpdatePruningStep(),
tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
]
model_for_pruning.fit(train_ds,
batch_size=batch_size, epochs=5, validation_data=val_ds,
callbacks=callbacks)
model_for_export = tfmot.sparsity.keras.strip_pruning(model_for_pruning)
#开始量化
print("start quantize")
quantize_model = tfmot.quantization.keras.quantize_model
q_aware_model = quantize_model(model_for_export)
q_aware_model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
q_aware_model.summary()
q_aware_model.fit(train_ds,
batch_size=batch_size, epochs=5, validation_data=val_ds)
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_and_pruned_tflite_model = converter.convert()
quantized_and_pruned_tflite_file ='pruned_and_quantized.tflite'
with open(quantized_and_pruned_tflite_file, 'wb') as f:
f.write(quantized_and_pruned_tflite_model)
运行结束后,我们看一下模型文件:
可以看到文件确实压缩了不少了,4倍左右
接下来试一下,推理速度
优化过的模型文件推理如下
import tensorflow as tf
import cv2
import numpy as np
import time
start=time.time()
image = cv2.imread('397.jpg')
image=cv2.resize(image,(180,180))
image=image[np.newaxis,:,:,:].astype(np.float32)
print(image.shape)
interpreter = tf.lite.Interpreter(model_path='pruned_and_quantized.tflite')
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
for _ in range(10):
interpreter.set_tensor(input_details[0]['index'],image)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data)
print('avg infer time is %.6f s'%((time.time()-start)/10.0))
运行结果:
原始模型推理:
import tensorflow as tf
import cv2
import numpy as np
import time
start=time.time()
image = cv2.imread('397.jpg')
image=cv2.resize(image,(180,180))
image=image[np.newaxis,:,:,:].astype(np.float32)
print(image.shape)
interpreter = tf.lite.Interpreter(model_path='baseline_model.tflite')
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
for _ in range(10):
interpreter.set_tensor(input_details[0]['index'],image)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data)
print('avg infer time is %.6f s'%((time.time()-start)/10.0))
运行结果:
出乎意料的是,模型文件虽然变小了,但是速度居然还慢了,还慢了几十倍
这里是使用的ubuntu,也试过了win10上面推理差距更大,慢更多倍
反正就是一句话,优化后居然慢了。。。。
只有是找不到tflite的benchmark工具,只能使用这样的方式进行测试,也许时间不靠谱