AIMET API documentation (10)
1.3 PyTorch model visualization API for compression
1.3.1 Top-level API compression
classaimet_torch.visualize_serialized_data.VisualizeCompression(visualization_url)
Update the Bokeh server session document and publish graphs/tables to the server with session ID compression.
VisualizeCompression.display_eval_scores(saved_eval_scores_dict_path)
Publish the evaluation score table to the server.
Parameters
save_eval_scores_dict_path – file path to evaluate scores for each layer
Return
without any
VisualizeCompression.display_comp_ratio_plot(comp_ratio_list_path)
Publish the best compression ratio to the server.
Parameter
comp_ratio_list_path – the path to the pkl file and the compression ratio of each layer
Return
without any
1.3.2 Code examples
Required imports
from decimal import Decimal
import torch
from torchvision import models
import aimet_common.defs
import aimet_torch.defs
import aimet_torch.utils
from aimet_common.utils import start_bokeh_server_session
from aimet_torch.compress import ModelCompressor
from aimet_torch.visualize_serialized_data import VisualizeCompression
Model compression with visualization parameters
def model_compression_with_visualization(eval_func):
"""
Code example for compressing a model with a visualization url provided.
"""
process = None
try:
visualization_url, process = start_bokeh_server_session()
input_shape = (1, 3, 224, 224)
model = models.resnet18(pretrained=True).to(torch.device('cuda'))
modules_to_ignore = [model.conv1]
greedy_params = aimet_common.defs.GreedySelectionParameters(target_comp_ratio=Decimal(0.65),
num_comp_ratio_candidates=10,
saved_eval_scores_dict=
'../data/resnet18_eval_scores.pkl')
auto_params = aimet_torch.defs.SpatialSvdParameters.AutoModeParams(greedy_params,
modules_to_ignore=modules_to_ignore)
params = aimet_torch.defs.SpatialSvdParameters(aimet_torch.defs.SpatialSvdParameters.Mode.auto, auto_params,
multiplicity=8)
# If no visualization URL is provided, during model compression execution no visualizations will be published.
ModelCompressor.compress_model(model=model, eval_callback=eval_func, eval_iterations=5,
input_shape=input_shape,
compress_scheme=aimet_common.defs.CompressionScheme.spatial_svd,
cost_metric=aimet_common.defs.CostMetric.mac, parameters=params,
visualization_url=None)
comp_ratios_file_path = './data/greedy_selection_comp_ratios_list.pkl'
eval_scores_path = '../data/resnet18_eval_scores.pkl'
# A user can visualize the eval scores dictionary and optimal compression ratios by executing the following code.
compression_visualizations = VisualizeCompression(visualization_url)
compression_visualizations.display_eval_scores(eval_scores_path)
compression_visualizations.display_comp_ratio_plot(comp_ratios_file_path)
finally:
if process:
process.terminate()
process.join()
1.4 PyTorch model visualization API for quantification
1.4.1 Top-level API quantification
aimet_torch.visualize_model.visualize_relative_weight_ranges_to_identify_problematic_layers(模型, results_dir , selected_layers=None )
For each selected layer, publishes a line plot showing the weight range for each layer, summary statistics of the relative weight range, and a histogram showing the weight range of the output channels relative to the minimum weight range.
parameter
-
Model (Module) – pytorch model
-
results_dir (str) – directory where bokeh images are saved
-
selected_layers (Optional[ List[~T]]) – A list of layers that the user can select for the visualization. If the selected layer is None, all linear and convolutional layers will be visualized.
Return typeList
[Figure]
Return to
Bokeh image list
aimet_torch.visualize_model.visualize_weight_ranges(模型, results_dir , selected_layers=None )
Visualize the range of weights for each layer with a scatter plot that shows the mean plotted against the standard deviation, the minimum value plotted against the maximum value, and a line containing the minimum, maximum, and average values for each output channel picture.
parameter
-
Model (Module) – pytorch model
-
selected_layers (Optional[ List[~T]]) – A list of layers that the user can select for the visualization. If the selected layer is None, all linear and convolutional layers will be visualized.
-
results_dir (str) – directory where bokeh images are saved
Return typeList
[Figure]
Return to
Bokeh image list
aimet_torch.visualize_model.visualize_changes_after_optimization(old_model ,new_model , results_dir , selected_layers=None )
Visualize changes before and after applying certain optimizations to the model.
parameter
-
old_model (Module) – pytorch model before optimization
-
new_model (Module) – optimized pytorch model
-
results_dir (str) – directory where bokeh images are saved
-
selected_layers (Optional[ List[~T]]) – A list of layers that the user can select for the visualization. If the selected layer is None, all linear and convolutional layers will be visualized.
Return typeList
[Figure]
Return to
Bokeh image list
code example
Required imports
import copy
import torch
from torchvision import models
from aimet_torch.cross_layer_equalization import equalize_model
from aimet_torch import batch_norm_fold
from aimet_torch import visualize_model
优化后模型对比
def visualize_changes_in_model_after_and_before_cle():
"""
Code example for visualizating model before and after Cross Layer Equalization optimization
"""
model = models.resnet18(pretrained=True).to(torch.device('cpu'))
model = model.eval()
# Create a copy of the model to visualize the before and after optimization changes
model_copy = copy.deepcopy(model)
# Specify a folder in which the plots will be saved
results_dir = './visualization'
batch_norm_fold.fold_all_batch_norms(model_copy, (1, 3, 224, 224))
equalize_model(model, (1, 3, 224, 224))
visualize_model.visualize_changes_after_optimization(model_copy, model, results_dir)
Visualize weight ranges in the model
def visualize_weight_ranges_model():
"""
Code example for model visualization
"""
model = models.resnet18(pretrained=True).to(torch.device('cpu'))
model = model.eval()
# Specify a folder in which the plots will be saved
results_dir = './visualization'
batch_norm_fold.fold_all_batch_norms(model, (1, 3, 224, 224))
# Usually it is observed that if we do BatchNorm fold the layer's weight range increases.
# This helps in visualizing layer's weight
visualize_model.visualize_weight_ranges(model, results_dir)
Visualize relative weight ranges in the model
def visualize_relative_weight_ranges_model():
"""
Code example for model visualization
"""
model = models.resnet18(pretrained=True).to(torch.device('cpu'))
model = model.eval()
# Specify a folder in which the plots will be saved
results_dir = './visualization'
batch_norm_fold.fold_all_batch_norms(model, (1, 3, 224, 224))
# Usually it is observed that if we do BatchNorm fold the layer's weight range increases.
# This helps in finding layers which can be equalized to get better performance on hardware
visualize_model.visualize_relative_weight_ranges_to_identify_problematic_layers(model, results_dir)
1.4 PyTorch layer output generation API
This API captures and saves the model's intermediate layer output. The model can be original (FP32) or Quantsim. Layer outputs are named according to PyTorch/ONNX/TorchScript models exported by the Quantsim export API. This allows layer output comparisons between the FP32 model, the quantized simulation model, and the actual quantized model on the target device to debug accuracy mismatch issues.
1.4.1 Top-level API
aimet_torch.layer_output_utils.LayerOutputUtil (model, dir_path, naming_scheme=<NamingScheme.PYTORCH: 1>, dummy_input=None, onnx_export_args=None)
implements capturing and saving the output of the middle layer of the model (fp32/quantsim).
Constructor of LayerOutputUtil.
parameter
-
model (Module) – The model whose layer output is required.
-
dir_path (str) – The directory where the layer output will be saved.
-
naming_scheme (NamingScheme) – The naming scheme to follow when naming layer output. There are several options depending on the exported model (pytorch, onnx or torchscript). See the NamingScheme enumeration definition.
-
dummy_input (Union[Tensor, Tuple, List[~T], None]) – Dummy input for the model. Required if naming_scheme is "NamingScheme.ONNX" or "NamingScheme.TORCHSCRIPT".
-
onnx_export_args (Union[OnnxExportApiArgs, Dict[~KT, ~VT], None]) – should be the same as the arguments passed to the Quantsim export API to ensure that the layer output names present in the exported onnx model are consistent with the generated layer output names consistency. Required if naming_scheme is "NamingScheme.ONNX".
The following APIs can be used to generate layer output
LayerOutputUtil.generate_layer_outputs(input_batch )
This method captures the output of each layer of the model and saves the input and corresponding layer output to disk.
Parameters
input_batch (Union[Tensor, List[Tensor], Tuple[Tensor]]) – A batch of inputs for which we want to get the output of the layer.
Return
without any
1.4.2 Enumeration definition
Naming scheme enumeration
aimet_torch.layer_output_utils.NamingScheme
An enumeration of layer output naming schemes.
-
ONNX= 2
names the output according to the exported onnx model. Layer output names are usually numbers. -
PYTORCH= 1
names the output according to the exported pytorch model. Use layer names. -
TORCHSCRIPT= 3
Name the output according to the exported torchscript model. Layer output names are usually numbers.
1.4.3 Code examples
import
import torch
from torchvision import models
from aimet_torch.onnx_utils import OnnxExportApiArgs
from aimet_torch.model_preparer import prepare_model
from aimet_torch.quantsim import QuantizationSimModel
from aimet_torch.layer_output_utils import LayerOutputUtil, NamingScheme
Get original or QuantSim model
# Obtain original model
original_model = models.resnet18()
original_model.eval()
original_model = prepare_model(original_model)
# Obtain quantsim model
dummy_input = torch.rand(1, 3, 224, 224)
def forward_pass(model: torch.nn.Module, input_batch: torch.Tensor):
model.eval()
with torch.no_grad():
_ = model(input_batch)
quantsim = QuantizationSimModel(model=original_model, quant_scheme='tf_enhanced',
dummy_input=dummy_input, rounding_mode='nearest',
default_output_bw=8, default_param_bw=8, in_place=False)
quantsim.compute_encodings(forward_pass_callback=forward_pass,
forward_pass_callback_args=dummy_input)
Get preprocessed input
# Get the inputs that are pre-processed using the same manner while computing quantsim encodings
input_batches = get_pre_processed_inputs()
Generate layer output
# Generate layer-outputs
layer_output_util = LayerOutputUtil(model=quantsim.model, dir_path='./layer_output_dump', naming_scheme=NamingScheme.ONNX,
dummy_input=dummy_input, onnx_export_args=OnnxExportApiArgs())
for input_batch in input_batches:
layer_output_util.generate_layer_outputs(input_batch)