Documentación API AIMET (9)
1.2.8 Definición de configuración
Consulte aimet_common.defs.CostMetric
Enumeración de métricas para medir el costo del modelo/capa
-
mac= 1
MAC: modelado de costos para requisitos computacionales -
memoria=2memoria
: modelado de costos para requisitos de espacio
Consulte aimet_common.defs.CompressionScheme
Enumeración de esquemas de compresión soportados por AIMET
-
channel_pruning= poda de 3
canales -
espacial_svd = 2
descomposición espacial del valor singular -
peso_svd= 1
peso descomposición del valor singular
Clase aimet_torch.defs.ModuleCompRatioPair(módulo, comp_ratio)
Un par de módulos de antorcha y una relación de compresión.
variable
-
módulo: un módulo de tipo torch.nn.module
-
comp_ratio – relación de compresión. La relación de compresión es la relación entre el costo de comprimir el modelo y el costo del modelo original.
1.2.9 Ejemplos de código
Importaciones requeridas
import os
from decimal import Decimal
import torch
# Compression-related imports
from aimet_common.defs import CostMetric, CompressionScheme, GreedySelectionParameters, RankSelectScheme
from aimet_torch.defs import WeightSvdParameters, SpatialSvdParameters, ChannelPruningParameters, \
ModuleCompRatioPair
from aimet_torch.compress import ModelCompressor
Función de evaluación
def evaluate_model(model: torch.nn.Module, eval_iterations: int, use_cuda: bool = False) -> float:
"""
This is intended to be the user-defined model evaluation function.
AIMET requires the above signature. So if the user's eval function does not
match this signature, please create a simple wrapper.
Note: Honoring the number of iterations is not absolutely necessary.
However if all evaluations run over an entire epoch of validation data,
the runtime for AIMET compression will obviously be higher.
:param model: Model to evaluate
:param eval_iterations: Number of iterations to use for evaluation.
None for entire epoch.
:param use_cuda: If true, evaluate using gpu acceleration
:return: single float number (accuracy) representing model's performance
"""
return .5
Compresión usando SVD espacial en modo automático, multiplicidad = 8 para redondeo de rango
def spatial_svd_auto_mode():
# load trained MNIST model
model = torch.load(os.path.join('../', 'data', 'mnist_trained_on_GPU.pth'))
# Specify the necessary parameters
greedy_params = GreedySelectionParameters(target_comp_ratio=Decimal(0.8),
num_comp_ratio_candidates=10)
auto_params = SpatialSvdParameters.AutoModeParams(greedy_params,
modules_to_ignore=[model.conv1])
params = SpatialSvdParameters(mode=SpatialSvdParameters.Mode.auto,
params=auto_params, multiplicity=8)
# Single call to compress the model
results = ModelCompressor.compress_model(model,
eval_callback=evaluate_model,
eval_iterations=1000,
input_shape=(1, 1, 28, 28),
compress_scheme=CompressionScheme.spatial_svd,
cost_metric=CostMetric.mac,
parameters=params)
compressed_model, stats = results
print(compressed_model)
print(stats) # Stats object can be pretty-printed easily
Compresión usando Spatial SVD en modo manual
def spatial_svd_manual_mode():
# Load a trained MNIST model
model = torch.load(os.path.join('../', 'data', 'mnist_trained_on_GPU.pth'))
# Specify the necessary parameters
manual_params = SpatialSvdParameters.ManualModeParams([ModuleCompRatioPair(model.conv1, 0.5),
ModuleCompRatioPair(model.conv2, 0.4)])
params = SpatialSvdParameters(mode=SpatialSvdParameters.Mode.manual,
params=manual_params)
# Single call to compress the model
results = ModelCompressor.compress_model(model,
eval_callback=evaluate_model,
eval_iterations=1000,
input_shape=(1, 1, 28, 28),
compress_scheme=CompressionScheme.spatial_svd,
cost_metric=CostMetric.mac,
parameters=params)
compressed_model, stats = results
print(compressed_model)
print(stats) # Stats object can be pretty-printed easily
Compresión usando SVD ponderado en modo automático
def weight_svd_auto_mode():
# Load trained MNIST model
model = torch.load(os.path.join('../', 'data', 'mnist_trained_on_GPU.pth'))
# Specify the necessary parameters
greedy_params = GreedySelectionParameters(target_comp_ratio=Decimal(0.8),
num_comp_ratio_candidates=10)
rank_select = RankSelectScheme.greedy
auto_params = WeightSvdParameters.AutoModeParams(rank_select_scheme=rank_select,
select_params=greedy_params,
modules_to_ignore=[model.conv1])
params = WeightSvdParameters(mode=WeightSvdParameters.Mode.auto,
params=auto_params)
# Single call to compress the model
results = ModelCompressor.compress_model(model,
eval_callback=evaluate_model,
eval_iterations=1000,
input_shape=(1, 1, 28, 28),
compress_scheme=CompressionScheme.weight_svd,
cost_metric=CostMetric.mac,
parameters=params)
compressed_model, stats = results
print(compressed_model)
print(stats) # Stats object can be pretty-printed easily
Utilice SVD ponderado para compresión en modo manual, multiplicidad = 8 para redondeo de rango
def weight_svd_manual_mode():
# Load a trained MNIST model
model = torch.load(os.path.join('../', 'data', 'mnist_trained_on_GPU.pth'))
# Specify the necessary parameters
manual_params = WeightSvdParameters.ManualModeParams([ModuleCompRatioPair(model.conv1, 0.5),
ModuleCompRatioPair(model.conv2, 0.4)])
params = WeightSvdParameters(mode=WeightSvdParameters.Mode.manual,
params=manual_params, multiplicity=8)
# Single call to compress the model
results = ModelCompressor.compress_model(model,
eval_callback=evaluate_model,
eval_iterations=1000,
input_shape=(1, 1, 28, 28),
compress_scheme=CompressionScheme.weight_svd,
cost_metric=CostMetric.mac,
parameters=params)
compressed_model, stats = results
print(compressed_model)
print(stats) # Stats object can be pretty-printed easily
Compresión mediante recorte de canales en modo automático
def channel_pruning_auto_mode():
# Load trained MNIST model
model = torch.load(os.path.join('../', 'data', 'mnist_trained_on_GPU.pth'))
# Specify the necessary parameters
greedy_params = GreedySelectionParameters(target_comp_ratio=Decimal(0.8),
num_comp_ratio_candidates=10)
auto_params = ChannelPruningParameters.AutoModeParams(greedy_params,
modules_to_ignore=[model.conv1])
data_loader = mnist_torch_model.DataLoaderMnist(cuda=True, seed=1, shuffle=True)
params = ChannelPruningParameters(data_loader=data_loader.train_loader,
num_reconstruction_samples=500,
allow_custom_downsample_ops=True,
mode=ChannelPruningParameters.Mode.auto,
params=auto_params)
# Single call to compress the model
results = ModelCompressor.compress_model(model,
eval_callback=evaluate_model,
eval_iterations=1000,
input_shape=(1, 1, 28, 28),
compress_scheme=CompressionScheme.channel_pruning,
cost_metric=CostMetric.mac,
parameters=params)
compressed_model, stats = results
print(compressed_model)
print(stats) # Stats object can be pretty-printed easily
Compresión usando recorte de canal en modo manual
def channel_pruning_manual_mode():
# Load a trained MNIST model
model = torch.load(os.path.join('../', 'data', 'mnist_trained_on_GPU.pth'))
# Specify the necessary parameters
manual_params = ChannelPruningParameters.ManualModeParams([ModuleCompRatioPair(model.conv2, 0.4)])
data_loader = mnist_torch_model.DataLoaderMnist(cuda=True, seed=1, shuffle=True)
params = ChannelPruningParameters(data_loader=data_loader.train_loader,
num_reconstruction_samples=500,
allow_custom_downsample_ops=True,
mode=ChannelPruningParameters.Mode.manual,
params=manual_params)
# Single call to compress the model
results = ModelCompressor.compress_model(model,
eval_callback=evaluate_model,
eval_iterations=1000,
input_shape=(1, 1, 28, 28),
compress_scheme=CompressionScheme.channel_pruning,
cost_metric=CostMetric.mac,
parameters=params)
compressed_model, stats = results
print(compressed_model)
print(stats) # Stats object can be pretty-printed easily
Ejemplo de objeto de entrenamiento
class Trainer:
""" Example trainer class """
def __init__(self):
self._layer_db = []
def train_model(self, model, layer, train_flag=True):
"""
Trains a model
:param model: Model to be trained
:param layer: layer which has to be fine tuned
:param train_flag: Default: True. If ture the model gets trained
:return:
"""
if train_flag:
mnist_torch_model.train(model, epochs=1, use_cuda=True, batch_size=50, batch_callback=None)
self._layer_db.append(layer)
Utilice Spatial SVD en modo automático para compresión y ajuste fino capa por capa
def spatial_svd_auto_mode_with_layerwise_finetuning():
# load trained MNIST model
model = torch.load(os.path.join('../', 'data', 'mnist_trained_on_GPU.pth'))
# Specify the necessary parameters
greedy_params = GreedySelectionParameters(target_comp_ratio=Decimal(0.8),
num_comp_ratio_candidates=10)
auto_params = SpatialSvdParameters.AutoModeParams(greedy_params,
modules_to_ignore=[model.conv1])
params = SpatialSvdParameters(mode=SpatialSvdParameters.Mode.auto,
params=auto_params)
# Single call to compress the model
results = ModelCompressor.compress_model(model,
eval_callback=evaluate_model,
eval_iterations=1000,
input_shape=(1, 1, 28, 28),
compress_scheme=CompressionScheme.spatial_svd,
cost_metric=CostMetric.mac,
parameters=params, trainer=Trainer())
compressed_model, stats = results
print(compressed_model)
print(stats) # Stats object can be pretty-printed easily