Neural Network Acceleration Basics
Implementation process
instruction execution time
processor
Computational bottleneck calculation code
import torch
import torchvision
from tqdm import tqdm
DEVICE = "cuda:0"
model = torchvision.models.mobilenet_v2(pretrained=True)
model = model.to(DEVICE)
with torch.no_grad():
data = torch.rand(size=[1,3,224,224])
for i in tqdm(range(1024)):
o = model.forward(data.to(DEVICE))
data = torch.rand(size=[128,3,224,224])
for i in tqdm(range(128)):
o = model.forward(data.to(DEVICE))