【ディープラーニング可視化シリーズ】——CAM可視化(セマンティックセグメンテーションネットワークを例として、Vitシリーズのバックボーンネットワークのセグメンテーションモデルをサポートし、GradCAM、GradCAMPlusPlus、LayerCAMおよびその他のカム可視化メソッドをサポートします)

Cam 関連の基礎知識については、リンクを参照してください: https://zhuanlan.zhihu.com/p/269702192

import warnings
import torch
import requests
import torchvision
import torch.functional as F
import numpy as np
from PIL import Image
from torchvision.models.segmentation import deeplabv3_resnet50, DeepLabV3_ResNet50_Weights
from pytorch_grad_cam.utils.image import show_cam_on_image, preprocess_image


image_file = ".\images"
image_file_path = os.path.join(image_file, str("15") + (".jpg"))
image = Image.open(image_file_path)
rgb_img = np.float32(image) / 255
input_tensor = preprocess_image(rgb_img,
                                mean=[0.5835, 0.5820, 0.5841],
                                std=[0.1149, 0.1111, 0.1064])

model = deeplabv3_resnet50(weights=DeepLabV3_ResNet50_Weights.DEFAULT)
model = model.eval()

device = torch.device('cuda:0') if torch.cuda.is_available()
model.to(device=device)
input_tensor = input_tensor.to(device)  

class SegmentationModelOutputWrapper(torch.nn.Module):
    def __init__(self, model): 
        super(SegmentationModelOutputWrapper, self).__init__()
        self.model = model
        
    def forward(self, x):
        return self.model(x)["out"]
    
model = SegmentationModelOutputWrapper(model)
output = model(input_tensor)

normalized_masks = torch.nn.functional.softmax(output, dim=1).cpu()
sem_classes = [
    'background',
    'crack',
]
sem_class_to_idx = {
    
    cls: idx for (idx, cls) in enumerate(sem_classes)}

crack_category = sem_class_to_idx["crack"]
crack_mask = normalized_masks[0, :, :, :].argmax(axis=0).detach().cpu().numpy()
crack_mask_uint8 = 255 * np.uint8(crack_mask == crack_category)
crack_mask_float = np.float32(crack_mask == car_category)

both_images = np.hstack((image, np.repeat(crack_mask_uint8[:, :, None], 3, axis=-1)))
img = Image.fromarray(both_images)
# img.show()

from pytorch_grad_cam import GradCAM, GradCAMPlusPlus, LayerCAM
from pytorch_grad_cam.utils.image import show_cam_on_image, preprocess_image

class SemanticSegmentationTarget:
    def __init__(self, category, mask):
        self.category = category
        self.mask = torch.from_numpy(mask)
        if torch.cuda.is_available():
            self.mask = self.mask.cuda()
        
    def __call__(self, model_output):
        return (model_output[self.category - 1, :, : ] * self.mask).sum()


target_layers = [model.encoder.norm4]

def reshape_transform(in_tensor):
    result = in_tensor.reshape(in_tensor.size(0),
        int(np.sqrt(in_tensor.size(1))), int(np.sqrt(in_tensor.size(1))), in_tensor.size(2))

    result = result.transpose(2, 3).transpose(1, 2)
    return result

targets = [SemanticSegmentationTarget(crack_category, crack_mask_float)]
with GradCAM(model=model,
             target_layers=target_layers,
             use_cuda=torch.cuda.is_available(),
             # reshape_transform=reshape_transform # 该部分是针对Vit系列模型的相关变换参数,cnn模型可不设置.
             ) as cam:
    grayscale_cam = cam(input_tensor=augmented_img[0].unsqueeze(0), targets=targets)[0, :]
    cam_image = show_cam_on_image(np.float32(img) /255, grayscale_cam, use_rgb=True)

cam_img = Image.fromarray(cam_image)
cam_img.show()

GradCAM手法による可視化結果は以下の通りです。
ここに画像の説明を挿入

おすすめ

転載: blog.csdn.net/qq_43456016/article/details/132169012
おすすめ