cnn 图像 cnn 图像特征 transformer k v

转载

话不是这么说的 2024-05-24 20:59:01

文章标签 cnn 图像 cnn transformer 加载 ci 文章分类 机器学习人工智能

热力图是一张和原始图片等同大小图，该图片上每个位置的像素取值范围从0到1，一般用0到255的灰度图表示。可以理解为对预测输出的贡献分布，分数越高的地方表示原始图片对应区域对网络的响应越高、贡献越大。主要有两种类型的可视化方法，利用GAP层，以及基于梯度传导的方法，具体可参考文档

万字长文：特征可视化技术(CAM)

https://mp.weixin.qq.com/s/WKImrtpjQBziz6Wr5uOGNw

这里主要介绍如何使用git-hub上提供的安装包进行可视化操作Class Activation Map methods implemented in Pytorch

https://github.com/jacobgil/pytorch-grad-cam

一.CNN可视化

一共提供了8种可视化方法可以选择

parser.add_argument('--method', type=str, default='gradcam',
                        choices=['gradcam', 'gradcam++',
                                 'scorecam', 'xgradcam',
                                 'ablationcam', 'eigencam',
                                 'eigengradcam', 'layercam'],
                        help='Can be gradcam/gradcam++/scorecam/xgradcam'
                             '/ablationcam/eigencam/eigengradcam/layercam')

1.1 首先需要安装 grad_cam的包

pip install grad-cam

1.2 添加加载图片的路径

parser.add_argument('--image-path', type=str, default="/dataset/Dset_Jerry/ChestXray14/images/00011355_027.png",
                        help='Input image path')

1.3 加载本地训练后的模型及权重

这里需要注意，如果是在多卡上进行训练的模型，需要将module这个属性移除

"""desnet"""
    model = model.DenseNet121(14)
    # original saved file with DataParallel
    state_dict = torch.load("/datasets/Dset_Jerry/CXR14/Densenet121_BCE_32/Densenet_8.pkl")  # 模型可以保存为pth文件，也可以为pt文件。
    
    # create new OrderedDict that does not contain module.
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k[7:]  # remove module.，表面从第7个key值字符取到最后一个字符，正好去掉了module.
        new_state_dict[name] = v  # 新字典的key值对应的value为一一对应的值。
    # load params
    model.load_state_dict(new_state_dict)  # 从新加载这个模型。
    model.eval（).cuda()

1.4 设置需要可视化的目标层

这里目标层选择，可以print model，来查找需要可视化层的具体名称，一般是模型最后输出的特征图

#print(model)
    target_layer=model.densenet121.features[-1]

1.5 设置保存图片的路径及名称

cv2.imwrite(f'/home/cai/project/Catheter/DenseNet_{args.method}_cam_00011355_027.jpg', cam_image)

完整代码如下

import argparse
import cv2
import numpy as np
import torch
import timm
from torchvision import models
import os
import torch.nn as nn
import Model.model as model
from collections import OrderedDict
from pytorch_grad_cam import GradCAM, \
                             ScoreCAM, \
                             GradCAMPlusPlus, \
                             AblationCAM, \
                             XGradCAM, \
                             EigenCAM, \
                             EigenGradCAM, \
                             LayerCAM

from pytorch_grad_cam import GuidedBackpropReLUModel
from pytorch_grad_cam.utils.image import show_cam_on_image, \
                                         deprocess_image, \
                                         preprocess_image


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--use-cuda', action='store_true', default=True,
                        help='Use NVIDIA GPU acceleration')
    parser.add_argument('--image-path', type=str, default="/dataset/Dset_Jerry/ChestXray14/images/00011355_027.png",
                        help='Input image path')
    parser.add_argument('--aug_smooth', action='store_true',
                        help='Apply test time augmentation to smooth the CAM')
    parser.add_argument('--eigen_smooth', action='store_true',
                        help='Reduce noise by taking the first principle componenet'
                        'of cam_weights*activations')
    parser.add_argument('--method', type=str, default='gradcam',
                        choices=['gradcam', 'gradcam++',
                                 'scorecam', 'xgradcam',
                                 'ablationcam', 'eigencam',
                                 'eigengradcam', 'layercam'],
                        help='Can be gradcam/gradcam++/scorecam/xgradcam'
                             '/ablationcam/eigencam/eigengradcam/layercam')

    args = parser.parse_args()
    args.use_cuda = args.use_cuda and torch.cuda.is_available()
    if args.use_cuda:
        print('Using GPU for acceleration')
    else:
        print('Using CPU for computation')

    return args


if __name__ == '__main__':
    """ python cam.py -image-path <path_to_image>
    Example usage of loading an image, and computing:
        1. CAM
        2. Guided Back Propagation
        3. Combining both
    """

    args = get_args()
    methods = \
        {"gradcam": GradCAM,
         "scorecam": ScoreCAM,
         "gradcam++": GradCAMPlusPlus,
         "ablationcam": AblationCAM,
         "xgradcam": XGradCAM,
         "eigencam": EigenCAM,
         "eigengradcam": EigenGradCAM,
         "layercam": LayerCAM}

    '''example'''
    #model = models.resnet50(pretrained=True)
    #target_layer = model.layer4[-1]
    # model = models.densenet121(pretrained=True)
    # target_layer = model.features[-1]


    """desnet"""
    model = model.DenseNet121(14)
    # original saved file with DataParallel
    state_dict = torch.load("/datasets/Dset_Jerry/CXR14/Densenet121_BCE_32/Densenet_8.pkl")  # 模型可以保存为pth文件，也可以为pt文件。
    # create new OrderedDict that does not contain module.
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k[7:]  # remove module.，表面从第7个key值字符取到最后一个字符，正好去掉了module.
        new_state_dict[name] = v  # 新字典的key值对应的value为一一对应的值。
    # load params
    model.load_state_dict(new_state_dict)  # 从新加载这个模型。
    model.eval（).cuda()

    #print(model)
    target_layer=model.densenet121.features[-1]


    # Choose the target layer you want to compute the visualization for.
    # Usually this will be the last convolutional layer in the model.
    # Some common choices can be:
    # Resnet18 and 50: model.layer4[-1]
    # VGG, densenet161: model.features[-1]
    # mnasnet1_0: model.layers[-1]
    # You can print the model to help chose the layer


    cam = methods[args.method](model=model,
                               target_layer=target_layer,
                               use_cuda=args.use_cuda)

    rgb_img = cv2.imread(args.image_path, 1)
    rgb_img = np.float32(cv2.resize(rgb_img, (384, 384))) / 255
    input_tensor = preprocess_image(rgb_img)

    # If None, returns the map for the highest scoring category.
    # Otherwise, targets the requested category.
    target_category = None

    # AblationCAM and ScoreCAM have batched implementations.
    # You can override the internal batch size for faster computation.
    cam.batch_size = 32

    grayscale_cam = cam(input_tensor=input_tensor,
                        target_category=target_category,
                        aug_smooth=args.aug_smooth,
                        eigen_smooth=args.eigen_smooth)

    # Here grayscale_cam has only one image in the batch
    grayscale_cam = grayscale_cam[0, :]

    cam_image = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)

    # cam_image is RGB encoded whereas "cv2.imwrite" requires BGR encoding.
    cam_image = cv2.cvtColor(cam_image, cv2.COLOR_RGB2BGR)

    gb_model = GuidedBackpropReLUModel(model=model, use_cuda=args.use_cuda)
    gb = gb_model(input_tensor, target_category=target_category)

    cam_mask = cv2.merge([grayscale_cam, grayscale_cam, grayscale_cam])
    cam_gb = deprocess_image(cam_mask * gb)
    gb = deprocess_image(gb)

    cv2.imwrite(f'/home/cai/project/Catheter/DenseNet_{args.method}_cam_00011355_027.jpg', cam_image)
    #cv2.imwrite(f'/home/cai/project/Catheter/{args.method}_gb.jpg', gb)
    #cv2.imwrite(f'/home/cai/project/Catheter/{args.method}_cam_gb.jpg', cam_gb)

Transformer特征图可视化

关于Transformer特征图可视化的方法，基本步骤和CNN的差不多，关键的是目标层如何选取,以下给两个样例

"viT"
model.blocks[-1].norm1

"Swin"
target_layer = model.block4[-1].norm1

具体代码如下：

import argparse
import cv2
import numpy as np
import torch
import torch.nn as nn
import timm
from Model.pvt_v2 import pvt_v2_b3
from collections import OrderedDict
from pytorch_grad_cam import GradCAM, \
                             ScoreCAM, \
                             GradCAMPlusPlus, \
                             AblationCAM, \
                             XGradCAM, \
                             EigenCAM, \
                             EigenGradCAM

from pytorch_grad_cam import GuidedBackpropReLUModel
from pytorch_grad_cam.utils.image import show_cam_on_image, \
                                         deprocess_image, \
                                         preprocess_image

def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--use-cuda', action='store_true', default=True,
                        help='Use NVIDIA GPU acceleration')
    parser.add_argument('--image-path', type=str, default="/dataset/Dset_Jerry/ChestXray14/images/00011355_027.png",
                        help='Input image path')
    parser.add_argument('--aug_smooth', action='store_true',
                        help='Apply test time augmentation to smooth the CAM')
    parser.add_argument('--eigen_smooth', action='store_true',
                        help='Reduce noise by taking the first principle componenet'
                        'of cam_weights*activations')

    parser.add_argument('--method', type=str, default='gradcam++',
                        help='Can be gradcam/gradcam++/scorecam/xgradcam/ablationcam')

    args = parser.parse_args()
    args.use_cuda = args.use_cuda and torch.cuda.is_available()
    if args.use_cuda:
        print('Using GPU for acceleration')
    else:
        print('Using CPU for computation')

    return args

def reshape_transform(tensor, height=12, width=12):
    result = tensor.reshape(tensor.size(0), 
        height, width, tensor.size(2))

    # Bring the channels to the first dimension,
    # like in CNNs.
    result = result.transpose(2, 3).transpose(1, 2)
    return result

if __name__ == '__main__':
    """ python swinT_example.py -image-path <path_to_image>
    Example usage of using cam-methods on a SwinTransformers network.
        
    """

    args = get_args()
    methods = \
        {"gradcam": GradCAM, 
         "scorecam": ScoreCAM, 
         "gradcam++": GradCAMPlusPlus,
         "ablationcam": AblationCAM,
         "xgradcam": XGradCAM,
         "eigencam": EigenCAM,
         "eigengradcam": EigenGradCAM}

    if args.method not in list(methods.keys()):
        raise Exception(f"method should be one of {list(methods.keys())}")

    '''Swin'''
    # model = timm.create_model('swin_base_patch4_window7_224', pretrained=False)
    # model.eval（)
    # print(model)
    # if args.use_cuda:
    #     model = model.cuda()
    #
    # target_layer = model.layers[-1].blocks[-2].norm1
    # print(target_layer )
    # print(model["norm"])

    #print(model.layers[-1])
    # print(model.layers[-1].blocks[-2])
    # print(target_layer)

    model = pvt_v2_b3()
    # original saved file with DataParallel
    state_dict = torch.load("/datasets/Dset_Jerry/CXR14/PVT_BCE_32/PVT_2.pkl")  # 模型可以保存为pth文件，也可以为pt文件。
    # create new OrderedDict that does not contain module.
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k[7:]  # remove module.，表面从第7个key值字符取到最后一个字符，正好去掉了module.
        new_state_dict[name] = v  # 新字典的key值对应的value为一一对应的值。

    # load params
    model.load_state_dict(new_state_dict)  # 从新加载这个模型。
    if args.use_cuda:
        model = model.cuda().eval（)

    #print(model)
    #target_layer = model.block4[-1].norm1
    target_layer =model.norm4
    #print(target_layer)



    if args.method not in methods:
        raise Exception(f"Method {args.method} not implemented")

    cam = methods[args.method](model=model, 
                               target_layer=target_layer,
                               use_cuda=args.use_cuda,
                               reshape_transform=reshape_transform)

    rgb_img = cv2.imread(args.image_path, 1)
    rgb_img = cv2.resize(rgb_img, (384, 384))
    rgb_img = np.float32(rgb_img) / 255
    input_tensor = preprocess_image(rgb_img, mean=[0.5, 0.5, 0.5], 
                                             std=[0.5, 0.5, 0.5])

    # If None, returns the map for the highest scoring category.
    # Otherwise, targets the requested category.
    target_category = 1
    print(target_category)
    # AblationCAM and ScoreCAM have batched implementations.
    # You can override the internal batch size for faster computation.
    cam.batch_size = 32

    grayscale_cam = cam(input_tensor=input_tensor,
                        target_category=target_category,
                        eigen_smooth=args.eigen_smooth,
                        aug_smooth=args.aug_smooth)

    # Here grayscale_cam has only one image in the batch
    grayscale_cam = grayscale_cam[0, :]
    
    cam_image = show_cam_on_image(rgb_img, grayscale_cam)
    cv2.imwrite(f'PVT_{args.method}_cam_00011355_027.jpg', cam_image)

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。