https://touch-sp.hatenablog.com/entry/2025/07/15/075437

以前の記事はこちら。
touch-sp.hatenablog.com
MXNetやGluonCVを使っています。懐かしいです。

今回はこちらを使わせてもらいました。
github.com

結果

=== Top 5 予測（ラベル付き） ===
Rank 1: water bottle (0.9660)
Rank 2: pop bottle (0.0174)
Rank 3: refrigerator (0.0013)
Rank 4: beer bottle (0.0011)
Rank 5: bottlecap (0.0007)

Pythonスクリプト

'''
pip install torch==2.6.0+cu126 torchvision==0.21.0+cu126 --index-url https://download.pytorch.org/whl/cu126
pip install ttach
pip install matplotlib
pip install opencv-python
pip install tqdm
pip install scikit-learn
pip install requests
'''

from pytorch_grad_cam import GradCAM
from pytorch_grad_cam import GuidedBackpropReLUModel
from pytorch_grad_cam.utils.image import show_cam_on_image, preprocess_image, deprocess_image
from torchvision.models import resnet50, ResNet50_Weights
from torchvision.datasets.utils import download_url
import numpy as np
import cv2
import torch
import torch.nn.functional as F
import requests

def get_imagenet_labels():
    """ImageNetのクラスラベルを取得"""
    try:
        # ImageNetのクラスラベルをダウンロード
        url = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
        response = requests.get(url)
        labels = response.text.strip().split('\n')
        return labels
    except:
        # ダウンロードに失敗した場合はダミーラベル
        return [f"class_{i}" for i in range(1000)]

# <class 'torchvision.models.resnet.ResNet'>
#model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1).to("cuda").eval()

# モデルを作成（重みなしで）
model = resnet50(weights=None).to("cuda").eval()

# 重みのダウンロード 
url = 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/resnet50_a1_0-14fe96d1.pth'
download_url(url, root = '.', filename = "weight.pth")

# 重みを手動で読み込み
state_dict = torch.load("weight.pth", map_location="cuda")
model.load_state_dict(state_dict)

target_layers = [model.layer4[-1]]

image_path="1.jpg"

rgb_img = cv2.imread(image_path, cv2.IMREAD_COLOR)[:, :, ::-1] 
rgb_img = cv2.resize(rgb_img, (224, 224))
rgb_img = np.float32(rgb_img) / 255
input_tensor = preprocess_image(
    rgb_img,
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
).to("cuda")

targets = None

with GradCAM(model=model, target_layers=target_layers) as cam:
  # You can also pass aug_smooth=True and eigen_smooth=True, to apply smoothing.
    grayscale_cam = cam(
        input_tensor=input_tensor,
        targets=targets,
        aug_smooth=True,
        eigen_smooth=True
    )
    # In this example grayscale_cam has only one image in the batch:
    grayscale_cam = grayscale_cam[0, :]
    cam_image = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)
    cam_image = cv2.cvtColor(cam_image, cv2.COLOR_RGB2BGR)
    # You can also get the model outputs without having to redo inference
    model_outputs = cam.outputs

probabilities = F.softmax(model_outputs, dim=1)

# 2. 上位5つの予測を取得
top5_prob, top5_idx = torch.topk(probabilities, 5)

# ラベルを取得してTop5を表示
labels = get_imagenet_labels()
print("\n=== Top 5 予測（ラベル付き） ===")
for i in range(5):
    class_idx = top5_idx[0][i].item()
    prob = top5_prob[0][i].item()
    label = labels[class_idx] if class_idx < len(labels) else f"class_{class_idx}"
    print(f"Rank {i+1}: {label} ({prob:.4f})")

gb_model = GuidedBackpropReLUModel(model=model, device="cuda")
gb = gb_model(input_tensor, target_category=None)

cam_mask = cv2.merge([grayscale_cam, grayscale_cam, grayscale_cam])
cam_gb = deprocess_image(cam_mask * gb)
gb = deprocess_image(gb)

cv2.imwrite("gradcam_cam.jpg", cam_image)
cv2.imwrite("gradcam_gb.jpg", gb)
cv2.imwrite("gradcam_cam_gb.jpg", cam_gb)