gpt4 book ai didi

python - 如何在训练有素的网络上实现 Grad-CAM

转载 作者:行者123 更新时间:2023-12-04 02:27:50 57 4
gpt4 key购买 nike

我已经训练了一个网络,并以 mynetwork.model 的形式保存了它。我想使用我自己的模型而不是 VGG16 或 ResNet 等应用 gradcam。
apply_gradcam.py

# import the necessary packages
from Grad_CAM.gradcam import GradCAM
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.applications import imagenet_utils
from tensorflow.keras.models import load_model
import numpy as np
import argparse
import imutils
import cv2


# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to the input image")
ap.add_argument("-m", "--model", type=str, default="vgg",
#choices=("vgg", "resnet"),
help="model to be used")
args = vars(ap.parse_args())


# initialize the model to be VGG16
Model = VGG16
# check to see if we are using ResNet
if args["model"] == "resnet":
Model = ResNet50
# load the pre-trained CNN from disk
print("[INFO] loading model...")
model = Model(weights="imagenet")

# load the original image from disk (in OpenCV format) and then
# resize the image to its target dimensions
orig = cv2.imread(args["image"])
resized = cv2.resize(orig, (224, 224))
# load the input image from disk (in Keras/TensorFlow format) and
# preprocess it
image = load_img(args["image"], target_size=(224, 224))
image = img_to_array(image)
image = np.expand_dims(image, axis=0)
image = imagenet_utils.preprocess_input(image)

# use the network to make predictions on the input image and find
# the class label index with the largest corresponding probability
preds = model.predict(image)
i = np.argmax(preds[0])
# decode the ImageNet predictions to obtain the human-readable label
decoded = imagenet_utils.decode_predictions(preds)
(imagenetID, label, prob) = decoded[0][0]
label = "{}: {:.2f}%".format(label, prob * 100)
print("[INFO] {}".format(label))

# initialize our gradient class activation map and build the heatmap
cam = GradCAM(model, i)
heatmap = cam.compute_heatmap(image)
# resize the resulting heatmap to the original input image dimensions
# and then overlay heatmap on top of the image
heatmap = cv2.resize(heatmap, (orig.shape[1], orig.shape[0]))
(heatmap, output) = cam.overlay_heatmap(heatmap, orig, alpha=0.5)

cv2.rectangle(output, (0, 0), (340, 40), (0, 0, 0), -1)
cv2.putText(output, label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX,
0.8, (255, 255, 255), 2)
# display the original image and resulting heatmap and output image
# to our screen
output = np.vstack([orig, heatmap, output])
output = imutils.resize(output, height=700)
cv2.imshow("Output", output)
cv2.waitKey(0)
gradcam.py
from tensorflow.keras.models import Model
import tensorflow as tf
import numpy as np
import cv2


class GradCAM:
def __init__(self, model, classIdx, layerName=None):
# store the model, the class index used to measure the class
# activation map, and the layer to be used when visualizing
# the class activation map
self.model = model
self.classIdx = classIdx
self.layerName = layerName
# if the layer name is None, attempt to automatically find
# the target output layer
if self.layerName is None:
self.layerName = self.find_target_layer()


def find_target_layer(self):
# attempt to find the final convolutional layer in the network
# by looping over the layers of the network in reverse order
for layer in reversed(self.model.layers):
# check to see if the layer has a 4D output
if len(layer.output_shape) == 4:
return layer.name
# otherwise, we could not find a 4D layer so the GradCAM
# algorithm cannot be applied
raise ValueError("Could not find 4D layer. Cannot apply GradCAM.")


def compute_heatmap(self, image, eps=1e-8):
# construct our gradient model by supplying (1) the inputs
# to our pre-trained model, (2) the output of the (presumably)
# final 4D layer in the network, and (3) the output of the
# softmax activations from the model
gradModel = Model(
inputs=[self.model.inputs],
outputs=[self.model.get_layer(self.layerName).output,
self.model.output])

# record operations for automatic differentiation
with tf.GradientTape() as tape:
# cast the image tensor to a float-32 data type, pass the
# image through the gradient model, and grab the loss
# associated with the specific class index
inputs = tf.cast(image, tf.float32)
(convOutputs, predictions) = gradModel(inputs)
loss = predictions[:, self.classIdx]
# use automatic differentiation to compute the gradients
grads = tape.gradient(loss, convOutputs)

# compute the guided gradients
castConvOutputs = tf.cast(convOutputs > 0, "float32")
castGrads = tf.cast(grads > 0, "float32")
guidedGrads = castConvOutputs * castGrads * grads
# the convolution and guided gradients have a batch dimension
# (which we don't need) so let's grab the volume itself and
# discard the batch
convOutputs = convOutputs[0]
guidedGrads = guidedGrads[0]

# compute the average of the gradient values, and using them
# as weights, compute the ponderation of the filters with
# respect to the weights
weights = tf.reduce_mean(guidedGrads, axis=(0, 1))
cam = tf.reduce_sum(tf.multiply(weights, convOutputs), axis=-1)

# grab the spatial dimensions of the input image and resize
# the output class activation map to match the input image
# dimensions
(w, h) = (image.shape[2], image.shape[1])
heatmap = cv2.resize(cam.numpy(), (w, h))
# normalize the heatmap such that all values lie in the range
# [0, 1], scale the resulting values to the range [0, 255],
# and then convert to an unsigned 8-bit integer
numer = heatmap - np.min(heatmap)
denom = (heatmap.max() - heatmap.min()) + eps
heatmap = numer / denom
heatmap = (heatmap * 255).astype("uint8")
# return the resulting heatmap to the calling function
return heatmap

def overlay_heatmap(self, heatmap, image, alpha=0.5,
colormap=cv2.COLORMAP_VIRIDIS):
# apply the supplied color map to the heatmap and then
# overlay the heatmap on the input image
heatmap = cv2.applyColorMap(heatmap, colormap)
output = cv2.addWeighted(image, alpha, heatmap, 1 - alpha, 0)
# return a 2-tuple of the color mapped heatmap and the output,
# overlaid image
return (heatmap, output)
正如您在 apply_gradcam.py 中看到的,使用了 VGG16 或 ResNet 预训练模型。我想使用我自己训练过的模型来执行 gradcam。出于这个原因,我评论了以下几行:
   # initialize the model to be VGG16
Model = VGG16
# check to see if we are using ResNet
if args["model"] == "resnet":
Model = ResNet50
# load the pre-trained CNN from disk
print("[INFO] loading model...")
model = Model(weights="imagenet")
我用过
model = load_model(args["model"]) 
为了使用我自己的模型。然后我执行了:
 python apply_gradcam.py --image /home/antonis/IM0001.jpeg --model /home/antonis/mynetwork.model
但是,我收到以下错误:
ValueError: `decode_predictions` expects a batch of predictions (i.e.
a 2D array of shape (samples, 1000)). Found array with shape: (1, 3)
这是预期的,因为模型输出 ImageNet 类(1000 维),而我的模型返回超过 2 个类的预测。
我想知道如何解决这个问题并使用我自己的模型应用 gradcam。

最佳答案

我不明白的一件事是,如果您有自己的分类器( 2 ),为什么还要使用 imagenet_utils.decode_predictions ?我不确定我下面的回答是否会让你满意。但这里有一些指针。
数据集

import tensorflow as tf
import numpy as np

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

# train set / data
x_train = x_train.astype('float32') / 255
# train set / target
y_train = tf.keras.utils.to_categorical(y_train , num_classes=10)

# validation set / data
x_test = x_test.astype('float32') / 255
# validation set / target
y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)
# (50000, 32, 32, 3) (50000, 10)
# (10000, 32, 32, 3) (10000, 10
型号
input = tf.keras.Input(shape=(32,32,3))
efnet = tf.keras.applications.EfficientNetB0(weights='imagenet',
include_top = False,
input_tensor = input)
# Now that we apply global max pooling.
gap = tf.keras.layers.GlobalMaxPooling2D()(efnet.output)

# Finally, we add a classification layer.
output = tf.keras.layers.Dense(10, activation='softmax')(gap)

# bind all
func_model = tf.keras.Model(efnet.input, output)
编译运行
func_model.compile(
loss = tf.keras.losses.CategoricalCrossentropy(),
metrics = tf.keras.metrics.CategoricalAccuracy(),
optimizer = tf.keras.optimizers.Adam())
# fit
func_model.fit(x_train, y_train, batch_size=128, epochs=15, verbose = 2)

Epoch 14/15
391/391 - 13s - loss: 0.1479 - categorical_accuracy: 0.9491
Epoch 15/15
391/391 - 13s - loss: 0.1505 - categorical_accuracy: 0.9481
毕业凸轮
和你的设置一样。
from tensorflow.keras.models import Model
import tensorflow as tf
import numpy as np
import cv2

class GradCAM:
def __init__(self, model, classIdx, layerName=None):
# store the model, the class index used to measure the class
# activation map, and the layer to be used when visualizing
# the class activation map
self.model = model
self.classIdx = classIdx
self.layerName = layerName
# if the layer name is None, attempt to automatically find
# the target output layer
if self.layerName is None:
self.layerName = self.find_target_layer()

def find_target_layer(self):
# attempt to find the final convolutional layer in the network
# by looping over the layers of the network in reverse order
for layer in reversed(self.model.layers):
# check to see if the layer has a 4D output
if len(layer.output_shape) == 4:
return layer.name
# otherwise, we could not find a 4D layer so the GradCAM
# algorithm cannot be applied
raise ValueError("Could not find 4D layer. Cannot apply GradCAM.")


def compute_heatmap(self, image, eps=1e-8):
# construct our gradient model by supplying (1) the inputs
# to our pre-trained model, (2) the output of the (presumably)
# final 4D layer in the network, and (3) the output of the
# softmax activations from the model
gradModel = Model(
inputs=[self.model.inputs],
outputs=[self.model.get_layer(self.layerName).output, self.model.output])

# record operations for automatic differentiation
with tf.GradientTape() as tape:
# cast the image tensor to a float-32 data type, pass the
# image through the gradient model, and grab the loss
# associated with the specific class index
inputs = tf.cast(image, tf.float32)
(convOutputs, predictions) = gradModel(inputs)

loss = predictions[:, tf.argmax(predictions[0])]

# use automatic differentiation to compute the gradients
grads = tape.gradient(loss, convOutputs)

# compute the guided gradients
castConvOutputs = tf.cast(convOutputs > 0, "float32")
castGrads = tf.cast(grads > 0, "float32")
guidedGrads = castConvOutputs * castGrads * grads
# the convolution and guided gradients have a batch dimension
# (which we don't need) so let's grab the volume itself and
# discard the batch
convOutputs = convOutputs[0]
guidedGrads = guidedGrads[0]

# compute the average of the gradient values, and using them
# as weights, compute the ponderation of the filters with
# respect to the weights
weights = tf.reduce_mean(guidedGrads, axis=(0, 1))
cam = tf.reduce_sum(tf.multiply(weights, convOutputs), axis=-1)

# grab the spatial dimensions of the input image and resize
# the output class activation map to match the input image
# dimensions
(w, h) = (image.shape[2], image.shape[1])
heatmap = cv2.resize(cam.numpy(), (w, h))
# normalize the heatmap such that all values lie in the range
# [0, 1], scale the resulting values to the range [0, 255],
# and then convert to an unsigned 8-bit integer
numer = heatmap - np.min(heatmap)
denom = (heatmap.max() - heatmap.min()) + eps
heatmap = numer / denom
heatmap = (heatmap * 255).astype("uint8")
# return the resulting heatmap to the calling function
return heatmap

def overlay_heatmap(self, heatmap, image, alpha=0.5,
colormap=cv2.COLORMAP_VIRIDIS):
# apply the supplied color map to the heatmap and then
# overlay the heatmap on the input image
heatmap = cv2.applyColorMap(heatmap, colormap)
output = cv2.addWeighted(image, alpha, heatmap, 1 - alpha, 0)
# return a 2-tuple of the color mapped heatmap and the output,
# overlaid image
return (heatmap, output)
预测
image = cv2.imread('/content/dog.jpg')
image = cv2.resize(image, (32, 32))
image = image.astype('float32') / 255
image = np.expand_dims(image, axis=0)

preds = func_model.predict(image)
i = np.argmax(preds[0])
获取模型的图层名称
for idx in range(len(func_model.layers)):
print(func_model.get_layer(index = idx).name)

# we picked `block5c_project_con` layer
传递到 GradCAM类(class)
icam = GradCAM(func_model, i, 'block5c_project_conv') 
heatmap = icam.compute_heatmap(image)
heatmap = cv2.resize(heatmap, (32, 32))

image = cv2.imread('/content/dog.jpg')
image = cv2.resize(image, (32, 32))
print(heatmap.shape, image.shape)

(heatmap, output) = icam.overlay_heatmap(heatmap, image, alpha=0.5)
可视化
fig, ax = plt.subplots(1, 3)

ax[0].imshow(heatmap)
ax[1].imshow(image)
ax[2].imshow(output)
enter image description here
引用 Grad-CAM class activation visualization

关于python - 如何在训练有素的网络上实现 Grad-CAM,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/66182884/

57 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com