gpt4 book ai didi

python - 重用pytorch模型时重复层

转载 作者:行者123 更新时间:2023-12-03 13:39:03 27 4
gpt4 key购买 nike

我正在尝试将一些 resnet 层重用于自定义架构,但遇到了一个我无法弄清楚的问题。这是一个简化的示例;当我运行时:

import torch
from torchvision import models
from torchsummary import summary

def convrelu(in_channels, out_channels, kernel, padding):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel, padding=padding),
nn.ReLU(inplace=True),
)


class ResNetUNet(nn.Module):
def __init__(self):
super().__init__()

self.base_model = models.resnet18(pretrained=False)
self.base_layers = list(self.base_model.children())


self.layer0 = nn.Sequential(*self.base_layers[:3])


def forward(self, x):
print(x.shape)

output = self.layer0(x)

return output

base_model = ResNetUNet().cuda()
summary(base_model,(3,224,224))

给我:
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 112, 112] 9,408
Conv2d-2 [-1, 64, 112, 112] 9,408
BatchNorm2d-3 [-1, 64, 112, 112] 128
BatchNorm2d-4 [-1, 64, 112, 112] 128
ReLU-5 [-1, 64, 112, 112] 0
ReLU-6 [-1, 64, 112, 112] 0
================================================================
Total params: 19,072
Trainable params: 19,072
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 36.75
Params size (MB): 0.07
Estimated Total Size (MB): 37.40
----------------------------------------------------------------

这是复制每一层(有 2 个转换,2 个批规范,2 个 relu),而不是每个层。如果我打印出 self.base_layers[:3]我得到:
[Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False), BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(inplace=True)]
它仅显示三层而没有重复。为什么它会复制我的图层?

我正在使用 pytorch 版本 1.4.0

最佳答案

您的图层实际上并没有被调用两次。这是 summary 的神器被实现。
原因很简单,因为summary递归地遍历模块的所有子模块并为每个子模块注册前向钩子(Hook)。由于您有重复的 child (在 base_modellayer0 中),因此这些重复的模块会注册多个 Hook 。当摘要调用转发时,这会导致每个模块的两个钩子(Hook)都被调用,从而导致报告层的重复。

对于您的玩具示例,解决方案是简单地不分配 base_model作为一个属性,因为无论如何它都没有在转发期间使用。这避免了 base_model曾经被添加为一个 child 。

class ResNetUNet(nn.Module):
def __init__(self):
super().__init__()
base_model = models.resnet18(pretrained=False)
base_layers = list(base_model.children())
self.layer0 = nn.Sequential(*base_layers[:3])

另一种解决方案是创建 summary 的修改版本。它不会多次为同一个模块注册钩子(Hook)。以下是增强版 summary我使用了一个名为 already_registered 的集合跟踪已经注册了钩子(Hook)的模块,以避免注册多个钩子(Hook)。
from collections import OrderedDict
import torch
import torch.nn as nn
import numpy as np

def summary(model, input_size, batch_size=-1, device="cuda"):

# keep track of registered modules so that we don't add multiple hooks
already_registered = set()

def register_hook(module):

def hook(module, input, output):
class_name = str(module.__class__).split(".")[-1].split("'")[0]
module_idx = len(summary)

m_key = "%s-%i" % (class_name, module_idx + 1)
summary[m_key] = OrderedDict()
summary[m_key]["input_shape"] = list(input[0].size())
summary[m_key]["input_shape"][0] = batch_size
if isinstance(output, (list, tuple)):
summary[m_key]["output_shape"] = [
[-1] + list(o.size())[1:] for o in output
]
else:
summary[m_key]["output_shape"] = list(output.size())
summary[m_key]["output_shape"][0] = batch_size

params = 0
if hasattr(module, "weight") and hasattr(module.weight, "size"):
params += torch.prod(torch.LongTensor(list(module.weight.size())))
summary[m_key]["trainable"] = module.weight.requires_grad
if hasattr(module, "bias") and hasattr(module.bias, "size"):
params += torch.prod(torch.LongTensor(list(module.bias.size())))
summary[m_key]["nb_params"] = params

if (
not isinstance(module, nn.Sequential)
and not isinstance(module, nn.ModuleList)
and not (module == model)
and module not in already_registered:
):
already_registered.add(module)
hooks.append(module.register_forward_hook(hook))

device = device.lower()
assert device in [
"cuda",
"cpu",
], "Input device is not valid, please specify 'cuda' or 'cpu'"

if device == "cuda" and torch.cuda.is_available():
dtype = torch.cuda.FloatTensor
else:
dtype = torch.FloatTensor

# multiple inputs to the network
if isinstance(input_size, tuple):
input_size = [input_size]

# batch_size of 2 for batchnorm
x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size]
# print(type(x[0]))

# create properties
summary = OrderedDict()
hooks = []

# register hook
model.apply(register_hook)

# make a forward pass
# print(x.shape)
model(*x)

# remove these hooks
for h in hooks:
h.remove()

print("----------------------------------------------------------------")
line_new = "{:>20} {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #")
print(line_new)
print("================================================================")
total_params = 0
total_output = 0
trainable_params = 0
for layer in summary:
# input_shape, output_shape, trainable, nb_params
line_new = "{:>20} {:>25} {:>15}".format(
layer,
str(summary[layer]["output_shape"]),
"{0:,}".format(summary[layer]["nb_params"]),
)
total_params += summary[layer]["nb_params"]
total_output += np.prod(summary[layer]["output_shape"])
if "trainable" in summary[layer]:
if summary[layer]["trainable"] == True:
trainable_params += summary[layer]["nb_params"]
print(line_new)

# assume 4 bytes/number (float on cuda).
total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.))
total_output_size = abs(2. * total_output * 4. / (1024 ** 2.)) # x2 for gradients
total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.))
total_size = total_params_size + total_output_size + total_input_size

print("================================================================")
print("Total params: {0:,}".format(total_params))
print("Trainable params: {0:,}".format(trainable_params))
print("Non-trainable params: {0:,}".format(total_params - trainable_params))
print("----------------------------------------------------------------")
print("Input size (MB): %0.2f" % total_input_size)
print("Forward/backward pass size (MB): %0.2f" % total_output_size)
print("Params size (MB): %0.2f" % total_params_size)
print("Estimated Total Size (MB): %0.2f" % total_size)
print("----------------------------------------------------------------")
# return summary

关于python - 重用pytorch模型时重复层,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/61668501/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com