gpt4 book ai didi

python - 在一小部分图像上测试我的 CNN,但训练没有效果

转载 作者:行者123 更新时间:2023-12-02 02:21:57 24 4
gpt4 key购买 nike

我构建了一个 CNN 来识别 224x224x3 图像中的 9 类手势。我尝试通过在 16 张图像上训练它来测试它的功能,看看它是否会过度拟合到 100 的准确度。这是我的网络

    import torch.nn as nn
class learn_gesture(nn.Module):
def __init__(self):
super(learn_gesture, self).__init__()
self.name = "gesture_learner"
self.conv1 = nn.Conv2d(in_channels=3, out_channels=20, kernel_size=5, stride=1, padding=2)
self.conv2 = nn.Conv2d(in_channels=20, out_channels=50, kernel_size=5, stride=1, padding=2)
self.conv3 = nn.Conv2d(in_channels=50, out_channels=100, kernel_size=5, stride=1, padding=2)
self.conv4 = nn.Conv2d(in_channels=100, out_channels=200, kernel_size=5, stride=1, padding=2)
self.conv5 = nn.Conv2d(in_channels=200, out_channels=400, kernel_size=5, stride=1, padding=2)
self.pool1 = nn.MaxPool2d(2,2)
self.pool2 = nn.MaxPool2d(2,2)
self.pool3 = nn.MaxPool2d(2,2)
self.pool4 = nn.MaxPool2d(2,2)
self.pool5 = nn.MaxPool2d(2,2)
self.fc1 = nn.Linear(7*7*400, 10000)
self.fc2 = nn.Linear(10000, 3000)
self.fc3 = nn.Linear(3000, 9)

def forward(self, x):
x = self.pool1(F.relu(self.conv1(x))) # gives 112*20
x = self.pool2(F.relu(self.conv2(x))) # gives 56*50
x = self.pool3(F.relu(self.conv3(x))) # gives 28*100
x = self.pool4(F.relu(self.conv4(x))) # gives 14*200
x = self.pool5(F.relu(self.conv5(x))) # gives 7*400
x = x.view(-1, 7*7*400)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
return F.softmax(self.fc3(x), dim=1)

这是训练代码:

    overfit_model = learn_gesture()
num_epochs = 200 #set it high so that it will converge
## loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(over_model.parameters(), lr=0.001, momentum=0.9) #optimizer is SGD with momentum

## set up some empty np arrays to store our result for plotting later
train_err = np.zeros(num_epochs)
train_loss = np.zeros(num_epochs)
################################################ train the network
for epoch in range(num_epochs):
total_train_loss = 0
total_train_err = 0
total_epoch = 0
for i, data in enumerate(smallLoader, 0):
inputs, labels = data
outputs = over_model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
optimizer.zero_grad()
corr = (determine_corr(outputs, labels)) # get a list of bool representing right or wrong predictions in the batch
total_train_err += corr.count(False)
total_train_loss += loss.item()
total_epoch += len(labels)
train_err[epoch] = float(total_train_err) / total_epoch
train_loss[epoch] = float(total_train_loss) / (i+1)
print(("Epoch {}: Train err: {}, Train loss: {}").format(
enter code hereepoch + 1,
train_err[epoch],
train_loss[epoch]))

训练没有效果,准确率和损失也没有任何提升。我只是完全无法弄清楚错误在哪里。非常感谢任何帮助!

##############更新##############

我去掉了前向函数中的softmax。令人惊讶的是,模型的性能并没有太大变化。我注意到输出中的某些元素现在为负数,并且所有类中的元素相加不会等于 1。这是应该发生的吗?输出:

tensor([[ 0.0165, -0.0041,  0.0043,  0.0017,  0.0238,  0.0329, -0.0265, -0.0224,
-0.0187],
[ 0.0163, -0.0044, 0.0036, 0.0028, 0.0248, 0.0334, -0.0268, -0.0218,
-0.0194],
[ 0.0161, -0.0046, 0.0041, 0.0019, 0.0240, 0.0333, -0.0266, -0.0223,
-0.0192],
[ 0.0190, -0.0044, 0.0035, 0.0015, 0.0244, 0.0322, -0.0267, -0.0223,
-0.0187],
[ 0.0174, -0.0048, 0.0033, 0.0021, 0.0251, 0.0328, -0.0257, -0.0225,
-0.0190],
[ 0.0175, -0.0041, 0.0033, 0.0031, 0.0241, 0.0329, -0.0264, -0.0222,
-0.0192],
[ 0.0168, -0.0042, 0.0033, 0.0022, 0.0251, 0.0335, -0.0269, -0.0225,
-0.0195],
[ 0.0163, -0.0047, 0.0037, 0.0030, 0.0243, 0.0336, -0.0265, -0.0227,
-0.0192],
[ 0.0165, -0.0043, 0.0038, 0.0026, 0.0242, 0.0337, -0.0264, -0.0222,
-0.0191],
[ 0.0163, -0.0051, 0.0038, 0.0016, 0.0236, 0.0338, -0.0258, -0.0223,
-0.0195],
[ 0.0173, -0.0037, 0.0038, 0.0018, 0.0236, 0.0322, -0.0269, -0.0225,
-0.0191],
[ 0.0174, -0.0044, 0.0031, 0.0019, 0.0241, 0.0334, -0.0266, -0.0224,
-0.0200],
[ 0.0164, -0.0038, 0.0034, 0.0029, 0.0245, 0.0342, -0.0269, -0.0225,
-0.0200],
[ 0.0173, -0.0046, 0.0036, 0.0021, 0.0245, 0.0328, -0.0264, -0.0221,
-0.0192],
[ 0.0168, -0.0046, 0.0034, 0.0025, 0.0248, 0.0336, -0.0262, -0.0222,
-0.0194],
[ 0.0166, -0.0051, 0.0033, 0.0015, 0.0234, 0.0331, -0.0270, -0.0218,
-0.0186]], grad_fn=<AddmmBackward>)
Epoch 199: Train err: 0.8125, Train loss: 2.1874701976776123

最佳答案

  1. 您似乎正在使用名为 overfit_model 的模型,其中将 over_model.parameters() 传递给优化器:

    optimizer = optim.SGD(over_model.parameters(), lr=0.001, momentum=0.9)

    应替换为 ovrefit_model.parameters()

  2. 您在反向传播之后将梯度设置为,这应该事先完成。因此,以下几行:

         loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    应替换为:

         optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  3. 无需调用F.softmax

    return F.softmax(self.fc3(x), dim=1)

    因为您正在使用nn.CrossEntropyLoss调用 F.cross_entropy其中natively bundles log_softmax before calling nll_loss

    return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)

关于python - 在一小部分图像上测试我的 CNN,但训练没有效果,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/66286991/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com