gpt4 book ai didi

python - Pytorch:deepcopy后无训练效果

转载 作者:行者123 更新时间:2023-12-01 23:38:07 27 4
gpt4 key购买 nike

我尝试在 pytorch 中复制一个神经网络,然后训练复制的网络,但训练似乎并没有在复制后改变网络中的权重。 This post建议 deepcopy 是制作神经网络副本的便捷方式,因此我尝试在我的代码中使用它。

下面的代码运行良好,表明训练后网络的权重和准确度与训练前不同。但是,当我切换 network_cp=deepcopy(network)optimizer_cp=deepcopy(optimizer) 时,训练前后的准确度和权重完全相同。

# torch settings
torch.backends.cudnn.enabled = True
device = torch.device("cpu")

# training settings
learning_rate = 0.01
momentum = 0.5
batch_size_train = 64
batch_size_test = 1000

# get MNIST data set
train_loader, test_loader = load_mnist(batch_size_train=batch_size_train,
batch_size_test=batch_size_test)

# make a network
network = Net()
optimizer = optim.SGD(network.parameters(), lr=learning_rate,
momentum=momentum)
network.to(device)

# train network
train(network, optimizer, train_loader, device)

# copy network
network_cp = network
#network_cp = deepcopy(network)
optimizer_cp = optimizer
#optimizer_cp = deepcopy(optimizer)

# get edge weights and accuracy of the copied network
acc1 = float(test(network_cp, optimizer_cp, test_loader, device))
weights1 = np.array(get_edge_weights(network_cp))

# train copied network
train(network_cp, optimizer_cp, train_loader, device)

# get edge weights and accuracy of the copied network after training
acc2 = float(test(network_cp, optimizer_cp, test_loader, device))
weights2 = np.array(get_edge_weights(network_cp))

# compare edge weights and accuracy of copied network before and after training
print('accuracy', acc1, acc2)
print('abs diff of weights for net1 and net2', np.sum(np.abs(weights1-weights2)))

要运行上面的代码,请包含这些导入和函数定义:

import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn as tnn
import torch.nn.functional as tnf
from copy import deepcopy
import numpy as np

def load_mnist(batch_size_train = 64, batch_size_test = 1000):
train_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST('temp/', #'/data/users/alice/pytorch_training_files/',
train=True, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
])),
batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST('temp/', #'/data/users/alice/pytorch_training_files/',
train=False, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
])),
batch_size=batch_size_test, shuffle=True)

return(train_loader, test_loader)

def train(network, optimizer, train_loader, device, n_epochs=5):
network.train()
for epoch in range(1, n_epochs + 1):
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = network(data)
loss = tnf.nll_loss(output, target)
loss.backward()
optimizer.step()

def test(network, optimizer, test_loader, device):
network.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = network(data)
test_loss += tnf.nll_loss(output, target, size_average=False).item()
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))

return(float(correct)/float(len(test_loader.dataset)))

def get_edge_weights(network):
layers = [module for module in network.modules()][1:]
output = np.zeros(1)
for j, layer in enumerate(layers):
weights = list(layer.parameters())[0]
weights_arr = weights.detach().numpy()
weights_arr = weights_arr.flatten()
output = np.concatenate((output,weights_arr))
return output[1:]

class Net(tnn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 =tnn.Linear(784,264)
self.fc2 = tnn.Linear(264,10)

def forward(self, x):
x = tnf.relu(self.fc1(x.view(-1,784)))
x = tnf.relu(self.fc2(x))
return tnf.log_softmax(x)

最佳答案

optimizer_cp = deepcopy(optimizer) 之后,optimizer_cp 仍然想要优化旧模型的参数(由 optimizer = optim.SGD(network. parameters(), lr=learning_rate, momentum=momentum)).

深度复制模型后,需要告诉优化器优化这个新模型的参数:

optimizer_cp = optim.SGD(network_cp.parameters(), lr=learning_rate, momentum=momentum)

关于python - Pytorch:deepcopy后无训练效果,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/65298796/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com