gpt4 book ai didi

torch : Expected all tensors on same device

转载 作者:行者123 更新时间:2023-12-05 03:28:17 26 4
gpt4 key购买 nike

我在同一台设备上移动了我的模型和输入,但我仍然遇到运行时错误:

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat1 in method wrapper_addmm)

这是我的代码,首先是我的模型实现:

import torch
import torch.nn.functional as F


class Net(torch.nn.Module):
def __init__(self, n_hiddens, n_feature= 2, n_output= 1):
super().__init__()
self.hiddens = []
n_hidden_in = n_feature
for n_hidden in n_hiddens :
self.hiddens.append( torch.nn.Linear(n_hidden_in, n_hidden) ) # hidden layer
n_hidden_in = n_hidden

self.predict = torch.nn.Linear(n_hidden, n_output) # output layer

def forward(self, x):
for hidden in self.hiddens :
x = F.relu(hidden(x)) # activation function for hidden layer
x = self.predict(x) # linear output
return x

然后我定义我的数据加载器。这里,X 和 y 是 numpy 数组

from torch.utils.data import TensorDataset, DataLoader
# Split training/test
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state= 42)

X_train_tensor = torch.from_numpy(X_train)
y_train_tensor = torch.from_numpy(y_train)

X_test_tensor = torch.from_numpy(X_test)
y_test_tensor = torch.from_numpy(y_test)


train_dataset = TensorDataset(X_train_tensor, y_train_tensor) # create your datset
train_dataloader = DataLoader(train_dataset, batch_size= 1000) # create your dataloader

test_dataset = TensorDataset(X_test_tensor, y_test_tensor) # create your datset
test_dataloader = DataLoader(test_dataset, batch_size= 1000) # create your dataloader

我在这里训练我的模型。错误发生在行“outputs = regressor(inputs)”

NUM_EPOCHS = 2000
BATCH_SIZE = 1000
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f"Device used : {device}")

# 1 hidden layer
total_num_nodes = 256
regressor = Net(n_hiddens= [total_num_nodes]).to(device)
optimizer = torch.optim.SGD(regressor.parameters(), lr=0.2, momentum= 0.1, nesterov= True)
loss_func = torch.nn.MSELoss() # this is for regression mean squared loss


for epoch in range(NUM_EPOCHS):
running_loss = 0.0

for i, data in enumerate(train_dataloader, 0):
inputs, values = data
inputs = inputs.float().to(device)
values = values.float().to(device)
optimizer.zero_grad() # clear gradients for next train
print(f"Input device is : cuda:{inputs.get_device()}")
print(f"Target value device is : cuda:{values.get_device()}")
print(f"Is model on cuda ? : {next(regressor.parameters()).is_cuda}")
outputs = regressor(inputs) # <-- This is where I have the error
loss = loss_func(outputs, values)
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients

这是我的打印语句的输出:

Device used : cuda:0
Input device is : cuda:0
Target value device is : cuda:0
Is model on cuda ? :True

这应该意味着我的模型和我的张量都在同一台设备上,为什么我仍然有这个错误?

错误日志是:

---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-6-5234b830bebc> in <module>()
24 print(f"Target value device is : cuda:{values.get_device()}")
25 print(f"Is model on cuda ? : {next(regressor.parameters()).is_cuda}")
---> 26 outputs = regressor(inputs)
27 loss = loss_func(outputs, values)
28 loss.backward() # backpropagation, compute gradients

4 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []

<ipython-input-4-56c54b30b771> in forward(self, x)
16 def forward(self, x):
17 for hidden in self.hiddens :
---> 18 x = F.relu(hidden(x)) # activation function for hidden layer
19 x = self.predict(x) # linear output
20 return x

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/linear.py in forward(self, input)
101
102 def forward(self, input: Tensor) -> Tensor:
--> 103 return F.linear(input, self.weight, self.bias)
104
105 def extra_repr(self) -> str:

/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
1846 if has_torch_function_variadic(input, weight, bias):
1847 return handle_torch_function(linear, (input, weight, bias), input, weight, bias=bias)
-> 1848 return torch._C._nn.linear(input, weight, bias)
1849
1850

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat1 in method wrapper_addmm)

非常感谢

最佳答案

TL;DR 使用 nn.ModuleList而不是将隐藏层存储在 Net 中的 pythonic。

所有隐藏层都存储在 Net 中的一个简单的 pythonic 列表 self.hidden 中。当您使用 .to(device) 将您的模型移动到 GPU 时,pytorch 无法判断此 pythonic 列表的所有元素也应移动到同一设备。然而,如果你制作 self.hidden = nn.ModuleLis(),pytorch 现在知道将这个特殊列表的所有元素视为 nn.Module 并且递归 将它们移动到与 Net 相同的设备上。

查看这些答案 1 , 2 , 3了解更多详情。

关于 torch : Expected all tensors on same device,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/71278607/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com