gpt4 book ai didi

python - 为双向 GRU 适配 Pytorch "NLP from Scratch"

转载 作者:行者123 更新时间:2023-12-04 01:27:31 28 4
gpt4 key购买 nike

我已从教程中获取代码并尝试对其进行修改以包括双向性和 GRU 的任意数量的层。

链接到使用单向、单层 GRU 的教程:
https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html

该模型工作正常,但是当我使用 set bidirectional=True 时,我得到一个尺寸不匹配错误(如下所示)。任何想法为什么会这样?

编码器 :

import torch.nn.init as init
class EncoderRNN(nn.Module):
def __init__(self, input_size, hidden_size, n_layers=1, bidirectional=False):
super(EncoderRNN, self).__init__()
self.hidden_size = hidden_size
self.hidden_var = hidden_size//2 if bidirectional else hidden_size
self.n_layers = n_layers
self.bidirectional = bidirectional
self.n_directions = 2 if bidirectional else 1

self.embedding = nn.Embedding(input_size, hidden_size)
self.gru = nn.GRU(hidden_size,
self.hidden_var,
num_layers=self.n_layers,
bidirectional=self.bidirectional)

def forward(self, input, hidden):
embedded = self.embedding(input).view(1, 1, -1)
output = embedded
output, hidden = self.gru(output, hidden)
#output = (output[:, :, :self.hidden_size] +
# output[:, :, self.hidden_size:])
return output, hidden

def initHidden(self):
return torch.zeros(self.n_layers*self.n_directions, 1, self.hidden_var, device=device)

AttnDecoder:
class AttnDecoderRNN(nn.Module):
def __init__(self, hidden_size, output_size, n_layers=1, dropout_p=0.1, max_length=MAX_LENGTH):
super(AttnDecoderRNN, self).__init__()
self.hidden_size = hidden_size
self.output_size = output_size
self.dropout_p = dropout_p
self.max_length = max_length
self.n_layers = n_layers

self.embedding = nn.Embedding(self.output_size, self.hidden_size)
self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
self.dropout = nn.Dropout(self.dropout_p)

self.gru = nn.GRU(self.hidden_size,
self.hidden_size,
num_layers = self.n_layers)

self.out = nn.Linear(self.hidden_size, self.output_size)

def forward(self, input, hidden, encoder_outputs):
embedded = self.embedding(input).view(1, 1, -1)
embedded = self.dropout(embedded)

attn_weights = F.softmax(
self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
attn_applied = torch.bmm(attn_weights.unsqueeze(0),
encoder_outputs.unsqueeze(0))

output = torch.cat((embedded[0], attn_applied[0]), 1)
output = self.attn_combine(output).unsqueeze(0)

output = F.relu(output)
output, hidden = self.gru(output, hidden)

output = F.log_softmax(self.out(output[0]), dim=1)

return output, hidden, attn_weights

def initHidden(self):
return torch.zeros(1*self.n_layers, 1, self.hidden_size, device=device)

除了这个代码块(考虑到新参数)之外,本教程中的所有其他内容都保持完全相同:
n_layers=1
bidirectional = True
hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size, n_layers=n_layers, bidirectional=bidirectional).to(device)
attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1, n_layers=n_layers).to(device)
trainIters(encoder1, attn_decoder1, 75000, print_every=5000)

错误:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-133-37084c93a197> in <module>
5 attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1, n_layers=n_layers).to(device)
6
----> 7 trainIters(encoder1, attn_decoder1, 75000, print_every=5000)

<ipython-input-131-774ce8edefa6> in trainIters(encoder, decoder, n_iters, print_every, plot_every, learning_rate)
16
17 loss = train(input_tensor, target_tensor, encoder,
---> 18 decoder, encoder_optimizer, decoder_optimizer, criterion)
19 print_loss_total += loss
20 plot_loss_total += loss

<ipython-input-130-67be7e8c2a58> in train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length)
39 for di in range(target_length):
40 decoder_output, decoder_hidden, decoder_attention = decoder(
---> 41 decoder_input, decoder_hidden, encoder_outputs)
42 topv, topi = decoder_output.topk(1)
43 decoder_input = topi.squeeze().detach() # detach from history as input

~/miniconda3/envs/pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
545 result = self._slow_forward(*input, **kwargs)
546 else:
--> 547 result = self.forward(*input, **kwargs)
548 for hook in self._forward_hooks.values():
549 hook_result = hook(self, input, result)

<ipython-input-129-6dd1d30fe28f> in forward(self, input, hidden, encoder_outputs)
24
25 attn_weights = F.softmax(
---> 26 self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
27 attn_applied = torch.bmm(attn_weights.unsqueeze(0),
28 encoder_outputs.unsqueeze(0))

~/miniconda3/envs/pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
545 result = self._slow_forward(*input, **kwargs)
546 else:
--> 547 result = self.forward(*input, **kwargs)
548 for hook in self._forward_hooks.values():
549 hook_result = hook(self, input, result)

~/miniconda3/envs/pytorch/lib/python3.7/site-packages/torch/nn/modules/linear.py in forward(self, input)
85
86 def forward(self, input):
---> 87 return F.linear(input, self.weight, self.bias)
88
89 def extra_repr(self):

~/miniconda3/envs/pytorch/lib/python3.7/site-packages/torch/nn/functional.py in linear(input, weight, bias)
1367 if input.dim() == 2 and bias is not None:
1368 # fused op is marginally faster
-> 1369 ret = torch.addmm(bias, input, weight.t())
1370 else:
1371 output = input.matmul(weight.t())

RuntimeError: size mismatch, m1: [1 x 384], m2: [512 x 10] at /tmp/pip-req-build-58y_cjjl/aten/src/TH/generic/THTensorMath.cpp:752

任何帮助,将不胜感激!

基于 user3923920 评论的更新(编码器-解码器还包括 LSTM 选项,现在可用于双向)

新的工作和适应编码器
class EncoderRNN(nn.Module):
def __init__(self, input_size, hidden_size, n_layers=1, bidirectional=False, method='GRU'):
super(EncoderRNN, self).__init__()
self.hidden_size = hidden_size
self.hidden_var = hidden_size // 2 if bidirectional else hidden_size
self.n_layers = n_layers
self.bidirectional = bidirectional
self.n_directions = 2 if bidirectional else 1
self.method = method

self.embedding = nn.Embedding(input_size, hidden_size)
if self.method == 'GRU':
self.net = nn.GRU(hidden_size,
self.hidden_var,
num_layers=self.n_layers,
bidirectional=self.bidirectional)
elif self.method == 'LSTM':
self.net = nn.LSTM(hidden_size,
self.hidden_var,
num_layers=self.n_layers,
bidirectional=self.bidirectional)

def forward(self, input, hidden):
embedded = self.embedding(input).view(1, 1, -1)
output = embedded
output, hidden = self.net(output, hidden)
# output = (output[:, :, :self.hidden_size] +
# output[:, :, self.hidden_size:])
return output, hidden, embedded

def initHidden(self):
if self.method == 'GRU':
return torch.zeros(self.n_layers * self.n_directions, 1, self.hidden_var, device=device)
elif self.method == 'LSTM':
h_state = torch.zeros(self.n_layers * self.n_directions, 1, self.hidden_var)
c_state = torch.zeros(self.n_layers * self.n_directions, 1, self.hidden_var)
hidden = (h_state, c_state)
return hidden

新的工作和适应解码器
class AttnDecoderRNN(nn.Module):
def __init__(self, hidden_size, output_size, n_layers=1, dropout_p=0.1,
max_length=MAX_LENGTH, method='GRU', bidirectional=False):

super(AttnDecoderRNN, self).__init__()
self.hidden_size = hidden_size
self.output_size = output_size
self.dropout_p = dropout_p
self.max_length = max_length
self.n_layers = n_layers
self.method = method
self.bidirectional = bidirectional

self.embedding = nn.Embedding(self.output_size, self.hidden_size)
self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
self.dropout = nn.Dropout(self.dropout_p)

if self.method == 'GRU':
self.net = nn.GRU(self.hidden_size,
self.hidden_size,
num_layers=self.n_layers)
elif self.method == 'LSTM':
self.net = nn.LSTM(self.hidden_size,
self.hidden_size,
num_layers=self.n_layers)

self.out = nn.Linear(self.hidden_size, self.output_size)

def forward(self, input, hidden, encoder_outputs):

# Embed
embedded = self.embedding(input).view(1, 1, -1)
embedded = self.dropout(embedded)
self.hidden = hidden

# Concatenate all of the layers
hidden_h_rows = ()
hidden_c_rows = ()

if self.method == 'LSTM':
# hidden is a tuple of h_state and c_state
decoder_h, decoder_c = hidden
print(decoder_h.shape)
hidden_shape = decoder_h.shape[0]

# h_state
for x in range(0, hidden_shape):
hidden_h_rows += (decoder_h[x],)

# c_state
for x in range(0, hidden_shape):
hidden_c_rows += (decoder_c[x],)

elif self.method == "GRU":

# hidden is not a tuple (GRU)
decoder_h = hidden
hidden_shape = decoder_h.shape[0]

# h_state
for x in range(0, hidden_shape):
hidden_h_rows += (decoder_h[x],)

if self.bidirectional:
decoder_h_cat = torch.cat(hidden_h_rows, 1)
# Make sure the h_dim size is compatible with num_layers with concatenation.
decoder_h = decoder_h_cat.view((self.n_layers, 1, self.hidden_size)) # hidden_size=256

if self.method == "LSTM":
decoder_c_cat = torch.cat(hidden_c_rows, 1)
decoder_c = decoder_c_cat.view((self.n_layers, 1, self.hidden_size)) # hidden_size=256
hidden_lstm = (decoder_h, decoder_c)

elif self.method == "GRU":
hidden_gru = decoder_h

# Attention Block
attn_weights = F.softmax(
self.attn(torch.cat((embedded[0], hidden_lstm[0][0] if self.method == "LSTM" else \
hidden_gru[0]), 1)), dim=1)
attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0))
output = torch.cat((embedded[0], attn_applied[0]), 1)
output = self.attn_combine(output).unsqueeze(0)

output = F.relu(output)
output, hidden = self.net(output,
hidden_lstm if self.method == "LSTM" else hidden_gru) # I am not sure about this!
output = F.log_softmax(self.out(output[0]), dim=1)
return output, hidden, attn_weights

def initHidden(self):

if self.method == 'GRU':
return torch.zeros(self.n_layers * 1, 1, self.hidden_var, device=device)
elif self.method == 'LSTM':
h_state = torch.zeros(self.n_layers * 1, 1, self.hidden_var)
c_state = torch.zeros(self.n_layers * 1, 1, self.hidden_var)
hidden = (h_state, c_state)
return hidden

最佳答案

所以我不确定这是否 100% 正确,因为我只是在学习如何编程 RNN,但我在几个额外的领域更改了我的代码。

对于一个你会注意到错误说 m1: [1x384]所以结果
torch.cat((embedded[0], hidden[0]), 1))
当通过 attn 权重层时,它不是以 512 结尾的维度,即预期的输入大小。这是因为 hidden 是一个形状为 [2, 1, 256] 的张量,而不是某个形状 [1, 1, 512] 之类的。由于您的尺寸与我的尺寸不完全匹配,我不确定有什么不同,所以在 train(...) 中它只是设置
decoder_hidden = encoder_hidden
我愿意
decoder_hidden = torch.cat((encoder_hidden[0], encoder_hidden[1]) , 1)
decoder_hidden = decoder_hidden.view((1, 1, 512))

希望这在某种程度上有所帮助

关于python - 为双向 GRU 适配 Pytorch "NLP from Scratch",我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/58996451/

28 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com