gpt4 book ai didi

python - 自定义tensorflow解码器TypeError : __call__() missing 1 required positional argument: 'inputs'

转载 作者:行者123 更新时间:2023-12-04 10:05:00 29 4
gpt4 key购买 nike

我正在使用 tensorflow 2.0 来训练我自己的注意力模型,
但是我在构建解码器类时遇到了一个大问题,
像这样

TypeError                                 Traceback (most recent call last)
<ipython-input-19-3042369c4295> in <module>
9 enc_hidden_h=fw_sample_state_h,
10 enc_hidden_c=fw_sample_state_c,
---> 11 enc_output=sample_output)
12
13 print ('Decoder output shape: (batch_size, vocab size) {}'.format(sample_decoder_output.shape))

TypeError: __call__() missing 1 required positional argument: 'inputs'

我的编码器-解码器注意力模型如下所示
编码器:这是一个自定义的 pBLSTM
class Encoder(tf.keras.Model):
def __init__(self, lstm_units, final_units, batch_sz, conv_filters, mfcc_dims):
super(Encoder, self).__init__()
self.lstm_units = lstm_units
self.final_units = final_units
self.batch_sz = batch_sz
self.conv_filters = conv_filters
self.mfcc_dims = mfcc_dims

# Convolution layer to extract feature after MFCC
self.conv_feat = tf.keras.layers.Conv1D(filters=self.conv_filters,
kernel_size=self.mfcc_dims,
padding='valid',
activation='relu',
strides=self.mfcc_dims)

def call(self, x):
'''
build a pyramidal LSTM neural network encoder
'''
# Convolution Feature Extraction
x = self.conv_feat(x)

# initialize states for forward and backward
initial_state_fw = None
initial_state_bw = None

counter = 0
while(x.shape[1] > self.final_units):
counter += 1
# forward LSTM
fw_output, fw_state_h, fw_state_c = self.build_lstm(True)(x, initial_state=initial_state_fw)

# backward LSTM
bw_output, bw_state_h, bw_state_c = self.build_lstm(False)(x, initial_state=initial_state_bw)

x = tf.concat([fw_output, bw_output], -1)
x = self.reshape_pyramidal(x)

initial_state_fw = [fw_state_h, fw_state_c]
initial_state_bw = [bw_state_h, bw_state_c]

print(f"Encoder pyramid layer number: {counter}\n")
return x, (fw_state_h, fw_state_c), (bw_state_h, bw_state_c)

def build_lstm(self, back=True):
'''
build LSTM layer for forward and backward
'''
return tf.keras.layers.LSTM(units=self.lstm_units,
return_sequences=True,
return_state=True,
go_backwards=back)

def reshape_pyramidal(self, outputs):
'''
After concatenating forward and backward outputs
return the reshaped output
'''
batch_size, time_steps, num_units = outputs.shape

return tf.reshape(outputs, (batch_size, -1, num_units * 2))

注意力模型:根据本文构建:
https://arxiv.org/abs/1508.04025v5
class BahdanauAttention(tf.keras.layers.Layer):
def __init__(self, units):
super(BahdanauAttention, self).__init__()
self.W1 = tf.keras.layers.Dense(units)
self.W2 = tf.keras.layers.Dense(units)
self.V = tf.keras.layers.Dense(1)

def call(self, query, values):
# query hidden state shape == (batch_size, hidden size)
# query_with_time_axis shape == (batch_size, 1, hidden size)
# values shape == (batch_size, max_len, hidden size)
# we are doing this to broadcast addition along the time axis to calculate the score
query_with_time_axis = tf.expand_dims(query, 1)

# score shape == (batch_size, max_length, 1)
# we get 1 at the last axis because we are applying score to self.V
# the shape of the tensor before applying self.V is (batch_size, max_length, units)
score = self.V(tf.nn.tanh(
self.W1(query_with_time_axis) + self.W2(values)))

# attention_weights shape == (batch_size, max_length, 1)
attention_weights = tf.nn.softmax(score, axis=1)

# context_vector shape after sum == (batch_size, hidden_size)
context_vector = attention_weights * values
context_vector = tf.reduce_sum(context_vector, axis=1)

return context_vector, attention_weights

解码器:一个 1 层 LSTM 解码器
class Decoder(tf.keras.Model):
def __init__(self, target_sz, embedding_dim, decoder_units, batch_sz, **kwargs):
super(Decoder, self).__init__(**kwargs)
self.batch_sz = batch_sz
self.decoder_units = decoder_units
self.embedding = tf.keras.layers.Embedding(target_sz, embedding_dim)
self.attention = BahdanauAttention(self.decoder_units)
self.lstm = tf.keras.layers.LSTM(units=self.decoder_units, return_sequences=True, return_state=True)
self.fc = tf.keras.layers.Dense(target_sz)


def call(self, x, enc_hidden_h, enc_hidden_c, enc_output):
'''
build LSTM decoder
'''
# enc_output shape == (batch_size, max_length, hidden_size)
context_vector, attention_weights = self.attention(enc_hidden_h, enc_output)

# x shape after passing through embedding == (batch_size, 1, embedding_dim)
x = self.embedding(x)

# x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

# passing the concatenated vector to the LSTM
output, state_h, state_c = self.lstm(x)

# output shape == (batch_size * 1, hidden_size)
output = tf.reshape(output, (-1, output.shape[-1]))

# output shape == (batch_size, vocab)
x = self.fc(output)

return x, (state_h, state_c), attention_weights

我在使用我的示例输入进行测试时遇到了这个错误,如下所示
example_input_batch, example_target_batch = next(iter(dataset))
sample_output, (fw_sample_state_h, fw_sample_state_c), bw_sample_state = encoder(example_input_batch)
decoder = Decoder(target_sz=PHONEME_SIZE,
embedding_dim=EMBEDDING_DIM,
decoder_units=LSTM_UNITS,
batch_sz=BATCH_SIZE)

sample_target_size = tf.random.uniform((BATCH_SIZE, 1))
sample_decoder_output, sample_decoder_hidden, attention_weights = decoder(
x=sample_target_size,
enc_hidden_h=fw_sample_state_h,
enc_hidden_c=fw_sample_state_c,
enc_output=sample_output)

最佳答案

正如评论中所讨论的,问题是海报继承自 tf.keras.Model在创建 Decoder() 时类(class)。这个父类(super class)期待一个 inputs __call__() 中的参数运算符(operator)。

因此,可以通过更改 x 来解决此错误至 inputsDecoder.call()像这样的方法:

def call(self, inputs, enc_hidden_h, enc_hidden_c, enc_output):
'''
build LSTM decoder
'''
# enc_output shape == (batch_size, max_length, hidden_size)
context_vector, attention_weights = self.attention(enc_hidden_h, enc_output)

# x shape after passing through embedding == (batch_size, 1, embedding_dim)
x = self.embedding(inputs)

# x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

# passing the concatenated vector to the LSTM
output, state_h, state_c = self.lstm(x)

# output shape == (batch_size * 1, hidden_size)
output = tf.reshape(output, (-1, output.shape[-1]))

# output shape == (batch_size, vocab)
x = self.fc(output)

return x, (state_h, state_c), attention_weights

关于python - 自定义tensorflow解码器TypeError : __call__() missing 1 required positional argument: 'inputs' ,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/61631360/

29 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com