- android - 多次调用 OnPrimaryClipChangedListener
- android - 无法更新 RecyclerView 中的 TextView 字段
- android.database.CursorIndexOutOfBoundsException : Index 0 requested, 光标大小为 0
- android - 使用 AppCompat 时,我们是否需要明确指定其 UI 组件(Spinner、EditText)颜色
我一直在尝试使用 Udemy 类(class) DeepLearning_NLP_Chatbot 中的 Seq2Seq 模型构建 RNN,并且我一步步跟着他,但是在训练时遇到错误:InvalidArgumentError:收到的标签值 8825 超出了 [0, 8825) 的有效范围。数据集here 。
这是数据处理数据集
# Building a Chatbot with Deep NLP.
# Importing the libraries.
import numpy as np
import tensorflow as tf
import re
import time
# ---Data Processing---#
#------------------------#
# Importing the dataset.
lines = open('movie_lines.txt', encoding = 'utf-8', errors = 'ignore').read().split('\n')
conversations = open('movie_conversations.txt', encoding = 'utf-8', errors = 'ignore').read().split('\n')
# Creating a dictionary that map each line and its id
id2line = {}
for line in lines:
_line = line.split(' +++$+++ ')
if len(_line) == 5:
id2line[_line[0]]= _line[4]
# Creating a list of all conversations
conversations_ids = []
for conversation in conversations[:-1]:
_conversation = conversation.split(' +++$+++ ')[-1][1:-1].replace("'","").replace(" ","")
conversations_ids.append(_conversation.split(','))
# Getting seperately the question and the answer
questions = []
answers = []
for conversation in conversations_ids:
for i in range( len(conversation) - 1):
questions.append(id2line[conversation[i]])
answers.append(id2line[conversation[i+1]])
# Doing a first cleaning of the text
def clean_text(text):
text = text.lower()
text = re.sub(r"i'm", "i am", text)
text = re.sub(r"he's", "he is", text)
text = re.sub(r"she's", "she is", text)
text = re.sub(r"that's", "that is", text)
text = re.sub(r"what's", "what is", text)
text = re.sub(r"where's", "where is", text)
text = re.sub(r"\'ll", " will", text)
text = re.sub(r"\'ve", " have", text)
text = re.sub(r"\'re", " are", text)
text = re.sub(r"\'d", " would", text)
text = re.sub(r"won't", "will not", text)
text = re.sub(r"can't", "cannot", text)
text = re.sub(r"[-()\"#/@;:<>{}+=~|.?,]", "", text)
return text
# Cleaning the questions
clean_questions = []
for question in questions:
clean_questions.append(clean_text(question))
# Cleaning the answers
clean_answers = []
for answer in answers:
clean_answers.append(clean_text(answer))
# Creating a dictionary that maps each word with its occurences.
word2count = {}
for question in clean_questions:
for word in question.split():
if word not in word2count:
word2count[word] = 1
else:
word2count[word] += 1
for answer in clean_answers:
for word in answer.split():
if word not in word2count:
word2count[word] = 1
else:
word2count[word] += 1
# Creating two dictionaries that map questions and answers word to a
unique integer.
threshold = 20
questionsword2int = {}
word_number = 0
for word, count in word2count.items():
if count >= threshold:
questionsword2int[word] = word_number
word_number += 1
answersword2int = {}
word_number = 0
for word, count in word2count.items():
if count >= threshold:
answersword2int[word] = word_number
word_number += 1
# Adding the last tokens to these two dictionaries.
tokens = ['<PAD>', '<EOS>', '<SOS>', '<OUT>']
for token in tokens:
questionsword2int[token] = len(questionsword2int) + 1
for token in tokens:
answersword2int[token] = len(answersword2int) + 1
# Creating inverse dictionary to answerswords2int dictionary.
answersint2word = {w_i:w for w,w_i in answersword2int.items() }
# Adding End Of String token in the end of every answer.
for i in range(len(clean_answers)):
clean_answers[i] += ' <EOS>'
# Translating all the questions and the answers into integers.
# and Replacing all the words that were filtered out to <OUT> token.
questions_into_int = []
for question in clean_questions:
ints = []
for word in question.split():
if word not in questionsword2int:
ints.append(questionsword2int['<OUT>'])
else:
ints.append(questionsword2int[word])
questions_into_int.append(ints)
answers_into_int = []
for answer in clean_answers:
ints = []
for word in answer.split():
if word not in answersword2int:
ints.append(answersword2int['<OUT>'])
else:
ints.append(answersword2int[word])
answers_into_int.append(ints)
# Sorting questions and answers by the length of the questions
sorted_clean_questions = []
sorted_clean_answers = []
for length in range(1, 25 + 1):
for i in enumerate(questions_into_int):
if length == len(i[1]):
sorted_clean_questions.append(questions_into_int[i[0]])
sorted_clean_answers.append(answers_into_int[i[0]])
这是构建 seq2seq 模型:
# --- Building SEQ2SEQ Model---#
#------------------------------#
# Creating placeholder for the inputs and the targets:
def model_inputs():
inputs = tf.placeholder(tf.int32, [None, None], name = 'input')
targets = tf.placeholder(tf.int32, [None, None], name = 'target')
lr = tf.placeholder(tf.float32, name = 'learning_rate')
keep_prob = tf.placeholder(tf.float32, name = 'keep_prob')
return inputs, targets, lr, keep_prob
# Preprocessing targets:
def preprocess_targets(targets, word2int, batch_size):
left_side = tf.fill([batch_size, 1], word2int['<SOS>'])
right_side = tf.strided_slice(targets, [0,0], [batch_size, -1], [1,1])
preprcessed_targets = tf.concat([left_side, right_side], 1)
return preprcessed_targets
# Creating the Encoder RNN Layer:
def encoder_rnn_layer(rnn_inputs, rnn_size, num_layers, keep_prob, sequence_length):
lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
lstm_dropout = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob = keep_prob)
encoder_cell = tf.contrib.rnn.MultiRNNCell([lstm_dropout] * num_layers)
_, encoder_state = tf.nn.bidirectional_dynamic_rnn(cell_fw= encoder_cell,
cell_bw= encoder_cell,
sequence_length= sequence_length,
inputs= rnn_inputs,
dtype= tf.float32)
return encoder_state
# Decoding the Training Set:
def decode_training_set(encoder_state, decoder_cell, decoder_embedded_input, sequence_length, decoding_scope, output_function, keep_prob, batch_size):
attention_states = tf.zeros([batch_size, 1, decoder_cell.output_size])
attention_keys, attention_values, attention_score_function, attention_construct_function = tf.contrib.seq2seq.prepare_attention(attention_states, attention_option='bahdanau', num_units=decoder_cell.output_size)
training_decoder_function = tf.contrib.seq2seq.attention_decoder_fn_train(encoder_state[0],
attention_keys,
attention_values,
attention_score_function,
attention_construct_function,
name= "attn_dec_train")
decoder_output, decoder_final_state, decoder_final_context_state = tf.contrib.seq2seq.dynamic_rnn_decoder(decoder_cell,
training_decoder_function,
decoder_embedded_input,
sequence_length,
scope=decoding_scope)
decoder_output_dropout = tf.nn.dropout(decoder_output, keep_prob)
return output_function(decoder_output_dropout)
# Decoding the Test/Validation Set:
def decode_test_set(encoder_state, decoder_cell, decoder_embeddings_matrix,sos_id,eso_id,maximum_length, num_words, decoding_scope, output_function, keep_prob, batch_size):
attention_states = tf.zeros([batch_size, 1, decoder_cell.output_size])
attention_keys, attention_values, attention_score_function, attention_construct_function = tf.contrib.seq2seq.prepare_attention(attention_states, attention_option='bahdanau', num_units=decoder_cell.output_size)
test_decoder_function = tf.contrib.seq2seq.attention_decoder_fn_inference(output_function,
encoder_state[0],
attention_keys,
attention_values,
attention_score_function,
attention_construct_function,
decoder_embeddings_matrix,
sos_id,
eso_id,
maximum_length,
num_words,
name= "attn_dec_inf")
test_predictions, decoder_final_state, decoder_final_context_state = tf.contrib.seq2seq.dynamic_rnn_decoder(decoder_cell, test_decoder_function, scope=decoding_scope)
return test_predictions
# Creating the Decoder RNN:
def decoder_rnn(decoder_embedded_input, decoder_embeddings_matrix, encoder_state, num_words,sequence_length,rnn_size, num_layers, word2int, keep_prob, batch_size):
with tf.variable_scope("decoding") as decoding_scope:
lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
lstm_dropout = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob=keep_prob)
decoder_cell = tf.contrib.rnn.MultiRNNCell([lstm_dropout] * num_layers)
weights = tf.truncated_normal_initializer(stddev= 0.1)
biases = tf.zeros_initializer()
output_function = lambda x : tf.contrib.layers.fully_connected(x,
num_words,
None,
scope=decoding_scope,
weights_initializer= weights,
biases_initializer= biases)
training_predictions = decode_training_set(encoder_state,
decoder_cell,
decoder_embedded_input,
sequence_length,
decoding_scope,
output_function,
keep_prob,
batch_size)
decoding_scope.reuse_variables()
test_predictions = decode_test_set(encoder_state,
decoder_cell,
decoder_embeddings_matrix,
word2int['<SOS>'],
word2int['<EOS>'],
sequence_length - 1,
num_words,
decoding_scope,
output_function,
keep_prob,
batch_size)
return training_predictions, test_predictions
# Building SEQ2SEQ Model:
def seq2seq_model(inputs, targets, keep_prob, batch_size, sequence_length, answers_num_words, questions_num_words, encoder_embedding_size, decoder_embedding_size, rnn_size, num_layers, questionswords2int):
encoder_embedded_input = tf.contrib.layers.embed_sequence(inputs,
answers_num_words + 1,
encoder_embedding_size,
initializer=tf.random_uniform_initializer(0,1))
encoder_state = encoder_rnn_layer(encoder_embedded_input,
rnn_size,
num_layers,
keep_prob,
sequence_length)
preprocessed_targets = preprocess_targets(targets, questionsword2int, batch_size)
decoder_embeddings_matrix = tf.Variable(tf.random_uniform([questions_num_words + 1, decoder_embedding_size], 0, 1))
decoder_embedded_input = tf.nn.embedding_lookup(decoder_embeddings_matrix, preprocessed_targets)
training_predictions, test_predictions = decoder_rnn(decoder_embedded_input,
decoder_embeddings_matrix,
encoder_state,
questions_num_words,
sequence_length,
rnn_size,
num_layers,
questionsword2int,
keep_prob,
batch_size)
return training_predictions, test_predictions
这是训练:
# --- Training SEQ2SEQ Model---#
#------------------------------#
# Setting the Hyperparameters:
epochs = 100
batch_size = 64
rnn_size = 512
num_layers = 3
encoding_embedding_size = 512
decoding_embedding_size = 512
learning_rate = 0.01
min_learning_rate = 0.0001
learning_rate_decay = 0.9
keep_probability = 0.5
# Defining a Session:
tf.reset_default_graph()
session = tf.InteractiveSession()
# Loading Model Input Function:
inputs, targets, lr, keep_prob = model_inputs()
# Setting the Sequence Length:
sequence_length = tf.placeholder_with_default(25,None, name='sequence_length')
# Getting the Shape of on Input Tensors:
input_shape = tf.shape(inputs)
# Getting the Test and Training Predections:
traning_predictions, test_predictions = seq2seq_model(tf.reverse(inputs, [-1]),
targets,
keep_prob,
batch_size,
sequence_length,
len(answersword2int),
len(questionsword2int),
encoding_embedding_size,
decoding_embedding_size,
rnn_size,
num_layers,
questionsword2int)
# Setting Up the Loss Error, The Optimizer and Gradient Clipping.
with tf.name_scope("optimization"):
loss_error = tf.contrib.seq2seq.sequence_loss(traning_predictions,
targets,
tf.ones([input_shape[0], sequence_length]))
optimizer = tf.train.AdamOptimizer(learning_rate)
gradients = optimizer.compute_gradients(loss_error)
clipped_gradients = [(tf.clip_by_value(grad_tensor, -5., 5.), grad_variable) for grad_tensor, grad_variable in gradients if grad_tensor is not None]
optimizer_gradient_clipping = optimizer.apply_gradients(clipped_gradients)
# Padding the Sequences With the <PAD> Token:
def apply_padding(batch_of_sequences, word2int):
max_sequence_length = max([len(sequence) for sequence in batch_of_sequences])
return [sequence + [word2int['<PAD>']] * (max_sequence_length - len(sequence)) for sequence in batch_of_sequences]
# Splitting The Data Into Batches of Questions and Answers:
def split_into_batches (questions, answers, batch_size):
for batch_index in range(0, len(questions) // batch_size):
start_index = batch_index * batch_size
questions_in_batch = questions[start_index: start_index + batch_size]
answers_in_batch = answers[start_index: start_index + batch_size]
padded_questions_in_batch = np.array(apply_padding(questions_in_batch, questionsword2int))
padded_answers_in_batch = np.array(apply_padding(answers_in_batch, answersword2int))
yield padded_questions_in_batch, padded_answers_in_batch
# Splitting the Questions and Answers into Training and Validation Set:
training_validation_split = int (len(sorted_clean_questions) * 0.15)
training_questions = sorted_clean_questions[training_validation_split:]
training_answers = sorted_clean_answers[training_validation_split:]
validation_questions = sorted_clean_questions[:training_validation_split]
validation_answers = sorted_clean_answers[:training_validation_split]
# Training:
batch_index_check_learning_loss = 100
batch_index_check_validation_loss = ((len(training_questions)) // batch_size // 2) - 1
total_training_loss_error = 0
list_validation_loss_error = []
early_stopping_check = 0
early_stopping_stop = 1000
checkpoint = 'chatbot_weights.ckpt'
session.run(tf.global_variables_initializer())
for epoch in range(1, epochs + 1):
for batch_index, (padded_questions_in_batch, padded_answers_in_batch) in enumerate(split_into_batches(training_questions, training_answers, batch_size)):
starting_time = time.time()
_, batch_training_loss_error = session.run([optimizer_gradient_clipping, loss_error], {inputs: padded_questions_in_batch, targets: padded_answers_in_batch, lr: learning_rate, sequence_length: padded_answers_in_batch.shape[1], keep_prob: keep_probability})
total_training_loss_error += batch_training_loss_error
ending_time = time.time
batch_time = ending_time - starting_time
if batch_index % batch_index_check_learning_loss == 0:
print('Epoch: {:>3}/{}, Batch: {:>4}/{}, Traing Loss Error: {:>6.3f}, Traing Time on 100 Batches: {:d} seconds'.format(epoch, batch_index,len(training_questions) // batch_size, total_training_loss_error / batch_index_check_learning_loss, int(batch_time * 100)))
total_training_loss_error = 0
if batch_index % batch_index_check_validation_loss == 0 and batch_index > 0:
total_validation_loss_error = 0
starting_time = time.time()
for batch_index_validation, (padded_questions_in_batch, padded_answers_in_batch) in enumerate(split_into_batches(validation_questions, validation_answers, batch_size)):
_, batch_validation_loss_error = session.run(loss_error, {inputs: padded_questions_in_batch, targets: padded_answers_in_batch, lr: learning_rate, sequence_length: padded_answers_in_batch.shape[1], keep_prob: 1})
total_validation_loss_error += batch_validation_loss_error
ending_time = time.time
batch_time = ending_time - starting_time
average_validation_loss_error = total_validation_loss_error / len(validation_questions) / batch_size
print('Validation Loss Error: {:>6.3f}, Batch Validation Time: {:d} seconds'.format(average_validation_loss_error, int(batch_time)))
learning_rate *= learning_rate_decay
if learning_rate < min_learning_rate:
learning_rate = min_learning_rate
list_validation_loss_error.append(average_validation_loss_error)
if average_validation_loss_error < min(list_validation_loss_error):
print('I speak better now :)')
early_stopping_check = 0
saver = tf.train.Saver()
saver.save(session, checkpoint)
else:
print('Sorry! I do not speak better, I need to practice more.')
early_stopping_check += 1
if early_stopping_check == early_stopping_stop:
break
if early_stopping_check == early_stopping_stop:
print('My apologies, I cannot speak better anymore, this is best I can do')
break
print('Game over!')
如果您对此错误有解决方案,我们将不胜感激。 :)
最佳答案
在最后一层,例如您使用了 model.add(Dense(1,activation='softmax'))。这里1限制它的值从[0, 1)改变它的形状到最大输出标签。例如,您的输出来自标签 [0,7),然后使用 model.add(Dense(7,activation='softmax'))
input_text = Input(shape=(max_len,), dtype=tf.string)
embedding = Lambda(ElmoEmbedding, output_shape=(max_len, 1024))(input_text)
x = Bidirectional(LSTM(units=512, return_sequences=True,
recurrent_dropout=0.2, dropout=0.2))(embedding)
x_rnn = Bidirectional(LSTM(units=512, return_sequences=True,
recurrent_dropout=0.2, dropout=0.2))(x)
x = add([x, x_rnn]) # residual connection to the first biLSTM
out = TimeDistributed(Dense(n_tags, activation="softmax"))(x)
这里,在 TimeDistributed 层中,n_tags 是我要分类的标签的长度。
如果我预测其他数量,例如 q_tag,其长度与 n_tags 不同,即假设 10 且 n_tags 的长度为 7,并且我收到 8 作为输出标签,它将给出无效参数错误 Received a label value of 8 which is Outside有效范围[0, 7)。
关于python - InvalidArgumentError : Received a label value of 8825 which is outside the valid range of [0, 8825) SEQ2SEQ 模型,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/52324322/
如果数据是从另一台计算机(首先)“发送”的,我如何设置我的套接字例程以“发送”(首先)或(切换)“接收”? 谢谢 通用代码: -(void) TcpClient{ char buffer[12
我正在尝试在代码中使用 Java 8 方法引用。有四种类型的方法引用可用。 静态方法引用。 实例方法(绑定(bind)接收者)。 实例方法(未绑定(bind)接收者)。 构造函数引用。 使用静态方法引
我正在尝试在我的代码中使用 Java 8 方法引用。有四种类型的方法引用可用。 静态方法引用。 实例方法(绑定(bind)接收器)。 实例方法(UnBound 接收器)。 构造函数引用。 使用静态方法
这个问题在这里已经有了答案: X does not implement Y (... method has a pointer receiver) (4 个答案) 关闭 3 年前。 最近在研究Iri
我把这个问题/错误发布到 GIT 官方 channel ,但没有得到任何回应。希望这里有人可以帮助我。 当 receive.denyCurrentBranch 设置为 updateInstead 并且
我正在开发一个新的监控系统,该系统可以测量 Celery 队列吞吐量并在队列备份时帮助提醒团队。在我的工作过程中,我遇到了一些我不理解的奇怪行为(并且在 Celery 规范中没有详细记录)。 出于测试
我正在开发一个新的监控系统,该系统可以测量 Celery 队列吞吐量并在队列备份时帮助提醒团队。在我的工作过程中,我遇到了一些我不理解的奇怪行为(并且在 Celery 规范中没有详细记录)。 出于测试
这个问题在这里已经有了答案: What does this Google Play APK publish error message mean? (17 个答案) 关闭 3 年前。 我为我的应用程
我正在寻找一种解决方案来从我的 child “药物”中获取数据,并使用 ID 从“medication_plan”节点接收特定数据并将它们显示在 Recyclerview 中。 数据库结构: 目前我正
我正在构建 DNN 来预测对象是否存在于图像中。我的网络有两个隐藏层,最后一层看起来像这样: # Output layer W_fc2 = weight_variable([2048, 1])
我有一个模拟销售漏斗的 WF4 服务。它的工作原理是从“注册”接听电话开始。之后,有 10 个类似的阶段(每个阶段包含 2 个接收)。在当前阶段验证接收到的数据之前,您不能前进到一个阶段。但我不确定的
我有一个用 NSubstitute 伪造的对象,它有一个被调用两次的方法。我想验证该方法实际上已被调用两次(且仅调用两次)。我浏览了文档和谷歌,但没有运气。任何帮助,将不胜感激。谢谢。 最佳答案 NS
我在 Windows 上使用 D 编写了一个套接字服务器,现在我想将它移植到 Linux 上。这是代码摘要: /* * this.rawsocks - SocketSet * this.serve
我有一个在 AndroidManifest.xml 中定义了 Receiver 的应用程序,它似乎随机地被禁用,这导致应用程序强制关闭,直到重新安装应用程序。在发生这种情况之前,应用可能会在一天、一周
我正在尝试使用 android 注释库通过两种方式进行广播接收器,但 ide 无法识别此代码中的 @Receiver 或 @ReceiverAction import android.content.
我正在试验 Android 的 LiveData .我只是试图将大量通知推送给观察 LiveData 对象的观察者。我让一个线程在后台运行,在一个 while 循环中,我不断地通过 LiveData
当我运行以下代码时: [Test] public async Task Can_Test_Update() { var response = await _controller.UpdateA
查看 header 时,似乎第二台接收邮件的服务器直到最终 header 中报告的送达日期之后才转发它。 在 c9mailgw11.amadis.com,报告的时间是 22:47:49 -0800
我在这里搜索了几个问题都没有得到答案,所以我会根据我的具体情况询问。 真正简单的接收后 Hook ,它只是 curl 到 Redmine 以强制 Redmine 在提交时更新 repo 的 View
我目前正在尝试 Elixir。我对 Ruby 或函数式编程的经验很少,所以我不太熟悉语法。我在读Learn Elixir in Y minutes其中一个例子让我有点困惑。起初,指南显示了 case
我是一名优秀的程序员,十分优秀!