gpt4 book ai didi

python - 由于 AdamOptimizer.compute_gradients 的输入大小错误而导致 ValueError

转载 作者:行者123 更新时间:2023-11-30 09:20:23 26 4
gpt4 key购买 nike

我不知道为什么,但在学习率设置为 1e-4< 的 AdamOptimizer 上调用 compute_gradients 时出现以下错误:

ValueError: input has 102144 elements, which isn't divisible by 91008

以下是相关代码段:

    optimizer = tf.train.AdamOptimizer(1e-4)
print(dcnn.loss)
grads_and_vars = optimizer.compute_gradients(dcnn.loss)

打印语句打印出来:

Tensor("loss/Mean:0", shape=(), dtype=float32)

我明白这个错误的含义(损失大小错误)。但是,如何解决这个问题。

这是我的完整代码(我正在尝试构建动态卷积神经网络):

# train.py

import datetime
import time

import numpy as np
import os
import tensorflow as tf
from env.src.sentiment_analysis.dcnn.text_dcnn import TextDCNN
from env.src.sentiment_analysis.cnn import data_helpers as data_helpers
from tensorflow.contrib import learn

# Model Hyperparameters
tf.flags.DEFINE_integer("embedding_dim", 128, "Dimensionality of character embedding (default: 128)")
tf.flags.DEFINE_string("filter_sizes", "3,4,5", "Comma-separated filter sizes (default: '3,4,5')")
tf.flags.DEFINE_integer("num_filters", 128, "Number of filters per filter size (default: 128)")
tf.flags.DEFINE_float("dropout_keep_prob", 0.5, "Dropout keep probability (default: 0.5)")
tf.flags.DEFINE_float("l2_reg_lambda", 0.0, "L2 regularizaion lambda (default: 0.0)")

# Training parameters
tf.flags.DEFINE_integer("batch_size", 64, "Batch Size (default: 64)")
tf.flags.DEFINE_integer("num_epochs", 200, "Number of training epochs (default: 200)")
tf.flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps (default: 100)")
tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)")

# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")
tf.flags.DEFINE_string("positive_file", "../rotten_tomatoes/rt-polarity.pos", "Location of the rt-polarity.pos file")
tf.flags.DEFINE_string("negative_file", "../rotten_tomatoes/rt-polarity.neg", "Location of the rt-polarity.neg file")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()

print("\nParameters:")

for attr, value in sorted(FLAGS.__flags.items()):
print("{} = {}".format(attr.upper(), value))

print("")


# Data Preparatopn

# Load data
print("Loading data...")
x_text, y = data_helpers.load_data_and_labels(FLAGS.positive_file, FLAGS.negative_file)

# Build vocabulary
max_document_length = max([len(x.split(" ")) for x in x_text])
vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
x = np.array(list(vocab_processor.fit_transform(x_text)))

x_arr = np.array(x_text)

seq_lens = []

for s in x_arr:
seq_lens.append(len(s))

# Randomly shuffle data
np.random.seed(10)
shuffle_indices = np.random.permutation(np.arange(len(y)))
x_shuffled = x[shuffle_indices]
y_shuffled = y[shuffle_indices]

# Split train/test set
x_train, x_dev = x_shuffled[:-1000], x_shuffled[-1000:]
y_train, y_dev = y_shuffled[:-1000], y_shuffled[-1000:]

print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

# Training
with tf.Graph().as_default():
session_conf = tf.ConfigProto(
allow_soft_placement=FLAGS.allow_soft_placement,
log_device_placement=FLAGS.log_device_placement
)
sess = tf.Session(config=session_conf)
with sess.as_default():
dcnn = TextDCNN(
sequence_lengths=seq_lens,
num_classes=y_train.shape[1],
vocab_size=len(vocab_processor.vocabulary_),
embedding_size=FLAGS.embedding_dim,
filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
num_filters=FLAGS.num_filters,
)

# The training procedure
global_step = tf.Variable(0, name="global_step", trainable=False)
optimizer = tf.train.AdamOptimizer(1e-4)
grads_and_vars = optimizer.compute_gradients(dcnn.loss)
train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

# Output directory for models and summaries
timestamp = str(int(time.time()))
out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
print("Writing to {}\n".format(out_dir))

# Summaries for loss and accuracy
loss_summary = tf.scalar_summary("loss", dcnn.loss)
acc_summary = tf.scalar_summary("accuracy", dcnn.accuracy)

# Summaries for training
train_summary_op = tf.merge_summary([loss_summary, acc_summary])
train_summary_dir = os.path.join(out_dir, "summaries", "train")
train_summary_writer = tf.train.SummaryWriter(train_summary_dir, sess.graph)

# Summaries for devs
dev_summary_op = tf.merge_summary([loss_summary, acc_summary])
dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
dev_summary_writer = tf.train.SummaryWriter(dev_summary_dir, sess.graph)

# Checkpointing
checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
checkpoint_prefix = os.path.join(checkpoint_dir, "model")

# TensorFlow assumes this directory already exsists so we need to create it
if not os.path.exists(checkpoint_dir):
os.makedirs(checkpoint_dir)
saver = tf.train.Saver(tf.all_variables())

# Write vocabulary
vocab_processor.save(os.path.join(out_dir, "vocab"))

# Initialize all variables
sess.run(tf.initialize_all_variables())

def train_step(x_batch, y_batch):
"""
A single training step.
Args:
x_batch: A batch of X training values.
y_batch: A batch of Y training values

Returns: void
"""

print(dcnn.input_x)
print(x_batch)
print(dcnn.input_y)
print(y_batch)

feed_dict = {
dcnn.input_x: x_batch,
dcnn.input_y: y_batch,
dcnn.dropout_keep_prob: FLAGS.dropout_keep_prob
}

# Execute train_op
_, step, summaries, loss, accuracy = sess.run(
[train_op, global_step, train_summary_op, dcnn.loss, dcnn.accuracy],
feed_dict
)

# Print and save to disk loss and accuracy of the current training batch
time_str = datetime.datetime.now().isoformat()
print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
train_summary_writer.add_summary(summaries, step)

def dev_step(x_batch, y_batch, writer=None):
"""
Evaluates a model on a dev set.
Args:
x_batch: A batch of X training values.
y_batch: A batch of Y training values.
writer: The writer to use to record the loss and accuracy

Returns: void
"""
feed_dict = {
dcnn.input_x: x_batch,
dcnn.input_y: y_batch,
dcnn.dropout_keep_prob : 1.0
}

step, summaries, loss, accuracy = sess.run(
[global_step, dev_summary_op, dcnn.loss, dcnn.accuracy],
feed_dict
)

time_str = datetime.datetime.now().isoformat()
print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
if writer:
writer.add_summary(summaries, step)

# Generate batches
batches = data_helpers.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs)

# Training loop. For each batch...
for batch in batches:
x_batch, y_batch = zip(*batch)
train_step(x_batch, y_batch)
current_step = tf.train.global_step(sess, global_step)
if current_step % FLAGS.evaluate_every == 0:
print("\nEvaluation:")
dev_step(x_dev, y_dev, writer=dev_summary_writer)
print("")
if current_step % FLAGS.checkpoint_every == 0:
path = saver.save(sess, checkpoint_prefix, global_step=current_step)
print("Saved model checkpoint to {}\n".format(path))

这是第二个文件(抱歉缩进)。

# text_dcnn.py

import tensorflow as tf

class TextDCNN(object):

"""
A CNN for NLP tasks. Architecture is as follows:
Embedding layer, conv layer, max-pooling and softmax layer
"""

def __init__(self, sequence_lengths, num_classes, vocab_size, embedding_size, filter_sizes, num_filters):
"""
Makes a new CNNClassifier
Args:
sequence_length: The length of each sentence
num_classes: Number of classes in the output layer (positive and negative would be 2 classes)
vocab_size: The size of the vocabulary, needed to define the size of the embedding layer
embedding_size: Dimensionality of the embeddings
filter_sizes: Number of words the convolutional filters will cover, there will be num_filters for each size
specified.
num_filters: The number of filters per filter size.

Returns: A new CNNClassifier with the given parameters.

"""
# Define the inputs and the dropout
self.max_length = max([l for l in sequence_lengths])

self.input_x = tf.placeholder(tf.int32, [None, self.max_length], name="input_x")
self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

# Runs the operations on the CPU and organizes them into an embedding scope
with tf.device("/cpu:0"), tf.name_scope("embedding"):
W = tf.Variable( # Make a 4D tensor to store batch, width, height, and channel
tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
name="W"
)

self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x)
self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)

pooled_outputs = []
for i, filter_size in enumerate(filter_sizes):
with tf.name_scope("conv-maxpool-%s" % filter_size):
# Conv layer
filter_shape = [filter_size, embedding_size, 1, num_filters]
# W is the filter matrix
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
conv = tf.nn.conv2d(
self.embedded_chars_expanded,
W,
strides=[1, 1, 1, 1],
padding="VALID",
name="conv"
)

# Apply nonlinearity
h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")

# Max-pooling layer over the outputs
pooled = tf.nn.max_pool(
h,
ksize=[1, sequence_lengths[i] - filter_size + 1, 1, 1],
strides=[1, 1, 1, 1],
padding="VALID",
name="pool"
)
pooled_outputs.append(pooled)

# Combine all of the pooled features
num_filters_total = num_filters * len(filter_sizes)

pooled_outputs = [tf.reshape(out, [-1, 94, 1, self.max_length]) for out in pooled_outputs]

self.h_pool = tf.concat(3, pooled_outputs)

# self.h_pool = tf.concat(3, pooled_outputs)
self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

# Add dropout
with tf.name_scope("dropout"):
# casted = tf.cast(self.dropout_keep_prob, tf.int32)
self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)

# Do raw predictions (no softmax)
with tf.name_scope("output"):
W = tf.Variable(tf.truncated_normal([num_filters_total, num_classes], stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
# xw_plus_b(...) is just Wx + b matmul alias
self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
self.predictions = tf.argmax(self.scores, 1, name="predictions")

# Calculate mean cross-entropy loss
with tf.name_scope("loss"):
# softmax_cross_entropy_with_logits(...) calculates cross-entropy loss
losses = tf.nn.softmax_cross_entropy_with_logits(self.scores, self.input_y)
self.loss = tf.reduce_mean(losses)

# Calculate accuracy
with tf.name_scope("accuracy"):
correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")

我使用的训练数据是带有标签的正面和负面电影评论的烂番茄数据集。

最佳答案

听起来这里有问题 self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x)

来自烂番茄数据集的输入可能不适合您的占位符变量之一。我会仔细检查一下。

关于python - 由于 AdamOptimizer.compute_gradients 的输入大小错误而导致 ValueError,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/41915775/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com