gpt4 book ai didi

python - 我的模型的损失值为 0,但它只是将所有输入分类到同一类,这是怎么回事?

转载 作者:太空宇宙 更新时间:2023-11-03 21:01:46 24 4
gpt4 key购买 nike

我训练这个模型来对数据集 Fashion-mnist 中的图像进行分类。当权重没有经过训练时,损失值看起来很正常,但是在第一个 epoch 之后,损失减少到 0,并且所有输入图像都被分类为 0 类。

如果添加正则化,权重更新速度会更慢,但最终会得到相同的结果,假设所有图像都分类为 0 类,损失值为 0。

import tensorflow as tf
from tensorflow import keras
import numpy as np

EPOCH = 10
BATCH_SIZE = 30
DATA_SIZE = 60000
REGULARIZER = 0.001


def main():
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

train_images = train_images / 255.0
test_images = test_images / 255.0

train_labels = train_labels.reshape((60000, 1))
train_images = train_images.reshape((60000, 784))

test_images = test_images.reshape((10000, 784))
judge_labels = test_labels.reshape((10000, 1))

x = tf.placeholder(tf.float32, (None, 784))
y_ = tf.placeholder(tf.float32, (None, 1))

w1 = tf.Variable(np.random.rand(784 * 24).reshape([784, 24]) * 10, dtype=tf.float32)
# tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w1))
w2 = tf.Variable(np.random.rand(24 * 24).reshape([24, 24]) * 10, dtype=tf.float32)
# tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w2))
w3 = tf.Variable(np.random.rand(24 * 10).reshape([24, 10]) * 10, dtype=tf.float32)
# tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w3))

bias1 = tf.constant(1, shape=(24,), dtype=tf.float32)
bias2 = tf.constant(1, shape=(24,), dtype=tf.float32)

y1 = tf.nn.relu(tf.matmul(x, w1) + bias1)
y2 = tf.nn.relu(tf.matmul(y1, w2) + bias2)

y = tf.matmul(y2, w3)

predict = tf.argmax(y, axis=1)

y_spy = tf.nn.softmax(y, axis=1)

ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(y_, 1), logits=y)
# loss = tf.reduce_mean(ce) + tf.add_n(tf.get_collection('losses'))
loss = tf.reduce_mean(ce)
train_step = tf.train.GradientDescentOptimizer(0.001).minimize(loss)

with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)

print('current out loss: ', end='')
print(sess.run(loss, feed_dict={x: test_images, y_: judge_labels}))
pre = sess.run(predict, feed_dict={x: test_images})
miss = pre - test_labels
print('right number: ', end='')
print((np.sum(miss == 0)))

for epoch in range(EPOCH):
for i in range(DATA_SIZE // BATCH_SIZE):
start = i * BATCH_SIZE
end = (i + 1) * BATCH_SIZE
_ = sess.run([train_step], feed_dict={x: train_images[start:end],
y_: train_labels[start:end]})
print('epochs %d :' % epoch)
print('current in loss: ', end='')
print(sess.run(loss, feed_dict={x: train_images[start:end],
y_: train_labels[start:end]}))
print('current out loss: ', end='')
print(sess.run(loss, feed_dict={x: test_images, y_: judge_labels}))
miss = sess.run(predict, feed_dict={x: test_images}) - test_labels
print('right number: ', end='')
print((np.sum(miss == 0)))


if __name__ == "__main__":
main()

最佳答案

错误1:损失函数应该是

ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.reshape(y_, [-1]), logits=y)

因为标签是该损失函数的平坦值。 (将y占位符改为int32类型)

错误 2:权重被初始化为非常大的值。

GradientDescentOptimizer 是非常慢的优化器。使用 AdamOptimizer 代替

固定代码:

import tensorflow as tf
from tensorflow import keras
import numpy as np

EPOCH = 10
BATCH_SIZE = 64
DATA_SIZE = 60000
REGULARIZER = 0.001


def main():
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

train_images = train_images / 255.0
test_images = test_images / 255.0

train_labels = train_labels.reshape((60000, 1))
train_images = train_images.reshape((60000, 784))

test_images = test_images.reshape((10000, 784))
judge_labels = test_labels.reshape((10000, 1))

x = tf.placeholder(tf.float32, (None, 784))
y_ = tf.placeholder(tf.int32, (None, 1))

w1 = tf.Variable(np.random.rand(784 * 24).reshape([784, 24]), dtype=tf.float32)
# tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w1))
w2 = tf.Variable(np.random.rand(24 * 24).reshape([24, 24]), dtype=tf.float32)
# tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w2))
w3 = tf.Variable(np.random.rand(24 * 10).reshape([24, 10]), dtype=tf.float32)
# tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w3))

bias1 = tf.constant(1, shape=(24,), dtype=tf.float32)
bias2 = tf.constant(1, shape=(24,), dtype=tf.float32)

y1 = tf.nn.relu(tf.matmul(x, w1) + bias1)
y2 = tf.nn.relu(tf.matmul(y1, w2) + bias2)

y = tf.matmul(y2, w3)

predict = tf.argmax(y, axis=1)

y_spy = tf.nn.softmax(y, axis=1)

ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.reshape(y_, [-1]), logits=y)
# loss = tf.reduce_mean(ce) + tf.add_n(tf.get_collection('losses'))
loss = tf.reduce_mean(ce)
train_step = tf.train.AdamOptimizer(0.001).minimize(loss)

with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)

print('current out loss: ', end='')
print(sess.run(loss, feed_dict={x: test_images, y_: judge_labels}))
pre = sess.run(predict, feed_dict={x: test_images})
miss = pre - test_labels
print('right number: ', end='')
print((np.sum(miss == 0)))

for epoch in range(EPOCH):
for i in range(DATA_SIZE // BATCH_SIZE):
start = i * BATCH_SIZE
end = (i + 1) * BATCH_SIZE
_ = sess.run([train_step], feed_dict={x: train_images[start:end],
y_: train_labels[start:end]})
print('epochs %d :' % epoch)
print('current in loss: ', end='')
print(sess.run(loss, feed_dict={x: train_images[start:end],
y_: train_labels[start:end]}))
print('current out loss: ', end='')
print(sess.run(loss, feed_dict={x: test_images, y_: judge_labels}))
miss = sess.run(predict, feed_dict={x: test_images}) - test_labels
print('right number: ', end='')
print((np.sum(miss == 0)))

miss = sess.run(predict, feed_dict={x: test_images})
print (miss[0:10], test_labels[0:10])


if __name__ == "__main__":
main()

输出(选择性):

...
Sample predictions: [9 2 4 3 2 4 4 4 7 7], Actual: [9 2 1 1 6 1 4 6 5 7]
...
Sample predictions: [9 2 1 1 6 1 4 6 1 7], Actual: [9 2 1 1 6 1 4 6 5 7]
...
Sample predictions: [7 2 1 1 6 1 4 6 1 7], Actual: [9 2 1 1 6 1 4 6 5 7]
...
Sample predictions: [9 2 1 1 6 1 4 6 1 7], Actual: [9 2 1 1 6 1 4 6 5 7]
...

包含训练、验证损失和训练、验证准确性以及每个时期的洗牌训练数据的代码

import tensorflow as tf
from tensorflow import keras
import numpy as np
from sklearn.metrics import classification_report, accuracy_score

EPOCH = 30
BATCH_SIZE = 64
DATA_SIZE = 60000
REGULARIZER = 0.001

def main():
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

train_images = train_images / 255.0
test_images = test_images / 255.0

train_labels = train_labels.reshape((60000, 1))
train_images = train_images.reshape((60000, 784))

test_images = test_images.reshape((10000, 784))
judge_labels = test_labels.reshape((10000, 1))

x = tf.placeholder(tf.float32, (None, 784))
y_ = tf.placeholder(tf.int32, (None, 1))

w1 = tf.Variable(np.random.rand(784 * 24).reshape([784, 24]), dtype=tf.float32)
tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w1))
w2 = tf.Variable(np.random.rand(24 * 24).reshape([24, 24]), dtype=tf.float32)
tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w2))
w3 = tf.Variable(np.random.rand(24 * 10).reshape([24, 10]), dtype=tf.float32)
tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(REGULARIZER)(w3))

bias1 = tf.constant(1, shape=(24,), dtype=tf.float32)
bias2 = tf.constant(1, shape=(24,), dtype=tf.float32)

y1 = tf.nn.relu(tf.matmul(x, w1) + bias1)
y2 = tf.nn.relu(tf.matmul(y1, w2) + bias2)

y = tf.matmul(y2, w3)

predict = tf.argmax(y, axis=1)

ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.reshape(y_, [-1]), logits=y)
loss = tf.reduce_mean(ce)
train_step = tf.train.AdamOptimizer(0.001).minimize(loss)

with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)

idx = np.arange(DATA_SIZE)

for epoch in range(EPOCH):
train_loss = list()
train_predict = list()
np.random.shuffle(idx)

train_images = train_images[idx]
train_labels = train_labels[idx]

for i in range(DATA_SIZE // BATCH_SIZE):
start = i * BATCH_SIZE
end = (i + 1) * BATCH_SIZE
_, loss_, p_ = sess.run([train_step, loss, predict], feed_dict={x: train_images[start:end],
y_: train_labels[start:end]})
train_loss.append(loss_)
train_predict.extend(p_)


test_loss, test_predict = sess.run([loss, predict], feed_dict={x: test_images,
y_: judge_labels})

print ("Epoch: {}, Train Loss: {:.3f}, Test Loss: {:.3f},"\
"Train Acc: {:.3f}, Test Acc: {:.3f}".format(
epoch+1, np.mean(train_loss), test_loss,
accuracy_score(train_labels[0:len(train_predict)], train_predict),
accuracy_score(judge_labels, test_predict)))

if __name__ == "__main__":
main()

输出:

....
Epoch: 27, Train Loss: 0.842, Test Loss: 1.015,Train Acc: 0.816, Test Acc: 0.798
Epoch: 28, Train Loss: 0.832, Test Loss: 0.880,Train Acc: 0.816, Test Acc: 0.806
Epoch: 29, Train Loss: 0.788, Test Loss: 0.886,Train Acc: 0.820, Test Acc: 0.805
Epoch: 30, Train Loss: 0.704, Test Loss: 0.742,Train Acc: 0.826, Test Acc: 0.815

关于python - 我的模型的损失值为 0,但它只是将所有输入分类到同一类,这是怎么回事?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/55665888/

24 4 0