gpt4 book ai didi

machine-learning - keras 神经网络为每个手写数字预测相同的数字

转载 作者:行者123 更新时间:2023-11-30 08:40:19 25 4
gpt4 key购买 nike

我是机器学习新手,所以作为第一个项目,我尝试基于 mnist 数据集构建一个手写数字识别神经网络,当我使用数据集本身提供的测试图像对其进行测试时,它似乎可以工作非常好(这就是函数 test_predict 的用途)。现在我想更进一步,让网络识别我拍摄的一些实际手写数字。函数 partial_img_rec 接受包含多个数字的图像,并且将由 multiple_digits 调用。我知道我在这里使用递归可能看起来很奇怪,并且我确信有一些更有效的方法可以做到这一点,但这不是问题。为了测试 partial_img_rec,我提供了一些存储在文件夹 .\individual_test 中的单个数字的照片,它们看起来都像这样:
1_digit.jpg

问题是:我的神经网络对每一张测试图像的预测都是“5”。无论实际显示的数字是什么,概率始终约为 22%。我完全明白为什么结果不如 mnist 数据集的测试图像所取得的结果那么好,但我当然没有想到这一点。您知道为什么会发生这种情况吗?欢迎任何建议。预先感谢您。

这是我的代码(已编辑,现在可以运行):

# import keras and the MNIST dataset
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from keras.utils import np_utils
# numpy is necessary since keras uses numpy arrays
import numpy as np

# imports for pictures
from PIL import Image
from PIL import ImageOps

# imports for tests
import random
import os

class mnist_network():
def __init__(self):
""" load data, create and train model """
# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# flatten 28*28 images to a 784 vector for each image
num_pixels = X_train.shape[1] * X_train.shape[2]
X_train = X_train.reshape((X_train.shape[0], num_pixels)).astype('float32')
X_test = X_test.reshape((X_test.shape[0], num_pixels)).astype('float32')
# normalize inputs from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255
# one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]


# create model
self.model = Sequential()
self.model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
self.model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
# Compile model
self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# train the model
self.model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)

self.train_img = X_train
self.train_res = y_train
self.test_img = X_test
self.test_res = y_test


def test_all(self):
""" evaluates the success rate using all the test data """
scores = self.model.evaluate(self.test_img, self.test_res, verbose=0)
print("Baseline Error: %.2f%%" % (100-scores[1]*100))

def predict_result(self, img, num_pixels = None, show=False):
""" predicts the number in a picture (vector) """
assert type(img) == np.ndarray and img.shape == (784,)

"""if show:
# show the picture!!!! some problem here
plt.imshow(img, cmap='Greys')
plt.show()"""

num_pixels = img.shape[0]
# the actual number
res_number = np.argmax(self.model.predict(img.reshape(-1,num_pixels)), axis = 1)
# the probabilities
res_probabilities = self.model.predict(img.reshape(-1,num_pixels))

return (res_number[0], res_probabilities.tolist()[0]) # we only need the first element since they only have one

def test_predict(self, amount_test = 100):
""" test some random numbers from the test part of the data set """
assert type(amount_test) == int and amount_test <= 10000
cnt_right = 0
cnt_wrong = 0

for i in range(amount_test):
ind = random.randrange(0,10000) # there are 10000 images in the test part of the data set
""" correct_res is the actual result stored in the data set
It's represented as a list of 10 elements one of which being 1, the rest 0 """
correct_list = self.test_res.tolist()
correct_list = correct_list[ind] # the correct sublist
correct_res = correct_list.index(1.0)


predicted_res = self.predict_result(self.test_img[ind])[0]

if correct_res != predicted_res:
cnt_wrong += 1
print("Error in predict ! \
index = ", ind, " predicted result = ", predicted_res, " correct result = ", correct_res)
else:
cnt_right += 1

print("The machine predicted correctly ",cnt_right," out of ",amount_test," examples. That is a success rate of ", (cnt_right/amount_test)*100,"%.")

def partial_img_rec(self, image, upper_left, lower_right, results=[]):
""" partial is a part of an image """
left_x, left_y = upper_left
right_x, right_y = lower_right

print("current test part: ", upper_left, lower_right)
print("results: ", results)
# condition to stop recursion: we've reached the full width of the picture
width, height = image.size
if right_x > width:
return results

partial = image.crop((left_x, left_y, right_x, right_y))
# rescale image to 28 *28 dimension
partial = partial.resize((28,28), Image.ANTIALIAS)

partial.show()
# transform to vector
partial = ImageOps.invert(partial)
partial = np.asarray(partial, "float32")
partial = partial / 255.
partial[partial < 0.5] = 0.
# flatten image to 28*28 = 784 vector
num_pixels = partial.shape[0] * partial.shape[1]
partial = partial.reshape(num_pixels)

step = height // 10
# is there a number in this part of the image?

res, prop = self.predict_result(partial)
print("result: ", res, ". probabilities: ", prop)
# only count this result if the network is >= 50% sure
if prop[res] >= 0.5:
results.append(res)
# step is 80% of the partial image's size (which is equivalent to the original image's height)
step = int(height * 0.8)
print("found valid result")
else:
# if there is no number found we take smaller steps
step = height // 20
print("step: ", step)
# recursive call with modified positions ( move on step variables )
return self.partial_img_rec(image, (left_x+step, left_y), (right_x+step, right_y), results=results)

def test_individual_digits(self):
""" test partial_img_rec with some individual digits (square shaped images)
saved in the folder 'individual_test' following the pattern 'number_digit.jpg' """
cnt_right, cnt_wrong = 0,0
folder_content = os.listdir(".\individual_test")

for imageName in folder_content:
# image file must be a jpg or png
assert imageName[-4:] == ".jpg" or imageName[-4:] == ".png"
correct_res = int(imageName[0])
image = Image.open(".\\individual_test\\" + imageName).convert("L")
# only square images in this test
if image.size[0] != image.size[1]:
print(imageName, " has the wrong proportions: ", image.size,". It has to be a square.")
continue
predicted_res = self.partial_img_rec(image, (0,0), (image.size[0], image.size[1]), results=[])

if predicted_res == []:
print("No prediction possible for ", imageName)
else:
predicted_res = predicted_res[0]

if predicted_res != correct_res:
print("error in partial_img-rec! Predicted ", predicted_res, ". The correct result would have been ", correct_res)
cnt_wrong += 1
else:
cnt_right += 1
print("correctly predicted ",imageName)
print(cnt_right, " out of ", cnt_right + cnt_wrong," digits were correctly recognised. The success rate is therefore ", (cnt_right / (cnt_right + cnt_wrong)) * 100," %.")

def multiple_digits(self, img):
""" takes as input an image without unnecessary whitespace surrounding the digits """
#assert type(img) == myImage
width, height = img.size
# start with the first quadratic part of the image
res_list = self.partial_img_rec(img, (0,0),(height ,height))
res_str =""
for elem in res_list:
res_str += str(elem)
return res_str




network = mnist_network()
network.test_individual_digits()

编辑

@Geecode 的回答非常有帮助,网络现在可以正确预测一些图片,包括上面显示的图片。但总体成功率低于50%。您有什么改进的想法吗?

返回不良结果的图像示例:

6 9

最佳答案

您的图像本身没有问题,您的模型可以正确地对其进行分类。

问题是您创建了 Floor Division在你的部分上:

partial = partial // 255

结果总是0。因此你总是得到黑色图像。

您必须进行“正常”划分和一些准备,因为您的模型是在黑色即 0. 值像素背景负图像上进行训练的:

# transform to vector
partial = ImageOps.invert(partial)
partial = np.asarray(partial, "float32")
partial = partial / 255.
partial[partial < 0.5] = 0.

之后您的模型将正确分类:

输出:

result:  1 . probabilities:  [0.000431705528171733, 0.7594985961914062, 0.0011404436081647873, 0.00018972357793245465, 0.03162384033203125, 0.008697531186044216, 0.0014472954208031297, 0.18429973721504211, 0.006838776171207428, 0.005832481198012829]
found valid result

请注意,您当然可以进行图像准备,但这不是本答案的目的。

更新:我关于如何在此任务中获得更好性能的详细答案,请参阅here .

关于machine-learning - keras 神经网络为每个手写数字预测相同的数字,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/59515746/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com