gpt4 book ai didi

python - 无法提高 Oxford-102 (Tensorflow) 上 AlexNet 的准确性

转载 作者:行者123 更新时间:2023-11-30 09:18:36 25 4
gpt4 key购买 nike

您好,我尝试在不使用预训练权重的情况下实现 AlexNet。我尝试在 Oxford-102 数据集上训练网络,但在整个过程中我一直获得 0.9% 的准确率,并且更改参数没有帮助,下面的代码有人可以帮助我吗?

我正在关注this教程

我将给定的测试集(较大)切换为用作训练集,并将给定的训练用作测试集。我使用梯度下降作为优化器。

我构建了与给定文章几乎相同的 AlexNet,我计算准确性的方式可能有问题吗?

以下是我加载数据的方式

import os
import sys
import warnings

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

from skimage.io import imread
from skimage.transform import resize

from scipy.io import loadmat

import tensorflow as tf

warnings.filterwarnings('ignore', category=UserWarning, module='skimage')

set_ids = loadmat('setid.mat')

set_ids

test_ids = set_ids['trnid'].tolist()[0]
train_ids = set_ids['tstid'].tolist()[0]

def indexes_processing(int_list):
returned_list = []
for index, element in enumerate(int_list):
returned_list.append(str(element))
for index, element in enumerate(returned_list):
if int(element) < 10:
returned_list[index] = '0000' + element
elif int(element) < 100:
returned_list[index] = '000' + element
elif int(element) < 1000:
returned_list[index] = '00' + element
else:
returned_list[index] = '0' + element
return returned_list

raw_train_ids = indexes_processing(train_ids)
raw_test_ids = indexes_processing(test_ids)

train_images = []
test_images = []
train_labels = []
test_labels = []

image_labels = (loadmat('imagelabels.mat')['labels'] - 1).tolist()[0]

labels = ['pink primrose', 'hard-leaved pocket orchid', 'canterbury bells', 'sweet pea', 'english marigold', 'tiger lily', 'moon orchid', 'bird of paradise', 'monkshood', 'globe thistle', 'snapdragon', "colt's foot", 'king protea', 'spear thistle', 'yellow iris', 'globe-flower', 'purple coneflower', 'peruvian lily', 'balloon flower', 'giant white arum lily', 'fire lily', 'pincushion flower', 'fritillary', 'red ginger', 'grape hyacinth', 'corn poppy', 'prince of wales feathers', 'stemless gentian', 'artichoke', 'sweet william', 'carnation', 'garden phlox', 'love in the mist', 'mexican aster', 'alpine sea holly', 'ruby-lipped cattleya', 'cape flower', 'great masterwort', 'siam tulip', 'lenten rose', 'barbeton daisy', 'daffodil', 'sword lily', 'poinsettia', 'bolero deep blue', 'wallflower', 'marigold', 'buttercup', 'oxeye daisy', 'common dandelion', 'petunia', 'wild pansy', 'primula', 'sunflower', 'pelargonium', 'bishop of llandaff', 'gaura', 'geranium', 'orange dahlia', 'pink-yellow dahlia?', 'cautleya spicata', 'japanese anemone', 'black-eyed susan', 'silverbush', 'californian poppy', 'osteospermum', 'spring crocus', 'bearded iris', 'windflower', 'tree poppy', 'gazania', 'azalea', 'water lily', 'rose', 'thorn apple', 'morning glory', 'passion flower', 'lotus', 'toad lily', 'anthurium', 'frangipani', 'clematis', 'hibiscus', 'columbine', 'desert-rose', 'tree mallow', 'magnolia', 'cyclamen ', 'watercress', 'canna lily', 'hippeastrum ', 'bee balm', 'ball moss', 'foxglove', 'bougainvillea', 'camellia', 'mallow', 'mexican petunia', 'bromelia', 'blanket flower', 'trumpet creeper', 'blackberry lily']

labels[16]

def one_hot_encode(labels):
'''
One hot encode the output labels to be numpy arrays of 0s and 1s
'''
out = np.zeros((len(labels), 102))
for index, element in enumerate(labels):
out[index, element] = 1
return out

class ProcessImage():

def __init__(self):
self.i = 0

self.training_images = np.zeros((6149, 227, 227, 3))
self.training_labels = None

self.testing_images = np.zeros((1020, 227, 227, 3))
self.testing_labels = None

def set_up_images(self):
print('Processing Training Images...')
i = 0
for element in raw_train_ids:
img = imread('jpg/image_{}.jpg'.format(element))
img = resize(img, (227, 227))
self.training_images[i] = img
i += 1
print('Done!')

i = 0
print('Processing Testing Images...')
for element in raw_test_ids:
img = imread('jpg/image_{}.jpg'.format(element))
img = resize(img, (227, 227))
self.testing_images[i] = img
i += 1
print('Done!')

print('Processing Training and Testing Labels...')
encoded_labels = one_hot_encode(image_labels)
for train_id in train_ids:
train_labels.append(encoded_labels[train_id - 1])
for test_id in test_ids:
test_labels.append(encoded_labels[test_id - 1])
self.training_labels = train_labels
self.testing_labels = test_labels
print('Done!')

def next_batch(self, batch_size):
x = self.training_images[self.i:self.i + batch_size]
y = self.training_labels[self.i:self.i + batch_size]
self.i = (self.i + batch_size) % len(self.training_images)
return x, y

image_processor = ProcessImage()

image_processor.set_up_images()

我的图表

# Helper Functions for AlexNet
def init_weights(filter_height, filter_width, num_channels, num_filters):
init_random_dist = tf.truncated_normal([filter_height, filter_width, num_channels, num_filters], stddev=0.1)
return tf.Variable(init_random_dist)

def init_bias(shape):
init_bias_vals = tf.constant(0.1, shape=shape)
return tf.Variable(init_bias_vals)

def conv2d(x, W, stride_y, stride_x, padding='SAME'):
return tf.nn.conv2d(x, W, strides=[1,stride_y,stride_x,1], padding=padding)

def max_pool(x, filter_height, filter_width, stride_y, stride_x, padding='SAME'):
return tf.nn.max_pool(x, ksize=[1,filter_height,filter_width,1], strides=[1,stride_y,stride_x,1], padding=padding)

def conv_layer(input_x, filter_height, filter_width, num_channels, num_filters, stride_y, stride_x, padding='SAME', groups=1):
W = init_weights(filter_height, filter_width, int(num_channels/groups), num_filters)
b = init_bias([num_filters])
convolve = lambda i, k: tf.nn.conv2d(i, k, strides=[1,stride_y,stride_x,1], padding=padding)
if groups == 1:
conv = convolve(input_x, W)
else:
input_groups = tf.split(axis=3, num_or_size_splits=groups, value=input_x)
weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=W)
output_groups = [convolve(i, k) for i, k in zip(input_groups, weight_groups)]
conv = tf.concat(axis=3, values=output_groups)
bias = tf.reshape(tf.nn.bias_add(conv, b), tf.shape(conv))
return tf.nn.relu(bias)

def lrn(x, radius, alpha, beta, bias=1.0):
return tf.nn.local_response_normalization(x, depth_radius=radius, alpha=alpha, beta=beta, bias=bias)

def fully_connected(input_layer, num_in, num_out, relu=True):
W = tf.truncated_normal([num_in, num_out], stddev=0.1)
W = tf.Variable(W)
b = init_bias([num_out])
out = tf.nn.xw_plus_b(input_layer, W, b)
if relu:
return tf.nn.relu(out)
else:
return out

def drop_out(x, keep_prob):
return tf.nn.dropout(x, keep_prob=keep_prob)

x = tf.placeholder(tf.float32, shape=[None, 227, 227, 3])
y_true = tf.placeholder(tf.float32, shape=[None, 102])
keep_prob = tf.placeholder(tf.float32)

# Create the graph

# 1st Layer: Conv (w ReLu) -> Lrn -> Pool
conv_1 = conv_layer(x, filter_height=11, filter_width=11, num_channels=3, num_filters=96, stride_y=4, stride_x=4, padding='VALID')
norm_1 = lrn(conv_1, radius=2, alpha=1e-05, beta=0.75)
pool_1 = max_pool(norm_1, filter_height=3, filter_width=3, stride_y=2, stride_x=2, padding='VALID')
pool_1.get_shape()

# 2nd Layer: Conv (w ReLu) -> Lrn -> Pool
conv_2 = conv_layer(pool_1, filter_height=5, filter_width=5, num_channels=96, num_filters=256, stride_y=1, stride_x=1, groups=2)
norm_2 = lrn(conv_2, radius=2, alpha=1e-05, beta=0.75)
pool_2 = max_pool(norm_2, filter_height=3, filter_width=3, stride_y=2, stride_x=2, padding='VALID')

# 3rd Layer: Conv (w ReLu)
conv_3 = conv_layer(pool_2, filter_height=3, filter_width=3, num_channels=256, num_filters=384, stride_y=1, stride_x=1)

# 4th Layer: Conv (w ReLu)
conv_4 = conv_layer(conv_3, filter_height=3, filter_width=3, num_channels=384, num_filters=384, stride_y=1, stride_x=1, groups=2)

# 5th Layer: Conv (w ReLu) -> Pool
conv_5 = conv_layer(conv_4, filter_height=3, filter_width=3, num_channels=384, num_filters=256, stride_y=1, stride_x=1, groups=2)
pool_5 = max_pool(conv_5, filter_height=3, filter_width=3, stride_y=2, stride_x=2, padding='VALID')

# 6th Layer: Flatten -> FC (w ReLu) -> Dropout
pool_6_flat = tf.reshape(pool_5, [-1, 6*6*256])
full_6 = fully_connected(pool_6_flat, 6*6*256, 4096)
full_6_dropout = drop_out(full_6, keep_prob)

# 7th Layer: FC (w ReLu) -> Dropout
full_7 = fully_connected(full_6_dropout, 4096, 4096)
full_7_dropout = drop_out(full_7, keep_prob)

# 8th Layer: FC and return unscaled activations
y_pred = fully_connected(full_7_dropout, 4096, 102, relu=False)

损失函数和优化器

cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true,logits=y_pred))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
train = optimizer.minimize(cross_entropy)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

运行 session

with tf.Session() as sess:
sess.run(init)
for i in range(15000):
batches = image_processor.next_batch(128)
sess.run(train, feed_dict={x:batches[0], y_true:batches[1], keep_prob:0.5})

if (i%1000 == 0):
print('On Step {}'.format(i))
print('Accuracy is: ')
matches = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
acc = tf.reduce_mean(tf.cast(matches, tf.float32))

print(sess.run(acc, feed_dict={x:image_processor.testing_images, y_true:image_processor.testing_labels, keep_prob:1.0}))

print('Saving model...')
saver.save(sess, 'models/model_iter.ckpt', global_step=i)
print('Saved at step: {}'.format(i))
print('\n')
print('Saving final model...')
saver.save(sess, 'models/model_final.ckpt')
print('Saved')

我一遍又一遍地获得 相同 0.00903922 的精度(在整个 15000 个时期)无论我多么努力地尝试更改参数,我什至尝试将图像的大小从 224 更改为 227,但它仍然给我相同的精度 0.00903922。

最佳答案

你的准确性对我来说看起来很好,尽管每次在循环中定义有点奇怪。

令我困扰的是你只训练了十步。您的训练集似乎由 6149 张图像组成,并且您正在批量训练 128 张图像。执行此操作十次后,您已经查看了 6000 张图像中的 1280 张 - 太少了,无法看到准确性的影响。

相反,您想要查看所有训练数据 - 大约 48 个训练步骤,或一个epoch - 并且您最好这样做几次。确切的 epoch 数量取决于数据和网络等多种因素,但您应该至少采用 10 个 epoch - 即 480 个训练步骤。

关于python - 无法提高 Oxford-102 (Tensorflow) 上 AlexNet 的准确性,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/48612987/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com