gpt4 book ai didi

python - CNN 给出有偏见的结果

转载 作者:太空狗 更新时间:2023-10-29 18:33:56 24 4
gpt4 key购买 nike

我在 CNN 上使用二元分类器。我有两个类别“我”和“其他”。我有大约 250 张我自己的图像和 500 张其他人的图像(随机面孔数据库)。我目前的图层实现非常简单

    self.model.add(Conv2D(128, (2, 2), padding='same', 
input_shape=dataset.X_train.shape[1:]))
self.model.add(Activation('relu'))
self.model.add(MaxPooling2D(pool_size=(2, 2)))
self.model.add(Dropout(0.25))

self.model.add(Conv2D(64, (2, 2), padding='same'))
self.model.add(Activation('relu'))
self.model.add(MaxPooling2D(pool_size=(2, 2)))
self.model.add(Dropout(0.25))

self.model.add(Conv2D(32, (1, 1), padding='same'))
self.model.add(Activation('relu'))
self.model.add(MaxPooling2D(pool_size=(2, 2)))
self.model.add(Dropout(0.5))
self.model.add(Dense(512))
self.model.add(Activation('relu'))
self.model.add(Dropout(0.25))
self.model.add(Dense(2)) # for two classes
self.model.add(Activation('softmax'))

我的网络准确率达到 93% enter image description here

enter image description here我的问题是,当我使用这个网络预测人脸时,它总是将任何人脸识别为我的。我裁剪了脸部,应用了 gabor 过滤器,但没有任何效果。任何建议将不胜感激。

随机人脸预测结果:【KK代表我的脸】概率总是超过 97%:

KK identified!
1/1 [==============================] - 0s
[[ 0.9741978 0.0258022]]
1/1 [==============================] - 0s

KK identified!
1/1 [==============================] - 0s
[[ 0.9897241 0.01027592]]
1/1 [==============================] - 0s

我的图像预测结果:[KK 代表我的脸]概率总是超过 99%:

KK identified!
1/1 [==============================] - 0s
[[ 0.99639165 0.00360837]]
1/1 [==============================] - 0s
KK identified!
1/1 [==============================] - 0s
[[ 0.99527925 0.00472075]]
1/1 [==============================] - 0s

训练代码

   def get_data(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE, img_channels=3, nb_classes=2):

images, labels = fetch_data('./data/')
labels = np.reshape(labels, [-1])

X_train, X_test, y_train, y_test = \
train_test_split(images, labels, test_size=0.3, random_state=random.randint(0, 100))
X_valid, X_test, y_valid, y_test = \
train_test_split(images, labels, test_size=0.3, random_state=random.randint(0, 100))
#train_test_split(images, labels, test_size=0.3, random_state=np.random.seed(15))

if K.image_dim_ordering() == 'th':
X_train = X_train.reshape(X_train.shape[0], 3, img_rows, img_cols)
X_valid = X_valid.reshape(X_valid.shape[0], 3, img_rows, img_cols)
X_test = X_test.reshape(X_test.shape[0], 3, img_rows, img_cols)
# input_shape = (3, img_rows, img_cols)
else:
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 3)
X_valid = X_valid.reshape(X_valid.shape[0], img_rows, img_cols, 3)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 3)
# input_shape = (img_rows, img_cols, 3)

Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_valid = np_utils.to_categorical(y_valid, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

X_train = X_train.astype('float32')
X_valid = X_valid.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_valid /= 255
X_test /= 255

self.X_train = X_train
self.X_valid = X_valid
self.X_test = X_test
self.Y_train = Y_train
self.Y_valid = Y_valid
self.Y_test = Y_test



def train_network(self, dataset, batch_size=32, nb_epoch=40, data_augmentation=True):
sgd = SGD(lr=0.003, decay=0.0000001, momentum=0.9, nesterov=True)

# adam = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0001)
self.model.compile(loss='binary_crossentropy',
optimizer=sgd,
metrics=['accuracy'])
if not data_augmentation:
processed_data = self.model.fit(dataset.X_train, dataset.Y_train,
batch_size=batch_size,
nb_epoch=nb_epoch,
validation_data=(dataset.X_valid, dataset.Y_valid),
shuffle=True)
else:
datagenerator = ImageDataGenerator(
featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True,
vertical_flip=False)

datagenerator.fit(dataset.X_train)

processed_data = self.model.fit_generator(datagen.flow(dataset.X_train, dataset.Y_train, batch_size=batch_size, shuffle=True),
samples_per_epoch=dataset.X_train.shape[0], nb_epoch=nb_epoch, validation_data=(dataset.X_valid, dataset.Y_valid))

谢谢

[更新:6 月 11 日]

图层

def build_model(self, dataset, nb_classes=2):
self.model = Sequential()

self.model.add(Conv2D(32, (3, 3), padding='same', input_shape=dataset.X_train.shape[1:]))
self.model.add(Activation('relu'))
self.model.add(Conv2D(32, (3, 3)))
self.model.add(Activation('relu'))
self.model.add(MaxPooling2D(pool_size=(2, 2)))
self.model.add(Dropout(0.5))

self.model.add(Conv2D(16, (3, 3), padding='same'))
self.model.add(Activation('relu'))
self.model.add(Conv2D(16, (3, 3)))
self.model.add(Activation('relu'))
self.model.add(MaxPooling2D(pool_size=(2, 2)))
self.model.add(Dropout(0.5))

self.model.add(Flatten())
self.model.add(Dense(512))
self.model.add(Activation('relu'))
self.model.add(Dropout(0.5))
self.model.add(Dense(nb_classes))
self.model.add(Activation('softmax'))

self.model.summary()

数据扩充

    # this will do preprocessing and realtime data augmentation
datagen = ImageDataGenerator(
featurewise_center=True, # set input mean to 0 over the dataset
samplewise_center=False, # set each sample mean to 0
featurewise_std_normalization=False, # divide inputs by std of the dataset
samplewise_std_normalization=False, # divide each input by its std
zca_whitening=False, # apply ZCA whitening
rotation_range=20, # randomly rotate images in the range (degrees, 0 to 180)
width_shift_range=0.2, # randomly shift images horizontally (fraction of total width)
height_shift_range=0.2, # randomly shift images vertically (fraction of total height)
# rescale=1. / 255,
# shear_range=0.2,
# zoom_range=0.2,
horizontal_flip=True, # randomly flip images
vertical_flip=False) # randomly flip images

datagen.fit(dataset.X_train)

checkpoint = ModelCheckpoint(self.FILE_PATH, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callback_list = [checkpoint]

# fit the model on the batches generated by datagen.flow()
train_generator = datagen.flow(dataset.X_train, dataset.Y_train, batch_size=batch_size, shuffle=True)

history = self.model.fit_generator(train_generator,
samples_per_epoch=dataset.X_train.shape[0],
nb_epoch=nb_epoch,
validation_data=(dataset.X_valid, dataset.Y_valid),
callbacks=callback_list)

数据集

class DataSet(object):

def __init__(self):
self.X_train = None
self.X_valid = None
self.X_test = None
self.Y_train = None
self.Y_valid = None
self.Y_test = None

# support only binary classification for now, thus 2 class limit
def get_data(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE, img_channels=3, nb_classes=2):

images, labels = fetch_data('./data/')
labels = np.reshape(labels, [-1])

X_train, X_test, y_train, y_test = \
train_test_split(images, labels, test_size=0.2, random_state=random.randint(0, 100))

X_valid, X_test, y_valid, y_test = \
train_test_split(images, labels, test_size=0.2, random_state=random.randint(0, 100))

if K.image_dim_ordering() == 'th':
X_train = X_train.reshape(X_train.shape[0], 3, img_rows, img_cols)
X_valid = X_valid.reshape(X_valid.shape[0], 3, img_rows, img_cols)
X_test = X_test.reshape(X_test.shape[0], 3, img_rows, img_cols)

else:
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 3)
X_valid = X_valid.reshape(X_valid.shape[0], img_rows, img_cols, 3)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 3)


# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_valid = np_utils.to_categorical(y_valid, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

X_train = X_train.astype('float32')
X_valid = X_valid.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_valid /= 255
X_test /= 255

self.X_train = X_train
self.X_valid = X_valid
self.X_test = X_test
self.Y_train = Y_train
self.Y_valid = Y_valid
self.Y_test = Y_test

最佳答案

结果一点也不奇怪。网络从未了解是什么让你的脸与众不同,而只是记住是什么让 500 集与你的不同。一旦你呈现一张新面孔,它就没有它的“内存”,因此将它解释为你的,只是因为 500 张面孔中存在的特征都没有出现在第 501 张面孔中。

解决这个问题的一些想法:

  • 使用 ImageDataGenerator 扩充您的数据,正如 petezurich 所提议的那样。
  • 增加内核大小。 2*2 太小,无法捕捉面部特征。考虑3*3,甚至在第一个隐藏层堆叠两个3*3。
  • 考虑使用批量归一化和正则化。将 Dropout 增加到 0.5。
  • 考虑用 diluted convolutions 替换池化层(在 Keras 中可用)。
  • 确保规范化输入数据。
  • 减少第一层中特征映射(过滤器)的数量。考虑使用例如32 个 3*3 map 而不是 128 个微小元素(如果我猜的话,这些是你的主要问题)。通过这种方式,您将迫使网络进行概括,而不是学习一些细微差别。

对我最后一点的假设的一个很好的测试是可视化隐藏层中的激活,尤其是第一个隐藏层。我有一种感觉,你的网络会激活一些不相关的特征(或者更确切地说 - 噪音),而不是“人类特征”(比如眼睛、发型)。

[添加更多代码后进行编辑]

  • 将您的输入数据以零为中心。
  • 降低批量大小。由于样本太少,您不希望在一个批处理中进行太多平均。

我仍然认为使用例如第一个隐藏层中的 16 或 32 个过滤器应该是第一个要检查的东西。看看你的脸。你能找出 128 个“特征”吗?除非你有一些严重的粉刺,否则我不这么认为。

关于python - CNN 给出有偏见的结果,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/44464219/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com