gpt4 book ai didi

python - Keras 3D 卷积 : Error when checking model input: expected covolution3d_input_1 to have 5 dimensions, 但得到数组形状 (1, 90, 100, 100)

转载 作者:太空宇宙 更新时间:2023-11-03 11:21:35 24 4
gpt4 key购买 nike

系统的目标是根据单词的发音对视频输入进行分类。每个样本是一组 90、100x100、灰度(1 色 channel 帧,维度为 (1, 90, 100, 100))。之前,训练数据直接加载到内存中并进行训练,它有效,但效率不高,并且以后不可能有更多的训练样本。为了解决这个问题,系统被修改为预处理训练数据并将其保存到 HDF5 文件中,然后拟合使用生成器将训练数据按需加载到模型中。但是,由于此修改,现在会生成以下错误:

Exception: Error when checking model input: expected convolution3d_input_1 to have 5 dimensions, but got array with shape (1, 90, 100, 100)

系统代码如下:

from keras import backend as K
from keras.callbacks import Callback
from keras.constraints import maxnorm
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Convolution3D
from keras.layers.convolutional import MaxPooling3D
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.utils.io_utils import HDF5Matrix
from pprint import pprint
from sklearn.utils import shuffle
K.set_image_dim_ordering("th")

import cv2
import h5py
import json
import os
import sys
import numpy as np

class OpticalSpeechRecognizer(object):
def __init__(self, rows, columns, frames_per_sequence):
self.rows = rows
self.columns = columns
self.frames_per_sequence = frames_per_sequence
self.osr = None

def train_osr_model(self, training_save_fn):
""" Train the optical speech recognizer
"""
print "\nTraining OSR"
validation_ratio = 0.3
training_sequence_generator = self.generate_training_sequences(training_save_fn)
validation_sequence_generator = self.generate_training_sequences(training_save_fn, validation_ratio=validation_ratio)
training_save_file = h5py.File(training_save_fn, "r")
sample_count = training_save_file.attrs["sample_count"]
pbi = PrintBatchInfo()
self.osr.fit_generator(generator=training_sequence_generator,
validation_data=validation_sequence_generator,
samples_per_epoch=sample_count,
nb_val_samples=int(round(validation_ratio*sample_count)),
nb_epoch=10,
verbose=2,
callbacks=[pbi],
class_weight=None,
nb_worker=1)

def generate_osr_model(self, training_save_fn):
""" Builds the optical speech recognizer model
"""
print "".join(["Generating OSR model\n",
"-"*40])
training_save_file = h5py.File(training_save_fn, "r")
osr = Sequential()
print " - Adding convolution layers"
osr.add(Convolution3D(nb_filter=32,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
input_shape=(1, self.frames_per_sequence, self.rows, self.columns),
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Convolution3D(nb_filter=32,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
osr.add(Convolution3D(nb_filter=64,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Convolution3D(nb_filter=64,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
osr.add(Convolution3D(nb_filter=128,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Convolution3D(nb_filter=128,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
osr.add(Flatten())
osr.add(Dropout(0.2))
print " - Adding fully connected layers"
osr.add(Dense(output_dim=128,
init="normal",
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Dense(output_dim=64,
init="normal",
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Dense(output_dim=32,
init="normal",
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Dense(output_dim=len(training_save_file.attrs["training_classes"].split(",")),
init="normal",
activation="softmax"))
print " - Compiling model"
sgd = SGD(lr=0.01,
decay=1e-6,
momentum=0.9,
nesterov=True)
osr.compile(loss="categorical_crossentropy",
optimizer=sgd,
metrics=["accuracy"])
self.osr = osr
print " * OSR MODEL GENERATED * "

def generate_training_sequences(self, training_save_fn, validation_ratio=0):
while True:
training_save_file = h5py.File(training_save_fn, "r")
sample_count = int(training_save_file.attrs["sample_count"])
# generate sequences for validation
if validation_ratio:
validation_sample_count = int(round(validation_ratio*sample_count))
validation_sample_idxs = np.random.randint(low=0, high=sample_count, size=validation_sample_count)
for idx in validation_sample_idxs:
X = training_save_file["X"][idx]
Y = training_save_file["Y"][idx]
yield (X, Y)
# generate sequences for training
else:
for idx in range(0, sample_count):
X = training_save_file["X"][idx]
Y = training_save_file["Y"][idx]
yield (X, Y)

def process_training_data(self, config_file, training_save_fn):
""" Preprocesses training data and saves them into an HDF5 file
"""
# load training metadata from config file
training_metadata = {}
training_classes = []
with open(config_file) as training_config:
training_metadata = json.load(training_config)
training_classes = sorted(list(training_metadata.keys()))

print "".join(["\n",
"Found {0} training classes!\n".format(len(training_classes)),
"-"*40])
for class_label, training_class in enumerate(training_classes):
print "{0:<4d} {1:<10s} {2:<30s}".format(class_label, training_class, training_metadata[training_class])
print ""

# count number of samples
sample_count = 0
sample_count_by_class = [0]*len(training_classes)
for class_label, training_class in enumerate(training_classes):
# get training class sequeunce paths
training_class_data_path = training_metadata[training_class]
training_class_sequence_paths = [os.path.join(training_class_data_path, file_name)
for file_name in os.listdir(training_class_data_path)
if (os.path.isfile(os.path.join(training_class_data_path, file_name))
and ".mov" in file_name)]
# update sample count
sample_count += len(training_class_sequence_paths)
sample_count_by_class[class_label] = len(training_class_sequence_paths)

print "".join(["\n",
"Found {0} training samples!\n".format(sample_count),
"-"*40])
for class_label, training_class in enumerate(training_classes):
print "{0:<4d} {1:<10s} {2:<6d}".format(class_label, training_class, sample_count_by_class[class_label])
print ""

# initialize HDF5 save file, but clear older duplicate first if it exists
try:
print "Saved file \"{0}\" already exists! Overwriting previous saved file.\n".format(training_save_fn)
os.remove(training_save_fn)
except OSError:
pass
training_save_file = h5py.File(training_save_fn, "w")
training_save_file.attrs["training_classes"] = np.string_(",".join(training_classes))
training_save_file.attrs["sample_count"] = sample_count
x_training_dataset = training_save_file.create_dataset("X",
shape=(sample_count, 1, self.frames_per_sequence, self.rows, self.columns),
dtype="f")
y_training_dataset = training_save_file.create_dataset("Y",
shape=(sample_count, len(training_classes)),
dtype="i")

# iterate through each class data
sample_idx = 0
for class_label, training_class in enumerate(training_classes):
# get training class sequeunce paths
training_class_data_path = training_metadata[training_class]
training_class_sequence_paths = [os.path.join(training_class_data_path, file_name)
for file_name in os.listdir(training_class_data_path)
if (os.path.isfile(os.path.join(training_class_data_path, file_name))
and ".mov" in file_name)]
# iterate through each sequence
for idx, training_class_sequence_path in enumerate(training_class_sequence_paths):
sys.stdout.write("Processing training data for class \"{0}\": {1}/{2} sequences\r"
.format(training_class, idx+1, len(training_class_sequence_paths)))
sys.stdout.flush()

# append grayscale, normalized sample frames
frames = self.process_frames(training_class_sequence_path)
x_training_dataset[sample_idx] = [frames]

# append one-hot encoded sample label
label = [0]*len(training_classes)
label[class_label] = 1
y_training_dataset[sample_idx] = label

# update sample index
sample_idx += 1

print "\n"

training_save_file.close()

print "Training data processed and saved to {0}".format(training_save_fn)

def process_frames(self, video_file_path):
""" Splits frames, resizes frames, converts RGB frames to greyscale, and normalizes frames
"""
video = cv2.VideoCapture(video_file_path)
success, frame = video.read()

frames = []
success = True

# resize, convert to grayscale, normalize, and collect valid frames
while success:
success, frame = video.read()
if success:
frame = cv2.resize(frame, (self.rows, self.columns))
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frame = frame.astype('float32') / 255.0
frames.append(frame)

# pre-pad short sequences and equalize frame lengths
if len(frames) < self.frames_per_sequence:
frames = [frames[0]]*(self.frames_per_sequence - len(frames)) + frames
frames = frames[0:self.frames_per_sequence]

return frames

class PrintBatchInfo(Callback):
def on_batch_end(self, epoch, logs={}):
print logs

if __name__ == "__main__":
osr = OpticalSpeechRecognizer(100, 100, 90)
osr.process_training_data("training_config.json", "training_data.h5")
osr.generate_osr_model("training_data.h5")
osr.train_osr_model("training_data.h5")

让我感到困惑的是,报告的输入维度是预期的输入维度,但它提示缺少第 5 个维度。生成器是否应该为每次迭代生成一批样本而不是单个样本以生成 5 维输出?

最佳答案

如果您返回一个简单示例,您需要确保输出是 5 维的,形状为:(batch_size, channels, frames, height, width)。这仅仅是因为每一层的维度应该是固定的。使这项工作最简单的方法是:

X = training_save_file["X"][[idx]]

通过此修复,您的输出应该符合预期的形状。

关于python - Keras 3D 卷积 : Error when checking model input: expected covolution3d_input_1 to have 5 dimensions, 但得到数组形状 (1, 90, 100, 100),我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/42032250/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com