- android - 多次调用 OnPrimaryClipChangedListener
- android - 无法更新 RecyclerView 中的 TextView 字段
- android.database.CursorIndexOutOfBoundsException : Index 0 requested, 光标大小为 0
- android - 使用 AppCompat 时,我们是否需要明确指定其 UI 组件(Spinner、EditText)颜色
系统的目标是根据单词的发音对视频输入进行分类。每个样本是一组 90、100x100、灰度(1 色 channel 帧,维度为 (1, 90, 100, 100)
)。之前,训练数据直接加载到内存中并进行训练,它有效,但效率不高,并且以后不可能有更多的训练样本。为了解决这个问题,系统被修改为预处理训练数据并将其保存到 HDF5
文件中,然后拟合使用生成器将训练数据按需加载到模型中。但是,由于此修改,现在会生成以下错误:
Exception: Error when checking model input: expected convolution3d_input_1 to have 5 dimensions, but got array with shape (1, 90, 100, 100)
系统代码如下:
from keras import backend as K
from keras.callbacks import Callback
from keras.constraints import maxnorm
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Convolution3D
from keras.layers.convolutional import MaxPooling3D
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.utils.io_utils import HDF5Matrix
from pprint import pprint
from sklearn.utils import shuffle
K.set_image_dim_ordering("th")
import cv2
import h5py
import json
import os
import sys
import numpy as np
class OpticalSpeechRecognizer(object):
def __init__(self, rows, columns, frames_per_sequence):
self.rows = rows
self.columns = columns
self.frames_per_sequence = frames_per_sequence
self.osr = None
def train_osr_model(self, training_save_fn):
""" Train the optical speech recognizer
"""
print "\nTraining OSR"
validation_ratio = 0.3
training_sequence_generator = self.generate_training_sequences(training_save_fn)
validation_sequence_generator = self.generate_training_sequences(training_save_fn, validation_ratio=validation_ratio)
training_save_file = h5py.File(training_save_fn, "r")
sample_count = training_save_file.attrs["sample_count"]
pbi = PrintBatchInfo()
self.osr.fit_generator(generator=training_sequence_generator,
validation_data=validation_sequence_generator,
samples_per_epoch=sample_count,
nb_val_samples=int(round(validation_ratio*sample_count)),
nb_epoch=10,
verbose=2,
callbacks=[pbi],
class_weight=None,
nb_worker=1)
def generate_osr_model(self, training_save_fn):
""" Builds the optical speech recognizer model
"""
print "".join(["Generating OSR model\n",
"-"*40])
training_save_file = h5py.File(training_save_fn, "r")
osr = Sequential()
print " - Adding convolution layers"
osr.add(Convolution3D(nb_filter=32,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
input_shape=(1, self.frames_per_sequence, self.rows, self.columns),
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Convolution3D(nb_filter=32,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
osr.add(Convolution3D(nb_filter=64,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Convolution3D(nb_filter=64,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
osr.add(Convolution3D(nb_filter=128,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Convolution3D(nb_filter=128,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
osr.add(Flatten())
osr.add(Dropout(0.2))
print " - Adding fully connected layers"
osr.add(Dense(output_dim=128,
init="normal",
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Dense(output_dim=64,
init="normal",
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Dense(output_dim=32,
init="normal",
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Dense(output_dim=len(training_save_file.attrs["training_classes"].split(",")),
init="normal",
activation="softmax"))
print " - Compiling model"
sgd = SGD(lr=0.01,
decay=1e-6,
momentum=0.9,
nesterov=True)
osr.compile(loss="categorical_crossentropy",
optimizer=sgd,
metrics=["accuracy"])
self.osr = osr
print " * OSR MODEL GENERATED * "
def generate_training_sequences(self, training_save_fn, validation_ratio=0):
while True:
training_save_file = h5py.File(training_save_fn, "r")
sample_count = int(training_save_file.attrs["sample_count"])
# generate sequences for validation
if validation_ratio:
validation_sample_count = int(round(validation_ratio*sample_count))
validation_sample_idxs = np.random.randint(low=0, high=sample_count, size=validation_sample_count)
for idx in validation_sample_idxs:
X = training_save_file["X"][idx]
Y = training_save_file["Y"][idx]
yield (X, Y)
# generate sequences for training
else:
for idx in range(0, sample_count):
X = training_save_file["X"][idx]
Y = training_save_file["Y"][idx]
yield (X, Y)
def process_training_data(self, config_file, training_save_fn):
""" Preprocesses training data and saves them into an HDF5 file
"""
# load training metadata from config file
training_metadata = {}
training_classes = []
with open(config_file) as training_config:
training_metadata = json.load(training_config)
training_classes = sorted(list(training_metadata.keys()))
print "".join(["\n",
"Found {0} training classes!\n".format(len(training_classes)),
"-"*40])
for class_label, training_class in enumerate(training_classes):
print "{0:<4d} {1:<10s} {2:<30s}".format(class_label, training_class, training_metadata[training_class])
print ""
# count number of samples
sample_count = 0
sample_count_by_class = [0]*len(training_classes)
for class_label, training_class in enumerate(training_classes):
# get training class sequeunce paths
training_class_data_path = training_metadata[training_class]
training_class_sequence_paths = [os.path.join(training_class_data_path, file_name)
for file_name in os.listdir(training_class_data_path)
if (os.path.isfile(os.path.join(training_class_data_path, file_name))
and ".mov" in file_name)]
# update sample count
sample_count += len(training_class_sequence_paths)
sample_count_by_class[class_label] = len(training_class_sequence_paths)
print "".join(["\n",
"Found {0} training samples!\n".format(sample_count),
"-"*40])
for class_label, training_class in enumerate(training_classes):
print "{0:<4d} {1:<10s} {2:<6d}".format(class_label, training_class, sample_count_by_class[class_label])
print ""
# initialize HDF5 save file, but clear older duplicate first if it exists
try:
print "Saved file \"{0}\" already exists! Overwriting previous saved file.\n".format(training_save_fn)
os.remove(training_save_fn)
except OSError:
pass
training_save_file = h5py.File(training_save_fn, "w")
training_save_file.attrs["training_classes"] = np.string_(",".join(training_classes))
training_save_file.attrs["sample_count"] = sample_count
x_training_dataset = training_save_file.create_dataset("X",
shape=(sample_count, 1, self.frames_per_sequence, self.rows, self.columns),
dtype="f")
y_training_dataset = training_save_file.create_dataset("Y",
shape=(sample_count, len(training_classes)),
dtype="i")
# iterate through each class data
sample_idx = 0
for class_label, training_class in enumerate(training_classes):
# get training class sequeunce paths
training_class_data_path = training_metadata[training_class]
training_class_sequence_paths = [os.path.join(training_class_data_path, file_name)
for file_name in os.listdir(training_class_data_path)
if (os.path.isfile(os.path.join(training_class_data_path, file_name))
and ".mov" in file_name)]
# iterate through each sequence
for idx, training_class_sequence_path in enumerate(training_class_sequence_paths):
sys.stdout.write("Processing training data for class \"{0}\": {1}/{2} sequences\r"
.format(training_class, idx+1, len(training_class_sequence_paths)))
sys.stdout.flush()
# append grayscale, normalized sample frames
frames = self.process_frames(training_class_sequence_path)
x_training_dataset[sample_idx] = [frames]
# append one-hot encoded sample label
label = [0]*len(training_classes)
label[class_label] = 1
y_training_dataset[sample_idx] = label
# update sample index
sample_idx += 1
print "\n"
training_save_file.close()
print "Training data processed and saved to {0}".format(training_save_fn)
def process_frames(self, video_file_path):
""" Splits frames, resizes frames, converts RGB frames to greyscale, and normalizes frames
"""
video = cv2.VideoCapture(video_file_path)
success, frame = video.read()
frames = []
success = True
# resize, convert to grayscale, normalize, and collect valid frames
while success:
success, frame = video.read()
if success:
frame = cv2.resize(frame, (self.rows, self.columns))
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frame = frame.astype('float32') / 255.0
frames.append(frame)
# pre-pad short sequences and equalize frame lengths
if len(frames) < self.frames_per_sequence:
frames = [frames[0]]*(self.frames_per_sequence - len(frames)) + frames
frames = frames[0:self.frames_per_sequence]
return frames
class PrintBatchInfo(Callback):
def on_batch_end(self, epoch, logs={}):
print logs
if __name__ == "__main__":
osr = OpticalSpeechRecognizer(100, 100, 90)
osr.process_training_data("training_config.json", "training_data.h5")
osr.generate_osr_model("training_data.h5")
osr.train_osr_model("training_data.h5")
让我感到困惑的是,报告的输入维度是预期的输入维度,但它提示缺少第 5 个维度。生成器是否应该为每次迭代生成一批样本而不是单个样本以生成 5 维输出?
最佳答案
如果您返回一个简单示例,您需要确保输出是 5 维的,形状为:(batch_size, channels, frames, height, width)
。这仅仅是因为每一层的维度应该是固定的。使这项工作最简单的方法是:
X = training_save_file["X"][[idx]]
通过此修复,您的输出应该符合预期的形状。
关于python - Keras 3D 卷积 : Error when checking model input: expected covolution3d_input_1 to have 5 dimensions, 但得到数组形状 (1, 90, 100, 100),我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/42032250/
我的代码如下所示: unemp n)) stop("wrong embedding dimension") : argument is of length zero Called from: emb
Macports 更新后,我认为更新了 numpy,我收到警告: VisibleDeprecationWarning: boolean index did not match indexed arra
我试图使用 MNIST 数据集实现简单的神经网络,但我不断收到此错误 将 matplotlib.pyplot 导入为 plt import torch from torchvision import
我编写了自己的相机 Activity ,我在FrameLayout中显示照片的实时预览,但是实时图片看起来不自然,有点高,我认为这是因为尺寸FrameLayout 的尺寸与相机的尺寸不匹配。我应该做什
tf.reduce_mean() 函数以 axis 参数中引用的索引的方式对数组元素求和。 在下面的代码中: import tensorflow as tf x = tf.Variable([1, 2
我有一个航类延误数据电子表格,我正在处理一个显示每个机场总延误时间的工作表。我想过滤维度“机场”,即根据每个机场的起飞次数创建机场类别,“小型”、“中型”和“大型”,这是通过计算维度“航类号”计算得出
如何在 3 列 X_train 数据上使用以下代码。错误 "ValueError: query data dimension must match training data dimension" 在
JavaScript:给定一个字符串数组: ['properties.dimensions.length', 'properties.name'] 在散列中使用这些来验证或访问它们(实际上只是想验证
JavaScript:给定一个字符串数组: ['properties.dimensions.length', 'properties.name'] 在散列中使用这些来验证或访问它们(实际上只是想验证
我有以下代码用于整数 vector 的 vector (即整数矩阵..) vector > scores (3, vector(2,0)); cout<
尽管已经有很多关于这个主题的答案,但在下面的例子中没有看到(摘自 https://gist.github.com/lirnli/c16ef186c75588e705d9864fb816a13c on
我有一堆保证有的图片: 最小宽度 = 200 像素 最大宽度 = 250 像素 最小高度 = 150 像素 最大高度 = 175 像素 我想要做的是显示一个由 200 像素 x 150 像素组成的图像
转tensorrt时报错: input: kMAX dimensions in profile 0 are [2,3,128,128] but input has static dimensions
我正在尝试对在 UCI 机器学习数据库中找到的一些文本识别数据进行 k 最近邻预测。 (https://archive.ics.uci.edu/ml/datasets/Letter+Recogniti
如何有效地在更高维空间上追踪等值面 最佳答案 你有一个 N 维的标量成本函数, f(y0, y1, .., yN) ∊ ℝ, y ∊ ℝ 但仅在规则的矩形网格中采样, yk = Ψk + ψk x
我正在尝试根据《Doing Bayesian Data Analysis: A Tutorial with R, JAGS, and Stan (2015)》一书来学习贝叶斯分析。 这本书里有例子。所
LEt x_t = F(x_{t-1}) 是 chaotic regime. 中的一个时间离散动力系统 从初始条件x_0开始,我们可以生成一个时间序列=x_t,其中t =1,2,...,T 表示时间索
当我尝试使用: const {width, height} = Dimensions.get('window'); 在 React Native 组件上,我收到一个以前从未见过的奇怪错误: 找不到变量
关闭。这个问题是opinion-based .它目前不接受答案。 想要改进这个问题? 更新问题,以便 editing this post 可以用事实和引用来回答它. 关闭 2 年前。 Improve
已关闭。此问题不符合Stack Overflow guidelines 。目前不接受答案。 这个问题似乎与 help center 中定义的范围内的编程无关。 . 已关闭 9 年前。 Improve
我是一名优秀的程序员,十分优秀!