gpt4 book ai didi

python - 从 tf.keras.utils.Sequence 构建的自定义数据生成器不适用于 tensorflow 模型的 fit api

转载 作者:行者123 更新时间:2023-12-04 17:20:04 25 4
gpt4 key购买 nike

我根据来自 link 的指南实现了一个序列生成器对象.

import tensorflow as tf
from cv2 import imread, resize
from sklearn.utils import shuffle
from cv2 import imread, resize
import numpy as np
from tensorflow.keras import utils
import math
import keras as ks

class reader(tf.keras.utils.Sequence):

def __init__(self, x, y, batch_size, n_class):
self.x, self.y = x, y
self.batch_size = batch_size
self.n_class = n_class

def __len__(self):
return math.ceil(len(self.x) / self.batch_size)

def __getitem__(self, idx):
print('getitem', idx)
batch_x = self.x[idx * self.batch_size:(idx + 1) *
self.batch_size]
batch_y = self.y[idx * self.batch_size:(idx + 1) *
self.batch_size]


data_x = list()
for batch in batch_x:
tmp = list()
for img_path in batch:
try:
img = imread(img_path)
tmp.append(img)
except Exception as e:
print(e)
print('failed to find path {}'.format(img_path))
data_x.append(tmp)
#
data_x = np.array(data_x, dtype='object')
data_y = np.array(batch_y)
data_y = utils.to_categorical(data_y, self.n_class)
print('return item')
print(data_x.shape)
return (data_x, data_y)

def on_epoch_end(self):
# option method to run some logic at the end of each epoch: e.g. reshuffling
print('on epoch end')
seed = np.random.randint()
self.x = shuffle(self.x, random_state=seed)
self.y = shuffle(self.y, random_state=seed)
但是,它不适用于 tensorflow 模型的 fit api。下面是我用来复制这个问题的简单模型架构。
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv3D(10, input_shape=(TEMPORAL_LENGTH,HEIGHT,WIDTH,CHANNEL), kernel_size=(2,2,2), strides=2))
model.add(tf.keras.layers.Conv3D(10, kernel_size=(2,3,3), strides=2))
model.add(tf.keras.layers.Conv3D(10, kernel_size=(2,3,3), strides=2))
model.add(tf.keras.layers.Conv3D(10, kernel_size=(2,3,3), strides=2))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(10))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
model.summary()
让我创建一个阅读器 r1 = reader(x_train, y_train, 20, 10)然后我调用model.fit api。
train_history = model.fit(r1, epochs=3, steps_per_epoch=5, verbose=1)
### output ###
getitem 0
return item
(20, 16, 192, 256, 3)
WARNING:tensorflow:sample_weight modes were coerced from
...
to
['...']
Train for 5 steps
Epoch 1/3
如果我不打扰,它会一直这样。出于好奇,我用 Keras api 创建的模型尝试了这种方法,令我惊讶的是它确实有效!
model = ks.models.Sequential()
model.add(ks.layers.Conv3D(10, input_shape=(TEMPORAL_LENGTH,HEIGHT,WIDTH,CHANNEL), kernel_size=(2,2,2), strides=2))
model.add(ks.layers.Conv3D(10, kernel_size=(2,3,3), strides=2))
model.add(ks.layers.Conv3D(10, kernel_size=(2,3,3), strides=2))
model.add(ks.layers.Conv3D(10, kernel_size=(2,3,3), strides=2))
model.add(ks.layers.Flatten())
model.add(ks.layers.Dense(10))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
train_history = model.fit(r1, epochs=3, steps_per_epoch=5, verbose=1)
### output ###
Epoch 1/3
getitem 586
return item
(20, 16, 192, 256, 3)
getitem 169
1/5 [=====>........................] - ETA: 22s - loss: 11.0373 - accuracy: 0.0000e+00return item
(20, 16, 192, 256, 3)
getitem 601
2/5 [===========>..................] - ETA: 12s - loss: 7.9983 - accuracy: 0.0250 return item
(20, 16, 192, 256, 3)
getitem 426
3/5 [=================>............] - ETA: 8s - loss: 10.7049 - accuracy: 0.2500return item
(20, 16, 192, 256, 3)
getitem 243
4/5 [=======================>......] - ETA: 3s - loss: 8.5093 - accuracy: 0.1875
依赖
  • tensorflow -GPU:2.1
  • keras-gpu:2.3.1
  • 最佳答案

    老年人。对于迟到的回复,我深表歉意。我已经找到了解决此问题的方法。
    我需要更改的只是在函数 self 处将 data_x 转换为 dtype='float32'。 getitem ().要复制该问题,只需将 dtype 更改为“对象”。
    除此之外,请允许我分享该类 ActionDataGenerator 是从 Anujshah's 修改而来的。教程。

    import tensorflow as tf
    from sklearn.utils import shuffle
    import cv2
    from cv2 import imread, resize
    from tensorflow.keras import utils
    import math
    import keras as ks
    import pandas as pd
    import numpy as np
    import os
    from collections import deque
    import copy

    class reader(tf.keras.utils.Sequence):

    def __init__(self, x, y, batch_size, n_class):
    self.x, self.y = x, y
    self.batch_size = batch_size
    self.n_class = n_class

    def __len__(self):
    return math.ceil(len(self.x) / self.batch_size)

    def __getitem__(self, idx):
    batch_x = self.x[idx * self.batch_size:(idx + 1) *
    self.batch_size]
    batch_y = self.y[idx * self.batch_size:(idx + 1) *
    self.batch_size]


    data_x = list()
    for batch in batch_x:
    tmp = list()
    for img_path in batch:
    try:
    img = imread(img_path)
    if img.shape != (192, 256, 3):
    img = cv2.resize(img,(256, 192))
    tmp.append(img)
    except Exception as e:
    print(e)
    print('failed to find path {}'.format(img_path))
    data_x.append(tmp)
    #
    data_x = np.array(data_x, dtype='float32')
    data_y = np.array(batch_y)
    data_y = utils.to_categorical(data_y, self.n_class)
    return data_x, data_y

    def on_epoch_end(self):
    # option method to run some logic at the end of each epoch: e.g. reshuffling
    seed = np.random.randint()
    self.x = shuffle(self.x, random_state=seed)
    self.y = shuffle(self.y, random_state=seed)

    class ActionDataGenerator(object):

    def __init__(self,root_data_path,temporal_stride=1,temporal_length=16,resize=224, max_sample=20):

    self.root_data_path = root_data_path
    self.temporal_length = temporal_length
    self.temporal_stride = temporal_stride
    self.resize=resize
    self.max_sample=max_sample

    def file_generator(self,data_path,data_files):
    '''
    data_files - list of csv files to be read.
    '''
    for f in data_files:
    tmp_df = pd.read_csv(os.path.join(data_path,f))
    label_list = list(tmp_df['Label'])
    total_images = len(label_list)
    if total_images>=self.temporal_length:
    num_samples = int((total_images-self.temporal_length)/self.temporal_stride)+1

    img_list = list(tmp_df['FileName'])
    else:
    print ('num of frames is less than temporal length; hence discarding this file-{}'.format(f))
    continue

    samples = deque()
    samp_count=0
    for img in img_list:
    if samp_count == self.max_sample:
    break
    samples.append(img)
    if len(samples)==self.temporal_length:
    samples_c=copy.deepcopy(samples)
    samp_count+=1
    for t in range(self.temporal_stride):
    samples.popleft()
    yield samples_c,label_list[0]

    def load_samples(self,data_cat='train', test_ratio=0.1):
    data_path = os.path.join(self.root_data_path,data_cat)
    csv_data_files = os.listdir(data_path)
    file_gen = self.file_generator(data_path,csv_data_files)
    iterator = True
    data_list = []
    while iterator:
    try:
    x,y = next(file_gen)
    x=list(x)
    data_list.append([x,y])
    except Exception as e:
    print ('the exception: ',e)
    iterator = False
    print ('end of data generator')
    # data_list = self.shuffle_data(data_list)
    return data_list

    def train_validation_split(self, data_list, target_column, val_size=0.1, ks_sequence=False):
    dataframe = pd.DataFrame(data_list)
    dataframe.columns = ['Feature', target_column]
    data_dict = dict()
    for i in range(len(np.unique(dataframe[target_column]))):
    data_dict[i] = dataframe[dataframe[target_column]==i]
    train, validation = pd.DataFrame(), pd.DataFrame()
    for df in data_dict.values():
    cut = int(df.shape[0] * val_size)
    val = df[:cut]
    rem = df[cut:]
    train = train.append(rem, ignore_index=True)
    validation = validation.append(val, ignore_index=True)
    if ks_sequence:
    return train['Feature'].values.tolist(), train['Label'].values.tolist(), \
    validation['Feature'].values.tolist(), validation['Label'].values.tolist() # without shuffle
    return train.values.tolist(), validation.values.tolist() # without shuffle

    root_data_path = 'C:\\Users\\AI-lab\\Documents\\activity_file\\UCF101\\csv_files\\' # machine specific
    CLASSES = 101
    BATCH_SIZE = 10
    EPOCHS = 1
    TEMPORAL_STRIDE = 8
    TEMPORAL_LENGTH = 16
    MAX_SAMPLE = 20
    HEIGHT = 192
    WIDTH = 256
    CHANNEL = 3

    data_gen_obj = ActionDataGenerator(root_data_path, temporal_stride=TEMPORAL_STRIDE, \
    temporal_length=TEMPORAL_LENGTH, max_sample=MAX_SAMPLE)
    train_data = data_gen_obj.load_samples(data_cat='train')
    x_train, y_train, x_val, y_val = data_gen_obj.train_validation_split(train_data, 'Label', 0.1, True)
    r1 = reader(x_train, y_train, BATCH_SIZE, CLASSES)
    r2 = reader(x_val, y_val, BATCH_SIZE, CLASSES)
    print(type(r1), type(r2))

    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Conv3D(10, input_shape=(TEMPORAL_LENGTH,HEIGHT,WIDTH,CHANNEL), kernel_size=(2,2,2), strides=2))
    model.add(tf.keras.layers.Conv3D(10, kernel_size=(2,3,3), strides=2))
    model.add(tf.keras.layers.Conv3D(10, kernel_size=(2,3,3), strides=2))
    model.add(tf.keras.layers.Conv3D(10, kernel_size=(2,3,3), strides=2))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(101, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.summary()

    train_history = model.fit(r1, epochs=3, steps_per_epoch=r1.__len__(), verbose=1)
    score = model.evaluate(r2, steps=5)
    print(score)
    输出
    the exception:  
    end of data generator
    <class '__main__.reader'> <class '__main__.reader'>
    Model: "sequential"
    _________________________________________________________________
    Layer (type) Output Shape Param #
    =================================================================
    conv3d (Conv3D) (None, 8, 96, 128, 10) 250
    _________________________________________________________________
    conv3d_1 (Conv3D) (None, 4, 47, 63, 10) 1810
    _________________________________________________________________
    conv3d_2 (Conv3D) (None, 2, 23, 31, 10) 1810
    _________________________________________________________________
    conv3d_3 (Conv3D) (None, 1, 11, 15, 10) 1810
    _________________________________________________________________
    flatten (Flatten) (None, 1650) 0
    _________________________________________________________________
    dense (Dense) (None, 101) 166751
    =================================================================
    Total params: 172,431
    Trainable params: 172,431
    Non-trainable params: 0
    _________________________________________________________________
    WARNING:tensorflow:sample_weight modes were coerced from
    ...
    to
    ['...']
    Train for 17562 steps
    Epoch 1/3
    77/17562 [..............................] - ETA: 1:35:53 - loss: 67.0937 - accuracy: 0.0156

    关于python - 从 tf.keras.utils.Sequence 构建的自定义数据生成器不适用于 tensorflow 模型的 fit api,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/66705131/

    25 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com