gpt4 book ai didi

python - Estimator中的TensorFlow图错误(ValueError:Tensor(...)必须与Tensor(...)来自同一图)

转载 作者:行者123 更新时间:2023-12-01 07:15:06 24 4
gpt4 key购买 nike

更新:使用tensorflow-gpu 1.13.1测试相同的代码在我的PC和Google Cloud上均可使用。



使用TensorFlow Estimator并运行train_and_evaluate给我以下错误消息:

“ ValueError:Tensor(” Const:0“,shape =(3,),dtype = float32)必须与Tensor(” ParallelMapDataset:0“,shape =(),dtype = variant,device = / device来自同一张图:CPU:0)。” (请参阅底部附近的完整错误输出)

当使用GPU(GeForge RTX 2070)在我的PC上训练CNN时,会发生这种情况。我正在将python 3.7和tensorflow-gpu / tensorflow 1.14.0,Keras 2.2.4一起使用,并在Conda环境中运行。

在以下日志消息“ ...将2716的检查点保存到C:/EstimatorOutput/10/model.ckpt”之后发生。并且似乎是在处理评估步骤的输入功能时。

现在的代码以前没有问题,但是由于我不清楚的原因,它突然改变了。

我在Google Cloud上运行了类似的代码(之前也运行良好),并且发生了相同的问题(请参见底部错误输出;在GPU上运行(BASIC_GPU); TensorFlow 1.14; Keras 2.2.4)

由于某种原因,新图表不兼容时,错误似乎与创建图表时的评估步骤有关。

这是我的代码->

我的任务模块:

import tensorflow as tf
from train_model import model #("train_model" is local folder)
from train_model.model import create_estimator

if __name__ == '__main__':

model_num = 10

# Throw properties into params dict to pass to other functions
params = {}
params['train csv'] = "train_set_local.csv"
params['eval csv'] = "eval_set_local.csv"
params['output path'] = "C:/EstimatorOutput/" + str(model_num) + "/"
params['data path'] = "C:/Databases/Birds_dB/Images"
params['image size'] = [244, 224]
params["batch size"] = 16*2
params['use random flip'] = True
params['learning rate'] = 0.000001
params['dropout rate'] = 0.50
params['num classes'] = 123
params['train steps'] = 65000
params['eval steps'] = 20
params['eval_throttle_secs'] = 600
params['num parallel calls'] = 4

# Run the training job
model.go_train(params) # (See "go_train" below in model script ->)


我的模型模块

import tensorflow as tf
from tensorflow.python.keras import estimator as kes
from tensorflow.python.keras.applications.vgg16 import VGG16
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dropout, Flatten, Dense
from train_model.input_fn import make_input_fn


def create_estimator(params):
# Import VGG16 model for transfer learning
base_model = VGG16(weights='imagenet')
base_model.summary()

x = base_model.get_layer('fc2').output

x = Dropout(params['dropout rate'])(x)

predictions = Dense(params['num classes'], activation="sigmoid", name="sm_out")(x)

model = Model(inputs=base_model.input, outputs=predictions)

for layer in model.layers:
layer.trainable = True

model.compile(
loss="binary_crossentropy",
optimizer=tf.train.AdamOptimizer(params['learning rate'],
beta1=0.9,
beta2=0.999),
metrics=["categorical_accuracy"]
)



config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.95
run_config = tf.estimator.RunConfig(
session_config=config,
model_dir=params['output path']
)

# Convert to Estimator
estimator_model = kes.model_to_estimator(
keras_model=model,
config=run_config
)

return estimator_model


def go_train(params):
# Create the estimator
Est = create_estimator(params)

# Set up Estimator train and evaluation specifications
train_spec = tf.estimator.TrainSpec(
input_fn=make_input_fn(params['train csv'], tf.estimator.ModeKeys.TRAIN, params, augment=True),
max_steps=params['train steps']
)
eval_spec = tf.estimator.EvalSpec(
input_fn=make_input_fn(params['eval csv'], tf.estimator.ModeKeys.EVAL, params, augment=True),
steps=params['eval steps'], # Evaluates on "eval steps" batches
throttle_secs=params['eval_throttle_secs']
)


# Run training and evaluation
tf.estimator.train_and_evaluate(Est, train_spec, eval_spec)



我的输入模块:

import tensorflow as tf
from keras.applications.vgg16 import preprocess_input

tf.logging.set_verbosity(v=tf.logging.INFO)

HEIGHT = 224
WIDTH = 224
NUM_CHANNELS = 3
NCLASSES = 123


def read_and_preprocess_with_augment(image_bytes, label=None):
return read_and_preprocess(image_bytes, label, augment=True)


def read_and_preprocess(image_bytes, label=None, augment=False):

image = tf.image.decode_jpeg(contents=image_bytes, channels=NUM_CHANNELS)
image = tf.image.convert_image_dtype(image=image, dtype=tf.float32) # 0-1
image = tf.expand_dims(input=image, axis=0) # resize_bilinear needs batches

if augment:

# Resize to slightly larger than target size
image = tf.image.resize_bilinear(images=image, size=[HEIGHT + 50, WIDTH + 50], align_corners=False)

# Image random rotation
degree_angle = tf.random.uniform((), minval=-25, maxval=25, dtype=tf.dtypes.float32)
radian = degree_angle * 3.14 / 180
image = tf.contrib.image.rotate(image, radian, interpolation='NEAREST')

# remove batch dimension
image = tf.squeeze(input=image, axis=0)

# Random Crop
image = tf.random_crop(value=image, size=[HEIGHT, WIDTH, NUM_CHANNELS])
# Random L-R flip
image = tf.image.random_flip_left_right(image=image)
# Random brightness
image = tf.image.random_brightness(image=image, max_delta=63.0 / 255.0)
# Random contrast
image = tf.image.random_contrast(image=image, lower=0.2, upper=1.8)

else:
image = tf.image.resize_bilinear(images=image, size=[HEIGHT, WIDTH], align_corners=False)
image = tf.squeeze(input=image, axis=0) # remove batch dimension

image = tf.cast(tf.round(image * 255), tf.int32)
image = preprocess_input(image)

label = tf.one_hot(tf.strings.to_number(label, out_type=tf.int32), depth=NCLASSES)

return {"input_1": image}, label


def make_input_fn(csv_of_filenames, mode, params, augment=False):
def _input_fn():
def decode_csv(csv_row):
filename, label = tf.decode_csv(records=csv_row, record_defaults=[[""], [""]])
image_bytes = tf.read_file(filename=filename)
return image_bytes, label

# Create tf.data.dataset from filename
dataset = tf.data.TextLineDataset(filenames=csv_of_filenames).map(map_func=decode_csv, num_parallel_calls=params['num parallel calls'])

if augment:
dataset = dataset.map(map_func=read_and_preprocess_with_augment, num_parallel_calls=params['num parallel calls'])
else:
dataset = dataset.map(map_func=read_and_preprocess, num_parallel_calls=params['num parallel calls'])

if mode == tf.estimator.ModeKeys.TRAIN:
num_epochs = None
dataset = dataset.shuffle(buffer_size=10*params["batch size"])
else:
num_epochs = 1

dataset = dataset.repeat(count=num_epochs).batch(batch_size=params["batch size"]).prefetch(4)
images, labels = dataset.make_one_shot_iterator().get_next()

return images, labels
return _input_fn


PC错误输出

如上所述,在我的GPU结果上本地运行时,以上代码是以下一系列错误消息(缩写):

将2716的检查点保存到....
...
...
  _evaluate中的文件“ C:... \ estimator.py”,第501行
    self._evaluate_build_graph(input_fn,hook,checkpoint_path))

_evaluate_build_graph中的文件“ C:... \ estimator.py”,行1501
    self._call_model_fn_eval(input_fn,self.config))

_call_model_fn_eval中的文件“ C:... \ estimator.py”,第1534行
    input_fn,ModeKeys.EVAL)

文件“ C:... \ estimator.py”,行1022,在_get_features_and_labels_from_input_fn中
    self._call_input_fn(input_fn,模式))

_call_input_fn中的文件“ C:... \ estimator.py”,行1113
    返回input_fn(** kwargs)

_input_fn中的文件“ C:... \ input_fn.py”,第71行
    数据集=数据集.map(map_func = read_and_preprocess_with_augment,num_parallel_calls = params ['num并行调用'])

地图中的文件“ C:... dataset_ops.py”,行1776
    自我,map_func,num_parallel_calls,preserve_cardinality = False))

init文件“ C:... \ dataset_ops.py”,行3239
    ** flat_structure(self))

文件“ C:... \ gen_dataset_ops.py”,行4179,在parallel_map_dataset中
    名称=名称)

_apply_op_helper中的文件“ C:... \ op_def_library.py”,行366
    g = ops._get_graph_from_inputs(_Flatten(keywords.values()))

_get_graph_from_inputs中的文件“ C:... \ ops.py”,行6135
    _assert_same_graph(原始图形元素,graph_element)

_assert_same_graph中的文件“ C:... ops.py”,第6071行
    (项目,original_item))

ValueError:Tensor(“ Const:0”,shape =(3,),dtype = float32)必须与Tensor(“ ParallelMapDataset:0”,shape =(),dtype = variant,device = / device来自同一张图: CPU:0)。

在Google Cloud上输出错误

服务
副本母版0退出,其非零状态为1。
追溯(最近一次通话):[...]

文件“ /usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/estimator.py”,第1534行,位于_call_model_fn_eval input_fn,ModeKeys.EVAL中)

文件“ /usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/estimator.py”,第1022行,位于_get_features_and_labels_from_input_fn self._call_input_fn(input_fn,模式))

_call_input_fn中的文件“ /usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/estimator.py”,行1113返回input_fn(** kwargs)

在_input_fn数据集=数据集.map(map_func = read_and_preprocess_with_augment,num_parallel_calls = params ['num并行调用']中,文件“ /root/.local/lib/python3.5/site-packages/train_model/input_fn.py”,第87行)

地图自我中的文件“ /usr/local/lib/python3.5/dist-packages/tensorflow/python/data/ops/dataset_ops.py”,行1776,map_func,num_parallel_calls,preserve_cardinality = False))

文件“ /usr/local/lib/python3.5/dist-packages/tensorflow/python/data/ops/dataset_ops.py”,第3239行,init ** flat_structure(self))文件“ / usr / local / lib /python3.5/dist-packages/tensorflow/python/ops/gen_dataset_ops.py“,第4179行,在parallel_map_dataset name = name中)文件” /usr/local/lib/python//3.5-dist-packages/tensorflow/python/ framework / op_def_library.py”,行366,在_apply_op_helper中g = ops._get_graph_from_inputs(_Flatten(keywords.values()))

在_get_graph_from_inputs _assert_same_graph(original_graph_element,graph_element)中的文件“ /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py”,行6135

_assert_same_graph中的文件“ /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py”,第6071行(项目,original_item))

ValueError:Tensor(“ Const_1:0”,shape =(3,),dtype = float32,device = / device:CPU:0)必须与Tensor(“ ParallelMapDataset:0”,shape =(),来自同一张图dtype = variant,设备= /设备:CPU:0)。

任何帮助/提示深表感谢。我被困在这一点上,不知道如何调试这一点!

最佳答案

使用以下预处理功能:

from tensorflow.keras.applications.mobilenet import preprocess_input


它具有与VGG预处理输入相同的功能。

关于python - Estimator中的TensorFlow图错误(ValueError:Tensor(...)必须与Tensor(...)来自同一图),我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/58015667/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com