gpt4 book ai didi

python - NotFoundError(请参阅上面的回溯): Key Variable not found in checkpoint

转载 作者:行者123 更新时间:2023-12-01 09:02:17 25 4
gpt4 key购买 nike

当我使用以下方法恢复保存的模型时:

checkpoint = tf.train.get_checkpoint_state(config.pre_model_dir)
if checkpoint and checkpoint.model_checkpoint_path:
saver.restore(session, checkpoint.model_checkpoint_path)

,我收到此错误:

INFO:tensorflow:Restoring parameters from ./saved_model/10_zones/10/network--1685000
---------------------------------------------------------------------------
NotFoundError Traceback (most recent call last)
/usr/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1321 try:
-> 1322 return fn(*args)
1323 except errors.OpError as e:

/usr/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata)
1306 return self._call_tf_sessionrun(
-> 1307 options, feed_dict, fetch_list, target_list, run_metadata)
1308

/usr/lib/python3.6/site-packages/tensorflow/python/client/session.py in _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata)
1408 self._session, options, feed_dict, fetch_list, target_list,
-> 1409 run_metadata)
1410 else:

NotFoundError: Key Variable not found in checkpoint
[[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_INT32, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]
[[Node: save/RestoreV2/_21 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_18_save/RestoreV2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]

During handling of the above exception, another exception occurred:

NotFoundError Traceback (most recent call last)
<ipython-input-97-0cbd09927b40> in <module>()
42 checkpoint = tf.train.get_checkpoint_state(config.pre_model_dir)
43 if checkpoint and checkpoint.model_checkpoint_path:
---> 44 saver.restore(session, checkpoint.model_checkpoint_path)
45 print("loaded the model")
46 else:

/usr/lib/python3.6/site-packages/tensorflow/python/training/saver.py in restore(self, sess, save_path)
1800 else:
1801 sess.run(self.saver_def.restore_op_name,
-> 1802 {self.saver_def.filename_tensor_name: save_path})
1803
1804 @staticmethod

/usr/lib/python3.6/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
898 try:
899 result = self._run(None, fetches, feed_dict, options_ptr,
--> 900 run_metadata_ptr)
901 if run_metadata:
902 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/usr/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
1133 if final_fetches or final_targets or (handle and feed_dict_tensor):
1134 results = self._do_run(handle, final_targets, final_fetches,
-> 1135 feed_dict_tensor, options, run_metadata)
1136 else:
1137 results = []

/usr/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1314 if handle is None:
1315 return self._do_call(_run_fn, feeds, fetches, targets, options,
-> 1316 run_metadata)
1317 else:
1318 return self._do_call(_prun_fn, handle, feeds, fetches)

/usr/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1333 except KeyError:
1334 pass
-> 1335 raise type(e)(node_def, op, message)
1336
1337 def _extend_graph(self):

NotFoundError: Key Variable not found in checkpoint
[[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_INT32, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]
[[Node: save/RestoreV2/_21 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_18_save/RestoreV2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]

Caused by op 'save/RestoreV2', defined at:
File "/usr/lib64/python3.6/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib64/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
app.start()
File "/usr/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
self.io_loop.start()
File "/usr/lib64/python3.6/site-packages/tornado/platform/asyncio.py", line 127, in start
self.asyncio_loop.run_forever()
File "/usr/lib64/python3.6/asyncio/base_events.py", line 422, in run_forever
self._run_once()
File "/usr/lib64/python3.6/asyncio/base_events.py", line 1432, in _run_once
handle._run()
File "/usr/lib64/python3.6/asyncio/events.py", line 145, in _run
self._callback(*self._args)
File "/usr/lib64/python3.6/site-packages/tornado/platform/asyncio.py", line 117, in _handle_events
handler_func(fileobj, events)
File "/usr/lib64/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
return fn(*args, **kwargs)
File "/usr/lib64/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
self._handle_recv()
File "/usr/lib64/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
self._run_callback(callback, msg)
File "/usr/lib64/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
callback(*args, **kwargs)
File "/usr/lib64/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
return fn(*args, **kwargs)
File "/usr/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
handler(stream, idents, msg)
File "/usr/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
raw_cell, store_history, silent, shell_futures)
File "/usr/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
if self.run_code(code, result):
File "/usr/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-97-0cbd09927b40>", line 26, in <module>
saver = tf.train.Saver()
File "/usr/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1338, in __init__
self.build()
File "/usr/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1347, in build
self._build(self._filename, build_save=True, build_restore=True)
File "/usr/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1384, in _build
build_save=build_save, build_restore=build_restore)
File "/usr/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 835, in _build_internal
restore_sequentially, reshape)
File "/usr/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 472, in _AddRestoreOps
restore_sequentially)
File "/usr/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 886, in bulk_restore
return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
File "/usr/lib/python3.6/site-packages/tensorflow/python/ops/gen_io_ops.py", line 1463, in restore_v2
shape_and_slices=shape_and_slices, dtypes=dtypes, name=name)
File "/usr/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/usr/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3392, in create_op
op_def=op_def)
File "/usr/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1718, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access

NotFoundError (see above for traceback): Key Variable not found in checkpoint
[[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_INT32, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]
[[Node: save/RestoreV2/_21 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_18_save/RestoreV2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]

我搜索了这个错误,发现有一个 tf bug,需要使用完整的相对路径调用模型,我按照该路径并尝试了值:'./saved_model/10_zones/10'os.path.abspath(config.pre_model_dir+'./../saved_model/10_zones/10')对于 config.pre_model_dir 。两者都导致了相同的错误。

我还使用检查了保存的变量的名称从tensorflow.contrib.framework.python.framework导入checkpoint_utils

var_list = checkpoint_utils.list_variables(config.pre_model_dir)
for v in var_list:
print(v)

这是:

('actor/main_net/layer1/biases/Variable', [90])
('actor/main_net/layer1/biases/Variable/Adam', [90])
('actor/main_net/layer1/biases/Variable/Adam_1', [90])
('actor/main_net/layer1/weights/Variable', [30, 90])
('actor/main_net/layer1/weights/Variable/Adam', [30, 90])
('actor/main_net/layer1/weights/Variable/Adam_1', [30, 90])
('actor/main_net/layer2/biases/Variable', [60])
('actor/main_net/layer2/biases/Variable/Adam', [60])
('actor/main_net/layer2/biases/Variable/Adam_1', [60])
('actor/main_net/layer2/weights/Variable', [90, 60])
('actor/main_net/layer2/weights/Variable/Adam', [90, 60])
('actor/main_net/layer2/weights/Variable/Adam_1', [90, 60])
('actor/main_net/layer3/biases/Variable', [30])
('actor/main_net/layer3/biases/Variable/Adam', [30])
('actor/main_net/layer3/biases/Variable/Adam_1', [30])
('actor/main_net/layer3/weights/Variable', [60, 30])
('actor/main_net/layer3/weights/Variable/Adam', [60, 30])
('actor/main_net/layer3/weights/Variable/Adam_1', [60, 30])
('actor/main_net/layer4/biases/Variable', [10])
('actor/main_net/layer4/biases/Variable/Adam', [10])
('actor/main_net/layer4/biases/Variable/Adam_1', [10])
('actor/main_net/layer4/weights/Variable', [30, 10])
('actor/main_net/layer4/weights/Variable/Adam', [30, 10])
('actor/main_net/layer4/weights/Variable/Adam_1', [30, 10])
('actor/target_net/layer1/biases/Variable', [90])
('actor/target_net/layer1/weights/Variable', [30, 90])
('actor/target_net/layer2/biases/Variable', [60])
('actor/target_net/layer2/weights/Variable', [90, 60])
('actor/target_net/layer3/biases/Variable', [30])
('actor/target_net/layer3/weights/Variable', [60, 30])
('actor/target_net/layer4/biases/Variable', [10])
('actor/target_net/layer4/weights/Variable', [30, 10])
('beta1_power', [])
('beta1_power_1', [])
('beta2_power', [])
('beta2_power_1', [])
('critic/main_net/l1/biases', [90])
('critic/main_net/l1/biases/Adam', [90])
('critic/main_net/l1/biases/Adam_1', [90])
('critic/main_net/l1/weights', [40, 90])
('critic/main_net/l1/weights/Adam', [40, 90])
('critic/main_net/l1/weights/Adam_1', [40, 90])
('critic/main_net/l2/biases', [60])
('critic/main_net/l2/biases/Adam', [60])
('critic/main_net/l2/biases/Adam_1', [60])
('critic/main_net/l2/weights', [90, 60])
('critic/main_net/l2/weights/Adam', [90, 60])
('critic/main_net/l2/weights/Adam_1', [90, 60])
('critic/main_net/l3/biases', [30])
('critic/main_net/l3/biases/Adam', [30])
('critic/main_net/l3/biases/Adam_1', [30])
('critic/main_net/l3/weights', [60, 30])
('critic/main_net/l3/weights/Adam', [60, 30])
('critic/main_net/l3/weights/Adam_1', [60, 30])
('critic/main_net/l4/bias', [1])
('critic/main_net/l4/bias/Adam', [1])
('critic/main_net/l4/bias/Adam_1', [1])
('critic/main_net/l4/kernel', [30, 1])
('critic/main_net/l4/kernel/Adam', [30, 1])
('critic/main_net/l4/kernel/Adam_1', [30, 1])
('critic/target_net/l1/biases', [90])
('critic/target_net/l1/weights', [40, 90])
('critic/target_net/l2/biases', [60])
('critic/target_net/l2/weights', [90, 60])
('critic/target_net/l3/biases', [30])
('critic/target_net/l3/weights', [60, 30])
('critic/target_net/l4/bias', [1])
('critic/target_net/l4/kernel', [30, 1])

用什么tf.global_variables()在我当前的模型结果中,它们都很相似:

<tf.Variable 'actor/main_net/layer1/weights/Variable:0' shape=(30, 90) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer1/biases/Variable:0' shape=(90,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer2/weights/Variable:0' shape=(90, 60) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer2/biases/Variable:0' shape=(60,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer3/weights/Variable:0' shape=(60, 30) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer3/biases/Variable:0' shape=(30,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer4/weights/Variable:0' shape=(30, 10) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer4/biases/Variable:0' shape=(10,) dtype=float32_ref>,
<tf.Variable 'actor/target_net/layer1/weights/Variable:0' shape=(30, 90) dtype=float32_ref>,
<tf.Variable 'actor/target_net/layer1/biases/Variable:0' shape=(90,) dtype=float32_ref>,
<tf.Variable 'actor/target_net/layer2/weights/Variable:0' shape=(90, 60) dtype=float32_ref>,
<tf.Variable 'actor/target_net/layer2/biases/Variable:0' shape=(60,) dtype=float32_ref>,
<tf.Variable 'actor/target_net/layer3/weights/Variable:0' shape=(60, 30) dtype=float32_ref>,
<tf.Variable 'actor/target_net/layer3/biases/Variable:0' shape=(30,) dtype=float32_ref>,
<tf.Variable 'actor/target_net/layer4/weights/Variable:0' shape=(30, 10) dtype=float32_ref>,
<tf.Variable 'actor/target_net/layer4/biases/Variable:0' shape=(10,) dtype=float32_ref>,
<tf.Variable 'Variable:0' shape=() dtype=int32_ref>,
<tf.Variable 'beta1_power:0' shape=() dtype=float32_ref>,
<tf.Variable 'beta2_power:0' shape=() dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer1/weights/Variable/Adam:0' shape=(30, 90) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer1/weights/Variable/Adam_1:0' shape=(30, 90) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer1/biases/Variable/Adam:0' shape=(90,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer1/biases/Variable/Adam_1:0' shape=(90,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer2/weights/Variable/Adam:0' shape=(90, 60) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer2/weights/Variable/Adam_1:0' shape=(90, 60) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer2/biases/Variable/Adam:0' shape=(60,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer2/biases/Variable/Adam_1:0' shape=(60,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer3/weights/Variable/Adam:0' shape=(60, 30) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer3/weights/Variable/Adam_1:0' shape=(60, 30) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer3/biases/Variable/Adam:0' shape=(30,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer3/biases/Variable/Adam_1:0' shape=(30,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer4/weights/Variable/Adam:0' shape=(30, 10) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer4/weights/Variable/Adam_1:0' shape=(30, 10) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer4/biases/Variable/Adam:0' shape=(10,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer4/biases/Variable/Adam_1:0' shape=(10,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l1/weights:0' shape=(40, 90) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l1/biases:0' shape=(90,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l2/weights:0' shape=(90, 60) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l2/biases:0' shape=(60,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l3/weights:0' shape=(60, 30) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l3/biases:0' shape=(30,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l4/kernel:0' shape=(30, 1) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l4/bias:0' shape=(1,) dtype=float32_ref>,
<tf.Variable 'critic/target_net/l1/weights:0' shape=(40, 90) dtype=float32_ref>,
<tf.Variable 'critic/target_net/l1/biases:0' shape=(90,) dtype=float32_ref>,
<tf.Variable 'critic/target_net/l2/weights:0' shape=(90, 60) dtype=float32_ref>,
<tf.Variable 'critic/target_net/l2/biases:0' shape=(60,) dtype=float32_ref>,
<tf.Variable 'critic/target_net/l3/weights:0' shape=(60, 30) dtype=float32_ref>,
<tf.Variable 'critic/target_net/l3/biases:0' shape=(30,) dtype=float32_ref>,
<tf.Variable 'critic/target_net/l4/kernel:0' shape=(30, 1) dtype=float32_ref>,
<tf.Variable 'critic/target_net/l4/bias:0' shape=(1,) dtype=float32_ref>,
<tf.Variable 'beta1_power_1:0' shape=() dtype=float32_ref>,
<tf.Variable 'beta2_power_1:0' shape=() dtype=float32_ref>,
<tf.Variable 'critic/main_net/l1/weights/Adam:0' shape=(40, 90) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l1/weights/Adam_1:0' shape=(40, 90) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l1/biases/Adam:0' shape=(90,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l1/biases/Adam_1:0' shape=(90,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l2/weights/Adam:0' shape=(90, 60) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l2/weights/Adam_1:0' shape=(90, 60) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l2/biases/Adam:0' shape=(60,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l2/biases/Adam_1:0' shape=(60,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l3/weights/Adam:0' shape=(60, 30) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l3/weights/Adam_1:0' shape=(60, 30) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l3/biases/Adam:0' shape=(30,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l3/biases/Adam_1:0' shape=(30,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l4/kernel/Adam:0' shape=(30, 1) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l4/kernel/Adam_1:0' shape=(30, 1) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l4/bias/Adam:0' shape=(1,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l4/bias/Adam_1:0' shape=(1,) dtype=float32_ref>

这两个列表的唯一区别是 <tf.Variable 'Variable:0' shape=() dtype=int32_ref> ,我不知道这是做什么用的以及它是如何生成的。但是,我不认为这是否是问题,因为我的任何可以恢复的模型也有它。

感谢您为解决此错误提供的任何帮助和评论。

最佳答案

我通过使用排除该单个变量解决了问题:

variables = slim.get_variables_to_restore()
variables_to_restore = [v for v in variables if 'global_step_counter' not in v.name.split('/')[0]]

saver = tf.train.Saver(variables_to_restore)

关于python - NotFoundError(请参阅上面的回溯): Key Variable not found in checkpoint,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/52376692/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com