gpt4 book ai didi

python - Tensorflow:可以释放 GPU 资源的模型包装器

转载 作者:行者123 更新时间:2023-12-04 12:38:43 25 4
gpt4 key购买 nike

这是 tensorflow .pb 卡住模型(imagenet 分类)的包装器:

import tensorflow as tf
import numpy as np
import cv2
from numba import cuda


class ModelWrapper():
def __init__(self, model_filepath):
self.graph_def = self.load_graph_def(model_filepath)
self.graph = self.load_graph(self.graph_def)
self.set_inputs_and_outputs()
self.sess = tf.Session(graph=self.graph)

print(self.__class__.__name__, 'call __init__') #

def load_graph_def(self, model_filepath):
# Expects frozen graph in .pb format
with tf.gfile.GFile(model_filepath, "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
return graph_def

def load_graph(self, graph_def):
with tf.Graph().as_default() as graph:
tf.import_graph_def(graph_def, name="")
return graph

def set_inputs_and_outputs(self):
input_list = []
for op in self.graph.get_operations(): # tensorflow.python.framework.ops.Operation
if op.type == "Placeholder":
input_list.append(op.name)
print('Inputs:', input_list)

all_name_list = []
input_name_list = []
for node in self.graph_def.node: # tensorflow.core.framework.node_def_pb2.NodeDef
all_name_list.append(node.name)
input_name_list.extend(node.input)
output_list = list(set(all_name_list) - set(input_name_list))
print('Outputs:', output_list)

self.inputs = []
self.input_tensor_names = [name + ":0" for name in input_list]
for input_tensor_name in self.input_tensor_names:
self.inputs.append(self.graph.get_tensor_by_name(input_tensor_name))

self.outputs = []
self.output_tensor_names = [name + ":0" for name in output_list]
for output_tensor_name in self.output_tensor_names:
self.outputs.append(self.graph.get_tensor_by_name(output_tensor_name))

input_dim_list = []
for op in self.graph.get_operations(): # tensorflow.python.framework.ops.Operation
if op.type == "Placeholder":
bs = op.get_attr('shape').dim[0].size
h = op.get_attr('shape').dim[1].size
w = op.get_attr('shape').dim[2].size
c = op.get_attr('shape').dim[3].size
input_dim_list.append([bs, h, w ,c])
assert len(input_dim_list) == 1
_, self.input_img_h, self.input_img_w, _ = input_dim_list[0]

def predict(self, img):
h, w, c = img.shape
if h != self.input_img_h or w != self.input_img_w:
img = cv2.resize(img, (self.input_img_w, self.input_img_h))

batch = img[np.newaxis, ...]
feed_dict = {self.inputs[0]: batch}
outputs = self.sess.run(self.outputs, feed_dict=feed_dict) # (1, 1001)
output = outputs[0]
return output

def __del__(self):
print(self.__class__.__name__, 'call __del__') #
import time #
time.sleep(3) #
cuda.close()

我想做的是在我不再需要模型后清理 GPU 内存,在这个例子中我只是在循环中创建和删除模型,但在现实生活中它可以是几个不同的模型。
wget https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz
tar -xvzf inception_v3_2016_08_28_frozen.pb.tar.gz
rm -f imagenet_slim_labels.txt
rm -f inception_v3_2016_08_28_frozen.pb.tar.gz

import os
import time

import tensorflow as tf
import numpy as np

from model_wrapper import ModelWrapper

MODEL_FILEPATH = './inception_v3_2016_08_28_frozen.pb'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)


def create_and_delete_in_loop():
for i in range(10):
print('-'*60)
print('i:', i)
model = ModelWrapper(MODEL_FILEPATH)
input_batch = np.zeros((model.input_img_h, model.input_img_w, 3), np.uint8)
y_pred = model.predict(input_batch)
print('y_pred.shape', y_pred.shape)
print('np.argmax(y_pred)', np.argmax(y_pred))
del model


if __name__ == "__main__":
create_and_delete_in_loop()

print('START WAITING')
time.sleep(10)
print('END OF THE PROGRAM!')

输出:
------------------------------------------------------------
i: 0
Inputs: ['input']
Outputs: ['InceptionV3/Predictions/Reshape_1']
ModelWrapper call __init__
y_pred.shape (1, 1001)
np.argmax(y_pred) 112
ModelWrapper call __del__
------------------------------------------------------------
i: 1
Inputs: ['input']
Outputs: ['InceptionV3/Predictions/Reshape_1']
ModelWrapper call __init__
Segmentation fault (core dumped)

释放GPU内存的正确方法是什么?

最佳答案

TL;博士 将您的功能作为新的 process+ 运行。
tf.reset_default_graph()不保证释放内存#。当一个进程终止时,它的所有内存(包括你的 GPU 内存)都将被释放。这不仅有助于使事情井井有条,而且您还可以分析每个进程消耗了多少 CPU、GPU、RAM、GPU 内存。

例如,如果你有这些功能,

def train_model(x, y, params):
model = ModelWrapper(params.filepath)
model.fit(x, y, epochs=params.epochs)


def predict_model(x, params):
model = ModelWrapper(params.filepath)
y_pred = model.predict(x)
print(y_pred.shape)

你可以像这样使用它

import multiprocessing

for i in range(8):
print(f"Training Model {i} from {params.filepath}")
process_train = multiprocessing.Process(train_model, args=(x_train, y_train, params))
process_train.start()
process_train.join()

print("Predicting")
process_predict = multiprocessing.Process(predict_model, args=(x_train, params))
process_predict.start()
process_predict.join()

通过这种方式,python 会为您的任务触发一个新进程,该进程可以使用自己的内存运行。

奖金提示 :如果你有很多可用的 CPU 和 GPU,你也可以选择并行运行它们:你只需要调用 process_train.join()在这种情况下循环之后。如果您有八个 GPU,您可以使用此父脚本来提供参数,而每个单独的进程都应在不同的 GPU 上运行。

# 在我开始使用流程之前,我尝试了各种不同的东西,单独的和一起的,
tf.reset_default_graph()
K.clear_session()
cuda.select_device(0); cuda.close()
model = get_new_model() # overwrite
model = None
del model
gc.collect()

+ 我也考虑使用线程,subprocess.Popen,但我对多处理感到满意,因为它提供了完全解耦,使管理和分配资源变得更加容易。

关于python - Tensorflow:可以释放 GPU 资源的模型包装器,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/58792739/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com