I got the following code for running inference server with FastApi. In about 8 hours my RAM memory increases by 4Gi.
我获得了使用FastApi运行推理服务器的以下代码。在大约8小时内,我的RAM内存增加了4Gi。
What is more interesting - when I stop my container RAM memory does not clean.
更有趣的是,当我停止我的容器时,内存并没有清理干净。
For instance, before running docker container I get 2 Gi RAM used, as soon as I run docker container memory increases up to 4Gi and then starts slowly grow. (8 hours - 8Gi RAM). After stoping container I still get 6 RAM used... It means that I get around 4Gi RAM leakage.
例如,在运行Docker Container之前,我使用了2GI的内存,一运行Docker Container,内存就增加到4Gi,然后开始缓慢增长。(8小时-8G内存)。在回收容器后,我仍有6个内存被使用。这意味着我可以避免4G内存泄漏。
Where could be my problem?
我的问题出在哪里?
import tensorflow as tf
import cv2
import numpy as np
import yaml
import time
from loguru import logger
import tracemalloc
# from flask import Flask, render_template, request, make_response
from PIL import Image
import os
import io
import json
import base64
import gc
# tf.config.gpu.set_per_process_memory_fraction(0.75)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
# ========= Функция для обработки сырой картинки и подготовки ее для инференса =========
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better val mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im, ratio, (dw, dh)
# ========= Функция для загрузки весов модели =========
def load_graph(frozen_graph_filename):
with tf.io.gfile.GFile(frozen_graph_filename, "rb") as f:
graph_def = tf.compat.v1.GraphDef()
graph_def.ParseFromString(f.read())
with tf.Graph().as_default() as graph:
tf.import_graph_def(graph_def)
return graph
# ========= Класс конфига =========
class create_config():
def __init__(self, file):
with open(file, 'r') as f:
data = yaml.safe_load(f)
self.modelPath = data['modelPath']
self.logFile = data['logFile']
self.loggingLevel = data['loggingLevel']
self.host = data['host']
self.port = data['port']
# ========= Функция обработки результатов YOLO =========
def YOLOdetect(output_data): # input = interpreter, output is boxes(xyxy), classes, scores
output_data = output_data[0] # x(1, 25200, 7) to x(25200, 7)
boxes = np.squeeze(output_data[..., :4]) # boxes [25200, 4]
scores = np.squeeze( output_data[..., 4:5]) # confidences [25200, 1]
classes = classFilter(output_data[..., 5:]) # get classes
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
x, y, w, h = boxes[..., 0], boxes[..., 1], boxes[..., 2], boxes[..., 3] #xywh
xyxy = [x - w / 2, y - h / 2, x + w / 2, y + h / 2] # xywh to xyxy [4, 25200]
return xyxy, scores, classes # output is boxes(x,y,x,y), classes(int), scores(float) [predictions length]
# ========= Функция отбора чисто классов =========
def classFilter(classdata):
classes = [] # create a list
for i in range(classdata.shape[0]): # loop through all predictions
classes.append(classdata[i].argmax()) # get the best classification location
return classes # return classes (int)
# ========= Функция масштабирования координат на изначальное разрешение картинки =========
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
# Rescale coords (xyxy) from img1_shape to img0_shape
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
coords[:, [0, 2]] -= pad[0] # x padding
coords[:, [1, 3]] -= pad[1] # y padding
coords[:, :4] /= gain
clip_coords(coords, img0_shape)
return coords
# ========= Функция ограничивающая координаты =========
def clip_coords(boxes, shape):
# Clip bounding xyxy bounding boxes to image shape (height, width)
boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2
boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y
# ========= Класс модели =========
class Yolov5:
@logger.catch
def __init__(self, model_weight):
self.model_weight = model_weight
self._start()
@logger.catch
def _start(self):
logger.error(' ========= PIZDETS ========= ')
self.graph = load_graph(self.model_weight)
self.x = self.graph.get_tensor_by_name('import/x:0')
self.y = self.graph.get_tensor_by_name('import/Identity:0')
# self.sess = tf.compat.v1.Session('', self.graph)
self.sess = tf.compat.v1.Session(graph=self.graph, config=tf.compat.v1.ConfigProto(log_device_placement=True))
@logger.catch
def forward(self, image):
y_out = self.sess.run(self.y, feed_dict={self.x: image})
return y_out
@logger.catch
def process_image(self, im0):
im = letterbox(im0, (640,640), stride=32, auto=False)[0] # padded resize
h, w = im.shape[:2]
# im = im[..., ::-1] # HWC to CHW, BGR to RGB
im = np.ascontiguousarray(im) # contiguous
im = im.astype('float32')
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
return im, cv2.cvtColor(im0, cv2.COLOR_BGR2RGB), h, w, im0.shape
@logger.catch
def postprocess(self, y, h, w, old_shape):
y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
y[0][..., :4] *= [w, h, w, h] # xywh normalized to pixels
xyxy, scores, classes = YOLOdetect(y)
xyxy = np.array(xyxy).T
indexes = tf.image.non_max_suppression(xyxy, scores, max_output_size=10, iou_threshold=0.5, score_threshold=0.3)
filtered_xyxy = xyxy[indexes, :]
filtered_scores = scores[np.array(indexes)]
scaled_xyxy = scale_coords((h,w), filtered_xyxy, old_shape).round()
return scaled_xyxy, filtered_scores
@logger.catch
def run_local(self, image):
# PREPROCESS
im, im0, h, w, old_shape = self.process_image(image)
# INFERENCE
y = self.forward(im)
# POSTPROCESS
coords, scores = self.postprocess(y, h, w, old_shape)
return coords, scores
config = create_config('extra/config.yml')
# print('======= Config created =======')
# model = Yolov5(config.modelPath)
# print('======= Model loaded =======')
# print('======= GPU MEMORY =======', tf.config.experimental.get_memory_usage('GPU:0'))
# image_path = 'extra/test_image.jpg'
# # Загружаем изображение с помощью CV2
# image = cv2.imread(image_path)
# TOTAL_IMAGES = 10000
# start = time.time()
# # INFERENCE
# for _ in range(TOTAL_IMAGES):
# model.run_local(image)
# end = time.time()
# # print('======= GPU MEMORY =======', tf.config.experimental.get_memory_usage('GPU:0'))
# print('======= TOTAL TIME =======', end - start)
# print('======= TIME PER IMAGE =======', (end - start) / TOTAL_IMAGES)
#===============================================================================================================
# Logger setup
logger.add(config.logFile, format="{time} {level} {message}", level=config.loggingLevel)
import io
import uvicorn
from fastapi import FastAPI, File, UploadFile
from fastapi.responses import StreamingResponse
from PIL import Image
from pathlib import Path
from pydantic import BaseModel
app = FastAPI()
class PredictRequest(BaseModel):
img: str
targetPercent: float
def get_prediction(image):
with tf.device('/GPU:0'):
results = model.run_local(image)
tf.keras.backend.clear_session()
return results
@app.post("/predict/")
async def predict_image(request: PredictRequest):
base64_image = request.img
conf_threshold = request.targetPercent
img_data = base64.b64decode(base64_image)
image_np = np.frombuffer(img_data, np.uint8)
image = cv2.imdecode(image_np, cv2.IMREAD_COLOR)
results = get_prediction(image)
res = []
for coord, score in zip(results[0], results[1]):
x1, y1, x2, y2 = list(map(int, coord))
confidence = float(score)
if confidence >= conf_threshold:
res.append({'pointMin' : (x1, y1), 'pointMax' : (x2, y2), 'label' : "people", 'percent' : confidence})
del results, img_data, image_np, image
gc.collect()
return json.dumps({'params' : res})
if __name__ == '__main__':
logger.info('starting yolov5 webservice... (TF)')
logger.info(f"cuda is available: {len(tf.config.list_physical_devices('GPU')) != 0}")
try:
model = Yolov5(config.modelPath)
except Exception as e:
logger.error(f"couldn't load model weights with error {e}")
uvicorn.run(app, host=config.host, port=config.port)
更多回答
我是一名优秀的程序员,十分优秀!