通过C++得到的engien模型，如何用python程序推理？ #20

fanghaiquan1 · 2023-12-25T13:19:31Z

通过C++得到的engien模型，如何用python程序推理？
The engien model obtained by C++, How to use Python program inference?

haoyang6551 · 2024-12-04T08:56:25Z

import cv2
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt

class HostDeviceMem:
    """A helper class to hold host and device memory."""
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

def load_engine(engine_file_path):
    with open(engine_file_path, 'rb') as f:
        return trt.Runtime(trt.Logger(trt.Logger.WARNING)).deserialize_cuda_engine(f.read())

def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()

    for i in range(engine.num_io_tensors):
        tensor_name = engine.get_tensor_name(i)
        size = trt.volume(engine.get_tensor_shape(tensor_name))
        dtype = trt.nptype(engine.get_tensor_dtype(tensor_name))

        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)  # page-locked memory buffer
        device_mem = cuda.mem_alloc(host_mem.nbytes)

        # Append the device buffer address to device bindings.
        bindings.append(int(device_mem))

        # Append to the appropriate input/output list.
        if engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT:
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))

    return inputs, outputs, bindings, stream

def do_inference(context, bindings, inputs, outputs, stream):
    # Transfer input data to the GPU
    for i in range(len(inputs)):
        cuda.memcpy_htod_async(inputs[i].device, inputs[i].host, stream)

    # Setup tensor addresses
    for i in range(len(bindings)):
        context.set_tensor_address(engine.get_tensor_name(i), bindings[i])

    # Run inference
    context.execute_async_v3(stream_handle=stream.handle)

    # Transfer predictions back
    for i in range(len(outputs)):
        cuda.memcpy_dtoh_async(outputs[i].host, outputs[i].device, stream)

    # Synchronize the stream
    stream.synchronize()

    return outputs

# 加载 TensorRT 引擎
engine = load_engine("/workspace/project/gpen11ms_clean_engine_test.engine")
context = engine.create_execution_context()

# 分配缓冲区
inputs, outputs, bindings, stream = allocate_buffers(engine)

# 读取和预处理图像
img_path = '/workspace/project/input/1.2_00.png'
input_img = cv2.imread(img_path, cv2.IMREAD_COLOR)
fixed_height = 512
fixed_width = 512
input_img = cv2.resize(input_img, (fixed_width, fixed_height))
img = input_img.astype(np.float32)[:, :, ::-1] / 255.0
img = img.transpose((2, 0, 1))  # 转换为 (C, H, W)
img = (img - 0.5) / 0.5  # 归一化到 [-1, 1]
img = np.expand_dims(img, axis=0).astype(np.float32)  # 添加 batch 维度

# 将输入数据复制到主机内存
np.copyto(inputs[0].host, img.ravel())

# 执行推理
outputs = do_inference(context, bindings, inputs, outputs, stream)
print("Output shape:", outputs[0].host.shape)

img = outputs[0].host.reshape(1, 3, 512, 512)
img = img.transpose(0, 2, 3, 1)  # 转换为 (1, 512, 512, 3)
img = img.squeeze(0)  # 去掉批次维度，变为 (512, 512, 3)
img = img.clip(-1, 1)  # 限制在 [-1, 1] 范围内
img = (img + 1) * 0.5 * 255  # 归一化到 [0, 255]
img = img[:, :, ::-1]
img = img.astype(np.uint8)

# 保存结果
cv2.imwrite('test.png', img)

it works for me, pycuda==2024.1.2, tensorrt==10.3.0

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

通过C++得到的engien模型，如何用python程序推理？ #20

通过C++得到的engien模型，如何用python程序推理？ #20

fanghaiquan1 commented Dec 25, 2023

haoyang6551 commented Dec 4, 2024

通过C++得到的engien模型， 如何用python程序推理？ #20

通过C++得到的engien模型， 如何用python程序推理？ #20

Comments

fanghaiquan1 commented Dec 25, 2023

haoyang6551 commented Dec 4, 2024

通过C++得到的engien模型，如何用python程序推理？ #20

通过C++得到的engien模型，如何用python程序推理？ #20