import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit # Note: required! to initialize pycuda
import tensorrt as trt
class TensorRTInference:
def __init__(self, engine_path):
# initialize
self.logger = trt.Logger(trt.Logger.ERROR)
self.runtime = trt.Runtime(self.logger)
# setup
self.engine = self.load_engine(engine_path)
self.context = self.engine.create_execution_context()
# allocate buffers
self.inputs, self.outputs, self.bindings, self.stream = self.allocate_buffers(
self.engine
)
def load_engine(self, engine_path):
# loads the model from given filepath
with open(engine_path, "rb") as f:
engine = self.runtime.deserialize_cuda_engine(f.read())
return engine
class HostDeviceMem:
def __init__(self, host_mem, device_mem, shape):
# keeping track of addresses
self.host = host_mem
self.device = device_mem
# keeping track of shape to un-flatten it later
self.shape = shape
def allocate_buffers(self, engine):
inputs, outputs, bindings = [], [], []
stream = cuda.Stream()
for i in range(engine.num_io_tensors):
tensor_name = engine.get_tensor_name(i)
shape = engine.get_tensor_shape(tensor_name)
size = trt.volume(shape)
dtype = trt.nptype(engine.get_tensor_dtype(tensor_name))
# allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# append the device buffer address to device bindings
bindings.append(int(device_mem))
# append to the appropiate input/output list
if engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT:
inputs.append(self.HostDeviceMem(host_mem, device_mem, shape))
else:
outputs.append(self.HostDeviceMem(host_mem, device_mem, shape))
return inputs, outputs, bindings, stream
def infer(self, input_data):
# transfer input data to device
np.copyto(self.inputs[0].host, input_data.ravel())
cuda.memcpy_htod_async(self.inputs[0].device, self.inputs[0].host, self.stream)
# set tensor address
for i in range(self.engine.num_io_tensors):
self.context.set_tensor_address(
self.engine.get_tensor_name(i), self.bindings[i]
)
# run inference
self.context.execute_async_v3(stream_handle=self.stream.handle)
# transfer predictions back
for i in range(len(self.outputs)):
cuda.memcpy_dtoh_async(
self.outputs[i].host, self.outputs[i].device, self.stream
)
# synchronize the stream
self.stream.synchronize()
# un-flatten the outputs
outputs = []
for i in range(len(self.outputs)):
output = self.outputs[i].host
output = output.reshape(self.outputs[i].shape)
outputs.append(output)
return outputs
from PIL import Image
if __name__ == "__main__":
engine_path = "unet_mobilenet_2.trt"
trt_inference = TensorRTInference(engine_path)
img = Image.open("data/test/images/lineImage0.jpeg")
img_array = np.array(img)
inputs = img_array.transpose(2, 0, 1) # (3, 1024, 1024)
# Run inference
output_data = trt_inference.infer(inputs)
Проблема
np.copyto(self.inputs[0].host, input_data.ravel())
Ошибка значения: не удалось транслировать входной массив из формы (5953536) в форму (3145728).
В общем, нет четкого примера, поэтому любая помощь приветствуется.< /п>
Я пытаюсь запустить модель двоичной семантической сегментации, которую хочу ускорить с помощью tensorRT. Я основываюсь на: https://stackoverflow.com/questions/59280745/inference-with-tensorrt-engine-file-on-python/67492525#67492525:~:text=I%20have-,updated,-%40Oguz%20Vuruskaner%27s%20ответ И https://tengteng.medium.com/example-inference-code-to-run-tensorrt-10-0-32ea93fdcc2e [b]ИНФО[/b] > [list] [*]WSL2 с ubuntu22.04 [*]Cuda 12.4 [*]версия tensorRT 10.7 [*]ввод — изображение (1x3x1024x1024), вывод — (1x1x1024x1024) [/list] Проверка файла движка дает: &&&& ПРОЙДЕН TensorRT.trtexec [TensorRT v100700] [b23] # /usr/src/tensorrt/bin/trtexec --loadEngine=unet_mobileone.trt --shapes=input:1x3x1024x1024 --verbose Текущий код [code]import numpy as np import pycuda.driver as cuda import pycuda.autoinit # Note: required! to initialize pycuda import tensorrt as trt
def load_engine(self, engine_path): # loads the model from given filepath with open(engine_path, "rb") as f: engine = self.runtime.deserialize_cuda_engine(f.read()) return engine
class HostDeviceMem: def __init__(self, host_mem, device_mem, shape): # keeping track of addresses self.host = host_mem self.device = device_mem # keeping track of shape to un-flatten it later self.shape = shape
# append the device buffer address to device bindings bindings.append(int(device_mem))
# append to the appropiate input/output list if engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT: inputs.append(self.HostDeviceMem(host_mem, device_mem, shape)) else: outputs.append(self.HostDeviceMem(host_mem, device_mem, shape))
return inputs, outputs, bindings, stream
def infer(self, input_data): # transfer input data to device np.copyto(self.inputs[0].host, input_data.ravel()) cuda.memcpy_htod_async(self.inputs[0].device, self.inputs[0].host, self.stream)
# set tensor address for i in range(self.engine.num_io_tensors): self.context.set_tensor_address( self.engine.get_tensor_name(i), self.bindings[i] )
# run inference self.context.execute_async_v3(stream_handle=self.stream.handle)
# transfer predictions back for i in range(len(self.outputs)): cuda.memcpy_dtoh_async( self.outputs[i].host, self.outputs[i].device, self.stream )
# synchronize the stream self.stream.synchronize()
# un-flatten the outputs outputs = [] for i in range(len(self.outputs)): output = self.outputs[i].host output = output.reshape(self.outputs[i].shape) outputs.append(output)
# Run inference output_data = trt_inference.infer(inputs)
[/code] [b]Проблема[/b] np.copyto(self.inputs[0].host, input_data.ravel()) Ошибка значения: не удалось транслировать входной массив из формы (5953536) в форму (3145728). В общем, нет четкого примера, поэтому любая помощь приветствуется.< /п>
Я могу быть уверен, что нет проблема с моим libmmdeploy_tensorrt_ops.so, потому что я загрузил его на Python в той же среде, и ошибок памяти не произошло. Infer работает отлично....
Я могу быть уверен, что нет проблема с моим libmmdeploy_tensorrt_ops.so, потому что я загрузил его на Python в той же среде, и ошибок памяти не произошло. Infer работает отлично....
libtorch 2.5.0.dev (последний вечер) (построен с Cuda 12.4) cuda 12.4
tensorrt 10.1.0.27
pytorch 2.4.0+cu124
Torch-tensorrt 2.4.0
python 3.12.8
Windows 10
Скомпилируйте фальшивый-Тенсорт с Cmake для генерации lib и dll...
Камера не транслирует рекламу. Я получаю эту ошибку
global ./modules/videoio/src/cap_gstreamer.cpp (2401) handleMessage OpenCV | GStreamer warning: Embedded video playback halted; module v4l2src0 reported: Failed to allocate required memory....