Проблема с изменением размера изображения при выполнении скрипта с моделью, созданной с помощью yolov5

Проблема с изменением размера изображения при выполнении скрипта с моделью, созданной с помощью yolov5 ⇐ Python

1 сообщение • Страница 1 из 1

Anonymous

Проблема с изменением размера изображения при выполнении скрипта с моделью, созданной с помощью yolov5

Цитата

Сообщение Anonymous » 16 янв 2025, 03:27

Я использую роботизированную руку HiWonder jetmax Vision с камерой, чтобы выполнять некоторые действия по касанию значка настроек телефона Samsung. Я обучил свой набор данных с помощью yolov5 с размером изображения 640, но когда я выполняю свой скрипт, он завершается с ошибкой,

Код: Выделить всё

ValueError: cannot reshape array of size 151200 into shape (1,3,80,80,6).  Ниже приведен сценарий, который я использую
#!/usr/bin/env python3
import os
import sys
import cv2
import numpy as np
import queue
from yolov5_tensorrt import Yolov5TensorRT

# Model Configuration
TRT_ENGINE_PATH = os.path.join(sys.path[0], "models/android6.trt")  # Path to your TensorRT model
TRT_INPUT_SIZE = 640  # Default input size for the model
TRT_CLASS_NAMES = ('samsungsettings',)  # Class names in the model
TRT_NUM_CLASSES = len(TRT_CLASS_NAMES)
COLORS = [(255, 0, 0)]  # Assign a distinct color for the settings icon bounding box

# Initialize components
yolov5 = Yolov5TensorRT(TRT_ENGINE_PATH, TRT_INPUT_SIZE, TRT_NUM_CLASSES)

def process_frame(frame, input_size):
# Preprocess the image for YOLO model
resized_image = cv2.resize(frame, input_size)
outputs = yolov5.detect(resized_image)  # Call detect with only the image

# Post-process detections
boxes, confs, classes = yolov5.post_process(resized_image, outputs, 0.6)  # Confidence threshold: 0.6
height, width = frame.shape[:2]
for box, conf, cls_id in zip(boxes, confs, classes):
x1 = int(box[0] / input_size[0] * width)
y1 = int(box[1] / input_size[1] * height)
x2 = int(box[2] / input_size[0] * width)
y2 = int(box[3] / input_size[1] * height)
label = TRT_CLASS_NAMES[cls_id]

# Draw bounding box and label
cv2.rectangle(frame, (x1, y1), (x2, y2), COLORS[cls_id], 2)
cv2.putText(frame, f"{label} {conf:.2f}", (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, COLORS[cls_id], 2)

return frame

def main(input_size=(640, 640)):
# Open video capture
cap = cv2.VideoCapture(0)  # Adjust the camera index as needed

if not cap.isOpened():
print("Error: Could not open video source.")
return

try:
while True:
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
if not ret:
print("Failed to grab frame.")
break

# Process the current frame for object detection
result_frame = process_frame(gray, input_size)

# Display the resulting frame
cv2.imshow("Icon Detection", result_frame)

# Break the loop on 'q' key press
if cv2.waitKey(1) & 0xFF == ord('q'):
break

finally:
cap.release()
cv2.destroyAllWindows()

if __name__ == '__main__':
# You can pass different input sizes here
main(input_size=(640, 640))

скрипт yolov5_tensorrt.py

Код: Выделить всё

import cv2
import sys
import os
import tensorrt as trt
import pycuda.autoinit
import pycuda.driver as cuda
import numpy as np
import math

# Simple helper data class that's a little nicer to use than a 2-tuple.
class HostDeviceMem:
def __init__(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem

def __str__(self):
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

def __repr__(self):
return self.__str__()

def sigmoid_v(array):
return np.reciprocal(np.exp(-array) + 1.0)

def sigmoid(x):
return 1 / (1 + math.exp(-x))

def non_max_suppression(boxes, confs, classes, iou_thres=0.6):
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = confs.flatten().argsort()[::-1]
keep = []
while order.size >  0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr  conf_thres
pred = pred[xc]
return nms(pred, nms_thres)

Скрипт работает нормально, когда я использую размер изображения 160, но камера не распознает значок на экране Android. Я также протестировал набор данных на roboflow, и набор данных отлично работает на моей веб-камере с размером 640. Любые рекомендации здесь будут очень признательны.

Подробнее здесь: https://stackoverflow.com/questions/793 ... ing-yolov5

1736987261

Anonymous

Я использую роботизированную руку HiWonder jetmax Vision с камерой, чтобы выполнять некоторые действия по касанию значка настроек телефона Samsung. Я обучил свой набор данных с помощью yolov5 с размером изображения 640, но когда я выполняю свой скрипт, он завершается с ошибкой,
[code]ValueError: cannot reshape array of size 151200 into shape (1,3,80,80,6).  Ниже приведен сценарий, который я использую
#!/usr/bin/env python3
import os
import sys
import cv2
import numpy as np
import queue
from yolov5_tensorrt import Yolov5TensorRT

# Model Configuration
TRT_ENGINE_PATH = os.path.join(sys.path[0], "models/android6.trt")  # Path to your TensorRT model
TRT_INPUT_SIZE = 640  # Default input size for the model
TRT_CLASS_NAMES = ('samsungsettings',)  # Class names in the model
TRT_NUM_CLASSES = len(TRT_CLASS_NAMES)
COLORS = [(255, 0, 0)]  # Assign a distinct color for the settings icon bounding box

# Initialize components
yolov5 = Yolov5TensorRT(TRT_ENGINE_PATH, TRT_INPUT_SIZE, TRT_NUM_CLASSES)

def process_frame(frame, input_size):
# Preprocess the image for YOLO model
resized_image = cv2.resize(frame, input_size)
outputs = yolov5.detect(resized_image)  # Call detect with only the image

# Post-process detections
boxes, confs, classes = yolov5.post_process(resized_image, outputs, 0.6)  # Confidence threshold: 0.6
height, width = frame.shape[:2]
for box, conf, cls_id in zip(boxes, confs, classes):
x1 = int(box[0] / input_size[0] * width)
y1 = int(box[1] / input_size[1] * height)
x2 = int(box[2] / input_size[0] * width)
y2 = int(box[3] / input_size[1] * height)
label = TRT_CLASS_NAMES[cls_id]

# Draw bounding box and label
cv2.rectangle(frame, (x1, y1), (x2, y2), COLORS[cls_id], 2)
cv2.putText(frame, f"{label} {conf:.2f}", (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, COLORS[cls_id], 2)

return frame

def main(input_size=(640, 640)):
# Open video capture
cap = cv2.VideoCapture(0)  # Adjust the camera index as needed

if not cap.isOpened():
print("Error: Could not open video source.")
return

try:
while True:
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
if not ret:
print("Failed to grab frame.")
break

# Process the current frame for object detection
result_frame = process_frame(gray, input_size)

# Display the resulting frame
cv2.imshow("Icon Detection", result_frame)

# Break the loop on 'q' key press
if cv2.waitKey(1) & 0xFF == ord('q'):
break

finally:
cap.release()
cv2.destroyAllWindows()

if __name__ == '__main__':
# You can pass different input sizes here
main(input_size=(640, 640))
[/code]
скрипт yolov5_tensorrt.py
[code]import cv2
import sys
import os
import tensorrt as trt
import pycuda.autoinit
import pycuda.driver as cuda
import numpy as np
import math

# Simple helper data class that's a little nicer to use than a 2-tuple.
class HostDeviceMem:
def __init__(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem

def __str__(self):
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

def __repr__(self):
return self.__str__()

def sigmoid_v(array):
return np.reciprocal(np.exp(-array) + 1.0)

def sigmoid(x):
return 1 / (1 + math.exp(-x))

def non_max_suppression(boxes, confs, classes, iou_thres=0.6):
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = confs.flatten().argsort()[::-1]
keep = []
while order.size >  0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr  conf_thres
pred = pred[xc]
return nms(pred, nms_thres)

[/code]
Скрипт работает нормально, когда я использую размер изображения 160, но камера не распознает значок на экране Android. Я также протестировал набор данных на roboflow, и набор данных отлично работает на моей веб-камере с размером 640. Любые рекомендации здесь будут очень признательны. 

Подробнее здесь: [url]https://stackoverflow.com/questions/79360063/image-resizing-issue-while-executing-script-with-a-model-created-using-yolov5[/url]

Ответить Пред. тема След. тема

1 сообщение • Страница 1 из 1

Быстрый ответ

Заголовок:

Имя пользователя:

Изменение регистра текста:

Смайлики

Ещё смайлики…

К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми. Можно прикреплять файлы, перетаскивая их в окно сообщения.

Максимально разрешённый размер вложения: 15 МБ.

Имя файла:

Комментарий к файлу:

Имя файла	Комментарий к файлу	Размер	Статус

Похожие темы

Ответы

Просмотры

Последнее сообщение

Проблема с изменением размера изображения при выполнении скрипта с моделью, созданной с помощью yolov5

Последнее сообщение Anonymous « 16 янв 2025, 06:11
Добавлено в форуме Python

Anonymous » 16 янв 2025, 06:11 » в форуме Python

Я использую роботизированную руку HiWonder jetmax Vision с камерой, чтобы выполнять некоторые действия по касанию значка настроек телефона Samsung. Я обучил свой набор данных с помощью yolov5 с размером изображения 640, но когда я выполняю свой...

0 Ответы

15 Просмотры

Последнее сообщение Anonymous
16 янв 2025, 06:11
Холодный запуск AnalyDocumentAsync с предварительно созданной моделью

Последнее сообщение Anonymous « 30 окт 2024, 14:18
Добавлено в форуме C#

Anonymous » 30 окт 2024, 14:18 » в форуме C#

В консольном приложении .net framework 4.8 я использую приведенный ниже код (из примеров Azure) с ценовой категорией S0 и местоположением во Франции.
Каждый раз, когда я запускаю приложение , первый вызов AnalyDocumentAsync занимает около 20 секунд,...

0 Ответы

12 Просмотры

Последнее сообщение Anonymous
30 окт 2024, 14:18
Pytorch не работает с обученной моделью + предварительно обученной моделью (Intel Open Vino)

Последнее сообщение Гость « 29 фев 2024, 09:54
Добавлено в форуме Python

Гость » 29 фев 2024, 09:54 » в форуме Python

def CLASAAAAABOX(CLASAAAAA,frame): frameHeight=frame.shape FrameWidth=frame.shape blob=cv2.dnn.blobFromImage(frame, 1.0, (672 384), swapRB=False, обрезка=True) CLASAAAAA.setInput(блоб) обнаружение = CLASAAAAA.forward() bboxs=[] для я в диапазоне...

0 Ответы

102 Просмотры

Последнее сообщение Гость
29 фев 2024, 09:54
Pytorch не работает с обученной моделью + предварительно обученной моделью (Intel Open Vino)

Последнее сообщение Гость « 12 мар 2024, 09:21
Добавлено в форуме Python

Гость » 12 мар 2024, 09:21 » в форуме Python

def PeopleBox(PeopleNet,frame):
frameHeight=frame.shape
frameWidth=frame.shape
blob=cv2.dnn.blobFromImage(frame, 1.0, (672,384), swapRB=False, crop=True)
PeopleNet.setInput(blob)
detection=PeopleNet.forward()
bboxs=[]
for i in...

0 Ответы

89 Просмотры

Последнее сообщение Гость
12 мар 2024, 09:21
Огромная разница между моделью (входные данные, обучение = True) и моделью (входные данные, обучение = False)

Последнее сообщение Anonymous « 28 окт 2024, 19:16
Добавлено в форуме Python

Anonymous » 28 окт 2024, 19:16 » в форуме Python

Мне было поручено реализовать модель машинного обучения на основе прочитанной мной статьи. Для этого в документе рекомендуется использовать определенный тип меры надежности прогноза: модель выполняет M стохастических проходов вперед с M различными...

0 Ответы

56 Просмотры

Последнее сообщение Anonymous
28 окт 2024, 19:16

Вернуться в «Python»