Код: Выделить всё
import torch
from groundingdino.util.inference import load_model, load_image, predict, annotate
import cv2
# Load the model
model = load_model("../GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py",
"../GroundingDINO/weights/groundingdino_swint_ogc.pth")
IMAGE_PATH = ".asset/cat_dog.jpeg"
TEXT_PROMPT = "person . animal . bird . object"
BOX_THRESHOLD = 0.35
TEXT_THRESHOLD = 0.25
# Load the image
image_source, image = load_image(IMAGE_PATH)
# Perform prediction
boxes, logits, phrases = predict(
model=model,
image=image,
caption=TEXT_PROMPT,
box_threshold=BOX_THRESHOLD,
text_threshold=TEXT_THRESHOLD
)
# Get image dimensions
ht, wd = image_source.shape[:2]
print(ht, wd, image_source.shape[:2])
# Convert bounding boxes to absolute coordinates
abs_box = boxes * torch.tensor([wd, ht, wd, ht])
abs_box = [abs_bo.numpy().astype("int") for abs_bo in abs_box]
annotated_frame = annotate(image_source=image_source, boxes=boxes, logits=logits, phrases=phrases)
for abs_bo in abs_box:
cv2.rectangle(annotated_frame, (abs_bo[0], abs_bo[1]),[![enter image description here][1]][1] (abs_bo[2], abs_bo[3]), (255, 0, 0), 2)
cv2.imwrite("annotated_image.jpg", annotated_frame)
Голубые коробки сделаны из «абсолютных» координат, любое понимание того, как манипулировать данными, возвращенными для получения абсолютных координат, было бы очень полезно, спасибо.
Подробнее здесь: https://stackoverflow.com/questions/793 ... t-function