Это мой код для расчета bbox:
Код: Выделить всё
from doctr.models import ocr_predictor
from doctr.io import DocumentFile
# Use docTR to analyze the image and obtain the result
line_boundaries = []
model = ocr_predictor(pretrained=True) #setting preserve_aspect_ratio=False or symmetric_pad=False didn't make a difference.
doc = DocumentFile.from_images(img_path)
result = model(doc)
# Extract bounding box coordinates for each line
for page in result.pages:
for block in page.blocks:
for line in block.lines:
#Multiplied relative coordinates with page dimension to get absolute coordinates
x_min, y_min, x_max, y_max = round(line.geometry[0][0] * page.dimensions[0]), round(line.geometry[0][1] * page.dimensions[1]), round(line.geometry[1][0] * page.dimensions[0]), round(line.geometry[1][1] * page.dimensions[1])
line_boundaries.append((x_min, y_min, x_max, y_max))
[(531, 148, 1321, 184), (2725, 148, 3061, 177), (526, 254, 3071, 295), (526, 288, 3071, 332), (535, 324, 3071, 363), ... ]
Это функция, которую я использовал для рисования прямоугольников:< /p>
Код: Выделить всё
import cv2
from google.colab.patches import cv2_imshow #to use instead of cv2.imshow as it causes collab to crash
def draw_rectangles(image_path, line_boundaries):
"""
Draws rectangles on an image using the provided line boundaries.
Args:
image_path: The path to the image file.
line_boundaries: A list of line boundaries, where each boundary is a list of four points.
Returns:
None
"""
# Load the image
image = cv2.imread(img_path)
# Iterate over the line boundaries and draw rectangles
for boundary in line_boundaries:
#x1, y1, x2, y2 = int(boundary[0][0]), int(boundary[0][1]), int(boundary[2][0]), int(boundary[2][1])
x1, y1, x2, y2 = map(int, boundary) # Convert coordinates to integers
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
# Display the image with rectangles
cv2_imshow(image) # Use cv2_imshow instead of cv2.imshow for collab only
cv2.waitKey(0)
cv2.destroyAllWindows()
[img]https://i .sstatic.net/A2gVCOV8.png[/img]
Я попробовал это без округления, которое не имело никакого значения, а также пробовал использовать только предиктор, но безрезультатно. Установка save_aspect_ratio=False или symmetric_pad=False в ocr_predictor также не изменила ситуацию.
Подробнее здесь: https://stackoverflow.com/questions/787 ... t-in-doctr