Автоматически поворачивать документ, если он не находится в нужном положении для распознавания текста с помощью ocr [зак

Автоматически поворачивать документ, если он не находится в нужном положении для распознавания текста с помощью ocr [зак ⇐ Python

1 сообщение • Страница 1 из 1

Anonymous

Автоматически поворачивать документ, если он не находится в нужном положении для распознавания текста с помощью ocr [зак

Цитата

Сообщение Anonymous » 13 окт 2024, 01:13

Я начал с импорта библиотеки и образа для запуска теста

Код: Выделить всё

import cv2
import numpy as np
from imutils.perspective import four_point_transform
import pytesseract
import os

image = cv2.imread(r'C:\\Users\\pcmic\\fung\\kiri.jpg')

pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

scale = 0.5
font = cv2.FONT_HERSHEY_SIMPLEX

обработка_изображения

Код: Выделить всё

def image_processing(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, threshold = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY)
return threshold

Это поворот FN

Код: Выделить всё

def rotate_if_needed(image, contour):
# Get the minimum area bounding box for the contour
rect = cv2.minAreaRect(contour)
box = cv2.boxPoints(rect)
box = np.int0(box)

# Calculate the angle of rotation
angle = rect[-1]

# Adjust the angle to ensure the document is straight
if angle < -45:
angle += 90

# Expand the canvas size to prevent cutting off during rotation
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)

# Calculate the new bounding dimensions after rotation
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
new_w = int((h * sin) + (w * cos))
new_h = int((h * cos) + (w * sin))

# Adjust the rotation matrix to take into account translation
M[0, 2] += (new_w / 2) - center[0]
M[1, 2] += (new_h / 2) - center[1]

# Perform the rotation
rotated = cv2.warpAffine(image, M, (new_w, new_h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)

return rotated

ScanToDetect ангелы FN

Код: Выделить всё

def scan_detection(image):
global document_contour

document_contour = np.array([[0, 0], [image.shape[1], 0], [image.shape[1], image.shape[0]], [0, image.shape[0]]])

gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5, 5), 0)
_, threshold = cv2.threshold(blur, 127, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

contours, _ = cv2.findContours(threshold, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)

max_area = 0
for contour in contours:
area = cv2.contourArea(contour)
if area > 1000:
peri = cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, 0.015 * peri, True)
if area > max_area and len(approx) == 4:
document_contour = approx
max_area = area

# Rotate the image if the document is not straight
rotated_image = rotate_if_needed(image, document_contour)

return rotated_image

Основной процесс отображения и обработки изображения

Код: Выделить всё

if image is not None:
# Copy the original image
frame_copy = image.copy()

# Perform document scan detection and rotation
rotated_image = scan_detection(frame_copy)

# Display the original image
cv2.imshow("Input Image", cv2.resize(image, (int(scale * image.shape[1]), int(scale * image.shape[0]))))

# Apply four-point transform to get a top-down view of the document
warped = four_point_transform(rotated_image, document_contour.reshape(4, 2))
cv2.imshow("Warped", cv2.resize(warped, (int(scale * warped.shape[1]), int(scale * warped.shape[0]))))

# Process the warped image (thresholding)
processed = image_processing(warped)
processed = processed[10:processed.shape[0] - 10, 10:processed.shape[1] - 10]
cv2.imshow("Processed", cv2.resize(processed, (int(scale * processed.shape[1]), int(scale * processed.shape[0]))))

# Save the processed image
cv2.imwrite("output/scanned_image.jpg", processed)

# Perform OCR on the processed image
ocr_text = pytesseract.image_to_string(processed)
print("Extracted Text from Image:")
print(ocr_text)

# Create the output directory if it doesn't exist
output_dir = "output"
if not os.path.exists(output_dir):
os.makedirs(output_dir)

# Save OCR results to a file in
cv2.waitKey(0)

cv2.destroyAllWindows()

Код, который я здесь пропустил, находится в том порядке, в котором я его пишу.
Я ценю вашу помощь, ребята.
этот код взят из в сети это помогает, но не выполняет свою работу. Я ожидал, что входное изображение будет повернуто, если оно не в правильном направлении, поэтому API распознавания текста будет работать хорошо.. посмотрите добавленный скриншот
Пример результата

Подробнее здесь: https://stackoverflow.com/questions/790 ... on-with-oc

1728771207

Anonymous

Я начал с импорта библиотеки и образа для запуска теста
[code]import cv2
import numpy as np
from imutils.perspective import four_point_transform
import pytesseract
import os

image = cv2.imread(r'C:\\Users\\pcmic\\fung\\kiri.jpg')

pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

scale = 0.5
font = cv2.FONT_HERSHEY_SIMPLEX
[/code]
обработка_изображения
[code]def image_processing(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, threshold = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY)
return threshold
[/code]
Это поворот FN
[code]def rotate_if_needed(image, contour):
# Get the minimum area bounding box for the contour
rect = cv2.minAreaRect(contour)
box = cv2.boxPoints(rect)
box = np.int0(box)

# Calculate the angle of rotation
angle = rect[-1]

# Adjust the angle to ensure the document is straight
if angle < -45:
angle += 90

# Expand the canvas size to prevent cutting off during rotation
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)

# Calculate the new bounding dimensions after rotation
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
new_w = int((h * sin) + (w * cos))
new_h = int((h * cos) + (w * sin))

# Adjust the rotation matrix to take into account translation
M[0, 2] += (new_w / 2) - center[0]
M[1, 2] += (new_h / 2) - center[1]

# Perform the rotation
rotated = cv2.warpAffine(image, M, (new_w, new_h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)

return rotated
[/code]
ScanToDetect ангелы FN
[code]def scan_detection(image):
global document_contour

document_contour = np.array([[0, 0], [image.shape[1], 0], [image.shape[1], image.shape[0]], [0, image.shape[0]]])

gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5, 5), 0)
_, threshold = cv2.threshold(blur, 127, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

contours, _ = cv2.findContours(threshold, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)

max_area = 0
for contour in contours:
area = cv2.contourArea(contour)
if area > 1000:
peri = cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, 0.015 * peri, True)
if area > max_area and len(approx) == 4:
document_contour = approx
max_area = area

# Rotate the image if the document is not straight
rotated_image = rotate_if_needed(image, document_contour)

return rotated_image
[/code]
Основной процесс отображения и обработки изображения
[code]if image is not None:
# Copy the original image
frame_copy = image.copy()

# Perform document scan detection and rotation
rotated_image = scan_detection(frame_copy)

# Display the original image
cv2.imshow("Input Image", cv2.resize(image, (int(scale * image.shape[1]), int(scale * image.shape[0]))))

# Apply four-point transform to get a top-down view of the document
warped = four_point_transform(rotated_image, document_contour.reshape(4, 2))
cv2.imshow("Warped", cv2.resize(warped, (int(scale * warped.shape[1]), int(scale * warped.shape[0]))))

# Process the warped image (thresholding)
processed = image_processing(warped)
processed = processed[10:processed.shape[0] - 10, 10:processed.shape[1] - 10]
cv2.imshow("Processed", cv2.resize(processed, (int(scale * processed.shape[1]), int(scale * processed.shape[0]))))

# Save the processed image
cv2.imwrite("output/scanned_image.jpg", processed)

# Perform OCR on the processed image
ocr_text = pytesseract.image_to_string(processed)
print("Extracted Text from Image:")
print(ocr_text)

# Create the output directory if it doesn't exist
output_dir = "output"
if not os.path.exists(output_dir):
os.makedirs(output_dir)

# Save OCR results to a file in
cv2.waitKey(0)

cv2.destroyAllWindows()
[/code]
Код, который я здесь пропустил, находится в том порядке, в котором я его пишу.
Я ценю вашу помощь, ребята.
этот код взят из в сети это помогает, но не выполняет свою работу.  Я ожидал, что входное изображение будет повернуто, если оно не в правильном направлении, поэтому API распознавания текста будет работать хорошо.. посмотрите добавленный скриншот
[b]Пример результата[/b] 

Подробнее здесь: [url]https://stackoverflow.com/questions/79077620/auto-turn-a-document-if-its-not-in-the-right-angel-for-text-recognition-with-oc[/url]

Ответить Пред. тема След. тема

1 сообщение • Страница 1 из 1

Быстрый ответ

Заголовок:

Имя пользователя:

Изменение регистра текста:

Смайлики

Ещё смайлики…

К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми. Можно прикреплять файлы, перетаскивая их в окно сообщения.

Максимально разрешённый размер вложения: 15 МБ.

Имя файла:

Комментарий к файлу:

Имя файла	Комментарий к файлу	Размер	Статус

Похожие темы

Ответы

Просмотры

Последнее сообщение

Атомизировать автоматически перевернуть документ, если он не в том положении для распознавания текста

Последнее сообщение Anonymous « 11 окт 2024, 12:50
Добавлено в форуме Python

Anonymous » 11 окт 2024, 12:50 » в форуме Python

Я начал с импорта библиотеки и образа для запуска теста
import cv2
import numpy as np
from imutils.perspective import four_point_transform
import pytesseract
import os

image = cv2.imread(r'C:\\Users\\pcmic\\fung\\kiri.jpg')...

0 Ответы

19 Просмотры

Последнее сообщение Anonymous
11 окт 2024, 12:50
Автоматически поворачивайте документ, если он не под прямым углом для распознавания текста с помощью OCR [закрыто]

Последнее сообщение Anonymous « 13 окт 2024, 02:52
Добавлено в форуме Python

Anonymous » 13 окт 2024, 02:52 » в форуме Python

Я начал с импорта библиотеки и образа для запуска теста
import cv2
import numpy as np
from imutils.perspective import four_point_transform
import pytesseract
import os

image = cv2.imread(r'C:\\Users\\pcmic\\fung\\kiri.jpg')...

0 Ответы

15 Просмотры

Последнее сообщение Anonymous
13 окт 2024, 02:52
OCR для распознавания коротких участков рукописного текста

Последнее сообщение Anonymous « 02 мар 2024, 19:23
Добавлено в форуме Linux

Anonymous » 02 мар 2024, 19:23 » в форуме Linux

I am looking for a handwriting OCR application or library that would run off-line on Linux. The purpose would be to recognize short text pieces entered with a pen (graphics tablet) in a screen annotation program. Certain input requirements like not...

0 Ответы

54 Просмотры

Последнее сообщение Anonymous
02 мар 2024, 19:23
Bootstrap Toolp в неправильном положении при начальном падении, затем в правильном положении

Последнее сообщение Anonymous « 13 фев 2025, 10:09
Добавлено в форуме Html

Anonymous » 13 фев 2025, 10:09 » в форуме Html

Я использую подсказки инструментов из начальной загрузки Twitter на Div на веб -странице. Подсказка инициализируется, но на первом наведении он находится в неправильном положении; Однако в последующих парящих всплеск инструментов находится в...

0 Ответы

17 Просмотры

Последнее сообщение Anonymous
13 фев 2025, 10:09
Bootstrap Toolp в неправильном положении при начальном падении, затем в правильном положении

Последнее сообщение Anonymous « 13 фев 2025, 10:09
Добавлено в форуме CSS

Anonymous » 13 фев 2025, 10:09 » в форуме CSS

Я использую подсказки инструментов из начальной загрузки Twitter на Div на веб -странице. Подсказка инициализируется, но на первом наведении он находится в неправильном положении; Однако в последующих парящих всплеск инструментов находится в...

0 Ответы

16 Просмотры

Последнее сообщение Anonymous
13 фев 2025, 10:09

Вернуться в «Python»