Обнаружение арифметических операторов в изображении

Обнаружение арифметических операторов в изображении ⇐ Python

1 сообщение • Страница 1 из 1

Anonymous

Обнаружение арифметических операторов в изображении

Цитата

Сообщение Anonymous » 11 июл 2024, 23:56

Я создал сценарий OCR, используя keras_ocr. Входные данные представляют собой блок-схему (оттенки серого). Я хочу извлечь тексты и координаты фигур изображения блок-схемы. Однако он не извлекает арифметические операторы, такие как «+,-,*,/». Иногда он также не обнаруживает числовые значения. Вот мой полный сценарий.

Код: Выделить всё

# Import necessary libraries
import os
import matplotlib.pyplot as plt
import keras_ocr
import cv2
import numpy as np
from google.colab import drive
from symspellpy.symspellpy import SymSpell, Verbosity
import pkg_resources

class OCRProcessor:
def __init__(self):
# Create a pipeline for OCR processing
self.pipeline = keras_ocr.pipeline.Pipeline()

def __get_bbox(self, image_path):
try:
# Read the image using OpenCV
image = cv2.imread(image_path)
if image is None:
raise ValueError(f"Image at path {image_path} could not be read.")

# Convert the image to RGB (keras-ocr expects RGB images)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Use the OCR pipeline to detect text
images = keras_ocr.tools.read(image_path)
self.image = images
prediction_groups = self.pipeline.recognize([images])

if not prediction_groups or not prediction_groups[0]:
return [], []

# Extract the bounding boxes and text
texts = []
results = []
for text, box in prediction_groups[0]:
texts.append(text)
xs, ys = set(), set()
for x in box:
xs.add(x[0])
ys.add(x[1])
results.append(list(map(int, [min(xs), min(ys), max(xs), max(ys)])))  # ymin, xmin, ymax, xmax

return texts, results
except Exception as e:
print(f"An error occurred in __get_bbox: {e}")
return [], []

def process_image(self, image_path):
return self.__get_bbox(image_path)

# Define the function to correct text
def correct_text(text_array):
# Initialize SymSpell object
sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)

# Load the dictionary
dictionary_path = pkg_resources.resource_filename(
"symspellpy", "frequency_dictionary_en_82_765.txt")
sym_spell.load_dictionary(dictionary_path, term_index=0, count_index=1)

corrected_text_array = []
for text in text_array:
suggestions = sym_spell.lookup(text, Verbosity.CLOSEST, max_edit_distance=2)
if suggestions:
corrected_text_array.append(suggestions[0].term)
else:
corrected_text_array.append(text)
return corrected_text_array

# Main function
def main(image_path):
ocr_processor = OCRProcessor()
ex_text, ex_co = ocr_processor.process_image(image_path)
if not ex_text:
print(f"No text detected in the image at path {image_path}.")
return [], [], []

# Correct the extracted text
cr_text = correct_text(ex_text)

# Print the results
print("Extracted Texts:", ex_text)
print("Corrected Texts:", cr_text)
print("Extracted Coordinates:", ex_co)

return ex_text, cr_text, ex_co

# Example usage (you can update the image path as needed)
image_path = '/content/Test2.jpg'
ex_text, cr_text, ex_co = main(image_path)

ex_shape, ex_coor = detect_shapes(image_path)

# Print or use the results
print("Detected Shapes:", ex_shape)
print("Coordinates for Shapes:", ex_coor)

Есть ли какое-либо решение этой проблемы?

Подробнее здесь: https://stackoverflow.com/questions/787 ... n-an-image

1720731407

Anonymous

Я создал сценарий OCR, используя [b]keras_ocr[/b]. Входные данные представляют собой блок-схему (оттенки серого). Я хочу извлечь тексты и координаты фигур изображения блок-схемы. Однако он не извлекает арифметические операторы, такие как «+,-,*,/». Иногда он также не обнаруживает числовые значения. Вот мой полный сценарий.
[code]# Import necessary libraries
import os
import matplotlib.pyplot as plt
import keras_ocr
import cv2
import numpy as np
from google.colab import drive
from symspellpy.symspellpy import SymSpell, Verbosity
import pkg_resources

class OCRProcessor:
def __init__(self):
# Create a pipeline for OCR processing
self.pipeline = keras_ocr.pipeline.Pipeline()

def __get_bbox(self, image_path):
try:
# Read the image using OpenCV
image = cv2.imread(image_path)
if image is None:
raise ValueError(f"Image at path {image_path} could not be read.")

# Convert the image to RGB (keras-ocr expects RGB images)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Use the OCR pipeline to detect text
images = keras_ocr.tools.read(image_path)
self.image = images
prediction_groups = self.pipeline.recognize([images])

if not prediction_groups or not prediction_groups[0]:
return [], []

# Extract the bounding boxes and text
texts = []
results = []
for text, box in prediction_groups[0]:
texts.append(text)
xs, ys = set(), set()
for x in box:
xs.add(x[0])
ys.add(x[1])
results.append(list(map(int, [min(xs), min(ys), max(xs), max(ys)])))  # ymin, xmin, ymax, xmax

return texts, results
except Exception as e:
print(f"An error occurred in __get_bbox: {e}")
return [], []

def process_image(self, image_path):
return self.__get_bbox(image_path)

# Define the function to correct text
def correct_text(text_array):
# Initialize SymSpell object
sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)

# Load the dictionary
dictionary_path = pkg_resources.resource_filename(
"symspellpy", "frequency_dictionary_en_82_765.txt")
sym_spell.load_dictionary(dictionary_path, term_index=0, count_index=1)

corrected_text_array = []
for text in text_array:
suggestions = sym_spell.lookup(text, Verbosity.CLOSEST, max_edit_distance=2)
if suggestions:
corrected_text_array.append(suggestions[0].term)
else:
corrected_text_array.append(text)
return corrected_text_array

# Main function
def main(image_path):
ocr_processor = OCRProcessor()
ex_text, ex_co = ocr_processor.process_image(image_path)
if not ex_text:
print(f"No text detected in the image at path {image_path}.")
return [], [], []

# Correct the extracted text
cr_text = correct_text(ex_text)

# Print the results
print("Extracted Texts:", ex_text)
print("Corrected Texts:", cr_text)
print("Extracted Coordinates:", ex_co)

return ex_text, cr_text, ex_co

# Example usage (you can update the image path as needed)
image_path = '/content/Test2.jpg'
ex_text, cr_text, ex_co = main(image_path)

ex_shape, ex_coor = detect_shapes(image_path)

# Print or use the results
print("Detected Shapes:", ex_shape)
print("Coordinates for Shapes:", ex_coor)
[/code]
Есть ли какое-либо решение этой проблемы? 

Подробнее здесь: [url]https://stackoverflow.com/questions/78736359/detect-arithmetic-operators-in-an-image[/url]

Ответить Пред. тема След. тема

1 сообщение • Страница 1 из 1

Быстрый ответ

Заголовок:

Имя пользователя:

Изменение регистра текста:

Смайлики

Ещё смайлики…

К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми. Можно прикреплять файлы, перетаскивая их в окно сообщения.

Максимально разрешённый размер вложения: 15 МБ.

Имя файла:

Комментарий к файлу:

Имя файла	Комментарий к файлу	Размер	Статус

Похожие темы

Ответы

Просмотры

Последнее сообщение

Какова правильная/лучшая практика перегрузки арифметических операторов в C++? [дубликат]

Последнее сообщение Anonymous « 07 окт 2024, 01:59
Добавлено в форуме C++

Anonymous » 07 окт 2024, 01:59 » в форуме C++

Я знаю, что это зависит от варианта использования, поэтому приведу пример. Я создал класс (в целях обучения) для представления рационального числа:
class Rational
{
int numerator = 0;
int denominator = 0;

public:
// CONSTRUCTORS
Rational(int n,...

0 Ответы

6 Просмотры

Последнее сообщение Anonymous
07 окт 2024, 01:59
Использование отражения C ++ для автоматического генерации арифметических операторов?

Последнее сообщение Anonymous « 24 июн 2025, 02:32
Добавлено в форуме C++

Anonymous » 24 июн 2025, 02:32 » в форуме C++

Поскольку предложение о отражении только что было принято в черновик C ++ 26, мне интересно, можно ли использовать ожидаемые объекты для автоматического генерации нескольких арифметических операторов для класса? | = , > = , будет ли способ...

0 Ответы

4 Просмотры

Последнее сообщение Anonymous
24 июн 2025, 02:32
Использование отражения C ++ для автоматического генерации арифметических операторов?

Последнее сообщение Anonymous « 24 июн 2025, 21:54
Добавлено в форуме C++

Anonymous » 24 июн 2025, 21:54 » в форуме C++

Поскольку предложение о отражении только что было принято в черновик C ++ 26, мне интересно, можно ли использовать ожидаемые объекты для автоматического генерации нескольких арифметических операторов для класса? | = , > = , будет ли способ...

0 Ответы

4 Просмотры

Последнее сообщение Anonymous
24 июн 2025, 21:54
Каков наиболее эффективный способ выполнения арифметических действий с несколькими общими числами, содержащимися в общей

Последнее сообщение Anonymous « 02 ноя 2024, 16:39
Добавлено в форуме C#

Anonymous » 02 ноя 2024, 16:39 » в форуме C#

Каков наиболее эффективный способ выполнения арифметических операций над несколькими общими числами, завернутыми в общую структуру на C# в DotNet 8+? Использовать небезопасный код МОЖЕТ. На самом деле есть два подвопроса:
Извлечение примитивных...

0 Ответы

10 Просмотры

Последнее сообщение Anonymous
02 ноя 2024, 16:39
Каков наиболее эффективный способ выполнения арифметических действий с несколькими общими числами, содержащимися в общей

Последнее сообщение Anonymous « 02 ноя 2024, 18:45
Добавлено в форуме C#

Anonymous » 02 ноя 2024, 18:45 » в форуме C#

Каков наиболее эффективный способ выполнения арифметических операций над несколькими общими числами, завернутыми в неуправляемую, универсальную и изменяемую структуру на C# в DotNet 8+? МОЖНО использовать небезопасный код.
Наиболее эффективный...

0 Ответы

11 Просмотры

Последнее сообщение Anonymous
02 ноя 2024, 18:45

Вернуться в «Python»