Интеграция камеры XIAO ESP32S3 с Flask для функций оптического распознавания символов и TTS

Интеграция камеры XIAO ESP32S3 с Flask для функций оптического распознавания символов и TTS ⇐ Python

1 сообщение • Страница 1 из 1

Anonymous

Интеграция камеры XIAO ESP32S3 с Flask для функций оптического распознавания символов и TTS

Цитата

Сообщение Anonymous » 20 окт 2024, 04:14

В текущей реализации веб-сервер Flask настроен на получение изображений JPEG с камеры ESP32. Сервер использует Tesseract OCR для извлечения текста из изображений, а затем преобразует этот текст в речь с помощью библиотеки gTTS. Однако когда ESP32 пытается отправить изображения на сервер, он постоянно получает код ответа HTTP 400, указывающий на «неверный запрос». Эта проблема предполагает, что сервер неправильно обрабатывает входящие данные изображения.
КОД PYTHON:
from flask import Flask, request, jsonify, send_file, render_template
import pytesseract
from PIL import Image
import numpy as np
import cv2
from gtts import gTTS
import os
import time

app = Flask(__name__)

# Tesseract OCR path (Adjust this for your system)
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

@app.route('/')
def index():
return render_template('index.html') # Serve the HTML page for uploading images

@app.route('/upload', methods=['POST'])
def upload_image():
print("Files received:", request.files) # Log received files
try:
if 'image' not in request.files:
return jsonify({'error': 'No image uploaded'}), 400

file = request.files['image']

if file.filename == '':
return jsonify({'error': 'No selected file'}), 400

# Check if the uploaded file is a JPEG image
if not file.content_type in ['image/jpeg', 'image/jpg']:
return jsonify({'error': 'File type not supported. Please upload a JPEG image.'}), 400

try:
# Open the image file
image = Image.open(file)
# Convert to JPEG format
image = image.convert('RGB')
jpeg_image_path = "uploaded_image.jpeg"
image.save(jpeg_image_path, "JPEG") # Save as JPEG
except Exception as e:
return jsonify({'error': f'Failed to process image: {str(e)}'}), 400

# Load the JPEG image for OCR processing
open_cv_image = cv2.imread(jpeg_image_path)

start_time_ocr = time.time()
extracted_text = pytesseract.image_to_string(open_cv_image)
end_time_ocr = time.time()
ocr_time = end_time_ocr - start_time_ocr

if not extracted_text.strip():
return jsonify({'error': 'No text could be extracted from the image'}), 400

start_time_tts = time.time()
tts = gTTS(extracted_text)
audio_file = "speech.mp3"
tts.save(audio_file)
end_time_tts = time.time()
tts_time = end_time_tts - start_time_tts

return jsonify({
'extracted_text': extracted_text,
'ocr_time': ocr_time,
'tts_time': tts_time,
'audio_url': '/play_audio',
'message': 'Audio generated successfully. Click to play.'
}), 200

except Exception as e:
return jsonify({'error': str(e)}), 500

@app.route('/play_audio')
def play_audio():
try:
return send_file("speech.mp3", as_attachment=False)
except Exception as e:
return jsonify({'error': str(e)}), 500

@app.after_request
def after_request(response):
# Remove the audio file after serving
try:
if os.path.exists("speech.mp3"):
os.remove("speech.mp3") # Remove the file after serving
if os.path.exists("uploaded_image.jpeg"):
os.remove("uploaded_image.jpeg") # Remove the uploaded image after processing
except Exception as e:
print(f'Error removing file: {e}') # Log the error
return response

if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)

КОД ESP32
#include "esp_camera.h"
#include "WiFi.h"
#include "HTTPClient.h"

#define CAMERA_MODEL_XIAO_ESP32S3 // Has PSRAM
#include "camera_pins.h"

unsigned long lastCaptureTime = 0; // Last shooting time
int imageCount = 1; // File Counter
bool camera_sign = false; // Check camera status

const char* ssid = "PLDTHOMEFIBRazyjK"; // Replace with your network SSID
const char* password = "PLDTWIFIDCF6k!"; // Replace with your network password
const char* serverUrl = "http://192.168.1.29:5000/upload"; // Replace with your server URL

void setup() {
Serial.begin(115200);
while (!Serial); // Wait for the serial monitor to open

// Connect to WiFi
WiFi.begin(ssid, password);
while (WiFi.status() != WL_CONNECTED) {
delay(500);
Serial.print(".");
}
Serial.println("Connected to WiFi");

camera_config_t config;
config.ledc_channel = LEDC_CHANNEL_0;
config.ledc_timer = LEDC_TIMER_0;
config.pin_d0 = Y2_GPIO_NUM;
config.pin_d1 = Y3_GPIO_NUM;
config.pin_d2 = Y4_GPIO_NUM;
config.pin_d3 = Y5_GPIO_NUM;
config.pin_d4 = Y6_GPIO_NUM;
config.pin_d5 = Y7_GPIO_NUM;
config.pin_d6 = Y8_GPIO_NUM;
config.pin_d7 = Y9_GPIO_NUM;
config.pin_xclk = XCLK_GPIO_NUM;
config.pin_pclk = PCLK_GPIO_NUM;
config.pin_vsync = VSYNC_GPIO_NUM;
config.pin_href = HREF_GPIO_NUM;
config.pin_sscb_sda = SIOD_GPIO_NUM;
config.pin_sscb_scl = SIOC_GPIO_NUM;
config.pin_pwdn = PWDN_GPIO_NUM;
config.pin_reset = RESET_GPIO_NUM;
config.xclk_freq_hz = 20000000;
config.frame_size = FRAMESIZE_UXGA;
config.pixel_format = PIXFORMAT_JPEG; // for streaming
config.grab_mode = CAMERA_GRAB_WHEN_EMPTY;
config.fb_location = CAMERA_FB_IN_PSRAM;
config.jpeg_quality = 12;
config.fb_count = 1;

if (config.pixel_format == PIXFORMAT_JPEG) {
if (psramFound()) {
config.jpeg_quality = 10;
config.fb_count = 2;
config.grab_mode = CAMERA_GRAB_LATEST;
} else {
// Limit the frame size when PSRAM is not available
config.frame_size = FRAMESIZE_UXGA;
config.fb_location = CAMERA_FB_IN_DRAM;
}
} else {
config.frame_size = FRAMESIZE_240X240;
#if CONFIG_IDF_TARGET_ESP32S3
config.fb_count = 2;
#endif
}

// camera init
esp_err_t err = esp_camera_init(&config);
if (err != ESP_OK) {
Serial.printf("Camera init failed with error 0x%x", err);
return;
}

camera_sign = true; // Camera initialized
Serial.println("Camera initialized successfully");
}

void sendPhoto(const char* filename, uint8_t* buf, size_t len) {
if (WiFi.status() == WL_CONNECTED) {
HTTPClient http;
http.begin(serverUrl); // Specify the URL
http.addHeader("Content-Type", "image/jpeg"); // Specify content-type header

// Send the HTTP POST request
int httpResponseCode = http.POST(buf, len);
if (httpResponseCode > 0) {
Serial.printf("HTTP Response code: %d\n", httpResponseCode);
} else {
Serial.printf("HTTP POST failed: %s\n", http.errorToString(httpResponseCode).c_str());
}

// Free resources
http.end();
} else {
Serial.println("WiFi not connected");
}
}

void loop() {
if (camera_sign) {
unsigned long now = millis();

// If it has been more than 10 seconds since the last shot, take a picture and send it
if ((now - lastCaptureTime) >= 10000) {
camera_fb_t *fb = esp_camera_fb_get();
if (!fb) {
Serial.println("Failed to get camera frame buffer");
return;
}

char filename[32];
sprintf(filename, "/image.jpg");
Serial.printf("Image Size: %zu bytes\n", fb->len); // Debug image size
sendPhoto(filename, fb->buf, fb->len);
Serial.printf("Sent picture: %s\r\n", filename);
imageCount++;
lastCaptureTime = now;

// Release image buffer
esp_camera_fb_return(fb);
}
}
}

ESP32 должен успешно отправить изображения JPEG на сервер Flask, который затем обработает эти изображения, не возвращая ошибку 400.
Вывод в последовательном формате esp32 монитор
HTTP Response code: 400
Sent picture: /image.jpg
Image Size: 108403 bytes
HTTP Response code: 400
Sent picture: /image.jpg
Image Size: 110773 bytes

Вывод в Python
192.168.1.43 - - [20/Oct/2024 09:11:13] "POST /upload HTTP/1.1" 400 -
192.168.1.43 - - [20/Oct/2024 09:11:30] "POST /upload HTTP/1.1" 400 -
192.168.1.43 - - [20/Oct/2024 09:11:46] "POST /upload HTTP/1.1" 400 -
192.168.1.43 - - [20/Oct/2024 09:11:54] "POST /upload HTTP/1.1" 400 -
192.168.1.43 - - [20/Oct/2024 09:12:07] "POST /upload HTTP/1.1" 400 -
192.168.1.43 - - [20/Oct/2024 09:12:20] "POST /upload HTTP/1.1" 400 -

Подробнее здесь: https://stackoverflow.com/questions/791 ... ctionality

1729386865

Anonymous

В текущей реализации веб-сервер Flask настроен на получение изображений JPEG с камеры ESP32. Сервер использует Tesseract OCR для извлечения текста из изображений, а затем преобразует этот текст в речь с помощью библиотеки gTTS. Однако когда ESP32 пытается отправить изображения на сервер, он постоянно получает код ответа HTTP 400, указывающий на «неверный запрос». Эта проблема предполагает, что сервер неправильно обрабатывает входящие данные изображения.
КОД PYTHON:
from flask import Flask, request, jsonify, send_file, render_template
import pytesseract
from PIL import Image
import numpy as np
import cv2
from gtts import gTTS
import os
import time

app = Flask(__name__)

# Tesseract OCR path (Adjust this for your system)
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

@app.route('/')
def index():
return render_template('index.html')  # Serve the HTML page for uploading images

@app.route('/upload', methods=['POST'])
def upload_image():
print("Files received:", request.files)  # Log received files
try:
if 'image' not in request.files:
return jsonify({'error': 'No image uploaded'}), 400

file = request.files['image']

if file.filename == '':
return jsonify({'error': 'No selected file'}), 400

# Check if the uploaded file is a JPEG image
if not file.content_type in ['image/jpeg', 'image/jpg']:
return jsonify({'error': 'File type not supported. Please upload a JPEG image.'}), 400

try:
# Open the image file
image = Image.open(file)
# Convert to JPEG format
image = image.convert('RGB')
jpeg_image_path = "uploaded_image.jpeg"
image.save(jpeg_image_path, "JPEG")  # Save as JPEG
except Exception as e:
return jsonify({'error': f'Failed to process image: {str(e)}'}), 400

# Load the JPEG image for OCR processing
open_cv_image = cv2.imread(jpeg_image_path)

start_time_ocr = time.time()
extracted_text = pytesseract.image_to_string(open_cv_image)
end_time_ocr = time.time()
ocr_time = end_time_ocr - start_time_ocr

if not extracted_text.strip():
return jsonify({'error': 'No text could be extracted from the image'}), 400

start_time_tts = time.time()
tts = gTTS(extracted_text)
audio_file = "speech.mp3"
tts.save(audio_file)
end_time_tts = time.time()
tts_time = end_time_tts - start_time_tts

return jsonify({
'extracted_text': extracted_text,
'ocr_time': ocr_time,
'tts_time': tts_time,
'audio_url': '/play_audio',
'message': 'Audio generated successfully.  Click to play.'
}), 200

except Exception as e:
return jsonify({'error': str(e)}), 500

@app.route('/play_audio')
def play_audio():
try:
return send_file("speech.mp3", as_attachment=False)
except Exception as e:
return jsonify({'error': str(e)}), 500

@app.after_request
def after_request(response):
# Remove the audio file after serving
try:
if os.path.exists("speech.mp3"):
os.remove("speech.mp3")  # Remove the file after serving
if os.path.exists("uploaded_image.jpeg"):
os.remove("uploaded_image.jpeg")  # Remove the uploaded image after processing
except Exception as e:
print(f'Error removing file: {e}')  # Log the error
return response

if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)


КОД ESP32
#include "esp_camera.h"
#include "WiFi.h"
#include "HTTPClient.h"

#define CAMERA_MODEL_XIAO_ESP32S3 // Has PSRAM
#include "camera_pins.h"

unsigned long lastCaptureTime = 0; // Last shooting time
int imageCount = 1;                // File Counter
bool camera_sign = false;          // Check camera status

const char* ssid = "PLDTHOMEFIBRazyjK";            // Replace with your network SSID
const char* password = "PLDTWIFIDCF6k!";     // Replace with your network password
const char* serverUrl = "http://192.168.1.29:5000/upload"; // Replace with your server URL

void setup() {
Serial.begin(115200);
while (!Serial); // Wait for the serial monitor to open

// Connect to WiFi
WiFi.begin(ssid, password);
while (WiFi.status() != WL_CONNECTED) {
delay(500);
Serial.print(".");
}
Serial.println("Connected to WiFi");

camera_config_t config;
config.ledc_channel = LEDC_CHANNEL_0;
config.ledc_timer = LEDC_TIMER_0;
config.pin_d0 = Y2_GPIO_NUM;
config.pin_d1 = Y3_GPIO_NUM;
config.pin_d2 = Y4_GPIO_NUM;
config.pin_d3 = Y5_GPIO_NUM;
config.pin_d4 = Y6_GPIO_NUM;
config.pin_d5 = Y7_GPIO_NUM;
config.pin_d6 = Y8_GPIO_NUM;
config.pin_d7 = Y9_GPIO_NUM;
config.pin_xclk = XCLK_GPIO_NUM;
config.pin_pclk = PCLK_GPIO_NUM;
config.pin_vsync = VSYNC_GPIO_NUM;
config.pin_href = HREF_GPIO_NUM;
config.pin_sscb_sda = SIOD_GPIO_NUM;
config.pin_sscb_scl = SIOC_GPIO_NUM;
config.pin_pwdn = PWDN_GPIO_NUM;
config.pin_reset = RESET_GPIO_NUM;
config.xclk_freq_hz = 20000000;
config.frame_size = FRAMESIZE_UXGA;
config.pixel_format = PIXFORMAT_JPEG; // for streaming
config.grab_mode = CAMERA_GRAB_WHEN_EMPTY;
config.fb_location = CAMERA_FB_IN_PSRAM;
config.jpeg_quality = 12;
config.fb_count = 1;

if (config.pixel_format == PIXFORMAT_JPEG) {
if (psramFound()) {
config.jpeg_quality = 10;
config.fb_count = 2;
config.grab_mode = CAMERA_GRAB_LATEST;
} else {
// Limit the frame size when PSRAM is not available
config.frame_size = FRAMESIZE_UXGA;
config.fb_location = CAMERA_FB_IN_DRAM;
}
} else {
config.frame_size = FRAMESIZE_240X240;
#if CONFIG_IDF_TARGET_ESP32S3
config.fb_count = 2;
#endif
}

// camera init
esp_err_t err = esp_camera_init(&config);
if (err != ESP_OK) {
Serial.printf("Camera init failed with error 0x%x", err);
return;
}

camera_sign = true; // Camera initialized
Serial.println("Camera initialized successfully");
}

void sendPhoto(const char* filename, uint8_t* buf, size_t len) {
if (WiFi.status() == WL_CONNECTED) {
HTTPClient http;
http.begin(serverUrl); // Specify the URL
http.addHeader("Content-Type", "image/jpeg"); // Specify content-type header

// Send the HTTP POST request
int httpResponseCode = http.POST(buf, len);
if (httpResponseCode >  0) {
Serial.printf("HTTP Response code: %d\n", httpResponseCode);
} else {
Serial.printf("HTTP POST failed: %s\n", http.errorToString(httpResponseCode).c_str());
}

// Free resources
http.end();
} else {
Serial.println("WiFi not connected");
}
}

void loop() {
if (camera_sign) {
unsigned long now = millis();

// If it has been more than 10 seconds since the last shot, take a picture and send it
if ((now - lastCaptureTime) >= 10000) {
camera_fb_t *fb = esp_camera_fb_get();
if (!fb) {
Serial.println("Failed to get camera frame buffer");
return;
}

char filename[32];
sprintf(filename, "/image.jpg");
Serial.printf("Image Size: %zu bytes\n", fb->len); // Debug image size
sendPhoto(filename, fb->buf, fb->len);
Serial.printf("Sent picture: %s\r\n", filename);
imageCount++;
lastCaptureTime = now;

// Release image buffer
esp_camera_fb_return(fb);
}
}
}


ESP32 должен успешно отправить изображения JPEG на сервер Flask, который затем обработает эти изображения, не возвращая ошибку 400.
Вывод в последовательном формате esp32 монитор
HTTP Response code: 400
Sent picture: /image.jpg
Image Size: 108403 bytes
HTTP Response code: 400
Sent picture: /image.jpg
Image Size: 110773 bytes

Вывод в Python
192.168.1.43 - - [20/Oct/2024 09:11:13] "POST /upload HTTP/1.1" 400 -
192.168.1.43 - - [20/Oct/2024 09:11:30] "POST /upload HTTP/1.1" 400 -
192.168.1.43 - - [20/Oct/2024 09:11:46] "POST /upload HTTP/1.1" 400 -
192.168.1.43 - - [20/Oct/2024 09:11:54] "POST /upload HTTP/1.1" 400 -
192.168.1.43 - - [20/Oct/2024 09:12:07] "POST /upload HTTP/1.1" 400 -
192.168.1.43 - - [20/Oct/2024 09:12:20] "POST /upload HTTP/1.1" 400 -
 

Подробнее здесь: [url]https://stackoverflow.com/questions/79105302/integration-of-xiao-esp32s3-camera-with-flask-for-ocr-and-tts-functionality[/url]

Ответить Пред. тема След. тема

1 сообщение • Страница 1 из 1

Быстрый ответ

Заголовок:

Имя пользователя:

Изменение регистра текста:

Смайлики

Ещё смайлики…

К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми. Можно прикреплять файлы, перетаскивая их в окно сообщения.

Максимально разрешённый размер вложения: 15 МБ.

Имя файла:

Комментарий к файлу:

Имя файла	Комментарий к файлу	Размер	Статус

Похожие темы

Ответы

Просмотры

Последнее сообщение

Интеграция камеры XIAO ESP32S3 с Flask для функций оптического распознавания символов и TTS

Последнее сообщение Anonymous « 19 окт 2024, 18:33
Добавлено в форуме Python

Anonymous » 19 окт 2024, 18:33 » в форуме Python

В текущей реализации веб-сервер Flask настроен на получение изображений JPEG с камеры ESP32. Сервер использует Tesseract OCR для извлечения текста из изображений, а затем преобразует этот текст в речь с помощью библиотеки gTTS. Однако когда ESP32...

0 Ответы

28 Просмотры

Последнее сообщение Anonymous
19 окт 2024, 18:33
Предварительная обработка изображений для оптического распознавания символов (pytesseract)

Последнее сообщение Anonymous « 02 мар 2024, 19:27
Добавлено в форуме Python

Anonymous » 02 мар 2024, 19:27 » в форуме Python

I'm trying to OCR image with pytesseract. Once I do the OCR for below image the result shows as WV over

What are the image pre-processing techniques that can be use to enhance this image by filling missing parts of text.

Enhance image OCR...

0 Ответы

116 Просмотры

Последнее сообщение Anonymous
02 мар 2024, 19:27
Предварительная обработка изображений в Python для оптического распознавания символов

Последнее сообщение Anonymous « 17 сен 2024, 20:19
Добавлено в форуме Python

Anonymous » 17 сен 2024, 20:19 » в форуме Python

Я занимаюсь предварительной обработкой изображений для оптического распознавания символов в Python. Я преобразовал PDF в двоичные изображения. Я получаю такой результат:

Я хочу, чтобы результат был примерно таким

https...

0 Ответы

18 Просмотры

Последнее сообщение Anonymous
17 сен 2024, 20:19
Предварительная обработка изображений в Python для оптического распознавания символов

Последнее сообщение Anonymous « 18 сен 2024, 00:13
Добавлено в форуме Python

Anonymous » 18 сен 2024, 00:13 » в форуме Python

Я занимаюсь предварительной обработкой изображений для оптического распознавания символов в Python. Я преобразовал PDF в двоичные изображения. Я получаю такой результат:

Я хочу, чтобы результат был примерно таким

https...

0 Ответы

20 Просмотры

Последнее сообщение Anonymous
18 сен 2024, 00:13
Предварительная обработка изображений в Python для оптического распознавания символов [закрыто]

Последнее сообщение Anonymous « 18 сен 2024, 17:53
Добавлено в форуме Python

Anonymous » 18 сен 2024, 17:53 » в форуме Python

Я занимаюсь предварительной обработкой изображений для оптического распознавания символов в Python. Я преобразовал PDF в двоичные изображения. Я получаю такой результат:

Я хочу, чтобы результат был примерно таким

https...

0 Ответы

16 Просмотры

Последнее сообщение Anonymous
18 сен 2024, 17:53

Вернуться в «Python»