API Python flask для преобразования DOCX в PDFPython

Программы на Python
Ответить
Anonymous
 API Python flask для преобразования DOCX в PDF

Сообщение Anonymous »

Я создал API Python Flask для преобразования файлов DOCX в PDF. Он отлично работает, когда я запускаю его вручную через командную строку. Однако я хотел бы превратить его в службу Windows, чтобы он мог запускаться автоматически и перезапускаться при необходимости.
Я попробовал использовать NSSM, чтобы настроить его как службу, и пока все казалось, работало, служба не могла открыть файлы DOCX, что бы я ни пытался.
Я также экспериментировал с pywin32, но запросы полностью завершались неудачей после запуска службы.
Я не думаю, что это разрешение проблема. Я добавил ведение журнала в свой код и убедился, что файлы DOCX получены правильно, имеют правильные разрешения и имеют правильный формат.
Есть ли у кого-нибудь идеи, как решить эту проблему?
Вот мой ОБНОВЛЕННЫЙ код:

Код: Выделить всё

import win32com.client
import pythoncom
import inspect, os
from flask import Flask, request, send_file
import tempfile
import logging
import time
import stat
import getpass

# Set up logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

app = Flask(__name__)

# WordHandler class to handle opening and closing Word application
class WordHandler:
def __init__(self):
self.word = None
logger.debug("WordHandler initialized")

def __enter__(self):
logger.debug("Starting Word application...")
start_time = time.time()
pythoncom.CoInitialize()
try:
self.word = win32com.client.DispatchEx("Word.Application")
logger.debug(f"Word application started in {time.time() - start_time:.2f} seconds")
except Exception as ex:
logger.error("Error starting Word application", exc_info=ex)
raise
return self

def __exit__(self, exc_type, exc_val, exc_tb):
logger.debug("Closing Word application...")
try:
if self.word:
self.word.Quit()
pythoncom.CoUninitialize()
logger.debug("Word application closed")
except Exception as ex:
logger.error("Error closing Word application", exc_info=ex)
raise

def process_document(self, docx_file):
"""Handles both TOC update and PDF conversion in a single Word session"""
logger.debug(f"Opening document: {docx_file}")
start_time = time.time()

if not os.path.exists(docx_file):
logger.error(f"The file does not exist: {docx_file}")
raise Exception(f"The file does not exist: {docx_file}")

try:
# Log Word application state
logger.debug(f"Word application object exists: {self.word is not None}")

doc = self.word.Documents.Open(docx_file)
# Check if document was opened successfully
if doc is None:
logger.error("Failed to open document - Word returned None")
raise Exception("Failed to open document in Word")

logger.debug(f"Document opened in {time.time() - start_time:.2f} seconds")
logger.debug(f"Document object exists: {doc is not None}")

# Add diagnostics for the document
try:
logger.info(f"Document name: {doc.Name}")
logger.info(f"Document path: {doc.Path}")
logger.info(f"Document pages: {doc.ComputeStatistics(2)}")  # 2 is for page count
logger.info(f"Document paragraphs count: {len(doc.Paragraphs)}")
except Exception as doc_ex:
logger.error(f"Error accessing document properties: {str(doc_ex)}")
raise Exception("Failed to access document properties")

# Check for Table of Contents
try:
toc_count = doc.TablesOfContents.Count
logger.info(f"Number of Tables of Contents found: {toc_count}")
except Exception as toc_ex:
logger.error(f"Error checking TOC count: {str(toc_ex)}")
toc_count = 0

# Only try to update TOC if it exists
if toc_count >  0:
logger.debug("Updating Table of Contents...")
toc_start = time.time()
doc.TablesOfContents(1).Update()
logger.debug(f"TOC updated in {time.time() - toc_start:.2f} seconds")
doc.Save()
else:
logger.warning("No Table of Contents found in document - skipping TOC update")

# Convert to PDF
pdf_file = docx_file.replace('.docx', '.pdf')
logger.debug(f"Converting to PDF: {pdf_file}")
pdf_start = time.time()
doc.SaveAs(pdf_file, FileFormat=17)
logger.debug(f"PDF conversion completed in {time.time() - pdf_start:.2f} seconds")
doc.Close()
logger.debug("Document closed")
return pdf_file

except Exception as ex:
logger.error(f"Failed to process document. Exception: {str(ex)}", exc_info=True)
if 'doc' in locals() and doc:
try:
doc.Close(SaveChanges=False)
logger.debug("Document closed after error")
except:
pass
raise Exception("Failed to process document. Check the file and try again.")

@app.route('/status', methods=['GET'])
def status():
return {'status': 'API is running', 'version': '1.0'}, 200

@app.route('/convert', methods=['POST'])
def convert():
logger.debug("Received conversion request")
if 'file' not in request.files:
logger.warning("No file provided in request")
return 'No file provided', 400

file = request.files['file']
if not file.filename.endswith('.docx'):
logger.warning(f"Invalid file format: {file.filename}")
return 'Invalid file format.  Please upload a .docx file', 400

temp_dir = None
temp_docx = None
pdf_path = None

try:
logger.debug("Creating temporary directory...")
temp_dir = tempfile.mkdtemp()
temp_docx = os.path.join(temp_dir, 'input.docx')
logger.debug(f"Saving uploaded file to {temp_docx}")
file.save(temp_docx)

# Add file checks and logging
logger.info(f"File exists: {os.path.exists(temp_docx)}")
logger.info(f"Absolute file path: {os.path.abspath(temp_docx)}")

# Check file permissions
file_stat = os.stat(temp_docx)
permissions = stat.filemode(file_stat.st_mode)
logger.info(f"File permissions: {permissions}")

# Get current user
current_user = getpass.getuser()
logger.info(f"Current user: {current_user}")

logger.debug("Starting document processing...")
process_start = time.time()
with WordHandler() as word_handler:
pdf_path = word_handler.process_document(temp_docx)
logger.debug(f"Document processing completed in {time.time() - process_start:.2f} seconds")

response = send_file(
pdf_path,
mimetype='application/pdf',
as_attachment=True,
download_name='converted.pdf'
)

@response.call_on_close
def cleanup():
logger.debug("Starting cleanup...")
try:
if temp_docx and os.path.exists(temp_docx):
os.unlink(temp_docx)
if temp_dir and os.path.exists(temp_dir):
os.rmdir(temp_dir)
if pdf_path and os.path.exists(pdf_path):
os.unlink(pdf_path)
logger.debug("Cleanup completed")
except Exception as e:
logger.error(f"Error during cleanup", exc_info=e)

return response

except Exception as e:
logger.error("Error during conversion request", exc_info=e)
return str(e), 500

if __name__ == "__main__":
app.run(host='0.0.0.0', port=5001, debug=True)
А вот логи:

Код: Выделить всё

WARNING:werkzeug: * Debugger is active!
INFO:werkzeug: * Debugger PIN: 366-636-785
DEBUG:__main__:Received conversion request
DEBUG:__main__:Creating temporary directory...
DEBUG:__main__:Saving uploaded file to C:\Users\User\AppData\Local\Temp\tmp5dwqrhb1\input.docx
INFO:__main__:File exists: True
INFO:__main__:Absolute file path: C:\Users\User\AppData\Local\Temp\tmp5dwqrhb1\input.docx
INFO:__main__:File permissions: -rw-rw-rw-
INFO:__main__:Current user: User
DEBUG:__main__:Starting document processing...
DEBUG:__main__:WordHandler initialized
DEBUG:__main__:Starting Word application...
DEBUG:__main__:Word application started in 0.35 seconds
DEBUG:__main__:Opening document: C:\Users\User\AppData\Local\Temp\tmp5dwqrhb1\input.docx
DEBUG:__main__:Word application object exists: True
ERROR:__main__:Failed to open document - Word returned None
ERROR:__main__:Failed to process document. Exception: Failed to open document in Word
Traceback (most recent call last):
File "C:\Users\User\Desktop\docx2pdf\app.py", line 64, in process_document
raise Exception("Failed to open document in Word")
Exception: Failed to open document in Word
DEBUG:__main__:Closing Word application...
DEBUG:__main__:Word application closed
ERROR:__main__:Error during conversion request
Traceback (most recent call last):
File "C:\Users\User\Desktop\docx2pdf\app.py", line 64, in process_document
raise Exception("Failed to open document in Word")
Exception: Failed to open document in Word

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "C:\Users\User\Desktop\docx2pdf\app.py", line 160, in convert
pdf_path = word_handler.process_document(temp_docx)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\User\Desktop\docx2pdf\app.py", line 115, in process_document
raise Exception("Failed to process document. Check the file and try again.")
Exception: Failed to process document.  Check the file and try again.
INFO:werkzeug:172.16.120.156 - - [09/Jan/2025 12:51:05] "[35m[1mPOST /convert HTTP/1.1[0m" 500 -
Заранее спасибо.

Подробнее здесь: https://stackoverflow.com/questions/793 ... ocx-to-pdf
Ответить

Быстрый ответ

Изменение регистра текста: 
Смайлики
:) :( :oops: :roll: :wink: :muza: :clever: :sorry: :angel: :read: *x)
Ещё смайлики…
   
К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми.

Максимально разрешённый размер вложения: 15 МБ.

Вернуться в «Python»