Мой вопрос: , как можно обновить код, чтобы преодолеть проблемы снижения? >
< /ol>
Код: Выделить всё
import PyPDF2
import openpyxl
def pdf_to_text(pdf_file):
text = ""
with open(pdf_file, "rb") as file:
pdf_reader = PyPDF2.PdfFileReader(file)
for page_num in range(pdf_reader.getNumPages()):
page = pdf_reader.getPage(page_num)
text += page.extractText()
return text
def save_text_to_excel(text, excel_file):
workbook = openpyxl.Workbook()
sheet = workbook.active
lines = text.split("\n")
for row_num, line in enumerate(lines, 1):
sheet.cell(row=row_num, column=1, value=line)
workbook.save(excel_file)
if __name__ == "__main__":
pdf_file = "PDF_File_name.pdf"
excel_file = "output.xlsx"
pdf_text = pdf_to_text(pdf_file)
save_text_to_excel(pdf_text, excel_file)
- Поэтому я обновил этот код Python: < /li>
< /ol>output: " pypdf2.errors.deprecationError: reader.getNumpages снижается и удаляется в PYPDF2 3.0.0. Используйте len (reader.pages ) вместо. en/andal/user/migration-1-to -2.html, который утверждает, для обновления:Код: Выделить всё
import PyPDF2 import openpyxl def pdf_to_text(pdf_file): text = "" with open(pdf_file, "rb") as file: pdf_reader = PyPDF2.PdfReader(file) for page_num in range(pdf_reader.getNumPages()): page = pdf_reader.getPage(page_num) text += page.extractText() return text def save_text_to_excel(text, excel_file): workbook = openpyxl.Workbook() sheet = workbook.active lines = text.split("\n") for row_num, line in enumerate(lines, 1): sheet.cell(row=row_num, column=1, value=line) workbook.save(excel_file) if __name__ == "__main__": pdf_file = "PDF_File_name.pdf" excel_file = "output.xlsx" pdf_text = pdf_to_text(pdf_file) save_text_to_excel(pdf_text, excel_file)
Код: Выделить всё
import PyPDF2
import openpyxl
def pdf_to_text(pdf_file):
text = ""
with open(pdf_file, "rb") as file:
pdf_reader = PyPDF2.PdfReader(file)
for page_num in range(pdf_reader.len(reader.pages)):
page = pdf_reader.getPage(page_num)
text += page.extractText()
return text
def save_text_to_excel(text, excel_file):
workbook = openpyxl.Workbook()
sheet = workbook.active
lines = text.split("\n")
for row_num, line in enumerate(lines, 1):
sheet.cell(row=row_num, column=1, value=line)
workbook.save(excel_file)
if __name__ == "__main__":
pdf_file = "PDF_File_name.pdf"
excel_file = "output.xlsx"
pdf_text = pdf_to_text(pdf_file)
save_text_to_excel(pdf_text, excel_file)
< ol start = "4">
Я обновил код на основе комментариев Abdul Aziz Barkat ':
typo: pdf_reader.len (reader.pages) Сравните с Len (reader.pages ), как указано в сообщении об исчезновении ... вы должны написать LEN (pdf_reader.pages), Len - это встроенная функция в Python. < /li>
< /ol>
Код: Выделить всё
import PyPDF2
import openpyxl
def pdf_to_text(pdf_file):
text = ""
with open(pdf_file, "rb") as file:
pdf_reader = PyPDF2.PdfReader(file)
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.getPage(page_num)
text += page.extractText()
return text
def save_text_to_excel(text, excel_file):
workbook = openpyxl.Workbook()
sheet = workbook.active
lines = text.split("\n")
for row_num, line in enumerate(lines, 1):
sheet.cell(row=row_num, column=1, value=line)
workbook.save(excel_file)
if __name__ == "__main__":
pdf_file = "computers.pdf"
excel_file = "output.xlsx"
pdf_text = pdf_to_text(pdf_file)
save_text_to_excel(pdf_text, excel_file)
Подробнее здесь: https://stackoverflow.com/questions/769 ... -python-co
Мобильная версия