
На данный момент я нахожусь на этапе записи таблицы в файл Excel. Вот код, который у меня есть:
Код: Выделить всё
import pdfplumber
import csv
import os
# Define extraction regions for each table (adjust coordinates as needed)
regions = [
(10, 100, 600, 260),
]
# Region for Table 1
# Add more regions for additional tables if needed
# Define the desired headers
# Specify the directory and filename for saving the CSV file
output_directory = "C:/Users/myname/Downloads"
output_filename = "clients_info.csv"
output_path = os.path.join(output_directory, output_filename)
with pdfplumber.open("C:/Users/myname/Downloads/clients.pdf") as pdf:
for region_index, region in enumerate(regions):
x1, y1, x2, y2 = region
tables_data = [] # Store data for all tables in this region
page = pdf.pages[0] # Extracting tables from the first page
table = page.within_bbox((x1, y1, x2, y2)).extract_table()
# Extract header row and filter out None values
header_row = [cell for cell in table[0] if cell is not None]
# Extract data rows and remove None values
for row in table[1:]:
filtered_row = [cell if cell is not None else "" for cell in row]
tables_data.append(filtered_row)
# Write the data for this region to a CSV file
with open(output_path, "w", newline="") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(header_row) # Write the filtered header row to the CSV file
for row in tables_data:
writer.writerow(row) # Write the data rows to the CSV file
Большое спасибо за помощь.
Подробнее здесь: https://stackoverflow.com/questions/784 ... pdfplumber