Использование данных поиска Python и pyxlsb из xlsb

Использование данных поиска Python и pyxlsb из xlsb ⇐ Python

1 сообщение • Страница 1 из 1

Anonymous

Использование данных поиска Python и pyxlsb из xlsb

Цитата

Сообщение Anonymous » 08 янв 2025, 21:31

Цели
У меня есть книга с несколькими листами. Формат листа всегда содержит время в столбце 1 в секундах и заголовки в верхней строке. Я хочу, чтобы код искал определенный диапазон времени и заголовок столбца в таблице, а затем находил среднее значение этих данных и вставлял его в CSV-файл.
Проблема Код невероятно медленный. Есть ли лучший способ? Листы — это 30 000 строк и 300 столбцов. Код работает только с измененным сокращенным набором данных
import pandas as pd
from pyxlsb import open_workbook
import datetime

print("Start", datetime.datetime.now())
# Define the paths
csv_file = r"L:\Projects\P1563 V4\Data\Summary\ANR Sweeps\Python\ANR Sweeps_Python_simple.csv"

# Read the CSV file (without headers)
csv_data = pd.read_csv(csv_file, encoding='ISO-8859-1', header=None)

# Extract parameters from the CSV file using numeric indexing
xlsb_path = csv_data.iloc[2, 0] # Cell A3
xlsb_file = csv_data.iloc[2, 1] # Cell B3
sheet_name = csv_data.iloc[0, 4] # Cell E1
column_header = csv_data.iloc[1, 4] # Cell E2
start_time = float(csv_data.iloc[2, 2]) # Cell C3
end_time = float(csv_data.iloc[2, 3]) # Cell D3

print("CSV open", datetime.datetime.now())

# Full path to the .xlsb file
xlsb_full_path = f"{xlsb_path}{xlsb_file}.xlsb"

with open_workbook(xlsb_full_path) as wb:
with wb.get_sheet(sheet_name) as sheet:
# Get the first row (assuming headers are in the first row)
headers = next(sheet.rows())

# Create a mapping of header name to its index
header_map = {cell.v: idx for idx, cell in enumerate(headers)}

# Directly access the index of the desired column header
if column_header in header_map:
column_index = header_map[column_header]
print(f"Header '{column_header}' is at index: {column_index}")
else:
print(f"Header '{column_header}' not found.")

# Get the indices of the required columns
data_col_idx = header_map[column_header]
time_col_idx = 0

# Initialize variables for sum and count
total = 0
count = 0
next(sheet.rows()) #skip column headers

# Iterate through the rows
for row in sheet.rows():
try:
# Get the time and data values
time_value = row[time_col_idx].v
data_value = row[data_col_idx].v
# Use this to skip column header
if isinstance(time_value, float):
if start_time

Подробнее здесь: https://stackoverflow.com/questions/793 ... -from-xlsb

1736361070

Anonymous

[b]Цели[/b]
У меня есть книга с несколькими листами. Формат листа всегда содержит время в столбце 1 в секундах и заголовки в верхней строке. Я хочу, чтобы код искал определенный диапазон времени и заголовок столбца в таблице, а затем находил среднее значение этих данных и вставлял его в CSV-файл.
[b]Проблема[/b] Код невероятно медленный. Есть ли лучший способ? Листы — это 30 000 строк и 300 столбцов. Код работает только с измененным сокращенным набором данных
import pandas as pd
from pyxlsb import open_workbook
import datetime

print("Start", datetime.datetime.now())
# Define the paths
csv_file = r"L:\Projects\P1563 V4\Data\Summary\ANR Sweeps\Python\ANR Sweeps_Python_simple.csv"

# Read the CSV file (without headers)
csv_data = pd.read_csv(csv_file, encoding='ISO-8859-1', header=None)

# Extract parameters from the CSV file using numeric indexing
xlsb_path = csv_data.iloc[2, 0]  # Cell A3
xlsb_file = csv_data.iloc[2, 1]  # Cell B3
sheet_name = csv_data.iloc[0, 4]  # Cell E1
column_header = csv_data.iloc[1, 4]  # Cell E2
start_time = float(csv_data.iloc[2, 2])  # Cell C3
end_time = float(csv_data.iloc[2, 3])  # Cell D3

print("CSV open", datetime.datetime.now())

# Full path to the .xlsb file
xlsb_full_path = f"{xlsb_path}{xlsb_file}.xlsb"

with open_workbook(xlsb_full_path) as wb:
with wb.get_sheet(sheet_name) as sheet:
# Get the first row (assuming headers are in the first row)
headers = next(sheet.rows())

# Create a mapping of header name to its index
header_map = {cell.v: idx for idx, cell in enumerate(headers)}

# Directly access the index of the desired column header
if column_header in header_map:
column_index = header_map[column_header]
print(f"Header '{column_header}' is at index: {column_index}")
else:
print(f"Header '{column_header}' not found.")

# Get the indices of the required columns
data_col_idx = header_map[column_header]
time_col_idx = 0

# Initialize variables for sum and count
total = 0
count = 0
next(sheet.rows()) #skip column headers

# Iterate through the rows
for row in sheet.rows():
try:
# Get the time and data values
time_value = row[time_col_idx].v
data_value = row[data_col_idx].v
# Use this to skip column header
if isinstance(time_value, float):
if start_time 

Подробнее здесь: [url]https://stackoverflow.com/questions/79340041/using-python-and-pyxlsb-lookup-data-from-xlsb[/url]