Код: Выделить всё
from pathlib import Path
import uuid
import polars as pl
def repartition(directory_to_repartition, target_size):
repart_dir = Path(directory_to_repartition)
old_paths = [v for v in repart_dir.iterdir() if v.suffix == '.parquet']
frames = [pl.read_parquet(path) for path in old_paths]
big_frame = pl.concat(frame) # memory usage may or may not be tolerable
#############This method is the missing link I am looking for assuming I have sufficient memory for these operations########
new_frames = big_frame.split_partitions(partition_size=target_size)
for frame in new_frames:
frame.write_parquet(repart_dir / f"{uuid.uuid4()}.parquet")
for old in old_paths:
try:
old.unlink()
except FileNotFoundError:
pass
Подробнее здесь: https://stackoverflow.com/questions/750 ... le-storage