Код: Выделить всё
import polars as pl
import requests
from pathlib import Path
url = "https://raw.githubusercontent.com/leanhdung1994/files/main/processedStep1_enwiktionary_namespace_0_43.ndjson"
workingDir = r"E:\Personal Projects\tmp\tarFiles"
outNdjson = Path(workingDir, "wiktionary.ndjson")
outTxt = Path(workingDir, "wiktionary.txt")
# Download
resp = requests.get(url)
resp.raise_for_status()
# Save
with open(outNdjson, "wb") as f:
f.write(resp.content)
# Read with Polars
df = pl.scan_ndjson(outNdjson)
print(df.select("html").collect())
df.select("html").sink_csv(outTxt, include_header=False)
Код: Выделить всё
shape: (23, 1)
┌─────────────────────────────────┐
│ html │
│ --- │
│ str │
╞═════════════════════════════════╡
│ playabilities
Подробнее здесь: [url]https://stackoverflow.com/questions/79829204/polars-how-to-write-a-column-of-strings-into-a-txt-file-without-escaping[/url]
Мобильная версия