Код: Выделить всё
from pyspark.sql import SparkSession
DB = database_name
TMP_TBL = temporary_table
TBL = table_name
sesh = SparkSession.builder.getOrCreate()
df_spark = sesh.createDataFrame(df)
df_spark.createOrReplaceTempView(TMP_TABLE)
create_db_query = f"""
CREATE DATABASE IF NOT EXISTS {DB}
COMMENT "This is a database"
LOCATION "/tmp/{DB}"
"""
create_table_query = f"""
CREATE TABLE IF NOT EXISTS {DB}.{TBL}
USING DELTA
TBLPROPERTIES (delta.autoOptimize.optimizeWrite = true, delta.autoOptimize.autoCompact = true)
COMMENT "This is a table"
LOCATION "/tmp/{DB}/{TBL}";
"""
insert_query = f"""
INSERT INTO TABLE {DB}.{TBL} select * from {TMP_TBL}
"""
sesh.sql(create_db_query)
sesh.sql(create_table_query)
sesh.sql(insert_query)
Подробнее здесь: https://stackoverflow.com/questions/752 ... -spark-sql