Snowflake SQL:
Код: Выделить всё
WITH pat_base AS (
SELECT * FROM MOD_DB.TEST.PATIENT_TABLE
),
pat_id AS (
SELECT
A.PATIENT_SURRG_ID,
A.SELECTED_DATA_SOURCE_TRACK_ID_REF,
f.value:value::string AS “PID.PID.3.CX.1_Patient_Identifier_ID1”,
A.FHIR_BIRTH_DATE AS “PID.PID.7_Date_of_Birth”
FROM pat_base A,
LATERAL FLATTEN(input => A.FHIR_IDENTIFIER) f
QUALIFY ROW_NUMBER() OVER (PARTITION BY A.PATIENT_SURRG_ID ORDER BY f.index) = 1
),
pat_nm AS (
SELECT
A.PATIENT_SURRG_ID,
A.SELECTED_DATA_SOURCE_TRACK_ID_REF,
f1.value:given::STRING AS “PID.PID.5.XPN.1.FN.1_Patient_Last_Name”
FROM pat_base A,
LATERAL FLATTEN(input => A.FHIR_NAME) f1
QUALIFY ROW_NUMBER() OVER (PARTITION BY A.PATIENT_SURRG_ID ORDER BY f1.index) = 1
),
pat AS (
SELECT
b.PATIENT_SURRG_ID,
b.SELECTED_DATA_SOURCE_TRACK_ID_REF,
id.“PID.PID.3.CX.1_Patient_Identifier_ID1”,
“PID.PID.7_Date_of_Birth”,
nm.“PID.PID.5.XPN.1.FN.1_Patient_Last_Name”
FROM pat_base b
LEFT JOIN pat_id id ON b.PATIENT_SURRG_ID = id.PATIENT_SURRG_ID
LEFT JOIN pat_nm nm ON b.PATIENT_SURRG_ID = nm.PATIENT_SURRG_ID
) SELECT TOP 1000 * FROM pat
Код: Выделить всё
pat_base = session.table(f"MOD_DB.TEST.PATIENT_TABLE")
pat_id = (pat_base.alias(“A”)
.join_table_function(“flatten”, F.col(“FHIR_IDENTIFIER”))
.select(
F.col(“PATIENT_SURRG_ID”),
F.col(“SELECTED_DATA_SOURCE_TRACK_ID_REF”),
F.col(“VALUE”)[“value”].cast(“string”).alias(“PID.PID.3.CX.1_Patient_Identifier_ID1”),
F.col(“FHIR_BIRTH_DATE”).alias(“PID.PID.7_Date_of_Birth”),
F.col(“INDEX”).alias(“f_index”)
)
.with_column(“rn”, F.row_number().over(
Window.partition_by(F.col(“PATIENT_SURRG_ID”)).order_by(F.col(“f_index”))
))
.filter(F.col(“rn”) == 1)
.drop(“rn”, “f_index”))
pat_nm = (pat_base.alias(“A”)
.join_table_function(“flatten”, F.col(“FHIR_NAME”))
.select(
F.col(“PATIENT_SURRG_ID”),
F.col(“SELECTED_DATA_SOURCE_TRACK_ID_REF”),
F.col(“VALUE”)[“given”].cast(“string”).alias(“PID.PID.5.XPN.1.FN.1_Patient_Last_Name”),
F.col(“INDEX”).alias(“f1_index”)
)
.with_column(“rn”, F.row_number().over(
Window.partition_by(F.col(“PATIENT_SURRG_ID”)).order_by(F.col(“f1_index”))
))
.filter(F.col(“rn”) == 1)
.drop(“rn”, “f1_index”))
pat = (pat_base.join(pat_id, pat_base[“PATIENT_SURRG_ID”] == pat_id[“PATIENT_SURRG_ID”], “left”)
.join(pat_nm, pat_base[“PATIENT_SURRG_ID”] == pat_nm[“PATIENT_SURRG_ID”], “left”)
.select(
pat_base[“PATIENT_SURRG_ID”].alias(“PATIENT_SURRG_ID”),
pat_base[“SELECTED_DATA_SOURCE_TRACK_ID_REF”].alias(“SELECTED_DATA_SOURCE_TRACK_ID_REF”),
pat_id.col(“PID.PID.3.CX.1_Patient_Identifier_ID1”),
pat_id.col(“PID.PID.7_Date_of_Birth”),
pat_nm.col(“PID.PID.5.XPN.1.FN.1_Patient_Last_Name”)
))
Код: Выделить всё
GlobalStats:
partitionsTotal=3691
partitionsAssigned=3691
bytesAssigned=10954727936
Operations:
1:0 ->Result PATIENT_TABLE.PATIENT_SURRG_ID, PATIENT_TABLE.SELECTED_DATA_SOURCE_TRACK_ID_REF, SNOWPARK_RIGHT.“PID.PID.3.CX.1_Patient_Identifier_ID1”, SNOWPARK_RIGHT.“PID.PID.7_Date_of_Birth”, SNOWPARK_RIGHT.“PID.PID.5.XPN.1.FN.1_Patient_Last_Name”
1:1 ->LeftOuterJoin joinKey: (SNOWPARK_RIGHT.“r_0001_PATIENT_SURRG_ID” = PATIENT_TABLE.PATIENT_SURRG_ID)
1:2 ->Filter ROW_NUMBER() OVER (PARTITION BY PATIENT_TABLE.PATIENT_SURRG_ID ORDER BY T_RIGHT.INDEX ASC NULLS FIRST) = 1
1:3 ->WindowFunction ROW_NUMBER() OVER (PARTITION BY PATIENT_TABLE.PATIENT_SURRG_ID ORDER BY T_RIGHT.INDEX ASC NULLS FIRST)
1:4 ->Flatten T_LEFT.FHIR_IDENTIFIER
1:5 ->Filter PATIENT_TABLE.PATIENT_SURRG_ID IS NOT NULL
1:6 ->TableScan MOD_DB.TEST.PATIENT_TABLE PATIENT_SURRG_ID, FHIR_IDENTIFIER, FHIR_BIRTH_DATE {partitionsTotal=3691, partitionsAssigned=3691, bytesAssigned=10954727936}
1:7 ->LeftOuterJoin joinKey: (SNOWPARK_RIGHT.PATIENT_SURRG_ID = PATIENT_TABLE.PATIENT_SURRG_ID)
1:8 ->Filter ROW_NUMBER() OVER (PARTITION BY PATIENT_TABLE.PATIENT_SURRG_ID ORDER BY T_RIGHT.INDEX ASC NULLS FIRST) = 1
1:9 ->WindowFunction ROW_NUMBER() OVER (PARTITION BY PATIENT_TABLE.PATIENT_SURRG_ID ORDER BY T_RIGHT.INDEX ASC NULLS FIRST)
1:10 ->Flatten T_LEFT.FHIR_NAME
1:11 ->Filter PATIENT_TABLE.PATIENT_SURRG_ID IS NOT NULL
1:12 ->TableScan MOD_DB.TEST.PATIENT_TABLE PATIENT_SURRG_ID, FHIR_NAME {partitionsTotal=3691, partitionsAssigned=3691, bytesAssigned=10954727936}
1:13 ->TableScan MOD_DB.TEST.PATIENT_TABLE PATIENT_SURRG_ID, SELECTED_DATA_SOURCE_TRACK_ID_REF {partitionsTotal=3691, partitionsAssigned=3691, bytesAssigned=10954727936}
None
Код: Выделить всё
Logical Execution Plan:
GlobalStats:
partitionsTotal=3691
partitionsAssigned=3691
bytesAssigned=10954727936
Operations:
1:0 ->Result B.PATIENT_SURRG_ID, B.SELECTED_DATA_SOURCE_TRACK_ID_REF, SYS_VW.“PID.PID.3.CX.1_Patient_Identifier_ID1_0”, SYS_VW.“PID.PID.7_Date_of_Birth_1”, SYS_VW.“PID.PID.5.XPN.1.FN.1_Patient_Last_Name_0”
1:1 ->LeftOuterJoin joinKey: (SYS_VW.PATIENT_SURRG_ID_2 = B.PATIENT_SURRG_ID)
1:2 ->Filter ROW_NUMBER() OVER (PARTITION BY A.PATIENT_SURRG_ID ORDER BY F.INDEX ASC NULLS LAST) = 1
1:3 ->WindowFunction ROW_NUMBER() OVER (PARTITION BY A.PATIENT_SURRG_ID ORDER BY F.INDEX ASC NULLS LAST)
1:4 ->Flatten SYS_VW.FHIR_IDENTIFIER_2
1:5 ->WithReference A
1:6 ->Filter A.PATIENT_SURRG_ID IS NOT NULL
1:7 ->WithClause PAT_BASE
1:8 ->TableScan MOD_DB.TEST.PATIENT_TABLE PATIENT_SURRG_ID, SELECTED_DATA_SOURCE_TRACK_ID_REF, FHIR_IDENTIFIER, FHIR_NAME, FHIR_BIRTH_DATE {partitionsTotal=3691, partitionsAssigned=3691, bytesAssigned=10954727936}
1:9 ->LeftOuterJoin joinKey: (SYS_VW.PATIENT_SURRG_ID_1 = B.PATIENT_SURRG_ID)
1:10 ->Filter ROW_NUMBER() OVER (PARTITION BY A.PATIENT_SURRG_ID ORDER BY F1.INDEX ASC NULLS LAST) = 1
1:11 ->WindowFunction ROW_NUMBER() OVER (PARTITION BY A.PATIENT_SURRG_ID ORDER BY F1.INDEX ASC NULLS LAST)
1:12 ->Flatten SYS_VW.FHIR_NAME_1
1:13 ->WithReference A
1:14 ->Filter A.PATIENT_SURRG_ID IS NOT NULL
1:15 ->WithReference B
None
Также сканированные байты различаются:
Код: Выделить всё
Bytes scanned(raw SQL): 4.74GB
Bytes scanned(Snowpark): 5.79GB
“SELECTED_DATA_SOURCE_TRACK_SYSTEM_REF” AS “SELECTED_DATA_SOURCE_TRACK_SYSTEM_REF”,
“l_0020_SELECTED_DATA_SOURCE_TRACK_ID_REF” AS “l_0020_SELECTED_DATA_SOURCE_TRACK_ID_REF”,
“SELECTED_DATA_SOURCE_ID_REF” AS “SELECTED_DATA_SOURCE_ID_REF”,
“SELECTED_DATA_SOURCE_NAME_REF” AS “SELECTED_DATA_SOURCE_NAME_REF”,
“SELECTED_DATA_SOURCE_TENANT” AS “SELECTED_DATA_SOURCE_TENANT”,
“SELECTED_BUNDLE_TRACK_SYSTEM_REF” AS “SELECTED_BUNDLE_TRACK_SYSTEM_REF”,
“SELECTED_BUNDLE_TRACK_ID_REF” AS “SELECTED_BUNDLE_TRACK_ID_REF”,
“SELECTED_BUNDLE_ID_SYSTEM_REF” AS “SELECTED_BUNDLE_ID_SYSTEM_REF”,
“SELECTED_FHIR_BUNDLE_ID_REF” AS “SELECTED_FHIR_BUNDLE_ID_REF”,
“SELECTED_BUNDLE_CLINICAL_DATA_TYPE_REF” AS “SELECTED_BUNDLE_CLINICAL_DATA_TYPE_REF”,
“SELECTED_PATIENT_RESOURCE_ID_SYSTEM” AS “SELECTED_PATIENT_RESOURCE_ID_SYSTEM”,
“SELECTED_FHIR_PATIENT_RESOURCE_ID” AS “SELECTED_FHIR_PATIENT_RESOURCE_ID”,
“FHIR_IDENTIFIER” AS “FHIR_IDENTIFIER”,
“FHIR_ACTIVE” AS “FHIR_ACTIVE”,
“FHIR_NAME” AS “FHIR_NAME”,
“FHIR_TELECOM” AS “FHIR_TELECOM”,
“FHIR_GENDER” AS “FHIR_GENDER”,
“FHIR_BIRTH_DATE” AS “FHIR_BIRTH_DATE”,
“FHIR_DECEASED_BOOLEAN” AS “FHIR_DECEASED_BOOLEAN”,
“FHIR_DECEASED_DTM” AS “FHIR_DECEASED_DTM”,
“FHIR_ADDRESS” AS “FHIR_ADDRESS”,
“FHIR_MARITAL_STATUS” AS “FHIR_MARITAL_STATUS”,
“FHIR_MULTIPLE_BIRTH_BOOLEAN” AS “FHIR_MULTIPLE_BIRTH_BOOLEAN”,
“FHIR_MULTIPLE_BIRTH_INTEGER” AS “FHIR_MULTIPLE_BIRTH_INTEGER”,
“FHIR_PHOTO” AS “FHIR_PHOTO”,
“FHIR_CONTACT” AS “FHIR_CONTACT”,
“FHIR_COMMUNICATION” AS “FHIR_COMMUNICATION”,
“FHIR_GENERAL_PRACTITIONER” AS “FHIR_GENERAL_PRACTITIONER”,
“FHIR_MANAGING_ORGANIZATION” AS “FHIR_MANAGING_ORGANIZATION”,
“FHIR_LINK” AS “FHIR_LINK”,
“FHIR_EXTENSION” AS “FHIR_EXTENSION”,
“STATUS” AS “STATUS”,
“VALIDATION” AS “VALIDATION”,
“HISTORY_FLAG” AS “HISTORY_FLAG”,
“DELETE_FLAG” AS “DELETE_FLAG”,
“DATA_QUALITY_DSC” AS “DATA_QUALITY_DSC”,
“FDTN_ROW_INSERT_TS” AS “FDTN_ROW_INSERT_TS”,
“FDTN_ROW_UPDATE_TS” AS “FDTN_ROW_UPDATE_TS”,
“r_0021_PATIENT_SURRG_ID” AS “r_0021_PATIENT_SURRG_ID”,
“r_0021_SELECTED_DATA_SOURCE_TRACK_ID_REF” AS “r_0021_SELECTED_DATA_SOURCE_TRACK_ID_REF”,
“PID.PID.3.CX.1_Patient_Identifier_ID1” AS “PID.PID.3.CX.1_Patient_Identifier_ID1”,
“PID.PID.7_Date_of_Birth” AS “PID.PID.7_Date_of_Birth”
FROM (
SELECT *
FROM (
(
SELECT
“PATIENT_SURRG_ID” AS “l_0020_PATIENT_SURRG_ID”,
“SUBMIT_IDENTIFIER_SYSTEM_REF” AS “SUBMIT_IDENTIFIER_SYSTEM_REF”,
“SUBMIT_IDENTIFIER_TYPE_REF” AS “SUBMIT_IDENTIFIER_TYPE_REF”,
“SUBMIT_IDENTIFIER_VALUE_REF” AS “SUBMIT_IDENTIFIER_VALUE_REF”,
“SUBMIT_NAME_REF” AS “SUBMIT_NAME_REF”,
“SELECTED_DATA_SOURCE_TRACK_SYSTEM_REF” AS “SELECTED_DATA_SOURCE_TRACK_SYSTEM_REF”,
“SELECTED_DATA_SOURCE_TRACK_ID_REF” AS “l_0020_SELECTED_DATA_SOURCE_TRACK_ID_REF”,
“SELECTED_DATA_SOURCE_ID_REF” AS “SELECTED_DATA_SOURCE_ID_REF”,
“SELECTED_DATA_SOURCE_NAME_REF” AS “SELECTED_DATA_SOURCE_NAME_REF”,
“SELECTED_DATA_SOURCE_TENANT” AS “SELECTED_DATA_SOURCE_TENANT”,
“SELECTED_BUNDLE_TRACK_SYSTEM_REF” AS “SELECTED_BUNDLE_TRACK_SYSTEM_REF”,
“SELECTED_BUNDLE_TRACK_ID_REF” AS “SELECTED_BUNDLE_TRACK_ID_REF”,
“SELECTED_BUNDLE_ID_SYSTEM_REF” AS “SELECTED_BUNDLE_ID_SYSTEM_REF”,
“SELECTED_FHIR_BUNDLE_ID_REF” AS “SELECTED_FHIR_BUNDLE_ID_REF”,
“SELECTED_BUNDLE_CLINICAL_DATA_TYPE_REF” AS “SELECTED_BUNDLE_CLINICAL_DATA_TYPE_REF”,
“SELECTED_PATIENT_RESOURCE_ID_SYSTEM” AS “SELECTED_PATIENT_RESOURCE_ID_SYSTEM”,
“SELECTED_FHIR_PATIENT_RESOURCE_ID” AS “SELECTED_FHIR_PATIENT_RESOURCE_ID”,
“FHIR_IDENTIFIER” AS “FHIR_IDENTIFIER”,
“FHIR_ACTIVE” AS “FHIR_ACTIVE”,
“FHIR_NAME” AS “FHIR_NAME”,
“FHIR_TELECOM” AS “FHIR_TELECOM”,
“FHIR_GENDER” AS “FHIR_GENDER”,
“FHIR_BIRTH_DATE” AS “FHIR_BIRTH_DATE”,
“FHIR_DECEASED_BOOLEAN” AS “FHIR_DECEASED_BOOLEAN”,
“FHIR_DECEASED_DTM” AS “FHIR_DECEASED_DTM”,
“FHIR_ADDRESS” AS “FHIR_ADDRESS”,
“FHIR_MARITAL_STATUS” AS “FHIR_MARITAL_STATUS”,
“FHIR_MULTIPLE_BIRTH_BOOLEAN” AS “FHIR_MULTIPLE_BIRTH_BOOLEAN”,
“FHIR_MULTIPLE_BIRTH_INTEGER” AS “FHIR_MULTIPLE_BIRTH_INTEGER”,
“FHIR_PHOTO” AS “FHIR_PHOTO”,
“FHIR_CONTACT” AS “FHIR_CONTACT”,
“FHIR_COMMUNICATION” AS “FHIR_COMMUNICATION”,
“FHIR_GENERAL_PRACTITIONER” AS “FHIR_GENERAL_PRACTITIONER”,
“FHIR_MANAGING_ORGANIZATION” AS “FHIR_MANAGING_ORGANIZATION”,
“FHIR_LINK” AS “FHIR_LINK”,
“FHIR_EXTENSION” AS “FHIR_EXTENSION”,
“STATUS” AS “STATUS”,
“VALIDATION” AS “VALIDATION”,
“HISTORY_FLAG” AS “HISTORY_FLAG”,
“DELETE_FLAG” AS “DELETE_FLAG”,
“DATA_QUALITY_DSC” AS “DATA_QUALITY_DSC”,
“FDTN_ROW_INSERT_TS” AS “FDTN_ROW_INSERT_TS”,
“FDTN_ROW_UPDATE_TS” AS “FDTN_ROW_UPDATE_TS”
FROM MOD_DB.TEST.PATIENT_TABLE
) AS SNOWPARK_LEFT
LEFT OUTER JOIN
(
SELECT
“PATIENT_SURRG_ID” AS “r_0021_PATIENT_SURRG_ID”,
“SELECTED_DATA_SOURCE_TRACK_ID_REF” AS “r_0021_SELECTED_DATA_SOURCE_TRACK_ID_REF”,
“PID.PID.3.CX.1_Patient_Identifier_ID1” AS “PID.PID.3.CX.1_Patient_Identifier_ID1”,
“PID.PID.7_Date_of_Birth” AS “PID.PID.7_Date_of_Birth”
FROM (
SELECT
“PATIENT_SURRG_ID”,
“SELECTED_DATA_SOURCE_TRACK_ID_REF”,
“PID.PID.3.CX.1_Patient_Identifier_ID1”,
“PID.PID.7_Date_of_Birth”,
“F_INDEX”,
row_number() OVER (PARTITION BY “PATIENT_SURRG_ID” ORDER BY
“F_INDEX” ASC NULLS FIRST ) AS “RN”
FROM (
SELECT
“PATIENT_SURRG_ID”,
“SELECTED_DATA_SOURCE_TRACK_ID_REF”,
CAST (“VALUE”[‘value’] AS STRING) AS “PID.PID.3.CX.1_Patient_Identifier_ID1”,
“FHIR_BIRTH_DATE” AS “PID.PID.7_Date_of_Birth”,
“INDEX” AS “F_INDEX”
FROM (
SELECT
T_LEFT.,
T_RIGHT.
FROM (
SELECT * FROM MOD_DB.TEST.PATIENT_TABLE
) AS T_LEFT
JOIN
TABLE (flatten(“FHIR_IDENTIFIER”) ) AS T_RIGHT
)
)
)
WHERE
(“RN” = 1)
) AS SNOWPARK_RIGHT
ON (“l_0020_PATIENT_SURRG_ID” = “r_0021_PATIENT_SURRG_ID”)
)
)
) AS SNOWPARK_LEFT
LEFT OUTER JOIN
(
SELECT
“PATIENT_SURRG_ID” AS “PATIENT_SURRG_ID”,
“SELECTED_DATA_SOURCE_TRACK_ID_REF” AS “SELECTED_DATA_SOURCE_TRACK_ID_REF”,
“PID.PID.5.XPN.1.FN.1_Patient_Last_Name” AS “PID.PID.5.XPN.1.FN.1_Patient_Last_Name”
FROM (
SELECT
“PATIENT_SURRG_ID”,
“SELECTED_DATA_SOURCE_TRACK_ID_REF”,
“PID.PID.5.XPN.1.FN.1_Patient_Last_Name”,
“F1_INDEX”,
row_number() OVER (PARTITION BY “PATIENT_SURRG_ID” ORDER BY
“F1_INDEX” ASC NULLS FIRST ) AS “RN”
FROM (
SELECT
“PATIENT_SURRG_ID”,
“SELECTED_DATA_SOURCE_TRACK_ID_REF”,
CAST (“VALUE”[‘given’] AS STRING) AS “PID.PID.5.XPN.1.FN.1_Patient_Last_Name”,
“INDEX” AS “F1_INDEX”
FROM (
SELECT
T_LEFT.,
T_RIGHT.
FROM (
SELECT * FROM MOD_DB.TEST.PATIENT_TABLE
) AS T_LEFT
JOIN
TABLE (flatten(“FHIR_NAME”) ) AS T_RIGHT
)
)
)
WHERE
(“RN” = 1)
) AS SNOWPARK_RIGHT
ON (“l_0020_PATIENT_SURRG_ID” = “PATIENT_SURRG_ID”)
)
)
LIMIT 10
Итак, мои вопросы следующие:
- Как мне сгенерировать предложение With в коде Snowpark?
- Как мне согласовать мой план запроса с необработанным SQL? Теоретически это должно быть возможно, поскольку все, что я делаю в SQL, должно быть достижимо с той же производительностью в Snowpark, если нет лучше.
Подробнее здесь: https://stackoverflow.com/questions/798 ... vs-snowsql
Мобильная версия