From 8e0ade08b0a3138b11d800c1df6e7219fd9a9b2d Mon Sep 17 00:00:00 2001 From: Jenny Medina Date: Tue, 12 Nov 2024 09:56:15 -0500 Subject: [PATCH 1/8] initialize table --- ...V2.21.0__verificationsubmission_latest.sql | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql diff --git a/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql b/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql new file mode 100644 index 00000000..8552c5c7 --- /dev/null +++ b/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql @@ -0,0 +1,38 @@ +use schema {{database_name}}.synapse; --noqa: JJ01,PRS,TMP,CP01 + +CREATE OR REPLACE DYNAMIC TABLE VERIFICATIONSUBMISSION_LATEST + TARGET_LAG = '1 day' + WAREHOUSE = compute_xsmall + AS + WITH latest_rows AS ( + SELECT + verificationsubmissionsnapshots.id AS the_id, + MAX(snapshot_timestamp) AS latest_timestamp + FROM + verificationsubmissionsnapshots + GROUP BY + verificationsubmissionsnapshots.id + ), + latest_unique_rows AS ( + SELECT + verificationsubmissionsnapshots.*, + ROW_NUMBER() OVER ( + PARTITION BY snapshot_timestamp + ORDER BY snapshot_timestamp DESC + ) AS row_num + FROM + verificationsubmissionsnapshots + JOIN + latest_rows + ON + verificationsubmissionsnapshots.id = latest_rows.the_id + AND verificationsubmissionsnapshots.snapshot_timestamp = latest_rows.latest_timestamp + ) + SELECT + * + FROM + latest_unique_rows + WHERE + row_num = 1 + ORDER BY + latest_unique_rows.id; From f7b8ea619f4ac0d4c29cb35524f29dbca2eaf95d Mon Sep 17 00:00:00 2001 From: Jenny Medina Date: Tue, 12 Nov 2024 10:08:04 -0500 Subject: [PATCH 2/8] fix syntax, exclude row_num from final table --- .../V2.21.0__verificationsubmission_latest.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql b/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql index 8552c5c7..fb11e2b8 100644 --- a/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql +++ b/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql @@ -6,12 +6,12 @@ CREATE OR REPLACE DYNAMIC TABLE VERIFICATIONSUBMISSION_LATEST AS WITH latest_rows AS ( SELECT - verificationsubmissionsnapshots.id AS the_id, + id AS latest_id, MAX(snapshot_timestamp) AS latest_timestamp FROM verificationsubmissionsnapshots GROUP BY - verificationsubmissionsnapshots.id + latest_id ), latest_unique_rows AS ( SELECT @@ -25,11 +25,11 @@ CREATE OR REPLACE DYNAMIC TABLE VERIFICATIONSUBMISSION_LATEST JOIN latest_rows ON - verificationsubmissionsnapshots.id = latest_rows.the_id + verificationsubmissionsnapshots.id = latest_rows.latest_id AND verificationsubmissionsnapshots.snapshot_timestamp = latest_rows.latest_timestamp ) SELECT - * + * EXCLUDE (row_num) FROM latest_unique_rows WHERE From cf0b89bee015c70a452a211f59a869b2f48d1305 Mon Sep 17 00:00:00 2001 From: Jenny Medina Date: Tue, 12 Nov 2024 11:18:27 -0500 Subject: [PATCH 3/8] typo --- .../dynamic_tables/V2.21.0__verificationsubmission_latest.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql b/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql index fb11e2b8..83cfef29 100644 --- a/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql +++ b/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql @@ -1,6 +1,6 @@ use schema {{database_name}}.synapse; --noqa: JJ01,PRS,TMP,CP01 -CREATE OR REPLACE DYNAMIC TABLE VERIFICATIONSUBMISSION_LATEST +CREATE DYNAMIC TABLE IF NOT EXISTS VERIFICATIONSUBMISSION_LATEST TARGET_LAG = '1 day' WAREHOUSE = compute_xsmall AS @@ -17,7 +17,7 @@ CREATE OR REPLACE DYNAMIC TABLE VERIFICATIONSUBMISSION_LATEST SELECT verificationsubmissionsnapshots.*, ROW_NUMBER() OVER ( - PARTITION BY snapshot_timestamp + PARTITION BY id ORDER BY snapshot_timestamp DESC ) AS row_num FROM From 62b49db111a7ebc92d3f0de77ef576949129c95a Mon Sep 17 00:00:00 2001 From: Jenny Medina Date: Tue, 12 Nov 2024 11:59:48 -0500 Subject: [PATCH 4/8] use QUALIFY instead of the WHERE clause --- .../V2.21.0__verificationsubmission_latest.sql | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql b/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql index 83cfef29..caf43924 100644 --- a/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql +++ b/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql @@ -16,10 +16,6 @@ CREATE DYNAMIC TABLE IF NOT EXISTS VERIFICATIONSUBMISSION_LATEST latest_unique_rows AS ( SELECT verificationsubmissionsnapshots.*, - ROW_NUMBER() OVER ( - PARTITION BY id - ORDER BY snapshot_timestamp DESC - ) AS row_num FROM verificationsubmissionsnapshots JOIN @@ -27,12 +23,14 @@ CREATE DYNAMIC TABLE IF NOT EXISTS VERIFICATIONSUBMISSION_LATEST ON verificationsubmissionsnapshots.id = latest_rows.latest_id AND verificationsubmissionsnapshots.snapshot_timestamp = latest_rows.latest_timestamp + QUALIFY ROW_NUMBER() OVER ( + PARTITION BY id + ORDER BY snapshot_timestamp DESC + ) = 1 ) SELECT - * EXCLUDE (row_num) + * FROM latest_unique_rows - WHERE - row_num = 1 ORDER BY latest_unique_rows.id; From 6df2f82224e9de2277173863f6334a11ca970e4c Mon Sep 17 00:00:00 2001 From: Jenny Medina Date: Tue, 12 Nov 2024 12:04:32 -0500 Subject: [PATCH 5/8] specify order --- .../dynamic_tables/V2.21.0__verificationsubmission_latest.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql b/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql index caf43924..68f6ac85 100644 --- a/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql +++ b/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql @@ -33,4 +33,4 @@ CREATE DYNAMIC TABLE IF NOT EXISTS VERIFICATIONSUBMISSION_LATEST FROM latest_unique_rows ORDER BY - latest_unique_rows.id; + latest_unique_rows.id ASC; From 423133b82fe4c31a64bd25d533500b950ca6fe4f Mon Sep 17 00:00:00 2001 From: Jenny Medina Date: Wed, 13 Nov 2024 10:31:19 -0500 Subject: [PATCH 6/8] simplify query --- .../V2.21.0__verificationsubmission_latest.sql | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql b/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql index 68f6ac85..be44910a 100644 --- a/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql +++ b/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql @@ -4,25 +4,11 @@ CREATE DYNAMIC TABLE IF NOT EXISTS VERIFICATIONSUBMISSION_LATEST TARGET_LAG = '1 day' WAREHOUSE = compute_xsmall AS - WITH latest_rows AS ( - SELECT - id AS latest_id, - MAX(snapshot_timestamp) AS latest_timestamp - FROM - verificationsubmissionsnapshots - GROUP BY - latest_id - ), - latest_unique_rows AS ( + WITH latest_unique_rows AS ( SELECT verificationsubmissionsnapshots.*, FROM - verificationsubmissionsnapshots - JOIN - latest_rows - ON - verificationsubmissionsnapshots.id = latest_rows.latest_id - AND verificationsubmissionsnapshots.snapshot_timestamp = latest_rows.latest_timestamp + {{database_name}}.synapse_raw.verificationsubmissionsnapshots --noqa: TMP QUALIFY ROW_NUMBER() OVER ( PARTITION BY id ORDER BY snapshot_timestamp DESC From 0fba27a378afdd1caad138fc2c305897fb23a26b Mon Sep 17 00:00:00 2001 From: Jenny Medina Date: Wed, 13 Nov 2024 13:15:36 -0500 Subject: [PATCH 7/8] add change_timestamp as precaution --- .../dynamic_tables/V2.21.0__verificationsubmission_latest.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql b/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql index be44910a..ffd2072c 100644 --- a/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql +++ b/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql @@ -4,6 +4,8 @@ CREATE DYNAMIC TABLE IF NOT EXISTS VERIFICATIONSUBMISSION_LATEST TARGET_LAG = '1 day' WAREHOUSE = compute_xsmall AS + -- We deduplicate simply by selecting the latest record for each + -- verification submission ID... WITH latest_unique_rows AS ( SELECT verificationsubmissionsnapshots.*, @@ -11,7 +13,7 @@ CREATE DYNAMIC TABLE IF NOT EXISTS VERIFICATIONSUBMISSION_LATEST {{database_name}}.synapse_raw.verificationsubmissionsnapshots --noqa: TMP QUALIFY ROW_NUMBER() OVER ( PARTITION BY id - ORDER BY snapshot_timestamp DESC + ORDER BY change_timestamp DESC, snapshot_timestamp DESC ) = 1 ) SELECT From 005773630b36853fc96ea7c4d31647d0af5cd0b9 Mon Sep 17 00:00:00 2001 From: Jenny Medina Date: Thu, 14 Nov 2024 12:48:35 -0500 Subject: [PATCH 8/8] 2 week window --- .../dynamic_tables/V2.21.0__verificationsubmission_latest.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql b/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql index ffd2072c..e16b0b31 100644 --- a/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql +++ b/synapse_data_warehouse/synapse/dynamic_tables/V2.21.0__verificationsubmission_latest.sql @@ -11,6 +11,8 @@ CREATE DYNAMIC TABLE IF NOT EXISTS VERIFICATIONSUBMISSION_LATEST verificationsubmissionsnapshots.*, FROM {{database_name}}.synapse_raw.verificationsubmissionsnapshots --noqa: TMP + WHERE + SNAPSHOT_TIMESTAMP >= CURRENT_TIMESTAMP - INTERVAL '14 DAYS' QUALIFY ROW_NUMBER() OVER ( PARTITION BY id ORDER BY change_timestamp DESC, snapshot_timestamp DESC