From acb0154db7c28ecaa99c0b3645ca75d5062192cc Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 9 Oct 2024 21:30:11 +0000 Subject: [PATCH 001/166] Initial test --- dbt/models/proximity/docs.md | 8 +++++ ...mity.dist_pin_to_traffic_daily_traffic.sql | 30 +++++++++++++++++++ dbt/models/spatial/docs.md | 8 +++++ dbt/models/spatial/schema.yml | 3 ++ 4 files changed, 49 insertions(+) create mode 100644 dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic.sql diff --git a/dbt/models/proximity/docs.md b/dbt/models/proximity/docs.md index 8faaa1b5d..55c5430e8 100644 --- a/dbt/models/proximity/docs.md +++ b/dbt/models/proximity/docs.md @@ -217,6 +217,14 @@ Distance from each PIN to the nearest sports stadium. **Primary Key**: `pin10`, `year` {% enddocs %} +# dist_pin_to_traffic_daily_traffic + +{% docs table_dist_pin_to_traffic_daily_traffic %} +Distance from each PIN to the valid value of daily traffic. + +**Primary Key**: `pin10`, `year` +{% enddocs %} + # dist_pin_to_university {% docs table_dist_pin_to_university %} diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic.sql new file mode 100644 index 000000000..6803d1e05 --- /dev/null +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic.sql @@ -0,0 +1,30 @@ +-- CTAS to create a table of distance to the nearest Metra route for each PIN +{{ + config( + materialized='table', + partitioned_by=['year'], + bucketed_by=['pin10'], + bucket_count=1 + ) +}} + +WITH traffic AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE annual_traffic > 0 + AND annual_traffic IS NOT NULL +) + +SELECT + pcl.pin10, + ARBITRARY(xy.road_name) AS nearest_road_name, + ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, + ARBITRARY(xy.year) AS nearest_road_data_year, + ARBITRARY(xy.annual_traffic) AS nearest_road_annual_traffic, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year diff --git a/dbt/models/spatial/docs.md b/dbt/models/spatial/docs.md index 096221bb8..cd3528c15 100644 --- a/dbt/models/spatial/docs.md +++ b/dbt/models/spatial/docs.md @@ -499,6 +499,14 @@ Includes townships within the City of Chicago, which are technically defunct. Dictionary to cleanup transit route and stop names. {% enddocs %} +# traffic + +{% docs table_traffic %} +Traffic data derived from Illinois Department of Transportation + +**Geometry:** `MULTILINESTRING` +{% enddocs %} + # transit_route {% docs table_transit_route %} diff --git a/dbt/models/spatial/schema.yml b/dbt/models/spatial/schema.yml index 0982b8106..513900073 100644 --- a/dbt/models/spatial/schema.yml +++ b/dbt/models/spatial/schema.yml @@ -174,6 +174,9 @@ sources: - name: township description: '{{ doc("table_township") }}' + - name: traffic + description: '{{ doc("table_traffic") }}' + - name: transit_dict description: '{{ doc("table_transit_dict") }}' From 0652b3a3639fdc3cc474957201b802c6fb2dfa50 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 9 Oct 2024 21:32:08 +0000 Subject: [PATCH 002/166] Sort headings --- dbt/models/spatial/docs.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dbt/models/spatial/docs.md b/dbt/models/spatial/docs.md index cd3528c15..a0d2cd9d9 100644 --- a/dbt/models/spatial/docs.md +++ b/dbt/models/spatial/docs.md @@ -493,12 +493,6 @@ Includes townships within the City of Chicago, which are technically defunct. **Geometry:** `MULTIPOLYGON` {% enddocs %} -# transit_dict - -{% docs table_transit_dict %} -Dictionary to cleanup transit route and stop names. -{% enddocs %} - # traffic {% docs table_traffic %} @@ -507,6 +501,12 @@ Traffic data derived from Illinois Department of Transportation **Geometry:** `MULTILINESTRING` {% enddocs %} +# transit_dict + +{% docs table_transit_dict %} +Dictionary to cleanup transit route and stop names. +{% enddocs %} + # transit_route {% docs table_transit_route %} From 5191e58c99a4741bb3b2b7262d05a325af338b6c Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 10 Oct 2024 14:27:43 +0000 Subject: [PATCH 003/166] New test --- ...mity.dist_pin_to_traffic_daily_traffic.sql | 4 +++ ...ist_pin_to_traffic_daily_traffic_Minor.sql | 31 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_Minor.sql diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic.sql index 6803d1e05..8aa822f25 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic.sql @@ -13,6 +13,10 @@ WITH traffic AS ( -- noqa: ST03 FROM {{ source('spatial', 'traffic') }} WHERE annual_traffic > 0 AND annual_traffic IS NOT NULL + AND ( + road_type = 'Interstate' + OR road_type = 'Freeway And Expressway' + ) ) SELECT diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_Minor.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_Minor.sql new file mode 100644 index 000000000..127016c05 --- /dev/null +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_Minor.sql @@ -0,0 +1,31 @@ +-- CTAS to create a table of distance to the nearest Metra route for each PIN +{{ + config( + materialized='table', + partitioned_by=['year'], + bucketed_by=['pin10'], + bucket_count=1 + ) +}} + +WITH traffic AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE annual_traffic > 0 + AND annual_traffic IS NOT NULL + AND road_type = 'Minor Arterial' +) + +SELECT + pcl.pin10, + ARBITRARY(xy.road_name) AS nearest_road_name, + ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, + ARBITRARY(xy.year) AS nearest_road_data_year, + ARBITRARY(xy.annual_traffic) AS nearest_road_annual_traffic, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year From c4807fb7897a064260ec1f2b697981c85dfa09af Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 10 Oct 2024 14:58:22 +0000 Subject: [PATCH 004/166] local --- ...ist_pin_to_traffic_daily_traffic_local.sql | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local.sql diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local.sql new file mode 100644 index 000000000..c586f328f --- /dev/null +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local.sql @@ -0,0 +1,31 @@ +-- CTAS to create a table of distance to the nearest Metra route for each PIN +{{ + config( + materialized='table', + partitioned_by=['year'], + bucketed_by=['pin10'], + bucket_count=1 + ) +}} + +WITH traffic AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE annual_traffic > 0 + AND annual_traffic IS NOT NULL + AND road_type = 'Local Road or Street' +) + +SELECT + pcl.pin10, + ARBITRARY(xy.road_name) AS nearest_road_name, + ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, + ARBITRARY(xy.year) AS nearest_road_data_year, + ARBITRARY(xy.annual_traffic) AS nearest_road_annual_traffic, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year From 6e2a088ec272f28fbf17109d3f2513b2b8d3434d Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 10 Oct 2024 15:00:49 +0000 Subject: [PATCH 005/166] surface width --- ...dist_pin_to_traffic_daily_traffic copy.sql | 32 +++++++++++++++++++ ...in_to_traffic_daily_traffic_Minor copy.sql | 29 +++++++++++++++++ ...in_to_traffic_daily_traffic_local copy.sql | 29 +++++++++++++++++ 3 files changed, 90 insertions(+) create mode 100644 dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic copy.sql create mode 100644 dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_Minor copy.sql create mode 100644 dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local copy.sql diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic copy.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic copy.sql new file mode 100644 index 000000000..c64d9ea5a --- /dev/null +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic copy.sql @@ -0,0 +1,32 @@ +-- CTAS to create a table of distance to the nearest Metra route for each PIN +{{ + config( + materialized='table', + partitioned_by=['year'], + bucketed_by=['pin10'], + bucket_count=1 + ) +}} + +WITH traffic AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE ( + road_type = 'Interstate' + OR road_type = 'Freeway And Expressway' + ) +) + +SELECT + pcl.pin10, + ARBITRARY(xy.road_name) AS nearest_road_name, + ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, + ARBITRARY(xy.year) AS nearest_road_data_year, + ARBITRARY(xy.surface_width) AS nearest_surface_width, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_Minor copy.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_Minor copy.sql new file mode 100644 index 000000000..56810b3a1 --- /dev/null +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_Minor copy.sql @@ -0,0 +1,29 @@ +-- CTAS to create a table of distance to the nearest Metra route for each PIN +{{ + config( + materialized='table', + partitioned_by=['year'], + bucketed_by=['pin10'], + bucket_count=1 + ) +}} + +WITH traffic AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE road_type = 'Minor Arterial' +) + +SELECT + pcl.pin10, + ARBITRARY(xy.road_name) AS nearest_road_name, + ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, + ARBITRARY(xy.year) AS nearest_road_data_year, + ARBITRARY(xy.surface_width) AS nearest_surface_width, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local copy.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local copy.sql new file mode 100644 index 000000000..8ad53d3d3 --- /dev/null +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local copy.sql @@ -0,0 +1,29 @@ +-- CTAS to create a table of distance to the nearest Metra route for each PIN +{{ + config( + materialized='table', + partitioned_by=['year'], + bucketed_by=['pin10'], + bucket_count=1 + ) +}} + +WITH traffic AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE road_type = 'Local Road or Street' +) + +SELECT + pcl.pin10, + ARBITRARY(xy.road_name) AS nearest_road_name, + ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, + ARBITRARY(xy.year) AS nearest_road_data_year, + ARBITRARY(xy.surface_width) AS nearest_surface_width, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year From ccc67fac0f77ad7dc722e8e3a53f0b05045a3e52 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 10 Oct 2024 16:08:49 +0000 Subject: [PATCH 006/166] More tests --- ... => proximity.dist_pin_to_traffic_width.sql} | 0 ...oximity.dist_pin_to_traffic_width_minor.sql} | 17 +++++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) rename dbt/models/proximity/{proximity.dist_pin_to_traffic_daily_traffic_local copy.sql => proximity.dist_pin_to_traffic_width.sql} (100%) rename dbt/models/proximity/{proximity.dist_pin_to_traffic_daily_traffic_Minor copy.sql => proximity.dist_pin_to_traffic_width_minor.sql} (74%) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local copy.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_width.sql similarity index 100% rename from dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local copy.sql rename to dbt/models/proximity/proximity.dist_pin_to_traffic_width.sql diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_Minor copy.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_width_minor.sql similarity index 74% rename from dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_Minor copy.sql rename to dbt/models/proximity/proximity.dist_pin_to_traffic_width_minor.sql index 56810b3a1..7b4c49bf7 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_Minor copy.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_width_minor.sql @@ -1,17 +1,25 @@ --- CTAS to create a table of distance to the nearest Metra route for each PIN -{{ +{{ config( materialized='table', partitioned_by=['year'], bucketed_by=['pin10'], bucket_count=1 - ) + ) }} WITH traffic AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} WHERE road_type = 'Minor Arterial' +), + +distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435, + pin10, + year + FROM {{ source('spatial', 'parcel') }} ) SELECT @@ -20,8 +28,9 @@ SELECT ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, ARBITRARY(xy.year) AS nearest_road_data_year, ARBITRARY(xy.surface_width) AS nearest_surface_width, + ARBITRARY(xy.surface_type) AS nearest_surface_type, pcl.year -FROM {{ source('spatial', 'parcel') }} AS pcl +FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 From 6a9c130a58b33dd5a35587c9f61658bd7c31207b Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 10 Oct 2024 18:42:56 +0000 Subject: [PATCH 007/166] Query improvements --- ...ist_pin_to_traffic_daily_traffic_minor.sql | 31 +++++++++++++++++++ ...ity.dist_pin_to_traffic_width_highway.sql} | 0 2 files changed, 31 insertions(+) create mode 100644 dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_minor.sql rename dbt/models/proximity/{proximity.dist_pin_to_traffic_daily_traffic copy.sql => proximity.dist_pin_to_traffic_width_highway.sql} (100%) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_minor.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_minor.sql new file mode 100644 index 000000000..127016c05 --- /dev/null +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_minor.sql @@ -0,0 +1,31 @@ +-- CTAS to create a table of distance to the nearest Metra route for each PIN +{{ + config( + materialized='table', + partitioned_by=['year'], + bucketed_by=['pin10'], + bucket_count=1 + ) +}} + +WITH traffic AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE annual_traffic > 0 + AND annual_traffic IS NOT NULL + AND road_type = 'Minor Arterial' +) + +SELECT + pcl.pin10, + ARBITRARY(xy.road_name) AS nearest_road_name, + ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, + ARBITRARY(xy.year) AS nearest_road_data_year, + ARBITRARY(xy.annual_traffic) AS nearest_road_annual_traffic, + pcl.year +FROM {{ source('spatial', 'parcel') }} AS pcl +INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year +GROUP BY pcl.pin10, pcl.year diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic copy.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_width_highway.sql similarity index 100% rename from dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic copy.sql rename to dbt/models/proximity/proximity.dist_pin_to_traffic_width_highway.sql From 12b2ebf4dfce2eb300a0713e87c4bc546bee9833 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 10 Oct 2024 19:19:04 +0000 Subject: [PATCH 008/166] Make all distinct_pins --- ...mity.dist_pin_to_traffic_daily_traffic.sql | 21 +++++++++---- ...ist_pin_to_traffic_daily_traffic_Minor.sql | 31 ------------------- ...ist_pin_to_traffic_daily_traffic_local.sql | 11 ++++++- ...ist_pin_to_traffic_daily_traffic_minor.sql | 11 ++++++- ...mity.dist_pin_to_traffic_width_highway.sql | 11 ++++++- 5 files changed, 45 insertions(+), 40 deletions(-) delete mode 100644 dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_Minor.sql diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic.sql index 8aa822f25..b9d119b2a 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic.sql @@ -17,18 +17,27 @@ WITH traffic AS ( -- noqa: ST03 road_type = 'Interstate' OR road_type = 'Freeway And Expressway' ) +), + +distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435, + pin10, + year + FROM {{ source('spatial', 'parcel') }} ) SELECT pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_road_name, - ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, - ARBITRARY(xy.year) AS nearest_road_data_year, - ARBITRARY(xy.annual_traffic) AS nearest_road_annual_traffic, + MIN(xy.road_name) AS nearest_road_name, -- Using MIN() as a placeholder + MIN(xy.dist_ft) AS nearest_road_dist_ft, + MIN(xy.year) AS nearest_road_data_year, + MIN(xy.annual_traffic) AS nearest_road_annual_traffic, pcl.year -FROM {{ source('spatial', 'parcel') }} AS pcl +FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 AND pcl.year = xy.pin_year -GROUP BY pcl.pin10, pcl.year +GROUP BY pcl.pin10, pcl.year; diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_Minor.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_Minor.sql deleted file mode 100644 index 127016c05..000000000 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_Minor.sql +++ /dev/null @@ -1,31 +0,0 @@ --- CTAS to create a table of distance to the nearest Metra route for each PIN -{{ - config( - materialized='table', - partitioned_by=['year'], - bucketed_by=['pin10'], - bucket_count=1 - ) -}} - -WITH traffic AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE annual_traffic > 0 - AND annual_traffic IS NOT NULL - AND road_type = 'Minor Arterial' -) - -SELECT - pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_road_name, - ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, - ARBITRARY(xy.year) AS nearest_road_data_year, - ARBITRARY(xy.annual_traffic) AS nearest_road_annual_traffic, - pcl.year -FROM {{ source('spatial', 'parcel') }} AS pcl -INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year -GROUP BY pcl.pin10, pcl.year diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local.sql index c586f328f..ef8923d8f 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local.sql @@ -14,6 +14,15 @@ WITH traffic AS ( -- noqa: ST03 WHERE annual_traffic > 0 AND annual_traffic IS NOT NULL AND road_type = 'Local Road or Street' +), + +distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435, + pin10, + year + FROM {{ source('spatial', 'parcel') }} ) SELECT @@ -23,7 +32,7 @@ SELECT ARBITRARY(xy.year) AS nearest_road_data_year, ARBITRARY(xy.annual_traffic) AS nearest_road_annual_traffic, pcl.year -FROM {{ source('spatial', 'parcel') }} AS pcl +FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_minor.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_minor.sql index 127016c05..6e3251a64 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_minor.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_minor.sql @@ -14,6 +14,15 @@ WITH traffic AS ( -- noqa: ST03 WHERE annual_traffic > 0 AND annual_traffic IS NOT NULL AND road_type = 'Minor Arterial' +), + +distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435, + pin10, + year + FROM {{ source('spatial', 'parcel') }} ) SELECT @@ -23,7 +32,7 @@ SELECT ARBITRARY(xy.year) AS nearest_road_data_year, ARBITRARY(xy.annual_traffic) AS nearest_road_annual_traffic, pcl.year -FROM {{ source('spatial', 'parcel') }} AS pcl +FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_width_highway.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_width_highway.sql index c64d9ea5a..692c62bc7 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_width_highway.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_width_highway.sql @@ -15,6 +15,15 @@ WITH traffic AS ( -- noqa: ST03 road_type = 'Interstate' OR road_type = 'Freeway And Expressway' ) +), + +distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435, + pin10, + year + FROM {{ source('spatial', 'parcel') }} ) SELECT @@ -24,7 +33,7 @@ SELECT ARBITRARY(xy.year) AS nearest_road_data_year, ARBITRARY(xy.surface_width) AS nearest_surface_width, pcl.year -FROM {{ source('spatial', 'parcel') }} AS pcl +FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 From a22e31463bed0a5062d2bc643d7ac3d09a31838e Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 11 Oct 2024 13:56:50 +0000 Subject: [PATCH 009/166] Make traffic_width unique --- .../proximity.dist_pin_to_traffic_width.sql | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_width.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_width.sql index 8ad53d3d3..8376475e4 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_width.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_width.sql @@ -12,6 +12,15 @@ WITH traffic AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} WHERE road_type = 'Local Road or Street' +), + +distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435, + pin10, + year + FROM {{ source('spatial', 'parcel') }} ) SELECT @@ -20,8 +29,9 @@ SELECT ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, ARBITRARY(xy.year) AS nearest_road_data_year, ARBITRARY(xy.surface_width) AS nearest_surface_width, + ARBITRARY(xy.surface_type) AS nearest_surface_type, pcl.year -FROM {{ source('spatial', 'parcel') }} AS pcl +FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 From d835273ca8ec2869a00e7276886e95bc9b96517b Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 11 Oct 2024 14:19:57 +0000 Subject: [PATCH 010/166] switch to daily_traffic --- .../proximity.dist_pin_to_traffic_daily_traffic.sql | 6 +++--- .../proximity.dist_pin_to_traffic_daily_traffic_local.sql | 6 +++--- .../proximity.dist_pin_to_traffic_daily_traffic_minor.sql | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic.sql index b9d119b2a..7ef57d384 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic.sql @@ -11,8 +11,8 @@ WITH traffic AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} - WHERE annual_traffic > 0 - AND annual_traffic IS NOT NULL + WHERE daily_traffic > 0 + AND daily_traffic IS NOT NULL AND ( road_type = 'Interstate' OR road_type = 'Freeway And Expressway' @@ -33,7 +33,7 @@ SELECT MIN(xy.road_name) AS nearest_road_name, -- Using MIN() as a placeholder MIN(xy.dist_ft) AS nearest_road_dist_ft, MIN(xy.year) AS nearest_road_data_year, - MIN(xy.annual_traffic) AS nearest_road_annual_traffic, + MIN(xy.daily_traffic) AS nearest_road_daily_traffic, pcl.year FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local.sql index ef8923d8f..b2172abcf 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local.sql @@ -11,8 +11,8 @@ WITH traffic AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} - WHERE annual_traffic > 0 - AND annual_traffic IS NOT NULL + WHERE daily_traffic > 0 + AND daily_traffic IS NOT NULL AND road_type = 'Local Road or Street' ), @@ -30,7 +30,7 @@ SELECT ARBITRARY(xy.road_name) AS nearest_road_name, ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, ARBITRARY(xy.year) AS nearest_road_data_year, - ARBITRARY(xy.annual_traffic) AS nearest_road_annual_traffic, + ARBITRARY(xy.daily_traffic) AS nearest_road_daily_traffic, pcl.year FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_minor.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_minor.sql index 6e3251a64..0daa3982d 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_minor.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_minor.sql @@ -11,8 +11,8 @@ WITH traffic AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} - WHERE annual_traffic > 0 - AND annual_traffic IS NOT NULL + WHERE daily_traffic > 0 + AND daily_traffic IS NOT NULL AND road_type = 'Minor Arterial' ), @@ -30,7 +30,7 @@ SELECT ARBITRARY(xy.road_name) AS nearest_road_name, ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, ARBITRARY(xy.year) AS nearest_road_data_year, - ARBITRARY(xy.annual_traffic) AS nearest_road_annual_traffic, + ARBITRARY(xy.daily_traffic) AS nearest_road_daily_traffic, pcl.year FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy From 7bd08b84192473b2a61ce8f930339e42d7082618 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 11 Oct 2024 16:12:27 +0000 Subject: [PATCH 011/166] Try master --- .../proximity.dist_pin_to_traffic_master.sql | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql new file mode 100644 index 000000000..b2237d39f --- /dev/null +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -0,0 +1,63 @@ +{{ + config( + materialized='table', + partitioned_by=['year'], + bucketed_by=['pin10'], + bucket_count=1 + ) +}} + +WITH traffic_highway AS ( + -- Local Road or Street traffic data + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE daily_traffic > 0 + AND daily_traffic IS NOT NULL + AND road_type = 'Freeway And Expressway' +), + +traffic_minor_arterial AS ( + -- Minor Arterial traffic data + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE daily_traffic > 0 + AND daily_traffic IS NOT NULL + AND road_type = 'Minor Arterial' +), + +distinct_pins AS ( + -- Select distinct pins from the parcel dataset + SELECT DISTINCT + x_3435, + y_3435, + pin10, + year + FROM {{ source('spatial', 'parcel') }} +) + +SELECT + pcl.pin10, + -- Nearest local road values + ARBITRARY(local_xy.road_name) AS nearest_local_road_name, + ARBITRARY(local_xy.dist_ft) AS nearest_local_road_dist_ft, + ARBITRARY(local_xy.year) AS nearest_local_road_data_year, + ARBITRARY(local_xy.daily_traffic) AS nearest_local_road_daily_traffic, + -- Nearest minor arterial values + ARBITRARY(arterial_xy.road_name) AS nearest_arterial_road_name, + ARBITRARY(arterial_xy.dist_ft) AS nearest_arterial_road_dist_ft, + ARBITRARY(arterial_xy.year) AS nearest_arterial_road_data_year, + ARBITRARY(arterial_xy.daily_traffic) AS nearest_arterial_road_daily_traffic, + pcl.year +FROM distinct_pins AS pcl +-- Join for nearest local road +LEFT JOIN traffic_highway AS local_xy + ON pcl.x_3435 = local_xy.x_3435 + AND pcl.y_3435 = local_xy.y_3435 + AND pcl.year = local_xy.pin_year +-- Join for nearest minor arterial +LEFT JOIN + traffic_minor_arterial AS arterial_xy + ON pcl.x_3435 = arterial_xy.x_3435 + AND pcl.y_3435 = arterial_xy.y_3435 + AND pcl.year = arterial_xy.pin_year +GROUP BY pcl.pin10, pcl.year From ca769428e2ca5e2fd182e4eee41f6e9ad0139083 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 11 Oct 2024 17:21:52 +0000 Subject: [PATCH 012/166] Fix master? --- ..._pin_to_traffic_daily_traffic_highway.sql} | 8 +- .../proximity.dist_pin_to_traffic_master.sql | 84 +++++++++++-------- 2 files changed, 55 insertions(+), 37 deletions(-) rename dbt/models/proximity/{proximity.dist_pin_to_traffic_daily_traffic.sql => proximity.dist_pin_to_traffic_daily_traffic_highway.sql} (79%) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_highway.sql similarity index 79% rename from dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic.sql rename to dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_highway.sql index 7ef57d384..755846c57 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_highway.sql @@ -30,10 +30,10 @@ distinct_pins AS ( SELECT pcl.pin10, - MIN(xy.road_name) AS nearest_road_name, -- Using MIN() as a placeholder - MIN(xy.dist_ft) AS nearest_road_dist_ft, - MIN(xy.year) AS nearest_road_data_year, - MIN(xy.daily_traffic) AS nearest_road_daily_traffic, + ARBITRARY(xy.road_name) AS nearest_road_name, + ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, + ARBITRARY(xy.year) AS nearest_road_data_year, + ARBITRARY(xy.daily_traffic) AS nearest_road_daily_traffic, pcl.year FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index b2237d39f..92f8e4507 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -1,3 +1,5 @@ +-- CTAS to create a table of distance to the nearest road type +-- (minor arterial and highway) for each PIN {{ config( materialized='table', @@ -7,57 +9,73 @@ ) }} -WITH traffic_highway AS ( - -- Local Road or Street traffic data +WITH traffic_minor AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} WHERE daily_traffic > 0 AND daily_traffic IS NOT NULL - AND road_type = 'Freeway And Expressway' + AND road_type = 'Minor Arterial' ), -traffic_minor_arterial AS ( - -- Minor Arterial traffic data +traffic_highway AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} WHERE daily_traffic > 0 AND daily_traffic IS NOT NULL - AND road_type = 'Minor Arterial' + AND ( + road_type = 'Interstate' + OR road_type = 'Freeway And Expressway' + ) ), distinct_pins AS ( - -- Select distinct pins from the parcel dataset SELECT DISTINCT x_3435, y_3435, pin10, year FROM {{ source('spatial', 'parcel') }} +), + +-- Select nearest road from Minor Arterial +nearest_minor AS ( + SELECT + pcl.pin10, + ARBITRARY(xy.road_name) AS nearest_road_name, + ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, + ARBITRARY(xy.year) AS nearest_road_data_year, + ARBITRARY(xy.daily_traffic) AS nearest_road_daily_traffic, + 'Minor Arterial' AS road_type, + pcl.year + FROM distinct_pins AS pcl + INNER JOIN ( {{ dist_to_nearest_geometry('traffic_minor') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year + GROUP BY pcl.pin10, pcl.year +), + +-- Select nearest road from Highway (Interstate, Freeway, Expressway) +nearest_highway AS ( + SELECT + pcl.pin10, + ARBITRARY(xy.road_name) AS nearest_road_name, + ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, + ARBITRARY(xy.year) AS nearest_road_data_year, + ARBITRARY(xy.daily_traffic) AS nearest_road_daily_traffic, + 'Highway' AS road_type, + pcl.year + FROM distinct_pins AS pcl + INNER JOIN ( {{ dist_to_nearest_geometry('traffic_highway') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year + GROUP BY pcl.pin10, pcl.year ) -SELECT - pcl.pin10, - -- Nearest local road values - ARBITRARY(local_xy.road_name) AS nearest_local_road_name, - ARBITRARY(local_xy.dist_ft) AS nearest_local_road_dist_ft, - ARBITRARY(local_xy.year) AS nearest_local_road_data_year, - ARBITRARY(local_xy.daily_traffic) AS nearest_local_road_daily_traffic, - -- Nearest minor arterial values - ARBITRARY(arterial_xy.road_name) AS nearest_arterial_road_name, - ARBITRARY(arterial_xy.dist_ft) AS nearest_arterial_road_dist_ft, - ARBITRARY(arterial_xy.year) AS nearest_arterial_road_data_year, - ARBITRARY(arterial_xy.daily_traffic) AS nearest_arterial_road_daily_traffic, - pcl.year -FROM distinct_pins AS pcl --- Join for nearest local road -LEFT JOIN traffic_highway AS local_xy - ON pcl.x_3435 = local_xy.x_3435 - AND pcl.y_3435 = local_xy.y_3435 - AND pcl.year = local_xy.pin_year --- Join for nearest minor arterial -LEFT JOIN - traffic_minor_arterial AS arterial_xy - ON pcl.x_3435 = arterial_xy.x_3435 - AND pcl.y_3435 = arterial_xy.y_3435 - AND pcl.year = arterial_xy.pin_year -GROUP BY pcl.pin10, pcl.year +-- Combine the two results with UNION ALL +SELECT * +FROM nearest_minor +UNION ALL +SELECT * +FROM nearest_highway From 8ba251d6aab56c7f71e537d64506cfaab09ac8f0 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 11 Oct 2024 19:37:28 +0000 Subject: [PATCH 013/166] Another master test --- .../proximity.dist_pin_to_traffic_master.sql | 78 +++++++------------ ...imity.dist_pin_to_traffic_width_local.sql} | 0 2 files changed, 30 insertions(+), 48 deletions(-) rename dbt/models/proximity/{proximity.dist_pin_to_traffic_width.sql => proximity.dist_pin_to_traffic_width_local.sql} (100%) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 92f8e4507..dc00de67d 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -1,14 +1,3 @@ --- CTAS to create a table of distance to the nearest road type --- (minor arterial and highway) for each PIN -{{ - config( - materialized='table', - partitioned_by=['year'], - bucketed_by=['pin10'], - bucket_count=1 - ) -}} - WITH traffic_minor AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} @@ -37,45 +26,38 @@ distinct_pins AS ( FROM {{ source('spatial', 'parcel') }} ), --- Select nearest road from Minor Arterial -nearest_minor AS ( - SELECT - pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_road_name, - ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, - ARBITRARY(xy.year) AS nearest_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_road_daily_traffic, - 'Minor Arterial' AS road_type, - pcl.year - FROM distinct_pins AS pcl - INNER JOIN ( {{ dist_to_nearest_geometry('traffic_minor') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year -), - --- Select nearest road from Highway (Interstate, Freeway, Expressway) -nearest_highway AS ( +-- Select nearest road from Minor Arterial and Highway +nearest_road AS ( SELECT pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_road_name, - ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, - ARBITRARY(xy.year) AS nearest_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_road_daily_traffic, - 'Highway' AS road_type, - pcl.year + pcl.year, + xy_minor.road_name AS nearest_minor_road_name, + xy_minor.dist_ft AS nearest_minor_road_dist_ft, + xy_minor.daily_traffic AS nearest_minor_daily_traffic, + xy_highway.road_name AS nearest_highway_road_name, + xy_highway.dist_ft AS nearest_highway_road_dist_ft, + xy_highway.daily_traffic AS nearest_highway_daily_traffic FROM distinct_pins AS pcl - INNER JOIN ( {{ dist_to_nearest_geometry('traffic_highway') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year + LEFT JOIN ( {{ dist_to_nearest_geometry('traffic_minor') }} ) AS xy_minor + ON pcl.x_3435 = xy_minor.x_3435 + AND pcl.y_3435 = xy_minor.y_3435 + AND pcl.year = xy_minor.pin_year + LEFT JOIN + ( {{ dist_to_nearest_geometry('traffic_highway') }} ) AS xy_highway + ON pcl.x_3435 = xy_highway.x_3435 + AND pcl.y_3435 = xy_highway.y_3435 + AND pcl.year = xy_highway.pin_year ) --- Combine the two results with UNION ALL -SELECT * -FROM nearest_minor -UNION ALL -SELECT * -FROM nearest_highway +-- Combine the data into a single row for each PIN +SELECT + pin10, + year, + ARBITRARY(nearest_minor_road_name) AS nearest_minor_road_name, + ARBITRARY(nearest_minor_road_dist_ft) AS nearest_minor_road_dist_ft, + ARBITRARY(nearest_minor_daily_traffic) AS nearest_minor_daily_traffic, + ARBITRARY(nearest_highway_road_name) AS nearest_highway_road_name, + ARBITRARY(nearest_highway_road_dist_ft) AS nearest_highway_road_dist_ft, + ARBITRARY(nearest_highway_daily_traffic) AS nearest_highway_daily_traffic +FROM nearest_road +GROUP BY pin10, year diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_width.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_width_local.sql similarity index 100% rename from dbt/models/proximity/proximity.dist_pin_to_traffic_width.sql rename to dbt/models/proximity/proximity.dist_pin_to_traffic_width_local.sql From 1cf60f27f7aa58c0e2c175bc4dff4e9907d6e5ac Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 11 Oct 2024 21:04:43 +0000 Subject: [PATCH 014/166] Another master test --- .../proximity.dist_pin_to_traffic_master.sql | 73 +++++++++++-------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index dc00de67d..d711cb01c 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -26,38 +26,53 @@ distinct_pins AS ( FROM {{ source('spatial', 'parcel') }} ), --- Select nearest road from Minor Arterial and Highway -nearest_road AS ( +-- Select nearest road from Minor Arterial +nearest_minor AS ( SELECT pcl.pin10, - pcl.year, - xy_minor.road_name AS nearest_minor_road_name, - xy_minor.dist_ft AS nearest_minor_road_dist_ft, - xy_minor.daily_traffic AS nearest_minor_daily_traffic, - xy_highway.road_name AS nearest_highway_road_name, - xy_highway.dist_ft AS nearest_highway_road_dist_ft, - xy_highway.daily_traffic AS nearest_highway_daily_traffic + ARBITRARY(xy.road_name) AS nearest_minor_road_name, + ARBITRARY(xy.dist_ft) AS nearest_minor_road_dist_ft, + ARBITRARY(xy.year) AS nearest_minor_road_data_year, + ARBITRARY(xy.daily_traffic) AS nearest_minor_road_daily_traffic, + pcl.year FROM distinct_pins AS pcl - LEFT JOIN ( {{ dist_to_nearest_geometry('traffic_minor') }} ) AS xy_minor - ON pcl.x_3435 = xy_minor.x_3435 - AND pcl.y_3435 = xy_minor.y_3435 - AND pcl.year = xy_minor.pin_year - LEFT JOIN - ( {{ dist_to_nearest_geometry('traffic_highway') }} ) AS xy_highway - ON pcl.x_3435 = xy_highway.x_3435 - AND pcl.y_3435 = xy_highway.y_3435 - AND pcl.year = xy_highway.pin_year + INNER JOIN ( {{ dist_to_nearest_geometry('traffic_minor') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year + GROUP BY pcl.pin10, pcl.year +), + +-- Select nearest road from Highway (Interstate, Freeway, Expressway) +nearest_highway AS ( + SELECT + pcl.pin10, + ARBITRARY(xy.road_name) AS nearest_highway_road_name, + ARBITRARY(xy.dist_ft) AS nearest_highway_road_dist_ft, + ARBITRARY(xy.year) AS nearest_highway_road_data_year, + ARBITRARY(xy.daily_traffic) AS nearest_highway_road_daily_traffic, + pcl.year + FROM distinct_pins AS pcl + INNER JOIN ( {{ dist_to_nearest_geometry('traffic_highway') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year + GROUP BY pcl.pin10, pcl.year ) --- Combine the data into a single row for each PIN +-- Join the results based on pin10 and year SELECT - pin10, - year, - ARBITRARY(nearest_minor_road_name) AS nearest_minor_road_name, - ARBITRARY(nearest_minor_road_dist_ft) AS nearest_minor_road_dist_ft, - ARBITRARY(nearest_minor_daily_traffic) AS nearest_minor_daily_traffic, - ARBITRARY(nearest_highway_road_name) AS nearest_highway_road_name, - ARBITRARY(nearest_highway_road_dist_ft) AS nearest_highway_road_dist_ft, - ARBITRARY(nearest_highway_daily_traffic) AS nearest_highway_daily_traffic -FROM nearest_road -GROUP BY pin10, year + COALESCE(minor.pin10, highway.pin10) AS pin10, + COALESCE(minor.year, highway.year) AS year, + minor.nearest_minor_road_name, + minor.nearest_minor_road_dist_ft, + minor.nearest_minor_road_data_year, + minor.nearest_minor_road_daily_traffic, + highway.nearest_highway_road_name, + highway.nearest_highway_road_dist_ft, + highway.nearest_highway_road_data_year, + highway.nearest_highway_road_daily_traffic +FROM nearest_minor AS minor +FULL OUTER JOIN nearest_highway AS highway + ON minor.pin10 = highway.pin10 + AND minor.year = highway.year From d3ba2590718b9dc064888db4ffec7c8cac8409ed Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 11 Oct 2024 21:13:42 +0000 Subject: [PATCH 015/166] Add config --- .../proximity/proximity.dist_pin_to_traffic_master.sql | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index d711cb01c..fe43b6091 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -1,3 +1,13 @@ +-- CTAS to create a table of distance to the nearest Metra route for each PIN +{{ + config( + materialized='table', + partitioned_by=['year'], + bucketed_by=['pin10'], + bucket_count=1 + ) +}} + WITH traffic_minor AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} From e34b8c89904142bcbad0a2d0b6d2c452009e15ee Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 11 Oct 2024 21:40:52 +0000 Subject: [PATCH 016/166] switch to width --- .../proximity.dist_pin_to_traffic_master.sql | 22 ++++++++----------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index fe43b6091..de09bb3c5 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -11,20 +11,16 @@ WITH traffic_minor AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} - WHERE daily_traffic > 0 - AND daily_traffic IS NOT NULL - AND road_type = 'Minor Arterial' + WHERE road_type = 'Minor Arterial' ), traffic_highway AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} - WHERE daily_traffic > 0 - AND daily_traffic IS NOT NULL - AND ( - road_type = 'Interstate' - OR road_type = 'Freeway And Expressway' - ) + WHERE ( + road_type = 'Interstate' + OR road_type = 'Freeway And Expressway' + ) ), distinct_pins AS ( @@ -43,7 +39,7 @@ nearest_minor AS ( ARBITRARY(xy.road_name) AS nearest_minor_road_name, ARBITRARY(xy.dist_ft) AS nearest_minor_road_dist_ft, ARBITRARY(xy.year) AS nearest_minor_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_minor_road_daily_traffic, + ARBITRARY(xy.surface_width) AS nearest_minor_road_surface_width, pcl.year FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_minor') }} ) AS xy @@ -60,7 +56,7 @@ nearest_highway AS ( ARBITRARY(xy.road_name) AS nearest_highway_road_name, ARBITRARY(xy.dist_ft) AS nearest_highway_road_dist_ft, ARBITRARY(xy.year) AS nearest_highway_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_highway_road_daily_traffic, + ARBITRARY(xy.surface_width) AS nearest_highway_road_surface_width, pcl.year FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_highway') }} ) AS xy @@ -77,11 +73,11 @@ SELECT minor.nearest_minor_road_name, minor.nearest_minor_road_dist_ft, minor.nearest_minor_road_data_year, - minor.nearest_minor_road_daily_traffic, + minor.nearest_minor_road_surface_width, highway.nearest_highway_road_name, highway.nearest_highway_road_dist_ft, highway.nearest_highway_road_data_year, - highway.nearest_highway_road_daily_traffic + highway.nearest_highway_road_surface_width FROM nearest_minor AS minor FULL OUTER JOIN nearest_highway AS highway ON minor.pin10 = highway.pin10 From 4ae5554ac626266c4ec15db9b633d419533071c4 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 11 Oct 2024 21:44:29 +0000 Subject: [PATCH 017/166] Make year last column --- dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index de09bb3c5..4f3141e85 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -69,7 +69,6 @@ nearest_highway AS ( -- Join the results based on pin10 and year SELECT COALESCE(minor.pin10, highway.pin10) AS pin10, - COALESCE(minor.year, highway.year) AS year, minor.nearest_minor_road_name, minor.nearest_minor_road_dist_ft, minor.nearest_minor_road_data_year, @@ -77,7 +76,8 @@ SELECT highway.nearest_highway_road_name, highway.nearest_highway_road_dist_ft, highway.nearest_highway_road_data_year, - highway.nearest_highway_road_surface_width + highway.nearest_highway_road_surface_width, + COALESCE(minor.year, highway.year) AS year FROM nearest_minor AS minor FULL OUTER JOIN nearest_highway AS highway ON minor.pin10 = highway.pin10 From d2a45faf2b6bfc81685bb76c8a432f69ebcfa3e2 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 15 Oct 2024 20:19:24 +0000 Subject: [PATCH 018/166] Merge into master --- ...t_pin_to_traffic_daily_traffic_highway.sql | 43 ----- ...ist_pin_to_traffic_daily_traffic_local.sql | 40 ---- ...ist_pin_to_traffic_daily_traffic_minor.sql | 40 ---- .../proximity.dist_pin_to_traffic_master.sql | 174 +++++++++++++++--- ...mity.dist_pin_to_traffic_width_highway.sql | 41 ----- ...ximity.dist_pin_to_traffic_width_local.sql | 39 ---- ...ximity.dist_pin_to_traffic_width_minor.sql | 38 ---- 7 files changed, 152 insertions(+), 263 deletions(-) delete mode 100644 dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_highway.sql delete mode 100644 dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local.sql delete mode 100644 dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_minor.sql delete mode 100644 dbt/models/proximity/proximity.dist_pin_to_traffic_width_highway.sql delete mode 100644 dbt/models/proximity/proximity.dist_pin_to_traffic_width_local.sql delete mode 100644 dbt/models/proximity/proximity.dist_pin_to_traffic_width_minor.sql diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_highway.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_highway.sql deleted file mode 100644 index 755846c57..000000000 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_highway.sql +++ /dev/null @@ -1,43 +0,0 @@ --- CTAS to create a table of distance to the nearest Metra route for each PIN -{{ - config( - materialized='table', - partitioned_by=['year'], - bucketed_by=['pin10'], - bucket_count=1 - ) -}} - -WITH traffic AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE daily_traffic > 0 - AND daily_traffic IS NOT NULL - AND ( - road_type = 'Interstate' - OR road_type = 'Freeway And Expressway' - ) -), - -distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435, - pin10, - year - FROM {{ source('spatial', 'parcel') }} -) - -SELECT - pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_road_name, - ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, - ARBITRARY(xy.year) AS nearest_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_road_daily_traffic, - pcl.year -FROM distinct_pins AS pcl -INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year -GROUP BY pcl.pin10, pcl.year; diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local.sql deleted file mode 100644 index b2172abcf..000000000 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_local.sql +++ /dev/null @@ -1,40 +0,0 @@ --- CTAS to create a table of distance to the nearest Metra route for each PIN -{{ - config( - materialized='table', - partitioned_by=['year'], - bucketed_by=['pin10'], - bucket_count=1 - ) -}} - -WITH traffic AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE daily_traffic > 0 - AND daily_traffic IS NOT NULL - AND road_type = 'Local Road or Street' -), - -distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435, - pin10, - year - FROM {{ source('spatial', 'parcel') }} -) - -SELECT - pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_road_name, - ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, - ARBITRARY(xy.year) AS nearest_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_road_daily_traffic, - pcl.year -FROM distinct_pins AS pcl -INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year -GROUP BY pcl.pin10, pcl.year diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_minor.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_minor.sql deleted file mode 100644 index 0daa3982d..000000000 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_daily_traffic_minor.sql +++ /dev/null @@ -1,40 +0,0 @@ --- CTAS to create a table of distance to the nearest Metra route for each PIN -{{ - config( - materialized='table', - partitioned_by=['year'], - bucketed_by=['pin10'], - bucket_count=1 - ) -}} - -WITH traffic AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE daily_traffic > 0 - AND daily_traffic IS NOT NULL - AND road_type = 'Minor Arterial' -), - -distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435, - pin10, - year - FROM {{ source('spatial', 'parcel') }} -) - -SELECT - pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_road_name, - ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, - ARBITRARY(xy.year) AS nearest_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_road_daily_traffic, - pcl.year -FROM distinct_pins AS pcl -INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year -GROUP BY pcl.pin10, pcl.year diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 4f3141e85..00a56edda 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -1,4 +1,4 @@ --- CTAS to create a table of distance to the nearest Metra route for each PIN +-- CTAS to create a table of distance to the nearest road for each PIN {{ config( materialized='table', @@ -14,13 +14,34 @@ WITH traffic_minor AS ( -- noqa: ST03 WHERE road_type = 'Minor Arterial' ), -traffic_highway AS ( -- noqa: ST03 +traffic_interstate AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} - WHERE ( - road_type = 'Interstate' - OR road_type = 'Freeway And Expressway' - ) + WHERE road_type = 'Interstate' +), + +traffic_freeway AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE road_type = 'Freeway and Expressway' +), + +traffic_principal AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE road_type = 'Other Principal Arterial' +), + +traffic_major_collector AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE road_type = 'Major Collector' +), + +traffic_minor_collector AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE road_type = 'Minor Collector' ), distinct_pins AS ( @@ -49,17 +70,89 @@ nearest_minor AS ( GROUP BY pcl.pin10, pcl.year ), --- Select nearest road from Highway (Interstate, Freeway, Expressway) -nearest_highway AS ( +-- Select nearest road from Interstate +nearest_interstate AS ( + SELECT + pcl.pin10, + ARBITRARY(xy.road_name) AS nearest_interstate_road_name, + ARBITRARY(xy.dist_ft) AS nearest_interstate_road_dist_ft, + ARBITRARY(xy.year) AS nearest_interstate_road_data_year, + ARBITRARY(xy.surface_width) AS nearest_interstate_road_surface_width, + pcl.year + FROM distinct_pins AS pcl + INNER JOIN ( {{ dist_to_nearest_geometry('traffic_interstate') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year + GROUP BY pcl.pin10, pcl.year +), + +-- Select nearest road from Freeway and Expressway +nearest_freeway AS ( + SELECT + pcl.pin10, + ARBITRARY(xy.road_name) AS nearest_freeway_road_name, + ARBITRARY(xy.dist_ft) AS nearest_freeway_road_dist_ft, + ARBITRARY(xy.year) AS nearest_freeway_road_data_year, + ARBITRARY(xy.surface_width) AS nearest_freeway_road_surface_width, + pcl.year + FROM distinct_pins AS pcl + INNER JOIN ( {{ dist_to_nearest_geometry('traffic_freeway') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year + GROUP BY pcl.pin10, pcl.year +), + +-- Select nearest road from Other Principal Arterial +nearest_principal AS ( + SELECT + pcl.pin10, + ARBITRARY(xy.road_name) AS nearest_principal_road_name, + ARBITRARY(xy.dist_ft) AS nearest_principal_road_dist_ft, + ARBITRARY(xy.year) AS nearest_principal_road_data_year, + ARBITRARY(xy.surface_width) AS nearest_principal_road_surface_width, + pcl.year + FROM distinct_pins AS pcl + INNER JOIN ( {{ dist_to_nearest_geometry('traffic_principal') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year + GROUP BY pcl.pin10, pcl.year +), + +-- Select nearest road from Major Collector +nearest_major_collector AS ( + SELECT + pcl.pin10, + ARBITRARY(xy.road_name) AS nearest_major_collector_road_name, + ARBITRARY(xy.dist_ft) AS nearest_major_collector_road_dist_ft, + ARBITRARY(xy.year) AS nearest_major_collector_road_data_year, + ARBITRARY(xy.surface_width) + AS nearest_major_collector_road_surface_width, + pcl.year + FROM distinct_pins AS pcl + INNER JOIN + ( {{ dist_to_nearest_geometry('traffic_major_collector') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year + GROUP BY pcl.pin10, pcl.year +), + +-- Select nearest road from Minor Collector +nearest_minor_collector AS ( SELECT pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_highway_road_name, - ARBITRARY(xy.dist_ft) AS nearest_highway_road_dist_ft, - ARBITRARY(xy.year) AS nearest_highway_road_data_year, - ARBITRARY(xy.surface_width) AS nearest_highway_road_surface_width, + ARBITRARY(xy.road_name) AS nearest_minor_collector_road_name, + ARBITRARY(xy.dist_ft) AS nearest_minor_collector_road_dist_ft, + ARBITRARY(xy.year) AS nearest_minor_collector_road_data_year, + ARBITRARY(xy.surface_width) + AS nearest_minor_collector_road_surface_width, pcl.year FROM distinct_pins AS pcl - INNER JOIN ( {{ dist_to_nearest_geometry('traffic_highway') }} ) AS xy + INNER JOIN + ( {{ dist_to_nearest_geometry('traffic_minor_collector') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 AND pcl.year = xy.pin_year @@ -68,17 +161,54 @@ nearest_highway AS ( -- Join the results based on pin10 and year SELECT - COALESCE(minor.pin10, highway.pin10) AS pin10, + COALESCE( + minor.pin10, + interstate.pin10, + freeway.pin10, + principal.pin10, + major_collector.pin10, + minor_collector.pin10 + ) AS pin10, minor.nearest_minor_road_name, minor.nearest_minor_road_dist_ft, minor.nearest_minor_road_data_year, minor.nearest_minor_road_surface_width, - highway.nearest_highway_road_name, - highway.nearest_highway_road_dist_ft, - highway.nearest_highway_road_data_year, - highway.nearest_highway_road_surface_width, - COALESCE(minor.year, highway.year) AS year + interstate.nearest_interstate_road_name, + interstate.nearest_interstate_road_dist_ft, + interstate.nearest_interstate_road_data_year, + interstate.nearest_interstate_road_surface_width, + freeway.nearest_freeway_road_name, + freeway.nearest_freeway_road_dist_ft, + freeway.nearest_freeway_road_data_year, + freeway.nearest_freeway_road_surface_width, + principal.nearest_principal_road_name, + principal.nearest_principal_road_dist_ft, + principal.nearest_principal_road_data_year, + principal.nearest_principal_road_surface_width, + major_collector.nearest_major_collector_road_name, + major_collector.nearest_major_collector_road_dist_ft, + major_collector.nearest_major_collector_road_data_year, + major_collector.nearest_major_collector_road_surface_width, + minor_collector.nearest_minor_collector_road_name, + minor_collector.nearest_minor_collector_road_dist_ft, + minor_collector.nearest_minor_collector_road_data_year, + minor_collector.nearest_minor_collector_road_surface_width, + COALESCE( + minor.year, + interstate.year, + freeway.year, + principal.year, + major_collector.year, + minor_collector.year + ) AS year FROM nearest_minor AS minor -FULL OUTER JOIN nearest_highway AS highway - ON minor.pin10 = highway.pin10 - AND minor.year = highway.year +FULL OUTER JOIN nearest_interstate AS interstate + ON minor.pin10 = interstate.pin10 AND minor.year = interstate.year +FULL OUTER JOIN nearest_freeway AS freeway + ON minor.pin10 = freeway.pin10 AND minor.year = freeway.year +FULL OUTER JOIN nearest_principal AS principal + ON minor.pin10 = principal.pin10 AND minor.year = principal.year +FULL OUTER JOIN nearest_major_collector AS major_collector + ON minor.pin10 = major_collector.pin10 AND minor.year = major_collector.year +FULL OUTER JOIN nearest_minor_collector AS minor_collector + ON minor.pin10 = minor_collector.pin10 AND minor.year = minor_collector.year diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_width_highway.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_width_highway.sql deleted file mode 100644 index 692c62bc7..000000000 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_width_highway.sql +++ /dev/null @@ -1,41 +0,0 @@ --- CTAS to create a table of distance to the nearest Metra route for each PIN -{{ - config( - materialized='table', - partitioned_by=['year'], - bucketed_by=['pin10'], - bucket_count=1 - ) -}} - -WITH traffic AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE ( - road_type = 'Interstate' - OR road_type = 'Freeway And Expressway' - ) -), - -distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435, - pin10, - year - FROM {{ source('spatial', 'parcel') }} -) - -SELECT - pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_road_name, - ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, - ARBITRARY(xy.year) AS nearest_road_data_year, - ARBITRARY(xy.surface_width) AS nearest_surface_width, - pcl.year -FROM distinct_pins AS pcl -INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year -GROUP BY pcl.pin10, pcl.year diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_width_local.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_width_local.sql deleted file mode 100644 index 8376475e4..000000000 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_width_local.sql +++ /dev/null @@ -1,39 +0,0 @@ --- CTAS to create a table of distance to the nearest Metra route for each PIN -{{ - config( - materialized='table', - partitioned_by=['year'], - bucketed_by=['pin10'], - bucket_count=1 - ) -}} - -WITH traffic AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Local Road or Street' -), - -distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435, - pin10, - year - FROM {{ source('spatial', 'parcel') }} -) - -SELECT - pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_road_name, - ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, - ARBITRARY(xy.year) AS nearest_road_data_year, - ARBITRARY(xy.surface_width) AS nearest_surface_width, - ARBITRARY(xy.surface_type) AS nearest_surface_type, - pcl.year -FROM distinct_pins AS pcl -INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year -GROUP BY pcl.pin10, pcl.year diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_width_minor.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_width_minor.sql deleted file mode 100644 index 7b4c49bf7..000000000 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_width_minor.sql +++ /dev/null @@ -1,38 +0,0 @@ -{{ - config( - materialized='table', - partitioned_by=['year'], - bucketed_by=['pin10'], - bucket_count=1 - ) -}} - -WITH traffic AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Minor Arterial' -), - -distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435, - pin10, - year - FROM {{ source('spatial', 'parcel') }} -) - -SELECT - pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_road_name, - ARBITRARY(xy.dist_ft) AS nearest_road_dist_ft, - ARBITRARY(xy.year) AS nearest_road_data_year, - ARBITRARY(xy.surface_width) AS nearest_surface_width, - ARBITRARY(xy.surface_type) AS nearest_surface_type, - pcl.year -FROM distinct_pins AS pcl -INNER JOIN ( {{ dist_to_nearest_geometry('traffic') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year -GROUP BY pcl.pin10, pcl.year From aace136d52b43e4d11dde619b1b61be4a3814a20 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 15 Oct 2024 20:52:21 +0000 Subject: [PATCH 019/166] Try to remove 2014 --- dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 00a56edda..d90bc1e64 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -42,6 +42,7 @@ traffic_minor_collector AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} WHERE road_type = 'Minor Collector' + AND year >= 2014 ), distinct_pins AS ( From 3f070dea0044a975737dd3f10319e8b65cac7bea Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 15 Oct 2024 20:53:42 +0000 Subject: [PATCH 020/166] Remove minor_collector --- .../proximity.dist_pin_to_traffic_master.sql | 38 +------------------ 1 file changed, 2 insertions(+), 36 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index d90bc1e64..db30a32af 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -38,13 +38,6 @@ traffic_major_collector AS ( -- noqa: ST03 WHERE road_type = 'Major Collector' ), -traffic_minor_collector AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Minor Collector' - AND year >= 2014 -), - distinct_pins AS ( SELECT DISTINCT x_3435, @@ -141,25 +134,6 @@ nearest_major_collector AS ( GROUP BY pcl.pin10, pcl.year ), --- Select nearest road from Minor Collector -nearest_minor_collector AS ( - SELECT - pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_minor_collector_road_name, - ARBITRARY(xy.dist_ft) AS nearest_minor_collector_road_dist_ft, - ARBITRARY(xy.year) AS nearest_minor_collector_road_data_year, - ARBITRARY(xy.surface_width) - AS nearest_minor_collector_road_surface_width, - pcl.year - FROM distinct_pins AS pcl - INNER JOIN - ( {{ dist_to_nearest_geometry('traffic_minor_collector') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year -) - -- Join the results based on pin10 and year SELECT COALESCE( @@ -167,8 +141,7 @@ SELECT interstate.pin10, freeway.pin10, principal.pin10, - major_collector.pin10, - minor_collector.pin10 + major_collector.pin10 ) AS pin10, minor.nearest_minor_road_name, minor.nearest_minor_road_dist_ft, @@ -190,17 +163,12 @@ SELECT major_collector.nearest_major_collector_road_dist_ft, major_collector.nearest_major_collector_road_data_year, major_collector.nearest_major_collector_road_surface_width, - minor_collector.nearest_minor_collector_road_name, - minor_collector.nearest_minor_collector_road_dist_ft, - minor_collector.nearest_minor_collector_road_data_year, - minor_collector.nearest_minor_collector_road_surface_width, COALESCE( minor.year, interstate.year, freeway.year, principal.year, - major_collector.year, - minor_collector.year + major_collector.year ) AS year FROM nearest_minor AS minor FULL OUTER JOIN nearest_interstate AS interstate @@ -211,5 +179,3 @@ FULL OUTER JOIN nearest_principal AS principal ON minor.pin10 = principal.pin10 AND minor.year = principal.year FULL OUTER JOIN nearest_major_collector AS major_collector ON minor.pin10 = major_collector.pin10 AND minor.year = major_collector.year -FULL OUTER JOIN nearest_minor_collector AS minor_collector - ON minor.pin10 = minor_collector.pin10 AND minor.year = minor_collector.year From b50e8133f5c0584de0ae2eaf71598334575d9a4a Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 15 Oct 2024 20:55:01 +0000 Subject: [PATCH 021/166] Remove parcel.year = pin.year --- .../proximity.dist_pin_to_traffic_master.sql | 36 +++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index db30a32af..13a864793 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -38,6 +38,12 @@ traffic_major_collector AS ( -- noqa: ST03 WHERE road_type = 'Major Collector' ), +traffic_minor_collector AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE road_type = 'Minor Collector' +), + distinct_pins AS ( SELECT DISTINCT x_3435, @@ -134,6 +140,24 @@ nearest_major_collector AS ( GROUP BY pcl.pin10, pcl.year ), +-- Select nearest road from Minor Collector +nearest_minor_collector AS ( + SELECT + pcl.pin10, + ARBITRARY(xy.road_name) AS nearest_minor_collector_road_name, + ARBITRARY(xy.dist_ft) AS nearest_minor_collector_road_dist_ft, + ARBITRARY(xy.year) AS nearest_minor_collector_road_data_year, + ARBITRARY(xy.surface_width) + AS nearest_minor_collector_road_surface_width, + pcl.year + FROM distinct_pins AS pcl + INNER JOIN + ( {{ dist_to_nearest_geometry('traffic_minor_collector') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + GROUP BY pcl.pin10, pcl.year +) + -- Join the results based on pin10 and year SELECT COALESCE( @@ -141,7 +165,8 @@ SELECT interstate.pin10, freeway.pin10, principal.pin10, - major_collector.pin10 + major_collector.pin10, + minor_collector.pin10 ) AS pin10, minor.nearest_minor_road_name, minor.nearest_minor_road_dist_ft, @@ -163,12 +188,17 @@ SELECT major_collector.nearest_major_collector_road_dist_ft, major_collector.nearest_major_collector_road_data_year, major_collector.nearest_major_collector_road_surface_width, + minor_collector.nearest_minor_collector_road_name, + minor_collector.nearest_minor_collector_road_dist_ft, + minor_collector.nearest_minor_collector_road_data_year, + minor_collector.nearest_minor_collector_road_surface_width, COALESCE( minor.year, interstate.year, freeway.year, principal.year, - major_collector.year + major_collector.year, + minor_collector.year ) AS year FROM nearest_minor AS minor FULL OUTER JOIN nearest_interstate AS interstate @@ -179,3 +209,5 @@ FULL OUTER JOIN nearest_principal AS principal ON minor.pin10 = principal.pin10 AND minor.year = principal.year FULL OUTER JOIN nearest_major_collector AS major_collector ON minor.pin10 = major_collector.pin10 AND minor.year = major_collector.year +FULL OUTER JOIN nearest_minor_collector AS minor_collector + ON minor.pin10 = minor_collector.pin10 AND minor.year = minor_collector.year From ebbc52ef67d6c448d20eb35dc09102331b922062 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 15 Oct 2024 21:02:52 +0000 Subject: [PATCH 022/166] Remove minor again --- .../proximity.dist_pin_to_traffic_master.sql | 36 ++----------------- 1 file changed, 2 insertions(+), 34 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 13a864793..db30a32af 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -38,12 +38,6 @@ traffic_major_collector AS ( -- noqa: ST03 WHERE road_type = 'Major Collector' ), -traffic_minor_collector AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Minor Collector' -), - distinct_pins AS ( SELECT DISTINCT x_3435, @@ -140,24 +134,6 @@ nearest_major_collector AS ( GROUP BY pcl.pin10, pcl.year ), --- Select nearest road from Minor Collector -nearest_minor_collector AS ( - SELECT - pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_minor_collector_road_name, - ARBITRARY(xy.dist_ft) AS nearest_minor_collector_road_dist_ft, - ARBITRARY(xy.year) AS nearest_minor_collector_road_data_year, - ARBITRARY(xy.surface_width) - AS nearest_minor_collector_road_surface_width, - pcl.year - FROM distinct_pins AS pcl - INNER JOIN - ( {{ dist_to_nearest_geometry('traffic_minor_collector') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - GROUP BY pcl.pin10, pcl.year -) - -- Join the results based on pin10 and year SELECT COALESCE( @@ -165,8 +141,7 @@ SELECT interstate.pin10, freeway.pin10, principal.pin10, - major_collector.pin10, - minor_collector.pin10 + major_collector.pin10 ) AS pin10, minor.nearest_minor_road_name, minor.nearest_minor_road_dist_ft, @@ -188,17 +163,12 @@ SELECT major_collector.nearest_major_collector_road_dist_ft, major_collector.nearest_major_collector_road_data_year, major_collector.nearest_major_collector_road_surface_width, - minor_collector.nearest_minor_collector_road_name, - minor_collector.nearest_minor_collector_road_dist_ft, - minor_collector.nearest_minor_collector_road_data_year, - minor_collector.nearest_minor_collector_road_surface_width, COALESCE( minor.year, interstate.year, freeway.year, principal.year, - major_collector.year, - minor_collector.year + major_collector.year ) AS year FROM nearest_minor AS minor FULL OUTER JOIN nearest_interstate AS interstate @@ -209,5 +179,3 @@ FULL OUTER JOIN nearest_principal AS principal ON minor.pin10 = principal.pin10 AND minor.year = principal.year FULL OUTER JOIN nearest_major_collector AS major_collector ON minor.pin10 = major_collector.pin10 AND minor.year = major_collector.year -FULL OUTER JOIN nearest_minor_collector AS minor_collector - ON minor.pin10 = minor_collector.pin10 AND minor.year = minor_collector.year From e8671f71179737f31e6bf1a805b0d87cf949c6d1 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 15 Oct 2024 21:05:44 +0000 Subject: [PATCH 023/166] Remove Freeway --- .../proximity.dist_pin_to_traffic_master.sql | 31 ------------------- 1 file changed, 31 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index db30a32af..aabdc2296 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -20,12 +20,6 @@ traffic_interstate AS ( -- noqa: ST03 WHERE road_type = 'Interstate' ), -traffic_freeway AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Freeway and Expressway' -), - traffic_principal AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} @@ -81,23 +75,6 @@ nearest_interstate AS ( GROUP BY pcl.pin10, pcl.year ), --- Select nearest road from Freeway and Expressway -nearest_freeway AS ( - SELECT - pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_freeway_road_name, - ARBITRARY(xy.dist_ft) AS nearest_freeway_road_dist_ft, - ARBITRARY(xy.year) AS nearest_freeway_road_data_year, - ARBITRARY(xy.surface_width) AS nearest_freeway_road_surface_width, - pcl.year - FROM distinct_pins AS pcl - INNER JOIN ( {{ dist_to_nearest_geometry('traffic_freeway') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year -), - -- Select nearest road from Other Principal Arterial nearest_principal AS ( SELECT @@ -139,7 +116,6 @@ SELECT COALESCE( minor.pin10, interstate.pin10, - freeway.pin10, principal.pin10, major_collector.pin10 ) AS pin10, @@ -151,10 +127,6 @@ SELECT interstate.nearest_interstate_road_dist_ft, interstate.nearest_interstate_road_data_year, interstate.nearest_interstate_road_surface_width, - freeway.nearest_freeway_road_name, - freeway.nearest_freeway_road_dist_ft, - freeway.nearest_freeway_road_data_year, - freeway.nearest_freeway_road_surface_width, principal.nearest_principal_road_name, principal.nearest_principal_road_dist_ft, principal.nearest_principal_road_data_year, @@ -166,15 +138,12 @@ SELECT COALESCE( minor.year, interstate.year, - freeway.year, principal.year, major_collector.year ) AS year FROM nearest_minor AS minor FULL OUTER JOIN nearest_interstate AS interstate ON minor.pin10 = interstate.pin10 AND minor.year = interstate.year -FULL OUTER JOIN nearest_freeway AS freeway - ON minor.pin10 = freeway.pin10 AND minor.year = freeway.year FULL OUTER JOIN nearest_principal AS principal ON minor.pin10 = principal.pin10 AND minor.year = principal.year FULL OUTER JOIN nearest_major_collector AS major_collector From 967114c997d58daf5741bd80e3d40b708a8e1514 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 15 Oct 2024 21:11:48 +0000 Subject: [PATCH 024/166] Start from begining --- .../proximity.dist_pin_to_traffic_master.sql | 112 ++++-------------- 1 file changed, 23 insertions(+), 89 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index aabdc2296..4f3141e85 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -1,4 +1,4 @@ --- CTAS to create a table of distance to the nearest road for each PIN +-- CTAS to create a table of distance to the nearest Metra route for each PIN {{ config( materialized='table', @@ -14,22 +14,13 @@ WITH traffic_minor AS ( -- noqa: ST03 WHERE road_type = 'Minor Arterial' ), -traffic_interstate AS ( -- noqa: ST03 +traffic_highway AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Interstate' -), - -traffic_principal AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Other Principal Arterial' -), - -traffic_major_collector AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Major Collector' + WHERE ( + road_type = 'Interstate' + OR road_type = 'Freeway And Expressway' + ) ), distinct_pins AS ( @@ -58,93 +49,36 @@ nearest_minor AS ( GROUP BY pcl.pin10, pcl.year ), --- Select nearest road from Interstate -nearest_interstate AS ( - SELECT - pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_interstate_road_name, - ARBITRARY(xy.dist_ft) AS nearest_interstate_road_dist_ft, - ARBITRARY(xy.year) AS nearest_interstate_road_data_year, - ARBITRARY(xy.surface_width) AS nearest_interstate_road_surface_width, - pcl.year - FROM distinct_pins AS pcl - INNER JOIN ( {{ dist_to_nearest_geometry('traffic_interstate') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year -), - --- Select nearest road from Other Principal Arterial -nearest_principal AS ( +-- Select nearest road from Highway (Interstate, Freeway, Expressway) +nearest_highway AS ( SELECT pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_principal_road_name, - ARBITRARY(xy.dist_ft) AS nearest_principal_road_dist_ft, - ARBITRARY(xy.year) AS nearest_principal_road_data_year, - ARBITRARY(xy.surface_width) AS nearest_principal_road_surface_width, + ARBITRARY(xy.road_name) AS nearest_highway_road_name, + ARBITRARY(xy.dist_ft) AS nearest_highway_road_dist_ft, + ARBITRARY(xy.year) AS nearest_highway_road_data_year, + ARBITRARY(xy.surface_width) AS nearest_highway_road_surface_width, pcl.year FROM distinct_pins AS pcl - INNER JOIN ( {{ dist_to_nearest_geometry('traffic_principal') }} ) AS xy + INNER JOIN ( {{ dist_to_nearest_geometry('traffic_highway') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 AND pcl.year = xy.pin_year GROUP BY pcl.pin10, pcl.year -), - --- Select nearest road from Major Collector -nearest_major_collector AS ( - SELECT - pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_major_collector_road_name, - ARBITRARY(xy.dist_ft) AS nearest_major_collector_road_dist_ft, - ARBITRARY(xy.year) AS nearest_major_collector_road_data_year, - ARBITRARY(xy.surface_width) - AS nearest_major_collector_road_surface_width, - pcl.year - FROM distinct_pins AS pcl - INNER JOIN - ( {{ dist_to_nearest_geometry('traffic_major_collector') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year -), +) -- Join the results based on pin10 and year SELECT - COALESCE( - minor.pin10, - interstate.pin10, - principal.pin10, - major_collector.pin10 - ) AS pin10, + COALESCE(minor.pin10, highway.pin10) AS pin10, minor.nearest_minor_road_name, minor.nearest_minor_road_dist_ft, minor.nearest_minor_road_data_year, minor.nearest_minor_road_surface_width, - interstate.nearest_interstate_road_name, - interstate.nearest_interstate_road_dist_ft, - interstate.nearest_interstate_road_data_year, - interstate.nearest_interstate_road_surface_width, - principal.nearest_principal_road_name, - principal.nearest_principal_road_dist_ft, - principal.nearest_principal_road_data_year, - principal.nearest_principal_road_surface_width, - major_collector.nearest_major_collector_road_name, - major_collector.nearest_major_collector_road_dist_ft, - major_collector.nearest_major_collector_road_data_year, - major_collector.nearest_major_collector_road_surface_width, - COALESCE( - minor.year, - interstate.year, - principal.year, - major_collector.year - ) AS year + highway.nearest_highway_road_name, + highway.nearest_highway_road_dist_ft, + highway.nearest_highway_road_data_year, + highway.nearest_highway_road_surface_width, + COALESCE(minor.year, highway.year) AS year FROM nearest_minor AS minor -FULL OUTER JOIN nearest_interstate AS interstate - ON minor.pin10 = interstate.pin10 AND minor.year = interstate.year -FULL OUTER JOIN nearest_principal AS principal - ON minor.pin10 = principal.pin10 AND minor.year = principal.year -FULL OUTER JOIN nearest_major_collector AS major_collector - ON minor.pin10 = major_collector.pin10 AND minor.year = major_collector.year +FULL OUTER JOIN nearest_highway AS highway + ON minor.pin10 = highway.pin10 + AND minor.year = highway.year From ba0b0a1608361f0f8d17083f921befee2cf350b3 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 15 Oct 2024 21:23:09 +0000 Subject: [PATCH 025/166] Separate freeway --- .../proximity.dist_pin_to_traffic_master.sql | 70 +++++++++++++------ 1 file changed, 48 insertions(+), 22 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 4f3141e85..5d7c57946 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -1,4 +1,4 @@ --- CTAS to create a table of distance to the nearest Metra route for each PIN +-- CTAS to create a table of distance to the nearest road for each PIN {{ config( materialized='table', @@ -14,13 +14,16 @@ WITH traffic_minor AS ( -- noqa: ST03 WHERE road_type = 'Minor Arterial' ), -traffic_highway AS ( -- noqa: ST03 +traffic_interstate AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} - WHERE ( - road_type = 'Interstate' - OR road_type = 'Freeway And Expressway' - ) + WHERE road_type = 'Interstate' +), + +traffic_freeway AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE road_type = 'Freeway And Expressway' ), distinct_pins AS ( @@ -49,17 +52,34 @@ nearest_minor AS ( GROUP BY pcl.pin10, pcl.year ), --- Select nearest road from Highway (Interstate, Freeway, Expressway) -nearest_highway AS ( +-- Select nearest road from Interstate +nearest_interstate AS ( + SELECT + pcl.pin10, + ARBITRARY(xy.road_name) AS nearest_interstate_road_name, + ARBITRARY(xy.dist_ft) AS nearest_interstate_road_dist_ft, + ARBITRARY(xy.year) AS nearest_interstate_road_data_year, + ARBITRARY(xy.surface_width) AS nearest_interstate_road_surface_width, + pcl.year + FROM distinct_pins AS pcl + INNER JOIN ( {{ dist_to_nearest_geometry('traffic_interstate') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year + GROUP BY pcl.pin10, pcl.year +), + +-- Select nearest road from Freeway And Expressway +nearest_freeway AS ( SELECT pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_highway_road_name, - ARBITRARY(xy.dist_ft) AS nearest_highway_road_dist_ft, - ARBITRARY(xy.year) AS nearest_highway_road_data_year, - ARBITRARY(xy.surface_width) AS nearest_highway_road_surface_width, + ARBITRARY(xy.road_name) AS nearest_freeway_road_name, + ARBITRARY(xy.dist_ft) AS nearest_freeway_road_dist_ft, + ARBITRARY(xy.year) AS nearest_freeway_road_data_year, + ARBITRARY(xy.surface_width) AS nearest_freeway_road_surface_width, pcl.year FROM distinct_pins AS pcl - INNER JOIN ( {{ dist_to_nearest_geometry('traffic_highway') }} ) AS xy + INNER JOIN ( {{ dist_to_nearest_geometry('traffic_freeway') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 AND pcl.year = xy.pin_year @@ -68,17 +88,23 @@ nearest_highway AS ( -- Join the results based on pin10 and year SELECT - COALESCE(minor.pin10, highway.pin10) AS pin10, + COALESCE(minor.pin10, interstate.pin10, freeway.pin10) AS pin10, minor.nearest_minor_road_name, minor.nearest_minor_road_dist_ft, minor.nearest_minor_road_data_year, minor.nearest_minor_road_surface_width, - highway.nearest_highway_road_name, - highway.nearest_highway_road_dist_ft, - highway.nearest_highway_road_data_year, - highway.nearest_highway_road_surface_width, - COALESCE(minor.year, highway.year) AS year + interstate.nearest_interstate_road_name, + interstate.nearest_interstate_road_dist_ft, + interstate.nearest_interstate_road_data_year, + interstate.nearest_interstate_road_surface_width, + freeway.nearest_freeway_road_name, + freeway.nearest_freeway_road_dist_ft, + freeway.nearest_freeway_road_data_year, + freeway.nearest_freeway_road_surface_width, + COALESCE(minor.year, interstate.year, freeway.year) AS year FROM nearest_minor AS minor -FULL OUTER JOIN nearest_highway AS highway - ON minor.pin10 = highway.pin10 - AND minor.year = highway.year +FULL OUTER JOIN nearest_interstate AS interstate + ON minor.pin10 = interstate.pin10 AND minor.year = interstate.year +FULL OUTER JOIN nearest_freeway AS freeway + ON COALESCE(minor.pin10, interstate.pin10) = freeway.pin10 + AND COALESCE(minor.year, interstate.year) = freeway.year From aa91cf82a49ae9292d14454e0e40fc374e08d0e9 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 15 Oct 2024 21:24:25 +0000 Subject: [PATCH 026/166] Add principal --- .../proximity.dist_pin_to_traffic_master.sql | 35 +++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 5d7c57946..c514abf4e 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -26,6 +26,12 @@ traffic_freeway AS ( -- noqa: ST03 WHERE road_type = 'Freeway And Expressway' ), +traffic_principal AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE road_type = 'Other Principal Arterial' +), + distinct_pins AS ( SELECT DISTINCT x_3435, @@ -84,11 +90,29 @@ nearest_freeway AS ( AND pcl.y_3435 = xy.y_3435 AND pcl.year = xy.pin_year GROUP BY pcl.pin10, pcl.year +), + +-- Select nearest road from Other Principal Arterial +nearest_principal AS ( + SELECT + pcl.pin10, + ARBITRARY(xy.road_name) AS nearest_principal_road_name, + ARBITRARY(xy.dist_ft) AS nearest_principal_road_dist_ft, + ARBITRARY(xy.year) AS nearest_principal_road_data_year, + ARBITRARY(xy.surface_width) AS nearest_principal_road_surface_width, + pcl.year + FROM distinct_pins AS pcl + INNER JOIN ( {{ dist_to_nearest_geometry('traffic_principal') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year + GROUP BY pcl.pin10, pcl.year ) -- Join the results based on pin10 and year SELECT - COALESCE(minor.pin10, interstate.pin10, freeway.pin10) AS pin10, + COALESCE(minor.pin10, interstate.pin10, freeway.pin10, principal.pin10) + AS pin10, minor.nearest_minor_road_name, minor.nearest_minor_road_dist_ft, minor.nearest_minor_road_data_year, @@ -101,10 +125,17 @@ SELECT freeway.nearest_freeway_road_dist_ft, freeway.nearest_freeway_road_data_year, freeway.nearest_freeway_road_surface_width, - COALESCE(minor.year, interstate.year, freeway.year) AS year + principal.nearest_principal_road_name, + principal.nearest_principal_road_dist_ft, + principal.nearest_principal_road_data_year, + principal.nearest_principal_road_surface_width, + COALESCE(minor.year, interstate.year, freeway.year, principal.year) AS year FROM nearest_minor AS minor FULL OUTER JOIN nearest_interstate AS interstate ON minor.pin10 = interstate.pin10 AND minor.year = interstate.year FULL OUTER JOIN nearest_freeway AS freeway ON COALESCE(minor.pin10, interstate.pin10) = freeway.pin10 AND COALESCE(minor.year, interstate.year) = freeway.year +FULL OUTER JOIN nearest_principal AS principal + ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10) = principal.pin10 + AND COALESCE(minor.year, interstate.year, freeway.year) = principal.year From 639468cd3ab3ed025e803e57f23f600656e163e5 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 15 Oct 2024 22:16:47 +0000 Subject: [PATCH 027/166] Try with major --- .../proximity.dist_pin_to_traffic_master.sql | 44 +++++++++++-------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index c514abf4e..37fd15af3 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -26,10 +26,10 @@ traffic_freeway AS ( -- noqa: ST03 WHERE road_type = 'Freeway And Expressway' ), -traffic_principal AS ( -- noqa: ST03 +traffic_major_collector AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Other Principal Arterial' + WHERE road_type = 'Major Collector' ), distinct_pins AS ( @@ -92,17 +92,19 @@ nearest_freeway AS ( GROUP BY pcl.pin10, pcl.year ), --- Select nearest road from Other Principal Arterial -nearest_principal AS ( +-- Select nearest road from Major Collector +nearest_major_collector AS ( SELECT pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_principal_road_name, - ARBITRARY(xy.dist_ft) AS nearest_principal_road_dist_ft, - ARBITRARY(xy.year) AS nearest_principal_road_data_year, - ARBITRARY(xy.surface_width) AS nearest_principal_road_surface_width, + ARBITRARY(xy.road_name) AS nearest_major_collector_road_name, + ARBITRARY(xy.dist_ft) AS nearest_major_collector_road_dist_ft, + ARBITRARY(xy.year) AS nearest_major_collector_road_data_year, + ARBITRARY(xy.surface_width) + AS nearest_major_collector_road_surface_width, pcl.year FROM distinct_pins AS pcl - INNER JOIN ( {{ dist_to_nearest_geometry('traffic_principal') }} ) AS xy + INNER JOIN + ( {{ dist_to_nearest_geometry('traffic_major_collector') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 AND pcl.year = xy.pin_year @@ -111,8 +113,9 @@ nearest_principal AS ( -- Join the results based on pin10 and year SELECT - COALESCE(minor.pin10, interstate.pin10, freeway.pin10, principal.pin10) - AS pin10, + COALESCE( + minor.pin10, interstate.pin10, freeway.pin10, major_collector.pin10 + ) AS pin10, minor.nearest_minor_road_name, minor.nearest_minor_road_dist_ft, minor.nearest_minor_road_data_year, @@ -125,17 +128,20 @@ SELECT freeway.nearest_freeway_road_dist_ft, freeway.nearest_freeway_road_data_year, freeway.nearest_freeway_road_surface_width, - principal.nearest_principal_road_name, - principal.nearest_principal_road_dist_ft, - principal.nearest_principal_road_data_year, - principal.nearest_principal_road_surface_width, - COALESCE(minor.year, interstate.year, freeway.year, principal.year) AS year + major_collector.nearest_major_collector_road_name, + major_collector.nearest_major_collector_road_dist_ft, + major_collector.nearest_major_collector_road_data_year, + major_collector.nearest_major_collector_road_surface_width, + COALESCE(minor.year, interstate.year, freeway.year, major_collector.year) + AS year FROM nearest_minor AS minor FULL OUTER JOIN nearest_interstate AS interstate ON minor.pin10 = interstate.pin10 AND minor.year = interstate.year FULL OUTER JOIN nearest_freeway AS freeway ON COALESCE(minor.pin10, interstate.pin10) = freeway.pin10 AND COALESCE(minor.year, interstate.year) = freeway.year -FULL OUTER JOIN nearest_principal AS principal - ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10) = principal.pin10 - AND COALESCE(minor.year, interstate.year, freeway.year) = principal.year +FULL OUTER JOIN nearest_major_collector AS major_collector + ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10) + = major_collector.pin10 + AND COALESCE(minor.year, interstate.year, freeway.year) + = major_collector.year From ed4ba5f05600f28c4ffd4e6bb6f6f165598a0d47 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 16 Oct 2024 14:09:23 +0000 Subject: [PATCH 028/166] Add other --- .../proximity.dist_pin_to_traffic_master.sql | 49 +++++++++++++++++-- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 37fd15af3..391287389 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -26,6 +26,12 @@ traffic_freeway AS ( -- noqa: ST03 WHERE road_type = 'Freeway And Expressway' ), +traffic_principal AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE road_type = 'Other Principal Arterial' +), + traffic_major_collector AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} @@ -92,6 +98,23 @@ nearest_freeway AS ( GROUP BY pcl.pin10, pcl.year ), +-- Select nearest road from Other Principal Arterial +nearest_principal AS ( + SELECT + pcl.pin10, + ARBITRARY(xy.road_name) AS nearest_principal_road_name, + ARBITRARY(xy.dist_ft) AS nearest_principal_road_dist_ft, + ARBITRARY(xy.year) AS nearest_principal_road_data_year, + ARBITRARY(xy.surface_width) AS nearest_principal_road_surface_width, + pcl.year + FROM distinct_pins AS pcl + INNER JOIN ( {{ dist_to_nearest_geometry('traffic_principal') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year + GROUP BY pcl.pin10, pcl.year +), + -- Select nearest road from Major Collector nearest_major_collector AS ( SELECT @@ -114,7 +137,11 @@ nearest_major_collector AS ( -- Join the results based on pin10 and year SELECT COALESCE( - minor.pin10, interstate.pin10, freeway.pin10, major_collector.pin10 + minor.pin10, + interstate.pin10, + freeway.pin10, + principal.pin10, + major_collector.pin10 ) AS pin10, minor.nearest_minor_road_name, minor.nearest_minor_road_dist_ft, @@ -128,20 +155,32 @@ SELECT freeway.nearest_freeway_road_dist_ft, freeway.nearest_freeway_road_data_year, freeway.nearest_freeway_road_surface_width, + principal.nearest_principal_road_name, + principal.nearest_principal_road_dist_ft, + principal.nearest_principal_road_data_year, + principal.nearest_principal_road_surface_width, major_collector.nearest_major_collector_road_name, major_collector.nearest_major_collector_road_dist_ft, major_collector.nearest_major_collector_road_data_year, major_collector.nearest_major_collector_road_surface_width, - COALESCE(minor.year, interstate.year, freeway.year, major_collector.year) - AS year + COALESCE( + minor.year, + interstate.year, + freeway.year, + principal.year, + major_collector.year + ) AS year FROM nearest_minor AS minor FULL OUTER JOIN nearest_interstate AS interstate ON minor.pin10 = interstate.pin10 AND minor.year = interstate.year FULL OUTER JOIN nearest_freeway AS freeway ON COALESCE(minor.pin10, interstate.pin10) = freeway.pin10 AND COALESCE(minor.year, interstate.year) = freeway.year +FULL OUTER JOIN nearest_principal AS principal + ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10) = principal.pin10 + AND COALESCE(minor.year, interstate.year, freeway.year) = principal.year FULL OUTER JOIN nearest_major_collector AS major_collector - ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10) + ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10, principal.pin10) = major_collector.pin10 - AND COALESCE(minor.year, interstate.year, freeway.year) + AND COALESCE(minor.year, interstate.year, freeway.year, principal.year) = major_collector.year From 0c41137fc1ecd2a047d249827a802f27f4699bbf Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 16 Oct 2024 14:14:31 +0000 Subject: [PATCH 029/166] re-add major --- .../proximity/proximity.dist_pin_to_traffic_master.sql | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 391287389..0cf9f61e1 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -180,7 +180,9 @@ FULL OUTER JOIN nearest_principal AS principal ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10) = principal.pin10 AND COALESCE(minor.year, interstate.year, freeway.year) = principal.year FULL OUTER JOIN nearest_major_collector AS major_collector - ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10, principal.pin10) - = major_collector.pin10 - AND COALESCE(minor.year, interstate.year, freeway.year, principal.year) - = major_collector.year + ON COALESCE( + minor.pin10, interstate.pin10, freeway.pin10, principal.pin10 + ) = major_collector.pin10 + AND COALESCE( + minor.year, interstate.year, freeway.year, principal.year + ) = major_collector.year From 7cc2b22d37bc1e6d8764b04a605c17ee9457c688 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 16 Oct 2024 15:25:54 +0000 Subject: [PATCH 030/166] remove year --- .../proximity.dist_pin_to_traffic_master.sql | 58 +++---------------- 1 file changed, 8 insertions(+), 50 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 0cf9f61e1..0b2c149a5 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -26,12 +26,6 @@ traffic_freeway AS ( -- noqa: ST03 WHERE road_type = 'Freeway And Expressway' ), -traffic_principal AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Other Principal Arterial' -), - traffic_major_collector AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} @@ -42,8 +36,7 @@ distinct_pins AS ( SELECT DISTINCT x_3435, y_3435, - pin10, - year + pin10 FROM {{ source('spatial', 'parcel') }} ), @@ -98,23 +91,6 @@ nearest_freeway AS ( GROUP BY pcl.pin10, pcl.year ), --- Select nearest road from Other Principal Arterial -nearest_principal AS ( - SELECT - pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_principal_road_name, - ARBITRARY(xy.dist_ft) AS nearest_principal_road_dist_ft, - ARBITRARY(xy.year) AS nearest_principal_road_data_year, - ARBITRARY(xy.surface_width) AS nearest_principal_road_surface_width, - pcl.year - FROM distinct_pins AS pcl - INNER JOIN ( {{ dist_to_nearest_geometry('traffic_principal') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year - GROUP BY pcl.pin10, pcl.year -), - -- Select nearest road from Major Collector nearest_major_collector AS ( SELECT @@ -137,11 +113,7 @@ nearest_major_collector AS ( -- Join the results based on pin10 and year SELECT COALESCE( - minor.pin10, - interstate.pin10, - freeway.pin10, - principal.pin10, - major_collector.pin10 + minor.pin10, interstate.pin10, freeway.pin10, major_collector.pin10 ) AS pin10, minor.nearest_minor_road_name, minor.nearest_minor_road_dist_ft, @@ -155,34 +127,20 @@ SELECT freeway.nearest_freeway_road_dist_ft, freeway.nearest_freeway_road_data_year, freeway.nearest_freeway_road_surface_width, - principal.nearest_principal_road_name, - principal.nearest_principal_road_dist_ft, - principal.nearest_principal_road_data_year, - principal.nearest_principal_road_surface_width, major_collector.nearest_major_collector_road_name, major_collector.nearest_major_collector_road_dist_ft, major_collector.nearest_major_collector_road_data_year, major_collector.nearest_major_collector_road_surface_width, - COALESCE( - minor.year, - interstate.year, - freeway.year, - principal.year, - major_collector.year - ) AS year + COALESCE(minor.year, interstate.year, freeway.year, major_collector.year) + AS year FROM nearest_minor AS minor FULL OUTER JOIN nearest_interstate AS interstate ON minor.pin10 = interstate.pin10 AND minor.year = interstate.year FULL OUTER JOIN nearest_freeway AS freeway ON COALESCE(minor.pin10, interstate.pin10) = freeway.pin10 AND COALESCE(minor.year, interstate.year) = freeway.year -FULL OUTER JOIN nearest_principal AS principal - ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10) = principal.pin10 - AND COALESCE(minor.year, interstate.year, freeway.year) = principal.year FULL OUTER JOIN nearest_major_collector AS major_collector - ON COALESCE( - minor.pin10, interstate.pin10, freeway.pin10, principal.pin10 - ) = major_collector.pin10 - AND COALESCE( - minor.year, interstate.year, freeway.year, principal.year - ) = major_collector.year + ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10) + = major_collector.pin10 + AND COALESCE(minor.year, interstate.year, freeway.year) + = major_collector.year From 03bdf619f053041eee392c67fdcf7dc667d7162a Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 16 Oct 2024 15:27:43 +0000 Subject: [PATCH 031/166] Remove year from join --- dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 0b2c149a5..92c2e6208 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -53,7 +53,6 @@ nearest_minor AS ( INNER JOIN ( {{ dist_to_nearest_geometry('traffic_minor') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year GROUP BY pcl.pin10, pcl.year ), @@ -70,7 +69,6 @@ nearest_interstate AS ( INNER JOIN ( {{ dist_to_nearest_geometry('traffic_interstate') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year GROUP BY pcl.pin10, pcl.year ), @@ -87,7 +85,6 @@ nearest_freeway AS ( INNER JOIN ( {{ dist_to_nearest_geometry('traffic_freeway') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year GROUP BY pcl.pin10, pcl.year ), @@ -106,7 +103,6 @@ nearest_major_collector AS ( ( {{ dist_to_nearest_geometry('traffic_major_collector') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 - AND pcl.year = xy.pin_year GROUP BY pcl.pin10, pcl.year ) From 09ce316c6d4825fe63c63f08447d07b0c3de3beb Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 16 Oct 2024 15:30:05 +0000 Subject: [PATCH 032/166] Remove all pcl.year --- .../proximity.dist_pin_to_traffic_master.sql | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 92c2e6208..ec2db3396 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -47,13 +47,12 @@ nearest_minor AS ( ARBITRARY(xy.road_name) AS nearest_minor_road_name, ARBITRARY(xy.dist_ft) AS nearest_minor_road_dist_ft, ARBITRARY(xy.year) AS nearest_minor_road_data_year, - ARBITRARY(xy.surface_width) AS nearest_minor_road_surface_width, - pcl.year + ARBITRARY(xy.surface_width) AS nearest_minor_road_surface_width FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_minor') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 - GROUP BY pcl.pin10, pcl.year + GROUP BY pcl.pin10 ), -- Select nearest road from Interstate @@ -63,13 +62,12 @@ nearest_interstate AS ( ARBITRARY(xy.road_name) AS nearest_interstate_road_name, ARBITRARY(xy.dist_ft) AS nearest_interstate_road_dist_ft, ARBITRARY(xy.year) AS nearest_interstate_road_data_year, - ARBITRARY(xy.surface_width) AS nearest_interstate_road_surface_width, - pcl.year + ARBITRARY(xy.surface_width) AS nearest_interstate_road_surface_width FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_interstate') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 - GROUP BY pcl.pin10, pcl.year + GROUP BY pcl.pin10 ), -- Select nearest road from Freeway And Expressway @@ -79,13 +77,12 @@ nearest_freeway AS ( ARBITRARY(xy.road_name) AS nearest_freeway_road_name, ARBITRARY(xy.dist_ft) AS nearest_freeway_road_dist_ft, ARBITRARY(xy.year) AS nearest_freeway_road_data_year, - ARBITRARY(xy.surface_width) AS nearest_freeway_road_surface_width, - pcl.year + ARBITRARY(xy.surface_width) AS nearest_freeway_road_surface_width FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_freeway') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 - GROUP BY pcl.pin10, pcl.year + GROUP BY pcl.pin10 ), -- Select nearest road from Major Collector @@ -96,14 +93,13 @@ nearest_major_collector AS ( ARBITRARY(xy.dist_ft) AS nearest_major_collector_road_dist_ft, ARBITRARY(xy.year) AS nearest_major_collector_road_data_year, ARBITRARY(xy.surface_width) - AS nearest_major_collector_road_surface_width, - pcl.year + AS nearest_major_collector_road_surface_width FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_major_collector') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 - GROUP BY pcl.pin10, pcl.year + GROUP BY pcl.pin10 ) -- Join the results based on pin10 and year From 7291c2db8e49c7f1a889a04d3631a540b256621e Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 16 Oct 2024 15:34:08 +0000 Subject: [PATCH 033/166] remove year from joins --- .../proximity/proximity.dist_pin_to_traffic_master.sql | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index ec2db3396..da83c67b6 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -122,17 +122,12 @@ SELECT major_collector.nearest_major_collector_road_name, major_collector.nearest_major_collector_road_dist_ft, major_collector.nearest_major_collector_road_data_year, - major_collector.nearest_major_collector_road_surface_width, - COALESCE(minor.year, interstate.year, freeway.year, major_collector.year) - AS year + major_collector.nearest_major_collector_road_surface_width FROM nearest_minor AS minor FULL OUTER JOIN nearest_interstate AS interstate - ON minor.pin10 = interstate.pin10 AND minor.year = interstate.year + ON minor.pin10 = interstate.pin10 FULL OUTER JOIN nearest_freeway AS freeway ON COALESCE(minor.pin10, interstate.pin10) = freeway.pin10 - AND COALESCE(minor.year, interstate.year) = freeway.year FULL OUTER JOIN nearest_major_collector AS major_collector ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10) = major_collector.pin10 - AND COALESCE(minor.year, interstate.year, freeway.year) - = major_collector.year From f0ca25fbeccea282ab71fc4299fafa1b4c5bb4a1 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 16 Oct 2024 15:57:40 +0000 Subject: [PATCH 034/166] Try a bit of refactor --- .../proximity.dist_pin_to_traffic_master.sql | 48 ++++++++++++------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index da83c67b6..1cda71b2c 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -1,4 +1,3 @@ --- CTAS to create a table of distance to the nearest road for each PIN {{ config( materialized='table', @@ -40,58 +39,68 @@ distinct_pins AS ( FROM {{ source('spatial', 'parcel') }} ), --- Select nearest road from Minor Arterial +distinct_years_rhs AS ( + SELECT DISTINCT year + FROM {{ source('spatial', 'traffic') }} + WHERE road_type IS NOT NULL +), + +-- Calculate nearest Minor Arterial road per pin nearest_minor AS ( SELECT pcl.pin10, + xy.year, ARBITRARY(xy.road_name) AS nearest_minor_road_name, ARBITRARY(xy.dist_ft) AS nearest_minor_road_dist_ft, - ARBITRARY(xy.year) AS nearest_minor_road_data_year, + ARBITRARY(xy.year) AS data_year, ARBITRARY(xy.surface_width) AS nearest_minor_road_surface_width FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_minor') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 - GROUP BY pcl.pin10 + GROUP BY pcl.pin10, xy.year ), --- Select nearest road from Interstate +-- Calculate nearest Interstate road per pin nearest_interstate AS ( SELECT pcl.pin10, + xy.year, ARBITRARY(xy.road_name) AS nearest_interstate_road_name, ARBITRARY(xy.dist_ft) AS nearest_interstate_road_dist_ft, - ARBITRARY(xy.year) AS nearest_interstate_road_data_year, + ARBITRARY(xy.year) AS data_year, ARBITRARY(xy.surface_width) AS nearest_interstate_road_surface_width FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_interstate') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 - GROUP BY pcl.pin10 + GROUP BY pcl.pin10, xy.year ), --- Select nearest road from Freeway And Expressway +-- Calculate nearest Freeway or Expressway road per pin nearest_freeway AS ( SELECT pcl.pin10, + xy.year, ARBITRARY(xy.road_name) AS nearest_freeway_road_name, ARBITRARY(xy.dist_ft) AS nearest_freeway_road_dist_ft, - ARBITRARY(xy.year) AS nearest_freeway_road_data_year, + ARBITRARY(xy.year) AS data_year, ARBITRARY(xy.surface_width) AS nearest_freeway_road_surface_width FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_freeway') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 - GROUP BY pcl.pin10 + GROUP BY pcl.pin10, xy.year ), --- Select nearest road from Major Collector +-- Calculate nearest Major Collector road per pin nearest_major_collector AS ( SELECT pcl.pin10, + xy.year, ARBITRARY(xy.road_name) AS nearest_major_collector_road_name, ARBITRARY(xy.dist_ft) AS nearest_major_collector_road_dist_ft, - ARBITRARY(xy.year) AS nearest_major_collector_road_data_year, + ARBITRARY(xy.year) AS data_year, ARBITRARY(xy.surface_width) AS nearest_major_collector_road_surface_width FROM distinct_pins AS pcl @@ -99,10 +108,10 @@ nearest_major_collector AS ( ( {{ dist_to_nearest_geometry('traffic_major_collector') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 - GROUP BY pcl.pin10 + GROUP BY pcl.pin10, xy.year ) --- Join the results based on pin10 and year +-- Join all nearest roads by pin10 and year SELECT COALESCE( minor.pin10, interstate.pin10, freeway.pin10, major_collector.pin10 @@ -122,12 +131,19 @@ SELECT major_collector.nearest_major_collector_road_name, major_collector.nearest_major_collector_road_dist_ft, major_collector.nearest_major_collector_road_data_year, - major_collector.nearest_major_collector_road_surface_width + major_collector.nearest_major_collector_road_surface_width, + COALESCE(minor.year, interstate.year, freeway.year, major_collector.year) + AS year FROM nearest_minor AS minor FULL OUTER JOIN nearest_interstate AS interstate - ON minor.pin10 = interstate.pin10 + ON minor.pin10 = interstate.pin10 AND minor.year = interstate.year FULL OUTER JOIN nearest_freeway AS freeway ON COALESCE(minor.pin10, interstate.pin10) = freeway.pin10 + AND COALESCE(minor.year, interstate.year) = freeway.year FULL OUTER JOIN nearest_major_collector AS major_collector ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10) = major_collector.pin10 + AND COALESCE(minor.year, interstate.year, freeway.year) + = major_collector.year +WHERE COALESCE(minor.year, interstate.year, freeway.year, major_collector.year) + >= (SELECT MIN(year) FROM distinct_years_rhs) From 205e5c541b77338bb6828562044d4e59be363604 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 16 Oct 2024 16:02:05 +0000 Subject: [PATCH 035/166] rename data_year --- .../proximity/proximity.dist_pin_to_traffic_master.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 1cda71b2c..cc9db6b21 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -52,7 +52,7 @@ nearest_minor AS ( xy.year, ARBITRARY(xy.road_name) AS nearest_minor_road_name, ARBITRARY(xy.dist_ft) AS nearest_minor_road_dist_ft, - ARBITRARY(xy.year) AS data_year, + ARBITRARY(xy.year) AS nearest_minor_road_data_year, ARBITRARY(xy.surface_width) AS nearest_minor_road_surface_width FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_minor') }} ) AS xy @@ -68,7 +68,7 @@ nearest_interstate AS ( xy.year, ARBITRARY(xy.road_name) AS nearest_interstate_road_name, ARBITRARY(xy.dist_ft) AS nearest_interstate_road_dist_ft, - ARBITRARY(xy.year) AS data_year, + ARBITRARY(xy.year) AS nearest_interstate_road_data_year, ARBITRARY(xy.surface_width) AS nearest_interstate_road_surface_width FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_interstate') }} ) AS xy @@ -84,7 +84,7 @@ nearest_freeway AS ( xy.year, ARBITRARY(xy.road_name) AS nearest_freeway_road_name, ARBITRARY(xy.dist_ft) AS nearest_freeway_road_dist_ft, - ARBITRARY(xy.year) AS data_year, + ARBITRARY(xy.year) AS nearest_freeway_road_data_year, ARBITRARY(xy.surface_width) AS nearest_freeway_road_surface_width FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_freeway') }} ) AS xy @@ -100,7 +100,7 @@ nearest_major_collector AS ( xy.year, ARBITRARY(xy.road_name) AS nearest_major_collector_road_name, ARBITRARY(xy.dist_ft) AS nearest_major_collector_road_dist_ft, - ARBITRARY(xy.year) AS data_year, + ARBITRARY(xy.year) AS nearest_major_collector_road_data_year, ARBITRARY(xy.surface_width) AS nearest_major_collector_road_surface_width FROM distinct_pins AS pcl From 33e1993cd69062734c41801625768e9c57171515 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 16 Oct 2024 16:26:19 +0000 Subject: [PATCH 036/166] Try to add minor collector --- .../proximity.dist_pin_to_traffic_master.sql | 45 ++++++++++++++++--- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index cc9db6b21..b20715a11 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -93,6 +93,23 @@ nearest_freeway AS ( GROUP BY pcl.pin10, xy.year ), +nearest_minor_collector AS ( + SELECT + pcl.pin10, + xy.year, + ARBITRARY(xy.road_name) AS nearest_minor_collector_road_name, + ARBITRARY(xy.dist_ft) AS nearest_minor_collector_road_dist_ft, + ARBITRARY(xy.year) AS nearest_minor_collector_road_data_year, + ARBITRARY(xy.surface_width) + AS nearest_minor_collector_road_surface_width + FROM distinct_pins AS pcl + INNER JOIN + ( {{ dist_to_nearest_geometry('traffic_minor_collector') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + GROUP BY pcl.pin10, xy.year +), + -- Calculate nearest Major Collector road per pin nearest_major_collector AS ( SELECT @@ -114,7 +131,8 @@ nearest_major_collector AS ( -- Join all nearest roads by pin10 and year SELECT COALESCE( - minor.pin10, interstate.pin10, freeway.pin10, major_collector.pin10 + minor.pin10, interstate.pin10, freeway.pin10, major_collector.pin10, + minor_collector.pin10 ) AS pin10, minor.nearest_minor_road_name, minor.nearest_minor_road_dist_ft, @@ -132,8 +150,14 @@ SELECT major_collector.nearest_major_collector_road_dist_ft, major_collector.nearest_major_collector_road_data_year, major_collector.nearest_major_collector_road_surface_width, - COALESCE(minor.year, interstate.year, freeway.year, major_collector.year) - AS year + minor_collector.nearest_minor_collector_road_name, + minor_collector.nearest_minor_collector_road_dist_ft, + minor_collector.nearest_minor_collector_road_data_year, + minor_collector.nearest_minor_collector_road_surface_width, + COALESCE( + minor.year, interstate.year, freeway.year, major_collector.year, + minor_collector.year + ) AS year FROM nearest_minor AS minor FULL OUTER JOIN nearest_interstate AS interstate ON minor.pin10 = interstate.pin10 AND minor.year = interstate.year @@ -145,5 +169,16 @@ FULL OUTER JOIN nearest_major_collector AS major_collector = major_collector.pin10 AND COALESCE(minor.year, interstate.year, freeway.year) = major_collector.year -WHERE COALESCE(minor.year, interstate.year, freeway.year, major_collector.year) - >= (SELECT MIN(year) FROM distinct_years_rhs) +FULL OUTER JOIN nearest_minor_collector AS minor_collector + ON COALESCE( + minor.pin10, interstate.pin10, freeway.pin10, major_collector.pin10 + ) + = minor_collector.pin10 + AND COALESCE( + minor.year, interstate.year, freeway.year, major_collector.year + ) + = minor_collector.year +WHERE COALESCE( + minor.year, interstate.year, freeway.year, major_collector.year, + minor_collector.year + ) >= (SELECT MIN(year) FROM distinct_years_rhs) From ce34f6f0d3d3c406b0395664888d79f3a23acd7b Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 16 Oct 2024 16:32:02 +0000 Subject: [PATCH 037/166] Add CTE --- .../proximity/proximity.dist_pin_to_traffic_master.sql | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index b20715a11..183ffb06d 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -13,6 +13,12 @@ WITH traffic_minor AS ( -- noqa: ST03 WHERE road_type = 'Minor Arterial' ), +traffic_minor_collector AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE road_type = 'Minor Collector' +), + traffic_interstate AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} From 6469e296c36177060f436975cd0cb0d8b29895bf Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 24 Oct 2024 16:34:24 +0000 Subject: [PATCH 038/166] Add lands and surface type --- .../proximity.dist_pin_to_traffic_master.sql | 77 +++++++------------ 1 file changed, 26 insertions(+), 51 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 183ffb06d..9ed1d4d26 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -13,12 +13,6 @@ WITH traffic_minor AS ( -- noqa: ST03 WHERE road_type = 'Minor Arterial' ), -traffic_minor_collector AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Minor Collector' -), - traffic_interstate AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} @@ -45,7 +39,7 @@ distinct_pins AS ( FROM {{ source('spatial', 'parcel') }} ), -distinct_years_rhs AS ( +distinct_years AS ( SELECT DISTINCT year FROM {{ source('spatial', 'traffic') }} WHERE road_type IS NOT NULL @@ -59,7 +53,9 @@ nearest_minor AS ( ARBITRARY(xy.road_name) AS nearest_minor_road_name, ARBITRARY(xy.dist_ft) AS nearest_minor_road_dist_ft, ARBITRARY(xy.year) AS nearest_minor_road_data_year, - ARBITRARY(xy.surface_width) AS nearest_minor_road_surface_width + ARBITRARY(xy.surface_width) AS nearest_minor_road_surface_width, + ARBITRARY(xy.surface_type) AS nearest_minor_surface_type, + ARBITRARY(xy.lanes) AS nearest_minor_lanes FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_minor') }} ) AS xy ON pcl.x_3435 = xy.x_3435 @@ -75,7 +71,9 @@ nearest_interstate AS ( ARBITRARY(xy.road_name) AS nearest_interstate_road_name, ARBITRARY(xy.dist_ft) AS nearest_interstate_road_dist_ft, ARBITRARY(xy.year) AS nearest_interstate_road_data_year, - ARBITRARY(xy.surface_width) AS nearest_interstate_road_surface_width + ARBITRARY(xy.surface_width) AS nearest_interstate_road_surface_width, + ARBITRARY(xy.surface_type) AS nearest_interstate_surface_type, + ARBITRARY(xy.lanes) AS nearest_interstate_lanes FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_interstate') }} ) AS xy ON pcl.x_3435 = xy.x_3435 @@ -91,7 +89,9 @@ nearest_freeway AS ( ARBITRARY(xy.road_name) AS nearest_freeway_road_name, ARBITRARY(xy.dist_ft) AS nearest_freeway_road_dist_ft, ARBITRARY(xy.year) AS nearest_freeway_road_data_year, - ARBITRARY(xy.surface_width) AS nearest_freeway_road_surface_width + ARBITRARY(xy.surface_width) AS nearest_freeway_road_surface_width, + ARBITRARY(xy.surface_type) AS nearest_freeway_surface_type, + ARBITRARY(xy.lanes) AS nearest_freeway_lanes FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_freeway') }} ) AS xy ON pcl.x_3435 = xy.x_3435 @@ -99,23 +99,6 @@ nearest_freeway AS ( GROUP BY pcl.pin10, xy.year ), -nearest_minor_collector AS ( - SELECT - pcl.pin10, - xy.year, - ARBITRARY(xy.road_name) AS nearest_minor_collector_road_name, - ARBITRARY(xy.dist_ft) AS nearest_minor_collector_road_dist_ft, - ARBITRARY(xy.year) AS nearest_minor_collector_road_data_year, - ARBITRARY(xy.surface_width) - AS nearest_minor_collector_road_surface_width - FROM distinct_pins AS pcl - INNER JOIN - ( {{ dist_to_nearest_geometry('traffic_minor_collector') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - GROUP BY pcl.pin10, xy.year -), - -- Calculate nearest Major Collector road per pin nearest_major_collector AS ( SELECT @@ -125,7 +108,9 @@ nearest_major_collector AS ( ARBITRARY(xy.dist_ft) AS nearest_major_collector_road_dist_ft, ARBITRARY(xy.year) AS nearest_major_collector_road_data_year, ARBITRARY(xy.surface_width) - AS nearest_major_collector_road_surface_width + AS nearest_major_collector_road_surface_width, + ARBITRARY(xy.surface_type) AS nearest_major_collector_surface_type, + ARBITRARY(xy.lanes) AS nearest_major_collector_lanes FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_major_collector') }} ) AS xy @@ -137,33 +122,34 @@ nearest_major_collector AS ( -- Join all nearest roads by pin10 and year SELECT COALESCE( - minor.pin10, interstate.pin10, freeway.pin10, major_collector.pin10, - minor_collector.pin10 + minor.pin10, interstate.pin10, freeway.pin10, major_collector.pin10 ) AS pin10, minor.nearest_minor_road_name, minor.nearest_minor_road_dist_ft, minor.nearest_minor_road_data_year, minor.nearest_minor_road_surface_width, + minor.nearest_minor_surface_type, + minor.nearest_minor_lanes, interstate.nearest_interstate_road_name, interstate.nearest_interstate_road_dist_ft, interstate.nearest_interstate_road_data_year, interstate.nearest_interstate_road_surface_width, + interstate.nearest_interstate_surface_type, + interstate.nearest_interstate_lanes, freeway.nearest_freeway_road_name, freeway.nearest_freeway_road_dist_ft, freeway.nearest_freeway_road_data_year, freeway.nearest_freeway_road_surface_width, + freeway.nearest_freeway_surface_type, + freeway.nearest_freeway_lanes, major_collector.nearest_major_collector_road_name, major_collector.nearest_major_collector_road_dist_ft, major_collector.nearest_major_collector_road_data_year, major_collector.nearest_major_collector_road_surface_width, - minor_collector.nearest_minor_collector_road_name, - minor_collector.nearest_minor_collector_road_dist_ft, - minor_collector.nearest_minor_collector_road_data_year, - minor_collector.nearest_minor_collector_road_surface_width, - COALESCE( - minor.year, interstate.year, freeway.year, major_collector.year, - minor_collector.year - ) AS year + major_collector.nearest_major_collector_surface_type, + major_collector.nearest_major_collector_lanes, + COALESCE(minor.year, interstate.year, freeway.year, major_collector.year) + AS year FROM nearest_minor AS minor FULL OUTER JOIN nearest_interstate AS interstate ON minor.pin10 = interstate.pin10 AND minor.year = interstate.year @@ -175,16 +161,5 @@ FULL OUTER JOIN nearest_major_collector AS major_collector = major_collector.pin10 AND COALESCE(minor.year, interstate.year, freeway.year) = major_collector.year -FULL OUTER JOIN nearest_minor_collector AS minor_collector - ON COALESCE( - minor.pin10, interstate.pin10, freeway.pin10, major_collector.pin10 - ) - = minor_collector.pin10 - AND COALESCE( - minor.year, interstate.year, freeway.year, major_collector.year - ) - = minor_collector.year -WHERE COALESCE( - minor.year, interstate.year, freeway.year, major_collector.year, - minor_collector.year - ) >= (SELECT MIN(year) FROM distinct_years_rhs) +WHERE COALESCE(minor.year, interstate.year, freeway.year, major_collector.year) + >= (SELECT MIN(year) FROM distinct_years) From 0e8dc1f028864585643fdcc565b6111e7cbcc899 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 24 Oct 2024 16:40:50 +0000 Subject: [PATCH 039/166] Switch to speed limit --- .../proximity.dist_pin_to_traffic_master.sql | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 9ed1d4d26..24528bdf6 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -53,7 +53,7 @@ nearest_minor AS ( ARBITRARY(xy.road_name) AS nearest_minor_road_name, ARBITRARY(xy.dist_ft) AS nearest_minor_road_dist_ft, ARBITRARY(xy.year) AS nearest_minor_road_data_year, - ARBITRARY(xy.surface_width) AS nearest_minor_road_surface_width, + ARBITRARY(xy.speed_limit) AS nearest_minor_road_speed_limit, ARBITRARY(xy.surface_type) AS nearest_minor_surface_type, ARBITRARY(xy.lanes) AS nearest_minor_lanes FROM distinct_pins AS pcl @@ -71,7 +71,7 @@ nearest_interstate AS ( ARBITRARY(xy.road_name) AS nearest_interstate_road_name, ARBITRARY(xy.dist_ft) AS nearest_interstate_road_dist_ft, ARBITRARY(xy.year) AS nearest_interstate_road_data_year, - ARBITRARY(xy.surface_width) AS nearest_interstate_road_surface_width, + ARBITRARY(xy.speed_limit) AS nearest_interstate_road_speed_limit, ARBITRARY(xy.surface_type) AS nearest_interstate_surface_type, ARBITRARY(xy.lanes) AS nearest_interstate_lanes FROM distinct_pins AS pcl @@ -89,7 +89,7 @@ nearest_freeway AS ( ARBITRARY(xy.road_name) AS nearest_freeway_road_name, ARBITRARY(xy.dist_ft) AS nearest_freeway_road_dist_ft, ARBITRARY(xy.year) AS nearest_freeway_road_data_year, - ARBITRARY(xy.surface_width) AS nearest_freeway_road_surface_width, + ARBITRARY(xy.speed_limit) AS nearest_freeway_road_speed_limit, ARBITRARY(xy.surface_type) AS nearest_freeway_surface_type, ARBITRARY(xy.lanes) AS nearest_freeway_lanes FROM distinct_pins AS pcl @@ -107,8 +107,8 @@ nearest_major_collector AS ( ARBITRARY(xy.road_name) AS nearest_major_collector_road_name, ARBITRARY(xy.dist_ft) AS nearest_major_collector_road_dist_ft, ARBITRARY(xy.year) AS nearest_major_collector_road_data_year, - ARBITRARY(xy.surface_width) - AS nearest_major_collector_road_surface_width, + ARBITRARY(xy.speed_limit) + AS nearest_major_collector_road_speed_limit, ARBITRARY(xy.surface_type) AS nearest_major_collector_surface_type, ARBITRARY(xy.lanes) AS nearest_major_collector_lanes FROM distinct_pins AS pcl @@ -127,25 +127,25 @@ SELECT minor.nearest_minor_road_name, minor.nearest_minor_road_dist_ft, minor.nearest_minor_road_data_year, - minor.nearest_minor_road_surface_width, + minor.nearest_minor_road_speed_limit, minor.nearest_minor_surface_type, minor.nearest_minor_lanes, interstate.nearest_interstate_road_name, interstate.nearest_interstate_road_dist_ft, interstate.nearest_interstate_road_data_year, - interstate.nearest_interstate_road_surface_width, + interstate.nearest_interstate_road_speed_limit, interstate.nearest_interstate_surface_type, interstate.nearest_interstate_lanes, freeway.nearest_freeway_road_name, freeway.nearest_freeway_road_dist_ft, freeway.nearest_freeway_road_data_year, - freeway.nearest_freeway_road_surface_width, + freeway.nearest_freeway_road_speed_limit, freeway.nearest_freeway_surface_type, freeway.nearest_freeway_lanes, major_collector.nearest_major_collector_road_name, major_collector.nearest_major_collector_road_dist_ft, major_collector.nearest_major_collector_road_data_year, - major_collector.nearest_major_collector_road_surface_width, + major_collector.nearest_major_collector_road_speed_limit, major_collector.nearest_major_collector_surface_type, major_collector.nearest_major_collector_lanes, COALESCE(minor.year, interstate.year, freeway.year, major_collector.year) From b4351bc51ab12a29f60fe161f265a2c227481e4a Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 24 Oct 2024 18:02:28 +0000 Subject: [PATCH 040/166] Lowercase and --- dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 24528bdf6..3da54e238 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -22,7 +22,7 @@ traffic_interstate AS ( -- noqa: ST03 traffic_freeway AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Freeway And Expressway' + WHERE road_type = 'Freeway and Expressway' ), traffic_major_collector AS ( -- noqa: ST03 From 6bc3d826d1dfb21f685d4c205fca5484e67da54a Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 24 Oct 2024 18:11:51 +0000 Subject: [PATCH 041/166] Add dailiy_traffic --- .../proximity/proximity.dist_pin_to_traffic_master.sql | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 3da54e238..39b1acb30 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -53,6 +53,7 @@ nearest_minor AS ( ARBITRARY(xy.road_name) AS nearest_minor_road_name, ARBITRARY(xy.dist_ft) AS nearest_minor_road_dist_ft, ARBITRARY(xy.year) AS nearest_minor_road_data_year, + ARBITRARY(xy.daily_traffic) AS nearest_minor_road_daily_traffic, ARBITRARY(xy.speed_limit) AS nearest_minor_road_speed_limit, ARBITRARY(xy.surface_type) AS nearest_minor_surface_type, ARBITRARY(xy.lanes) AS nearest_minor_lanes @@ -71,6 +72,7 @@ nearest_interstate AS ( ARBITRARY(xy.road_name) AS nearest_interstate_road_name, ARBITRARY(xy.dist_ft) AS nearest_interstate_road_dist_ft, ARBITRARY(xy.year) AS nearest_interstate_road_data_year, + ARBITRARY(xy.daily_traffic) AS nearest_interstate_daily_traffic, ARBITRARY(xy.speed_limit) AS nearest_interstate_road_speed_limit, ARBITRARY(xy.surface_type) AS nearest_interstate_surface_type, ARBITRARY(xy.lanes) AS nearest_interstate_lanes @@ -89,6 +91,7 @@ nearest_freeway AS ( ARBITRARY(xy.road_name) AS nearest_freeway_road_name, ARBITRARY(xy.dist_ft) AS nearest_freeway_road_dist_ft, ARBITRARY(xy.year) AS nearest_freeway_road_data_year, + ARBITRARY(xy.daily_traffic) AS nearest_freeway_daily_traffic, ARBITRARY(xy.speed_limit) AS nearest_freeway_road_speed_limit, ARBITRARY(xy.surface_type) AS nearest_freeway_surface_type, ARBITRARY(xy.lanes) AS nearest_freeway_lanes @@ -109,6 +112,7 @@ nearest_major_collector AS ( ARBITRARY(xy.year) AS nearest_major_collector_road_data_year, ARBITRARY(xy.speed_limit) AS nearest_major_collector_road_speed_limit, + ARBITRARY(xy.daily_traffic) AS nearest_major_collector_daily_traffic, ARBITRARY(xy.surface_type) AS nearest_major_collector_surface_type, ARBITRARY(xy.lanes) AS nearest_major_collector_lanes FROM distinct_pins AS pcl @@ -127,24 +131,28 @@ SELECT minor.nearest_minor_road_name, minor.nearest_minor_road_dist_ft, minor.nearest_minor_road_data_year, + minor.nearest_minor_road_daily_traffic, minor.nearest_minor_road_speed_limit, minor.nearest_minor_surface_type, minor.nearest_minor_lanes, interstate.nearest_interstate_road_name, interstate.nearest_interstate_road_dist_ft, interstate.nearest_interstate_road_data_year, + interstate.nearest_interstate_daily_traffic, interstate.nearest_interstate_road_speed_limit, interstate.nearest_interstate_surface_type, interstate.nearest_interstate_lanes, freeway.nearest_freeway_road_name, freeway.nearest_freeway_road_dist_ft, freeway.nearest_freeway_road_data_year, + freeway.nearest_freeway_daily_traffic, freeway.nearest_freeway_road_speed_limit, freeway.nearest_freeway_surface_type, freeway.nearest_freeway_lanes, major_collector.nearest_major_collector_road_name, major_collector.nearest_major_collector_road_dist_ft, major_collector.nearest_major_collector_road_data_year, + major_collector.nearest_major_collector_daily_traffic, major_collector.nearest_major_collector_road_speed_limit, major_collector.nearest_major_collector_surface_type, major_collector.nearest_major_collector_lanes, From 373ec21dbf9b14bf9603ea0d8fd1f9c88e72a63d Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 24 Oct 2024 19:40:26 +0000 Subject: [PATCH 042/166] Test local in wrong query --- dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 39b1acb30..3e7c1fad6 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -28,7 +28,7 @@ traffic_freeway AS ( -- noqa: ST03 traffic_major_collector AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Major Collector' + WHERE road_type = 'Local Road or Street' ), distinct_pins AS ( From 3ba546f9bfaf03d9fa20d4d6c9c90ba48305b152 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 24 Oct 2024 19:46:26 +0000 Subject: [PATCH 043/166] Try different join syntax --- .../proximity.dist_pin_to_traffic_master.sql | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 3e7c1fad6..f68c99e55 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -158,16 +158,17 @@ SELECT major_collector.nearest_major_collector_lanes, COALESCE(minor.year, interstate.year, freeway.year, major_collector.year) AS year -FROM nearest_minor AS minor +FROM distinct_pins AS distinct_pins +FULL OUTER JOIN nearest_minor AS minor + ON distinct_pins.pin10 = minor.pin10 + AND distinct_pins.year = minor.year FULL OUTER JOIN nearest_interstate AS interstate - ON minor.pin10 = interstate.pin10 AND minor.year = interstate.year + ON distinct_pins.pin10 = interstate.pin10 + AND distinct_pins.year = interstate.year FULL OUTER JOIN nearest_freeway AS freeway - ON COALESCE(minor.pin10, interstate.pin10) = freeway.pin10 - AND COALESCE(minor.year, interstate.year) = freeway.year + ON distinct_pins.pin10 = freeway.pin10 + AND distinct_pins.year = freeway.year FULL OUTER JOIN nearest_major_collector AS major_collector - ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10) - = major_collector.pin10 - AND COALESCE(minor.year, interstate.year, freeway.year) - = major_collector.year -WHERE COALESCE(minor.year, interstate.year, freeway.year, major_collector.year) - >= (SELECT MIN(year) FROM distinct_years) + ON distinct_pins.pin10 = major_collector.pin10 + AND distinct_pins.year = major_collector.year +WHERE minor.year >= (SELECT MIN(year) FROM distinct_years) From 6d15e6620839ed632679b7f9d8513dc08ce0192a Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 24 Oct 2024 20:19:42 +0000 Subject: [PATCH 044/166] Add local --- .../proximity.dist_pin_to_traffic_master.sql | 69 +++++++++++++++---- 1 file changed, 55 insertions(+), 14 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index f68c99e55..d4968829f 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -25,12 +25,18 @@ traffic_freeway AS ( -- noqa: ST03 WHERE road_type = 'Freeway and Expressway' ), -traffic_major_collector AS ( -- noqa: ST03 +traffic_local AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} WHERE road_type = 'Local Road or Street' ), +traffic_major_collector AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE road_type = 'Major Collector' +), + distinct_pins AS ( SELECT DISTINCT x_3435, @@ -102,6 +108,24 @@ nearest_freeway AS ( GROUP BY pcl.pin10, xy.year ), +nearest_local AS ( + SELECT + pcl.pin10, + xy.year, + ARBITRARY(xy.road_name) AS nearest_local_road_name, + ARBITRARY(xy.dist_ft) AS nearest_local_road_dist_ft, + ARBITRARY(xy.year) AS nearest_local_road_data_year, + ARBITRARY(xy.daily_traffic) AS nearest_local_daily_traffic, + ARBITRARY(xy.speed_limit) AS nearest_local_road_speed_limit, + ARBITRARY(xy.surface_type) AS nearest_local_surface_type, + ARBITRARY(xy.lanes) AS nearest_local_lanes + FROM distinct_pins AS pcl + INNER JOIN ( {{ dist_to_nearest_geometry('traffic_local') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + GROUP BY pcl.pin10, xy.year +), + -- Calculate nearest Major Collector road per pin nearest_major_collector AS ( SELECT @@ -126,7 +150,8 @@ nearest_major_collector AS ( -- Join all nearest roads by pin10 and year SELECT COALESCE( - minor.pin10, interstate.pin10, freeway.pin10, major_collector.pin10 + minor.pin10, interstate.pin10, freeway.pin10, + local_road.pin10, major_collector.pin10 ) AS pin10, minor.nearest_minor_road_name, minor.nearest_minor_road_dist_ft, @@ -149,6 +174,13 @@ SELECT freeway.nearest_freeway_road_speed_limit, freeway.nearest_freeway_surface_type, freeway.nearest_freeway_lanes, + local_road.nearest_local_road_name, + local_road.nearest_local_road_dist_ft, + local_road.nearest_local_road_data_year, + local_road.nearest_local_daily_traffic, + local_road.nearest_local_road_speed_limit, + local_road.nearest_local_surface_type, + local_road.nearest_local_lanes, major_collector.nearest_major_collector_road_name, major_collector.nearest_major_collector_road_dist_ft, major_collector.nearest_major_collector_road_data_year, @@ -156,19 +188,28 @@ SELECT major_collector.nearest_major_collector_road_speed_limit, major_collector.nearest_major_collector_surface_type, major_collector.nearest_major_collector_lanes, - COALESCE(minor.year, interstate.year, freeway.year, major_collector.year) + COALESCE( + minor.year, interstate.year, freeway.year, + local_road.year, major_collector.year) AS year -FROM distinct_pins AS distinct_pins -FULL OUTER JOIN nearest_minor AS minor - ON distinct_pins.pin10 = minor.pin10 - AND distinct_pins.year = minor.year +FROM nearest_minor AS minor FULL OUTER JOIN nearest_interstate AS interstate - ON distinct_pins.pin10 = interstate.pin10 - AND distinct_pins.year = interstate.year + ON minor.pin10 = interstate.pin10 AND minor.year = interstate.year FULL OUTER JOIN nearest_freeway AS freeway - ON distinct_pins.pin10 = freeway.pin10 - AND distinct_pins.year = freeway.year + ON COALESCE(minor.pin10, interstate.pin10) = freeway.pin10 + AND COALESCE(minor.year, interstate.year) = freeway.year +FULL OUTER JOIN nearest_local AS local_road + ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10) + = local_road.pin10 + AND COALESCE(minor.year, interstate.year, freeway.year) + = local_road.year FULL OUTER JOIN nearest_major_collector AS major_collector - ON distinct_pins.pin10 = major_collector.pin10 - AND distinct_pins.year = major_collector.year -WHERE minor.year >= (SELECT MIN(year) FROM distinct_years) + ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10, local_road.pin10) + = major_collector.pin10 + AND COALESCE(minor.year, interstate.year, freeway.year, local_road.year) + = major_collector.year +WHERE COALESCE( + minor.year, interstate.year, freeway.year, + local_road.year, major_collector.year + ) + >= (SELECT MIN(year) FROM distinct_years) From fdae40188fcfb7c186cd3505facfbe1436880eeb Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 24 Oct 2024 20:24:11 +0000 Subject: [PATCH 045/166] Try to add other --- .../proximity.dist_pin_to_traffic_master.sql | 54 ++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index d4968829f..4a117a63b 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -37,6 +37,12 @@ traffic_major_collector AS ( -- noqa: ST03 WHERE road_type = 'Major Collector' ), +traffic_other AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE road_type = 'Other Principal Arterial' +), + distinct_pins AS ( SELECT DISTINCT x_3435, @@ -145,6 +151,25 @@ nearest_major_collector AS ( ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 GROUP BY pcl.pin10, xy.year +), + +-- Calculate nearest Other Principal Arterial road per pin +nearest_other AS ( + SELECT + pcl.pin10, + xy.year, + ARBITRARY(xy.road_name) AS nearest_other_road_name, + ARBITRARY(xy.dist_ft) AS nearest_other_road_dist_ft, + ARBITRARY(xy.year) AS nearest_other_road_data_year, + ARBITRARY(xy.daily_traffic) AS nearest_other_daily_traffic, + ARBITRARY(xy.speed_limit) AS nearest_other_road_speed_limit, + ARBITRARY(xy.surface_type) AS nearest_other_surface_type, + ARBITRARY(xy.lanes) AS nearest_other_lanes + FROM distinct_pins AS pcl + INNER JOIN ( {{ dist_to_nearest_geometry('traffic_other') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + GROUP BY pcl.pin10, xy.year ) -- Join all nearest roads by pin10 and year @@ -188,9 +213,16 @@ SELECT major_collector.nearest_major_collector_road_speed_limit, major_collector.nearest_major_collector_surface_type, major_collector.nearest_major_collector_lanes, + other.nearest_other_road_name, + other.nearest_other_road_dist_ft, + other.nearest_other_road_data_year, + other.nearest_other_daily_traffic, + other.nearest_other_road_speed_limit, + other.nearest_other_surface_type, + other.nearest_other_lanes, COALESCE( minor.year, interstate.year, freeway.year, - local_road.year, major_collector.year) + local_road.year, major_collector.year, other.year) AS year FROM nearest_minor AS minor FULL OUTER JOIN nearest_interstate AS interstate @@ -208,8 +240,26 @@ FULL OUTER JOIN nearest_major_collector AS major_collector = major_collector.pin10 AND COALESCE(minor.year, interstate.year, freeway.year, local_road.year) = major_collector.year +FULL OUTER JOIN nearest_other AS other + ON COALESCE( + minor.pin10, + interstate.pin10, + freeway.pin10, + local_road.pin10, + major_collector.pin10 + ) + = other.pin10 + AND COALESCE( + minor.year, + interstate.year, + freeway.year, + local_road.year, + major_collector.year + ) + = other.year WHERE COALESCE( minor.year, interstate.year, freeway.year, - local_road.year, major_collector.year + local_road.year, major_collector.year, + other.year ) >= (SELECT MIN(year) FROM distinct_years) From fe8a1cce8941a9fe09881c7511bd64893f6d7bdd Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 24 Oct 2024 21:31:40 +0000 Subject: [PATCH 046/166] Add minor collector --- .../proximity.dist_pin_to_traffic_master.sql | 50 +++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 4a117a63b..55e2fe9d2 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -13,6 +13,12 @@ WITH traffic_minor AS ( -- noqa: ST03 WHERE road_type = 'Minor Arterial' ), +traffic_minor_collector AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE road_type = 'Minor Collector' +), + traffic_interstate AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} @@ -76,6 +82,26 @@ nearest_minor AS ( GROUP BY pcl.pin10, xy.year ), +-- Calculate nearest Minor Collector road per pin +nearest_minor_collector AS ( + SELECT + pcl.pin10, + xy.year, + ARBITRARY(xy.road_name) AS nearest_minor_collector_road_name, + ARBITRARY(xy.dist_ft) AS nearest_minor_collector_road_dist_ft, + ARBITRARY(xy.year) AS nearest_minor_collector_road_data_year, + ARBITRARY(xy.daily_traffic) AS nearest_minor_collector_daily_traffic, + ARBITRARY(xy.speed_limit) AS nearest_minor_collector_road_speed_limit, + ARBITRARY(xy.surface_type) AS nearest_minor_collector_surface_type, + ARBITRARY(xy.lanes) AS nearest_minor_collector_lanes + FROM distinct_pins AS pcl + INNER JOIN + ( {{ dist_to_nearest_geometry('traffic_minor_collector') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + GROUP BY pcl.pin10, xy.year +), + -- Calculate nearest Interstate road per pin nearest_interstate AS ( SELECT @@ -175,7 +201,7 @@ nearest_other AS ( -- Join all nearest roads by pin10 and year SELECT COALESCE( - minor.pin10, interstate.pin10, freeway.pin10, + minor.pin10, minor_collector.pin10, interstate.pin10, freeway.pin10, local_road.pin10, major_collector.pin10 ) AS pin10, minor.nearest_minor_road_name, @@ -185,6 +211,13 @@ SELECT minor.nearest_minor_road_speed_limit, minor.nearest_minor_surface_type, minor.nearest_minor_lanes, + minor_collector.nearest_minor_collector_road_name, + minor_collector.nearest_minor_collector_road_dist_ft, + minor_collector.nearest_minor_collector_road_data_year, + minor_collector.nearest_minor_collector_daily_traffic, + minor_collector.nearest_minor_collector_road_speed_limit, + minor_collector.nearest_minor_collector_surface_type, + minor_collector.nearest_minor_collector_lanes, interstate.nearest_interstate_road_name, interstate.nearest_interstate_road_dist_ft, interstate.nearest_interstate_road_data_year, @@ -221,7 +254,7 @@ SELECT other.nearest_other_surface_type, other.nearest_other_lanes, COALESCE( - minor.year, interstate.year, freeway.year, + minor.year, minor_collector.year interstate.year, freeway.year, local_road.year, major_collector.year, other.year) AS year FROM nearest_minor AS minor @@ -257,8 +290,19 @@ FULL OUTER JOIN nearest_other AS other major_collector.year ) = other.year -WHERE COALESCE( +FULL OUTER JOIN nearest_minor_collector AS minor_collector + ON COALESCE( + minor.pin10, interstate.pin10, freeway.pin10, + local_road.pin10, major_collector.pin10, other.pin10 + ) + = minor_collector.pin10 + AND COALESCE( minor.year, interstate.year, freeway.year, + local_road.year, major_collector.year, other.year + ) + = minor_collector.year +WHERE COALESCE( + minor.year, minor_collector.year, interstate.year, freeway.year, local_road.year, major_collector.year, other.year ) From 601292ab79c1a4033067df94cfb1ac1fb91a3124 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 24 Oct 2024 21:40:33 +0000 Subject: [PATCH 047/166] Add year to join for minor_collector --- .../proximity/proximity.dist_pin_to_traffic_master.sql | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 55e2fe9d2..9fbfbbdfd 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -294,13 +294,12 @@ FULL OUTER JOIN nearest_minor_collector AS minor_collector ON COALESCE( minor.pin10, interstate.pin10, freeway.pin10, local_road.pin10, major_collector.pin10, other.pin10 - ) - = minor_collector.pin10 + ) = minor_collector.pin10 AND COALESCE( minor.year, interstate.year, freeway.year, local_road.year, major_collector.year, other.year - ) - = minor_collector.year + ) = minor_collector.year + AND minor_collector.year >= 2014 WHERE COALESCE( minor.year, minor_collector.year, interstate.year, freeway.year, local_road.year, major_collector.year, From 35bfcf9c664ae5bd254710ed3e27236db26a5ca2 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 24 Oct 2024 21:56:11 +0000 Subject: [PATCH 048/166] Add year to join for minor_collector --- dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 9fbfbbdfd..dbf66a371 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -99,6 +99,7 @@ nearest_minor_collector AS ( ( {{ dist_to_nearest_geometry('traffic_minor_collector') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 + AND pcl.year >= 2014 GROUP BY pcl.pin10, xy.year ), From ce877905bd9855c1c6c9cfe079995ba564b59b21 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 25 Oct 2024 14:06:51 +0000 Subject: [PATCH 049/166] Test with only valid daily traffic for local --- .../proximity.dist_pin_to_traffic_master.sql | 102 ++++++------------ 1 file changed, 30 insertions(+), 72 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index dbf66a371..58ae4908b 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -13,12 +13,6 @@ WITH traffic_minor AS ( -- noqa: ST03 WHERE road_type = 'Minor Arterial' ), -traffic_minor_collector AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Minor Collector' -), - traffic_interstate AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} @@ -35,12 +29,14 @@ traffic_local AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} WHERE road_type = 'Local Road or Street' + AND daily_traffic IS NOT NULL ), -traffic_major_collector AS ( -- noqa: ST03 +traffic_collector AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} WHERE road_type = 'Major Collector' + OR road_type = 'Minor Collector' ), traffic_other AS ( -- noqa: ST03 @@ -82,27 +78,6 @@ nearest_minor AS ( GROUP BY pcl.pin10, xy.year ), --- Calculate nearest Minor Collector road per pin -nearest_minor_collector AS ( - SELECT - pcl.pin10, - xy.year, - ARBITRARY(xy.road_name) AS nearest_minor_collector_road_name, - ARBITRARY(xy.dist_ft) AS nearest_minor_collector_road_dist_ft, - ARBITRARY(xy.year) AS nearest_minor_collector_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_minor_collector_daily_traffic, - ARBITRARY(xy.speed_limit) AS nearest_minor_collector_road_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_minor_collector_surface_type, - ARBITRARY(xy.lanes) AS nearest_minor_collector_lanes - FROM distinct_pins AS pcl - INNER JOIN - ( {{ dist_to_nearest_geometry('traffic_minor_collector') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.year >= 2014 - GROUP BY pcl.pin10, xy.year -), - -- Calculate nearest Interstate road per pin nearest_interstate AS ( SELECT @@ -160,21 +135,21 @@ nearest_local AS ( ), -- Calculate nearest Major Collector road per pin -nearest_major_collector AS ( +nearest_collector AS ( SELECT pcl.pin10, xy.year, - ARBITRARY(xy.road_name) AS nearest_major_collector_road_name, - ARBITRARY(xy.dist_ft) AS nearest_major_collector_road_dist_ft, - ARBITRARY(xy.year) AS nearest_major_collector_road_data_year, + ARBITRARY(xy.road_name) AS nearest_collector_road_name, + ARBITRARY(xy.dist_ft) AS nearest_collector_road_dist_ft, + ARBITRARY(xy.year) AS nearest_collector_road_data_year, ARBITRARY(xy.speed_limit) - AS nearest_major_collector_road_speed_limit, - ARBITRARY(xy.daily_traffic) AS nearest_major_collector_daily_traffic, - ARBITRARY(xy.surface_type) AS nearest_major_collector_surface_type, - ARBITRARY(xy.lanes) AS nearest_major_collector_lanes + AS nearest_collector_road_speed_limit, + ARBITRARY(xy.daily_traffic) AS nearest_collector_daily_traffic, + ARBITRARY(xy.surface_type) AS nearest_collector_surface_type, + ARBITRARY(xy.lanes) AS nearest_collector_lanes FROM distinct_pins AS pcl INNER JOIN - ( {{ dist_to_nearest_geometry('traffic_major_collector') }} ) AS xy + ( {{ dist_to_nearest_geometry('traffic_collector') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 GROUP BY pcl.pin10, xy.year @@ -202,8 +177,8 @@ nearest_other AS ( -- Join all nearest roads by pin10 and year SELECT COALESCE( - minor.pin10, minor_collector.pin10, interstate.pin10, freeway.pin10, - local_road.pin10, major_collector.pin10 + minor.pin10, interstate.pin10, freeway.pin10, + local_road.pin10, collector.pin10 ) AS pin10, minor.nearest_minor_road_name, minor.nearest_minor_road_dist_ft, @@ -212,13 +187,6 @@ SELECT minor.nearest_minor_road_speed_limit, minor.nearest_minor_surface_type, minor.nearest_minor_lanes, - minor_collector.nearest_minor_collector_road_name, - minor_collector.nearest_minor_collector_road_dist_ft, - minor_collector.nearest_minor_collector_road_data_year, - minor_collector.nearest_minor_collector_daily_traffic, - minor_collector.nearest_minor_collector_road_speed_limit, - minor_collector.nearest_minor_collector_surface_type, - minor_collector.nearest_minor_collector_lanes, interstate.nearest_interstate_road_name, interstate.nearest_interstate_road_dist_ft, interstate.nearest_interstate_road_data_year, @@ -240,13 +208,13 @@ SELECT local_road.nearest_local_road_speed_limit, local_road.nearest_local_surface_type, local_road.nearest_local_lanes, - major_collector.nearest_major_collector_road_name, - major_collector.nearest_major_collector_road_dist_ft, - major_collector.nearest_major_collector_road_data_year, - major_collector.nearest_major_collector_daily_traffic, - major_collector.nearest_major_collector_road_speed_limit, - major_collector.nearest_major_collector_surface_type, - major_collector.nearest_major_collector_lanes, + collector.nearest_collector_road_name, + collector.nearest_collector_road_dist_ft, + collector.nearest_collector_road_data_year, + collector.nearest_collector_daily_traffic, + collector.nearest_collector_road_speed_limit, + collector.nearest_collector_surface_type, + collector.nearest_collector_lanes, other.nearest_other_road_name, other.nearest_other_road_dist_ft, other.nearest_other_road_data_year, @@ -255,8 +223,8 @@ SELECT other.nearest_other_surface_type, other.nearest_other_lanes, COALESCE( - minor.year, minor_collector.year interstate.year, freeway.year, - local_road.year, major_collector.year, other.year) + minor.year, interstate.year, freeway.year, + local_road.year, collector.year, other.year) AS year FROM nearest_minor AS minor FULL OUTER JOIN nearest_interstate AS interstate @@ -269,18 +237,18 @@ FULL OUTER JOIN nearest_local AS local_road = local_road.pin10 AND COALESCE(minor.year, interstate.year, freeway.year) = local_road.year -FULL OUTER JOIN nearest_major_collector AS major_collector +FULL OUTER JOIN nearest_collector AS collector ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10, local_road.pin10) - = major_collector.pin10 + = collector.pin10 AND COALESCE(minor.year, interstate.year, freeway.year, local_road.year) - = major_collector.year + = collector.year FULL OUTER JOIN nearest_other AS other ON COALESCE( minor.pin10, interstate.pin10, freeway.pin10, local_road.pin10, - major_collector.pin10 + collector.pin10 ) = other.pin10 AND COALESCE( @@ -288,22 +256,12 @@ FULL OUTER JOIN nearest_other AS other interstate.year, freeway.year, local_road.year, - major_collector.year + collector.year ) = other.year -FULL OUTER JOIN nearest_minor_collector AS minor_collector - ON COALESCE( - minor.pin10, interstate.pin10, freeway.pin10, - local_road.pin10, major_collector.pin10, other.pin10 - ) = minor_collector.pin10 - AND COALESCE( - minor.year, interstate.year, freeway.year, - local_road.year, major_collector.year, other.year - ) = minor_collector.year - AND minor_collector.year >= 2014 WHERE COALESCE( - minor.year, minor_collector.year, interstate.year, freeway.year, - local_road.year, major_collector.year, + minor.year, interstate.year, freeway.year, + local_road.year, collector.year, other.year ) >= (SELECT MIN(year) FROM distinct_years) From 40ddfd7884b776d3b2a4497065724fcd594f6250 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 25 Oct 2024 15:00:41 +0000 Subject: [PATCH 050/166] Test macro upload --- dbt/macros/nearest_feature_aggregation.sql | 50 ++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 dbt/macros/nearest_feature_aggregation.sql diff --git a/dbt/macros/nearest_feature_aggregation.sql b/dbt/macros/nearest_feature_aggregation.sql new file mode 100644 index 000000000..c0d592976 --- /dev/null +++ b/dbt/macros/nearest_feature_aggregation.sql @@ -0,0 +1,50 @@ +{% macro aggregate_lowest_distance(table_name) %} + with + distances as ( + -- Unpivot all "dist_ft" columns for comparison + select + *, + unnest( + array[ + {% for column in dbt_utils.get_filtered_columns_in_relation( + table_name, "dist_ft" + ) %} + ({{ column }}, '{{ column }}') + {% endfor %} + ] + ) as (distance_value, distance_column) + from {{ table_name }} + ), + min_distances as ( + -- Identify the lowest distance and corresponding column for each row + select + *, + min(distance_value) over (partition by id) as min_distance, + array_agg(distance_column) filter ( + where distance_value = min(distance_value) over (partition by id) + ) as matching_columns + from distances + ), + aggregated as ( + -- Extract the prefix before the first underscore and rename it with + -- "aggregated_" + select + id, + min_distance, + array_agg( + distinct regexp_extract(column_name, '^(.*?)_') + ) as original_prefixes, + array_agg( + distinct 'aggregated_' || regexp_extract(column_name, '^(.*?)_') + ) as aggregated_prefixes + from + ( + select id, unnest(matching_columns) as column_name + from min_distances + ) prefix_agg + group by id, min_distance + ) + select * + from aggregated + ; +{% endmacro %} From a3131c6b0b22a2a12cb53e3dd7defec959f76cc1 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 25 Oct 2024 17:18:14 +0000 Subject: [PATCH 051/166] Test macro --- dbt/macros/nearest_feature_aggregation.sql | 48 ++++++++++--------- .../proximity.dist_pin_to_traffic_master.sql | 37 +++++--------- 2 files changed, 38 insertions(+), 47 deletions(-) diff --git a/dbt/macros/nearest_feature_aggregation.sql b/dbt/macros/nearest_feature_aggregation.sql index c0d592976..ee2271cf2 100644 --- a/dbt/macros/nearest_feature_aggregation.sql +++ b/dbt/macros/nearest_feature_aggregation.sql @@ -1,36 +1,41 @@ -{% macro aggregate_lowest_distance(table_name) %} +{% macro aggregate_smallest_feature(table_name, feature_suffix="dist_ft") %} with - distances as ( - -- Unpivot all "dist_ft" columns for comparison + columns as ( + -- Get column names that contain the suffix + select column_name + from information_schema.columns + where + table_name = lower('{{ table_name }}') + and column_name like '%' || '{{ feature_suffix }}' + ), + feature as ( + -- Unpivot all matching columns for comparison select - *, + t.*, unnest( array[ - {% for column in dbt_utils.get_filtered_columns_in_relation( - table_name, "dist_ft" - ) %} - ({{ column }}, '{{ column }}') + {% for column in columns %} + (t.{{ column.column_name }}, '{{ column.column_name }}') {% endfor %} ] - ) as (distance_value, distance_column) - from {{ table_name }} + ) as (feature_value, feature_column) + from {{ table_name }} t ), - min_distances as ( - -- Identify the lowest distance and corresponding column for each row + min_feature as ( + -- Identify the lowest feature and corresponding column for each row select *, - min(distance_value) over (partition by id) as min_distance, - array_agg(distance_column) filter ( - where distance_value = min(distance_value) over (partition by id) + min(feature_value) over (partition by id) as min_feature, + array_agg(feature_column) filter ( + where feature_value = min(feature_value) over (partition by id) ) as matching_columns - from distances + from feature ), aggregated as ( - -- Extract the prefix before the first underscore and rename it with - -- "aggregated_" + -- Extract prefix before first underscore and rename with "aggregated_" select id, - min_distance, + min_feature, array_agg( distinct regexp_extract(column_name, '^(.*?)_') ) as original_prefixes, @@ -39,10 +44,9 @@ ) as aggregated_prefixes from ( - select id, unnest(matching_columns) as column_name - from min_distances + select id, unnest(matching_columns) as column_name from min_feature ) prefix_agg - group by id, min_distance + group by id, min_feature ) select * from aggregated diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 58ae4908b..804cb5e99 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -174,7 +174,6 @@ nearest_other AS ( GROUP BY pcl.pin10, xy.year ) --- Join all nearest roads by pin10 and year SELECT COALESCE( minor.pin10, interstate.pin10, freeway.pin10, @@ -224,8 +223,8 @@ SELECT other.nearest_other_lanes, COALESCE( minor.year, interstate.year, freeway.year, - local_road.year, collector.year, other.year) - AS year + local_road.year, collector.year, other.year + ) AS year FROM nearest_minor AS minor FULL OUTER JOIN nearest_interstate AS interstate ON minor.pin10 = interstate.pin10 AND minor.year = interstate.year @@ -233,10 +232,8 @@ FULL OUTER JOIN nearest_freeway AS freeway ON COALESCE(minor.pin10, interstate.pin10) = freeway.pin10 AND COALESCE(minor.year, interstate.year) = freeway.year FULL OUTER JOIN nearest_local AS local_road - ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10) - = local_road.pin10 - AND COALESCE(minor.year, interstate.year, freeway.year) - = local_road.year + ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10) = local_road.pin10 + AND COALESCE(minor.year, interstate.year, freeway.year) = local_road.year FULL OUTER JOIN nearest_collector AS collector ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10, local_road.pin10) = collector.pin10 @@ -244,24 +241,14 @@ FULL OUTER JOIN nearest_collector AS collector = collector.year FULL OUTER JOIN nearest_other AS other ON COALESCE( - minor.pin10, - interstate.pin10, - freeway.pin10, - local_road.pin10, - collector.pin10 - ) - = other.pin10 + minor.pin10, interstate.pin10, freeway.pin10, + local_road.pin10, collector.pin10 + ) = other.pin10 AND COALESCE( - minor.year, - interstate.year, - freeway.year, - local_road.year, - collector.year - ) - = other.year + minor.year, interstate.year, freeway.year, + local_road.year, collector.year + ) = other.year WHERE COALESCE( minor.year, interstate.year, freeway.year, - local_road.year, collector.year, - other.year - ) - >= (SELECT MIN(year) FROM distinct_years) + local_road.year, collector.year, other.year + ) >= (SELECT MIN(year) FROM distinct_years) From 364fafb7853d4acb9e3f2b92628148a0eedf74ba Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Mon, 28 Oct 2024 15:13:13 +0000 Subject: [PATCH 052/166] Revert to traffic not null --- .../proximity/proximity.dist_pin_to_traffic_master.sql | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 804cb5e99..29267f725 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -29,7 +29,6 @@ traffic_local AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} WHERE road_type = 'Local Road or Street' - AND daily_traffic IS NOT NULL ), traffic_collector AS ( -- noqa: ST03 @@ -128,7 +127,9 @@ nearest_local AS ( ARBITRARY(xy.surface_type) AS nearest_local_surface_type, ARBITRARY(xy.lanes) AS nearest_local_lanes FROM distinct_pins AS pcl - INNER JOIN ( {{ dist_to_nearest_geometry('traffic_local') }} ) AS xy + INNER JOIN ( + {{ nearest_pin_neighbors('traffic_local', 1, 100) }} + ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 GROUP BY pcl.pin10, xy.year From 28cbeb2fd22916440cbc5ee3f3077386c7d68c31 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Mon, 28 Oct 2024 15:20:15 +0000 Subject: [PATCH 053/166] Test nearest_local new format --- .../proximity.dist_pin_to_traffic_master.sql | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 29267f725..9325c2d1a 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -115,7 +115,7 @@ nearest_freeway AS ( GROUP BY pcl.pin10, xy.year ), -nearest_local AS ( +nearest_local_step_1 AS ( SELECT pcl.pin10, xy.year, @@ -135,6 +135,39 @@ nearest_local AS ( GROUP BY pcl.pin10, xy.year ), +missing_matches AS ( + SELECT + pcl.pin10, + pcl.year, + pcl.x_3435, + pcl.y_3435 + FROM {{ source('spatial', 'parcel') }} AS pcl + LEFT JOIN nearest_local_step_1 AS nl + ON pcl.pin10 = nl.pin10 + AND pcl.year = nl.nearest_local_road_data_year + WHERE nl.pin10 IS NULL +), + +nearest_local AS ( + SELECT + mm.pin10, + xy.year, + ARBITRARY(xy.road_name) AS nearest_local_road_name, + ARBITRARY(xy.dist_ft) AS nearest_local_road_dist_ft, + ARBITRARY(xy.year) AS nearest_local_road_data_year, + ARBITRARY(xy.daily_traffic) AS nearest_local_daily_traffic, + ARBITRARY(xy.speed_limit) AS nearest_local_road_speed_limit, + ARBITRARY(xy.surface_type) AS nearest_local_surface_type, + ARBITRARY(xy.lanes) AS nearest_local_lanes + FROM missing_matches AS mm + INNER JOIN ( + {{ nearest_pin_neighbors('missing_matches', 1, 10000) }} + ) AS xy + ON mm.x_3435 = xy.x_3435 + AND mm.y_3435 = xy.y_3435 + GROUP BY mm.pin10, xy.year +), + -- Calculate nearest Major Collector road per pin nearest_collector AS ( SELECT From 387731c9a1e4a1491e1bc30e6b623054a86392ff Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Mon, 28 Oct 2024 15:42:31 +0000 Subject: [PATCH 054/166] Revert --- .../proximity.dist_pin_to_traffic_master.sql | 56 ++++--------------- 1 file changed, 12 insertions(+), 44 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 9325c2d1a..f888ca3a2 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -29,6 +29,7 @@ traffic_local AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} WHERE road_type = 'Local Road or Street' + AND daily_traffic IS NOT NULL ), traffic_collector AS ( -- noqa: ST03 @@ -115,59 +116,26 @@ nearest_freeway AS ( GROUP BY pcl.pin10, xy.year ), -nearest_local_step_1 AS ( +nearest_collector AS ( SELECT pcl.pin10, xy.year, - ARBITRARY(xy.road_name) AS nearest_local_road_name, - ARBITRARY(xy.dist_ft) AS nearest_local_road_dist_ft, - ARBITRARY(xy.year) AS nearest_local_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_local_daily_traffic, - ARBITRARY(xy.speed_limit) AS nearest_local_road_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_local_surface_type, - ARBITRARY(xy.lanes) AS nearest_local_lanes + ARBITRARY(xy.road_name) AS nearest_collector_road_name, + ARBITRARY(xy.dist_ft) AS nearest_collector_road_dist_ft, + ARBITRARY(xy.year) AS nearest_collector_road_data_year, + ARBITRARY(xy.speed_limit) + AS nearest_collector_road_speed_limit, + ARBITRARY(xy.daily_traffic) AS nearest_collector_daily_traffic, + ARBITRARY(xy.surface_type) AS nearest_collector_surface_type, + ARBITRARY(xy.lanes) AS nearest_collector_lanes FROM distinct_pins AS pcl - INNER JOIN ( - {{ nearest_pin_neighbors('traffic_local', 1, 100) }} - ) AS xy + INNER JOIN + ( {{ dist_to_nearest_geometry('traffic_collector') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 GROUP BY pcl.pin10, xy.year ), -missing_matches AS ( - SELECT - pcl.pin10, - pcl.year, - pcl.x_3435, - pcl.y_3435 - FROM {{ source('spatial', 'parcel') }} AS pcl - LEFT JOIN nearest_local_step_1 AS nl - ON pcl.pin10 = nl.pin10 - AND pcl.year = nl.nearest_local_road_data_year - WHERE nl.pin10 IS NULL -), - -nearest_local AS ( - SELECT - mm.pin10, - xy.year, - ARBITRARY(xy.road_name) AS nearest_local_road_name, - ARBITRARY(xy.dist_ft) AS nearest_local_road_dist_ft, - ARBITRARY(xy.year) AS nearest_local_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_local_daily_traffic, - ARBITRARY(xy.speed_limit) AS nearest_local_road_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_local_surface_type, - ARBITRARY(xy.lanes) AS nearest_local_lanes - FROM missing_matches AS mm - INNER JOIN ( - {{ nearest_pin_neighbors('missing_matches', 1, 10000) }} - ) AS xy - ON mm.x_3435 = xy.x_3435 - AND mm.y_3435 = xy.y_3435 - GROUP BY mm.pin10, xy.year -), - -- Calculate nearest Major Collector road per pin nearest_collector AS ( SELECT From e70ea40624ce617da7aa218c3b1ce603bfced9e9 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Mon, 28 Oct 2024 15:50:54 +0000 Subject: [PATCH 055/166] Revert --- dbt/macros/nearest_feature_aggregation.sql | 108 +++++++++--------- .../proximity.dist_pin_to_traffic_master.sql | 57 +++++---- 2 files changed, 90 insertions(+), 75 deletions(-) diff --git a/dbt/macros/nearest_feature_aggregation.sql b/dbt/macros/nearest_feature_aggregation.sql index ee2271cf2..4bf6c0959 100644 --- a/dbt/macros/nearest_feature_aggregation.sql +++ b/dbt/macros/nearest_feature_aggregation.sql @@ -1,54 +1,58 @@ {% macro aggregate_smallest_feature(table_name, feature_suffix="dist_ft") %} - with - columns as ( - -- Get column names that contain the suffix - select column_name - from information_schema.columns - where - table_name = lower('{{ table_name }}') - and column_name like '%' || '{{ feature_suffix }}' - ), - feature as ( - -- Unpivot all matching columns for comparison - select - t.*, - unnest( - array[ - {% for column in columns %} - (t.{{ column.column_name }}, '{{ column.column_name }}') - {% endfor %} - ] - ) as (feature_value, feature_column) - from {{ table_name }} t - ), - min_feature as ( - -- Identify the lowest feature and corresponding column for each row - select - *, - min(feature_value) over (partition by id) as min_feature, - array_agg(feature_column) filter ( - where feature_value = min(feature_value) over (partition by id) - ) as matching_columns - from feature - ), - aggregated as ( - -- Extract prefix before first underscore and rename with "aggregated_" - select - id, - min_feature, - array_agg( - distinct regexp_extract(column_name, '^(.*?)_') - ) as original_prefixes, - array_agg( - distinct 'aggregated_' || regexp_extract(column_name, '^(.*?)_') - ) as aggregated_prefixes - from - ( - select id, unnest(matching_columns) as column_name from min_feature - ) prefix_agg - group by id, min_feature - ) - select * - from aggregated - ; + {%- set macro_content -%} + {% raw %} + {% macro aggregate_smallest_feature(table_name, feature_suffix="dist_ft") %} + with + columns as ( + select column_name + from information_schema.columns + where + table_name = lower('{{ table_name }}') + and column_name like '%' || '{{ feature_suffix }}' + ), + feature as ( + select + t.*, + unnest( + array[ + {% for column in columns %} + (t.{{ column.column_name }}, '{{ column.column_name }}') + {% endfor %} + ] + ) as (feature_value, feature_column) + from {{ table_name }} t + ), + min_feature as ( + select + *, + min(feature_value) over (partition by id) as min_feature, + array_agg(feature_column) filter ( + where feature_value = min(feature_value) over (partition by id) + ) as matching_columns + from feature + ), + aggregated as ( + select + id, + min_feature, + array_agg( + distinct regexp_extract(column_name, '^(.*?)_') + ) as original_prefixes, + array_agg( + distinct 'aggregated_' || regexp_extract(column_name, '^(.*?)_') + ) as aggregated_prefixes + from + ( + select id, unnest(matching_columns) as column_name from min_feature + ) prefix_agg + group by id, min_feature + ) + select * + from aggregated + ; + {% endmacro %} + {% endraw %} + {%- endset -%} + + {{ macro_content }} {% endmacro %} diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index f888ca3a2..58ae4908b 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -116,21 +116,19 @@ nearest_freeway AS ( GROUP BY pcl.pin10, xy.year ), -nearest_collector AS ( +nearest_local AS ( SELECT pcl.pin10, xy.year, - ARBITRARY(xy.road_name) AS nearest_collector_road_name, - ARBITRARY(xy.dist_ft) AS nearest_collector_road_dist_ft, - ARBITRARY(xy.year) AS nearest_collector_road_data_year, - ARBITRARY(xy.speed_limit) - AS nearest_collector_road_speed_limit, - ARBITRARY(xy.daily_traffic) AS nearest_collector_daily_traffic, - ARBITRARY(xy.surface_type) AS nearest_collector_surface_type, - ARBITRARY(xy.lanes) AS nearest_collector_lanes + ARBITRARY(xy.road_name) AS nearest_local_road_name, + ARBITRARY(xy.dist_ft) AS nearest_local_road_dist_ft, + ARBITRARY(xy.year) AS nearest_local_road_data_year, + ARBITRARY(xy.daily_traffic) AS nearest_local_daily_traffic, + ARBITRARY(xy.speed_limit) AS nearest_local_road_speed_limit, + ARBITRARY(xy.surface_type) AS nearest_local_surface_type, + ARBITRARY(xy.lanes) AS nearest_local_lanes FROM distinct_pins AS pcl - INNER JOIN - ( {{ dist_to_nearest_geometry('traffic_collector') }} ) AS xy + INNER JOIN ( {{ dist_to_nearest_geometry('traffic_local') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 GROUP BY pcl.pin10, xy.year @@ -176,6 +174,7 @@ nearest_other AS ( GROUP BY pcl.pin10, xy.year ) +-- Join all nearest roads by pin10 and year SELECT COALESCE( minor.pin10, interstate.pin10, freeway.pin10, @@ -225,8 +224,8 @@ SELECT other.nearest_other_lanes, COALESCE( minor.year, interstate.year, freeway.year, - local_road.year, collector.year, other.year - ) AS year + local_road.year, collector.year, other.year) + AS year FROM nearest_minor AS minor FULL OUTER JOIN nearest_interstate AS interstate ON minor.pin10 = interstate.pin10 AND minor.year = interstate.year @@ -234,8 +233,10 @@ FULL OUTER JOIN nearest_freeway AS freeway ON COALESCE(minor.pin10, interstate.pin10) = freeway.pin10 AND COALESCE(minor.year, interstate.year) = freeway.year FULL OUTER JOIN nearest_local AS local_road - ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10) = local_road.pin10 - AND COALESCE(minor.year, interstate.year, freeway.year) = local_road.year + ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10) + = local_road.pin10 + AND COALESCE(minor.year, interstate.year, freeway.year) + = local_road.year FULL OUTER JOIN nearest_collector AS collector ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10, local_road.pin10) = collector.pin10 @@ -243,14 +244,24 @@ FULL OUTER JOIN nearest_collector AS collector = collector.year FULL OUTER JOIN nearest_other AS other ON COALESCE( - minor.pin10, interstate.pin10, freeway.pin10, - local_road.pin10, collector.pin10 - ) = other.pin10 + minor.pin10, + interstate.pin10, + freeway.pin10, + local_road.pin10, + collector.pin10 + ) + = other.pin10 AND COALESCE( - minor.year, interstate.year, freeway.year, - local_road.year, collector.year - ) = other.year + minor.year, + interstate.year, + freeway.year, + local_road.year, + collector.year + ) + = other.year WHERE COALESCE( minor.year, interstate.year, freeway.year, - local_road.year, collector.year, other.year - ) >= (SELECT MIN(year) FROM distinct_years) + local_road.year, collector.year, + other.year + ) + >= (SELECT MIN(year) FROM distinct_years) From 43937d385f210b0e020c6d4a2fde802d02c7041a Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Mon, 28 Oct 2024 20:16:53 +0000 Subject: [PATCH 056/166] test --- dbt/macros/nearest_feature_aggregation.sql | 67 +---- .../proximity.dist_pin_to_traffic_master.sql | 249 ++++++++++-------- 2 files changed, 152 insertions(+), 164 deletions(-) diff --git a/dbt/macros/nearest_feature_aggregation.sql b/dbt/macros/nearest_feature_aggregation.sql index 4bf6c0959..82125083e 100644 --- a/dbt/macros/nearest_feature_aggregation.sql +++ b/dbt/macros/nearest_feature_aggregation.sql @@ -1,58 +1,11 @@ -{% macro aggregate_smallest_feature(table_name, feature_suffix="dist_ft") %} - {%- set macro_content -%} - {% raw %} - {% macro aggregate_smallest_feature(table_name, feature_suffix="dist_ft") %} - with - columns as ( - select column_name - from information_schema.columns - where - table_name = lower('{{ table_name }}') - and column_name like '%' || '{{ feature_suffix }}' - ), - feature as ( - select - t.*, - unnest( - array[ - {% for column in columns %} - (t.{{ column.column_name }}, '{{ column.column_name }}') - {% endfor %} - ] - ) as (feature_value, feature_column) - from {{ table_name }} t - ), - min_feature as ( - select - *, - min(feature_value) over (partition by id) as min_feature, - array_agg(feature_column) filter ( - where feature_value = min(feature_value) over (partition by id) - ) as matching_columns - from feature - ), - aggregated as ( - select - id, - min_feature, - array_agg( - distinct regexp_extract(column_name, '^(.*?)_') - ) as original_prefixes, - array_agg( - distinct 'aggregated_' || regexp_extract(column_name, '^(.*?)_') - ) as aggregated_prefixes - from - ( - select id, unnest(matching_columns) as column_name from min_feature - ) prefix_agg - group by id, min_feature - ) - select * - from aggregated - ; - {% endmacro %} - {% endraw %} - {%- endset -%} - - {{ macro_content }} +{% macro nearest_feature_aggregation(base_columns, characteristics) %} + {%- for characteristic in characteristics %} + case + {%- for base in base_columns %} + when {{ base }} = least({{ base_columns | join(", ") }}) + then {{ base.replace("dist_ft", characteristic) }} + {%- endfor %} + else null + end as closest_{{ characteristic }}, + {%- endfor %} {% endmacro %} diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 58ae4908b..e194bb68a 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -69,8 +69,8 @@ nearest_minor AS ( ARBITRARY(xy.year) AS nearest_minor_road_data_year, ARBITRARY(xy.daily_traffic) AS nearest_minor_road_daily_traffic, ARBITRARY(xy.speed_limit) AS nearest_minor_road_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_minor_surface_type, - ARBITRARY(xy.lanes) AS nearest_minor_lanes + ARBITRARY(xy.surface_type) AS nearest_minor_road_surface_type, + ARBITRARY(xy.lanes) AS nearest_minor_road_lanes FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_minor') }} ) AS xy ON pcl.x_3435 = xy.x_3435 @@ -86,10 +86,10 @@ nearest_interstate AS ( ARBITRARY(xy.road_name) AS nearest_interstate_road_name, ARBITRARY(xy.dist_ft) AS nearest_interstate_road_dist_ft, ARBITRARY(xy.year) AS nearest_interstate_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_interstate_daily_traffic, + ARBITRARY(xy.daily_traffic) AS nearest_interstate_road_daily_traffic, ARBITRARY(xy.speed_limit) AS nearest_interstate_road_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_interstate_surface_type, - ARBITRARY(xy.lanes) AS nearest_interstate_lanes + ARBITRARY(xy.surface_type) AS nearest_interstate_road_surface_type, + ARBITRARY(xy.lanes) AS nearest_interstate_road_lanes FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_interstate') }} ) AS xy ON pcl.x_3435 = xy.x_3435 @@ -105,10 +105,10 @@ nearest_freeway AS ( ARBITRARY(xy.road_name) AS nearest_freeway_road_name, ARBITRARY(xy.dist_ft) AS nearest_freeway_road_dist_ft, ARBITRARY(xy.year) AS nearest_freeway_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_freeway_daily_traffic, + ARBITRARY(xy.daily_traffic) AS nearest_freeway_road_daily_traffic, ARBITRARY(xy.speed_limit) AS nearest_freeway_road_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_freeway_surface_type, - ARBITRARY(xy.lanes) AS nearest_freeway_lanes + ARBITRARY(xy.surface_type) AS nearest_freeway_road_surface_type, + ARBITRARY(xy.lanes) AS nearest_freeway_road_lanes FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_freeway') }} ) AS xy ON pcl.x_3435 = xy.x_3435 @@ -123,10 +123,10 @@ nearest_local AS ( ARBITRARY(xy.road_name) AS nearest_local_road_name, ARBITRARY(xy.dist_ft) AS nearest_local_road_dist_ft, ARBITRARY(xy.year) AS nearest_local_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_local_daily_traffic, + ARBITRARY(xy.daily_traffic) AS nearest_local_road_daily_traffic, ARBITRARY(xy.speed_limit) AS nearest_local_road_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_local_surface_type, - ARBITRARY(xy.lanes) AS nearest_local_lanes + ARBITRARY(xy.surface_type) AS nearest_local_road_surface_type, + ARBITRARY(xy.lanes) AS nearest_local_road_lanes FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_local') }} ) AS xy ON pcl.x_3435 = xy.x_3435 @@ -144,9 +144,9 @@ nearest_collector AS ( ARBITRARY(xy.year) AS nearest_collector_road_data_year, ARBITRARY(xy.speed_limit) AS nearest_collector_road_speed_limit, - ARBITRARY(xy.daily_traffic) AS nearest_collector_daily_traffic, - ARBITRARY(xy.surface_type) AS nearest_collector_surface_type, - ARBITRARY(xy.lanes) AS nearest_collector_lanes + ARBITRARY(xy.daily_traffic) AS nearest_collector_road_daily_traffic, + ARBITRARY(xy.surface_type) AS nearest_collector_road_surface_type, + ARBITRARY(xy.lanes) AS nearest_collector_road_lanes FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_collector') }} ) AS xy @@ -163,105 +163,140 @@ nearest_other AS ( ARBITRARY(xy.road_name) AS nearest_other_road_name, ARBITRARY(xy.dist_ft) AS nearest_other_road_dist_ft, ARBITRARY(xy.year) AS nearest_other_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_other_daily_traffic, + ARBITRARY(xy.daily_traffic) AS nearest_other_road_daily_traffic, ARBITRARY(xy.speed_limit) AS nearest_other_road_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_other_surface_type, - ARBITRARY(xy.lanes) AS nearest_other_lanes + ARBITRARY(xy.surface_type) AS nearest_other_road_surface_type, + ARBITRARY(xy.lanes) AS nearest_other_road_lanes FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_other') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 GROUP BY pcl.pin10, xy.year -) +), -- Join all nearest roads by pin10 and year +final_aggregation AS ( + SELECT + COALESCE( + minor.pin10, interstate.pin10, freeway.pin10, + local_road.pin10, collector.pin10 + ) AS pin10, + minor.nearest_minor_road_name, + minor.nearest_minor_road_dist_ft, + minor.nearest_minor_road_data_year, + minor.nearest_minor_road_daily_traffic, + minor.nearest_minor_road_speed_limit, + minor.nearest_minor_road_surface_type, + minor.nearest_minor_road_lanes, + interstate.nearest_interstate_road_name, + interstate.nearest_interstate_road_dist_ft, + interstate.nearest_interstate_road_data_year, + interstate.nearest_interstate_road_daily_traffic, + interstate.nearest_interstate_road_speed_limit, + interstate.nearest_interstate_road_surface_type, + interstate.nearest_interstate_road_lanes, + freeway.nearest_freeway_road_name, + freeway.nearest_freeway_road_dist_ft, + freeway.nearest_freeway_road_data_year, + freeway.nearest_freeway_road_daily_traffic, + freeway.nearest_freeway_road_speed_limit, + freeway.nearest_freeway_road_surface_type, + freeway.nearest_freeway_road_lanes, + local_road.nearest_local_road_name, + local_road.nearest_local_road_dist_ft, + local_road.nearest_local_road_data_year, + local_road.nearest_local_road_daily_traffic, + local_road.nearest_local_road_speed_limit, + local_road.nearest_local_road_surface_type, + local_road.nearest_local_road_lanes, + collector.nearest_collector_road_name, + collector.nearest_collector_road_dist_ft, + collector.nearest_collector_road_data_year, + collector.nearest_collector_road_daily_traffic, + collector.nearest_collector_road_speed_limit, + collector.nearest_collector_road_surface_type, + collector.nearest_collector_road_lanes, + other.nearest_other_road_name, + other.nearest_other_road_dist_ft, + other.nearest_other_road_data_year, + other.nearest_other_road_daily_traffic, + other.nearest_other_road_speed_limit, + other.nearest_other_road_surface_type, + other.nearest_other_road_lanes, + COALESCE( + minor.year, interstate.year, freeway.year, + local_road.year, collector.year, other.year) + AS year + FROM nearest_minor AS minor + FULL OUTER JOIN nearest_interstate AS interstate + ON minor.pin10 = interstate.pin10 + AND minor.year = interstate.year + FULL OUTER JOIN nearest_freeway AS freeway + ON COALESCE(minor.pin10, interstate.pin10) = freeway.pin10 + AND COALESCE(minor.year, interstate.year) = freeway.year + FULL OUTER JOIN nearest_local AS local_road + ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10) + = local_road.pin10 + AND COALESCE(minor.year, interstate.year, freeway.year) + = local_road.year + FULL OUTER JOIN nearest_collector AS collector + ON COALESCE( + minor.pin10, interstate.pin10, + freeway.pin10, local_road.pin10 + ) + = collector.pin10 + AND COALESCE(minor.year, interstate.year, freeway.year, local_road.year) + = collector.year + FULL OUTER JOIN nearest_other AS other + ON COALESCE( + minor.pin10, + interstate.pin10, + freeway.pin10, + local_road.pin10, + collector.pin10 + ) + = other.pin10 + AND COALESCE( + minor.year, + interstate.year, + freeway.year, + local_road.year, + collector.year + ) + = other.year + WHERE COALESCE( + minor.year, interstate.year, freeway.year, + local_road.year, collector.year, + other.year + ) + >= (SELECT MIN(year) FROM distinct_years) +) + + SELECT - COALESCE( - minor.pin10, interstate.pin10, freeway.pin10, - local_road.pin10, collector.pin10 - ) AS pin10, - minor.nearest_minor_road_name, - minor.nearest_minor_road_dist_ft, - minor.nearest_minor_road_data_year, - minor.nearest_minor_road_daily_traffic, - minor.nearest_minor_road_speed_limit, - minor.nearest_minor_surface_type, - minor.nearest_minor_lanes, - interstate.nearest_interstate_road_name, - interstate.nearest_interstate_road_dist_ft, - interstate.nearest_interstate_road_data_year, - interstate.nearest_interstate_daily_traffic, - interstate.nearest_interstate_road_speed_limit, - interstate.nearest_interstate_surface_type, - interstate.nearest_interstate_lanes, - freeway.nearest_freeway_road_name, - freeway.nearest_freeway_road_dist_ft, - freeway.nearest_freeway_road_data_year, - freeway.nearest_freeway_daily_traffic, - freeway.nearest_freeway_road_speed_limit, - freeway.nearest_freeway_surface_type, - freeway.nearest_freeway_lanes, - local_road.nearest_local_road_name, - local_road.nearest_local_road_dist_ft, - local_road.nearest_local_road_data_year, - local_road.nearest_local_daily_traffic, - local_road.nearest_local_road_speed_limit, - local_road.nearest_local_surface_type, - local_road.nearest_local_lanes, - collector.nearest_collector_road_name, - collector.nearest_collector_road_dist_ft, - collector.nearest_collector_road_data_year, - collector.nearest_collector_daily_traffic, - collector.nearest_collector_road_speed_limit, - collector.nearest_collector_surface_type, - collector.nearest_collector_lanes, - other.nearest_other_road_name, - other.nearest_other_road_dist_ft, - other.nearest_other_road_data_year, - other.nearest_other_daily_traffic, - other.nearest_other_road_speed_limit, - other.nearest_other_surface_type, - other.nearest_other_lanes, - COALESCE( - minor.year, interstate.year, freeway.year, - local_road.year, collector.year, other.year) - AS year -FROM nearest_minor AS minor -FULL OUTER JOIN nearest_interstate AS interstate - ON minor.pin10 = interstate.pin10 AND minor.year = interstate.year -FULL OUTER JOIN nearest_freeway AS freeway - ON COALESCE(minor.pin10, interstate.pin10) = freeway.pin10 - AND COALESCE(minor.year, interstate.year) = freeway.year -FULL OUTER JOIN nearest_local AS local_road - ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10) - = local_road.pin10 - AND COALESCE(minor.year, interstate.year, freeway.year) - = local_road.year -FULL OUTER JOIN nearest_collector AS collector - ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10, local_road.pin10) - = collector.pin10 - AND COALESCE(minor.year, interstate.year, freeway.year, local_road.year) - = collector.year -FULL OUTER JOIN nearest_other AS other - ON COALESCE( - minor.pin10, - interstate.pin10, - freeway.pin10, - local_road.pin10, - collector.pin10 - ) - = other.pin10 - AND COALESCE( - minor.year, - interstate.year, - freeway.year, - local_road.year, - collector.year - ) - = other.year -WHERE COALESCE( - minor.year, interstate.year, freeway.year, - local_road.year, collector.year, - other.year - ) - >= (SELECT MIN(year) FROM distinct_years) + pin10, + nearest_minor_road_dist_ft, + nearest_other_road_dist_ft, + nearest_interstate_road_dist_ft, + + {{ nearest_feature_aggregation( + [ + "nearest_minor_road_dist_ft", + "nearest_interstate_road_dist_ft", + "nearest_other_road_dist_ft", + "nearest_freeway_road_dist_ft", + "nearest_local_road_dist_ft", + "nearest_collector_road_dist_ft" + ], + [ + "name", + "dist_ft", + "data_year", + "daily_traffic", + "speed_limit", + "surface_type", + "lanes" + ] + ) }} + year +FROM final_aggregation; From 35dafbf3e7a76edf52858dcf938fb3da32887c8b Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Mon, 28 Oct 2024 20:23:23 +0000 Subject: [PATCH 057/166] Remove semicolon --- .../proximity.dist_pin_to_traffic_master.sql | 44 +++++++++++++++++-- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index e194bb68a..04f18448d 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -275,10 +275,48 @@ final_aggregation AS ( SELECT pin10, + nearest_minor_road_name, nearest_minor_road_dist_ft, - nearest_other_road_dist_ft, + nearest_minor_road_data_year, + nearest_minor_road_daily_traffic, + nearest_minor_road_speed_limit, + nearest_minor_road_surface_type, + nearest_minor_road_lanes, + nearest_interstate_road_name, nearest_interstate_road_dist_ft, - + nearest_interstate_road_data_year, + nearest_interstate_road_daily_traffic, + nearest_interstate_road_speed_limit, + nearest_interstate_road_surface_type, + nearest_interstate_road_lanes, + nearest_freeway_road_name, + nearest_freeway_road_dist_ft, + nearest_freeway_road_data_year, + nearest_freeway_road_daily_traffic, + nearest_freeway_road_speed_limit, + nearest_freeway_road_surface_type, + nearest_freeway_road_lanes, + nearest_local_road_name, + nearest_local_road_dist_ft, + nearest_local_road_data_year, + nearest_local_road_daily_traffic, + nearest_local_road_speed_limit, + nearest_local_road_surface_type, + nearest_local_road_lanes, + nearest_collector_road_name, + nearest_collector_road_dist_ft, + nearest_collector_road_data_year, + nearest_collector_road_daily_traffic, + nearest_collector_road_speed_limit, + nearest_collector_road_surface_type, + nearest_collector_road_lanes, + nearest_other_road_name, + nearest_other_road_dist_ft, + nearest_other_road_data_year, + nearest_other_road_daily_traffic, + nearest_other_road_speed_limit, + nearest_other_road_surface_type, + nearest_other_road_lanes, {{ nearest_feature_aggregation( [ "nearest_minor_road_dist_ft", @@ -299,4 +337,4 @@ SELECT ] ) }} year -FROM final_aggregation; +FROM final_aggregation From fe29661fa5cde784811b1d0eef5b5aeef8f46f6d Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 29 Oct 2024 14:29:52 +0000 Subject: [PATCH 058/166] Try null and all for local --- .../proximity.dist_pin_to_traffic_master.sql | 36 +++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 04f18448d..aac1e72f5 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -32,6 +32,13 @@ traffic_local AS ( -- noqa: ST03 AND daily_traffic IS NOT NULL ), +traffic_local_null AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE road_type = 'Local Road or Street' + AND daily_traffic IS NOT NULL +), + traffic_collector AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} @@ -121,17 +128,35 @@ nearest_local AS ( pcl.pin10, xy.year, ARBITRARY(xy.road_name) AS nearest_local_road_name, - ARBITRARY(xy.dist_ft) AS nearest_local_road_dist_ft, + ARBITRARY(xy.dist_ft) AS nearest_local_dist_ft, ARBITRARY(xy.year) AS nearest_local_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_local_road_daily_traffic, - ARBITRARY(xy.speed_limit) AS nearest_local_road_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_local_road_surface_type, - ARBITRARY(xy.lanes) AS nearest_local_road_lanes + ARBITRARY(xy.daily_traffic) AS nearest_local_daily_traffic, + ARBITRARY(xy.speed_limit) AS nearest_local_speed_limit, + ARBITRARY(xy.surface_type) AS nearest_local_surface_type, + ARBITRARY(xy.lanes) AS nearest_local_lanes FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_local') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 GROUP BY pcl.pin10, xy.year + + UNION ALL + + SELECT + pcl.pin10, + xy.year, + ARBITRARY(xy.road_name) AS nearest_local_road_name, + ARBITRARY(xy.dist_ft) AS nearest_local_dist_ft, + ARBITRARY(xy.year) AS nearest_local_road_data_year, + ARBITRARY(xy.daily_traffic) AS nearest_local_daily_traffic, + ARBITRARY(xy.speed_limit) AS nearest_local_speed_limit, + ARBITRARY(xy.surface_type) AS nearest_local_surface_type, + ARBITRARY(xy.lanes) AS nearest_local_lanes + FROM distinct_pins AS pcl + INNER JOIN ( {{ dist_to_nearest_geometry('traffic_local_null') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + GROUP BY pcl.pin10, xy.year ), -- Calculate nearest Major Collector road per pin @@ -272,7 +297,6 @@ final_aggregation AS ( >= (SELECT MIN(year) FROM distinct_years) ) - SELECT pin10, nearest_minor_road_name, From 1d118f4305f73ef494316f604cd6b4695d1ac026 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 29 Oct 2024 14:31:36 +0000 Subject: [PATCH 059/166] typo --- dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index aac1e72f5..b7efc0c1f 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -36,7 +36,7 @@ traffic_local_null AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} WHERE road_type = 'Local Road or Street' - AND daily_traffic IS NOT NULL + AND daily_traffic IS NULL ), traffic_collector AS ( -- noqa: ST03 From 3f5f221c2020dbddc16813e20fdadd263bcbee7a Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 29 Oct 2024 14:33:26 +0000 Subject: [PATCH 060/166] add road to query --- .../proximity.dist_pin_to_traffic_master.sql | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index b7efc0c1f..8a08e6c8e 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -128,12 +128,12 @@ nearest_local AS ( pcl.pin10, xy.year, ARBITRARY(xy.road_name) AS nearest_local_road_name, - ARBITRARY(xy.dist_ft) AS nearest_local_dist_ft, + ARBITRARY(xy.dist_ft) AS nearest_local_road_dist_ft, ARBITRARY(xy.year) AS nearest_local_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_local_daily_traffic, - ARBITRARY(xy.speed_limit) AS nearest_local_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_local_surface_type, - ARBITRARY(xy.lanes) AS nearest_local_lanes + ARBITRARY(xy.daily_traffic) AS nearest_local_road_daily_traffic, + ARBITRARY(xy.speed_limit) AS nearest_local_road_speed_limit, + ARBITRARY(xy.surface_type) AS nearest_local_road_surface_type, + ARBITRARY(xy.lanes) AS nearest_local_road_lanes FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_local') }} ) AS xy ON pcl.x_3435 = xy.x_3435 @@ -146,12 +146,12 @@ nearest_local AS ( pcl.pin10, xy.year, ARBITRARY(xy.road_name) AS nearest_local_road_name, - ARBITRARY(xy.dist_ft) AS nearest_local_dist_ft, + ARBITRARY(xy.dist_ft) AS nearest_local_road_dist_ft, ARBITRARY(xy.year) AS nearest_local_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_local_daily_traffic, - ARBITRARY(xy.speed_limit) AS nearest_local_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_local_surface_type, - ARBITRARY(xy.lanes) AS nearest_local_lanes + ARBITRARY(xy.daily_traffic) AS nearest_local_road_daily_traffic, + ARBITRARY(xy.speed_limit) AS nearest_local_road_speed_limit, + ARBITRARY(xy.surface_type) AS nearest_local_road_surface_type, + ARBITRARY(xy.lanes) AS nearest_local_road_lanes FROM distinct_pins AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('traffic_local_null') }} ) AS xy ON pcl.x_3435 = xy.x_3435 From f6bdb9a10f309a811078d5d4ed24f2b0552295c1 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 29 Oct 2024 20:56:54 +0000 Subject: [PATCH 061/166] Revert to working query --- .../proximity.dist_pin_to_traffic_master.sql | 26 +------------------ 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 8a08e6c8e..04f18448d 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -32,13 +32,6 @@ traffic_local AS ( -- noqa: ST03 AND daily_traffic IS NOT NULL ), -traffic_local_null AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Local Road or Street' - AND daily_traffic IS NULL -), - traffic_collector AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} @@ -139,24 +132,6 @@ nearest_local AS ( ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 GROUP BY pcl.pin10, xy.year - - UNION ALL - - SELECT - pcl.pin10, - xy.year, - ARBITRARY(xy.road_name) AS nearest_local_road_name, - ARBITRARY(xy.dist_ft) AS nearest_local_road_dist_ft, - ARBITRARY(xy.year) AS nearest_local_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_local_road_daily_traffic, - ARBITRARY(xy.speed_limit) AS nearest_local_road_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_local_road_surface_type, - ARBITRARY(xy.lanes) AS nearest_local_road_lanes - FROM distinct_pins AS pcl - INNER JOIN ( {{ dist_to_nearest_geometry('traffic_local_null') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - GROUP BY pcl.pin10, xy.year ), -- Calculate nearest Major Collector road per pin @@ -297,6 +272,7 @@ final_aggregation AS ( >= (SELECT MIN(year) FROM distinct_years) ) + SELECT pin10, nearest_minor_road_name, From 7dd95cdde6e10131445370f972b499f3c67fd979 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 8 Nov 2024 16:57:22 +0000 Subject: [PATCH 062/166] Test pin to local --- .../proximity/proximity.dist_pin_to_local.sql | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 dbt/models/proximity/proximity.dist_pin_to_local.sql diff --git a/dbt/models/proximity/proximity.dist_pin_to_local.sql b/dbt/models/proximity/proximity.dist_pin_to_local.sql new file mode 100644 index 000000000..af3347ffd --- /dev/null +++ b/dbt/models/proximity/proximity.dist_pin_to_local.sql @@ -0,0 +1,61 @@ +{{ + config( + materialized='table', + partitioned_by=['year'], + bucketed_by=['pin10'], + bucket_count=1 + ) +}} + +WITH traffic_local AS ( -- noqa: ST03 + SELECT + *, + NTILE(20) OVER () AS partition_num + FROM spatial.traffic + WHERE road_type = 'Local Road or Street' +), + +distinct_pins AS ( + SELECT DISTINCT + x_3435, + y_3435, + pin10 + FROM {{ source('spatial', 'parcel') }} +), + +ranked_nearest_local AS ( + SELECT + pcl.pin10, + xy.year, + xy.road_name AS nearest_local_road_name, + xy.dist_ft AS nearest_local_road_dist_ft, + xy.year AS nearest_local_road_data_year, + xy.daily_traffic AS nearest_local_road_daily_traffic, + xy.speed_limit AS nearest_local_road_speed_limit, + xy.surface_type AS nearest_local_road_surface_type, + xy.lanes AS nearest_local_road_lanes, + ROW_NUMBER() + OVER (PARTITION BY pcl.pin10, xy.year ORDER BY xy.dist_ft) + AS row_num + FROM distinct_pins AS pcl + INNER JOIN ( {{ dist_to_nearest_geometry('traffic_local') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 +), + +nearest_local AS ( + SELECT + pin10, + nearest_local_road_name, + nearest_local_road_dist_ft, + nearest_local_road_data_year, + nearest_local_road_daily_traffic, + nearest_local_road_speed_limit, + nearest_local_road_surface_type, + nearest_local_road_lanes, + year + FROM ranked_nearest_local + WHERE row_num = 1 +) + +SELECT * FROM nearest_local; From 97d41bbb578eca1a9b1a0670e152fb955598a2e9 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 8 Nov 2024 17:22:12 +0000 Subject: [PATCH 063/166] Test Local v2 --- .../proximity/proximity.dist_pin_to_local.sql | 43 +++++++++++-------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_local.sql b/dbt/models/proximity/proximity.dist_pin_to_local.sql index af3347ffd..c57571091 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_local.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_local.sql @@ -24,23 +24,30 @@ distinct_pins AS ( ), ranked_nearest_local AS ( - SELECT - pcl.pin10, - xy.year, - xy.road_name AS nearest_local_road_name, - xy.dist_ft AS nearest_local_road_dist_ft, - xy.year AS nearest_local_road_data_year, - xy.daily_traffic AS nearest_local_road_daily_traffic, - xy.speed_limit AS nearest_local_road_speed_limit, - xy.surface_type AS nearest_local_road_surface_type, - xy.lanes AS nearest_local_road_lanes, - ROW_NUMBER() - OVER (PARTITION BY pcl.pin10, xy.year ORDER BY xy.dist_ft) - AS row_num - FROM distinct_pins AS pcl - INNER JOIN ( {{ dist_to_nearest_geometry('traffic_local') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 + {% for partition_num in range(1, 21) %} + SELECT + pcl.pin10, + xy.year, + xy.road_name AS nearest_local_road_name, + xy.dist_ft AS nearest_local_road_dist_ft, + xy.year AS nearest_local_road_data_year, + xy.daily_traffic AS nearest_local_road_daily_traffic, + xy.speed_limit AS nearest_local_road_speed_limit, + xy.surface_type AS nearest_local_road_surface_type, + xy.lanes AS nearest_local_road_lanes, + ROW_NUMBER() + OVER (PARTITION BY pcl.pin10, xy.year ORDER BY xy.dist_ft) + AS row_num + FROM distinct_pins AS pcl + INNER JOIN ( + {{ dist_to_nearest_geometry('traffic_local') }} + ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + WHERE xy.partition_num = {{ partition_num }} + + {% if not loop.last %} UNION ALL {% endif %} + {% endfor %} ), nearest_local AS ( @@ -58,4 +65,4 @@ nearest_local AS ( WHERE row_num = 1 ) -SELECT * FROM nearest_local; +SELECT * FROM nearest_local From 4e22dcc295fa5d01676194d5767e322787259b3f Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 8 Nov 2024 18:46:42 +0000 Subject: [PATCH 064/166] Test v3 --- .../proximity.dist_pin_to_local_2.sql | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 dbt/models/proximity/proximity.dist_pin_to_local_2.sql diff --git a/dbt/models/proximity/proximity.dist_pin_to_local_2.sql b/dbt/models/proximity/proximity.dist_pin_to_local_2.sql new file mode 100644 index 000000000..f54d6b9b1 --- /dev/null +++ b/dbt/models/proximity/proximity.dist_pin_to_local_2.sql @@ -0,0 +1,63 @@ +{{ + config( + materialized='table', + partitioned_by=['year'], + bucketed_by=['pin10'], + bucket_count=1 + ) +}} + +WITH traffic_local AS ( -- noqa: ST03 + SELECT + *, + -- Cast traffic geometry to the correct type + ST_GEOMFROMBINARY(geometry_3435) AS geometry_cast + FROM {{ source('spatial', 'traffic') }} + WHERE road_type = 'Local Road or Street' +), + +township_geometry AS ( -- noqa: ST03 + SELECT + *, + -- Cast township geometry to the correct type + ST_GEOMFROMBINARY(geometry_3435) AS geometry_cast + FROM {{ source('spatial', 'township') }} +), + +joined_traffic_township AS ( -- noqa: ST03 + SELECT + tl.*, + town.township_code, + ST_DISTANCE(tl.geometry_cast, town.geometry_cast) + AS distance_to_township + FROM traffic_local AS tl + INNER JOIN township_geometry AS town + ON ST_INTERSECTS(tl.geometry_cast, town.geometry_cast) +), + +distinct_pins AS ( -- noqa: ST03 + SELECT DISTINCT + x_3435, + y_3435, + town_code, + pin10 + FROM {{ source('spatial', 'parcel') }} +) + +SELECT + pcl.pin10, + xy.road_name AS nearest_local_road_name, + xy.dist_ft AS nearest_local_road_dist_ft, + xy.year AS nearest_local_road_data_year, + xy.daily_traffic AS nearest_local_road_daily_traffic, + xy.speed_limit AS nearest_local_road_speed_limit, + xy.surface_type AS nearest_local_road_surface_type, + xy.lanes AS nearest_local_road_lanes, + xy.year +FROM distinct_pins AS pcl +INNER JOIN ( + {{ dist_to_nearest_geometry('joined_traffic_township') }} +) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 + AND pcl.town_code = xy.township_code From 4a1b30343d7af4731af1d63706bf1707f6a6a24d Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 13 Nov 2024 17:16:35 +0000 Subject: [PATCH 065/166] Revert to working --- .../proximity/proximity.dist_pin_to_local.sql | 68 ------------------- .../proximity.dist_pin_to_local_2.sql | 63 ----------------- 2 files changed, 131 deletions(-) delete mode 100644 dbt/models/proximity/proximity.dist_pin_to_local.sql delete mode 100644 dbt/models/proximity/proximity.dist_pin_to_local_2.sql diff --git a/dbt/models/proximity/proximity.dist_pin_to_local.sql b/dbt/models/proximity/proximity.dist_pin_to_local.sql deleted file mode 100644 index c57571091..000000000 --- a/dbt/models/proximity/proximity.dist_pin_to_local.sql +++ /dev/null @@ -1,68 +0,0 @@ -{{ - config( - materialized='table', - partitioned_by=['year'], - bucketed_by=['pin10'], - bucket_count=1 - ) -}} - -WITH traffic_local AS ( -- noqa: ST03 - SELECT - *, - NTILE(20) OVER () AS partition_num - FROM spatial.traffic - WHERE road_type = 'Local Road or Street' -), - -distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435, - pin10 - FROM {{ source('spatial', 'parcel') }} -), - -ranked_nearest_local AS ( - {% for partition_num in range(1, 21) %} - SELECT - pcl.pin10, - xy.year, - xy.road_name AS nearest_local_road_name, - xy.dist_ft AS nearest_local_road_dist_ft, - xy.year AS nearest_local_road_data_year, - xy.daily_traffic AS nearest_local_road_daily_traffic, - xy.speed_limit AS nearest_local_road_speed_limit, - xy.surface_type AS nearest_local_road_surface_type, - xy.lanes AS nearest_local_road_lanes, - ROW_NUMBER() - OVER (PARTITION BY pcl.pin10, xy.year ORDER BY xy.dist_ft) - AS row_num - FROM distinct_pins AS pcl - INNER JOIN ( - {{ dist_to_nearest_geometry('traffic_local') }} - ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - WHERE xy.partition_num = {{ partition_num }} - - {% if not loop.last %} UNION ALL {% endif %} - {% endfor %} -), - -nearest_local AS ( - SELECT - pin10, - nearest_local_road_name, - nearest_local_road_dist_ft, - nearest_local_road_data_year, - nearest_local_road_daily_traffic, - nearest_local_road_speed_limit, - nearest_local_road_surface_type, - nearest_local_road_lanes, - year - FROM ranked_nearest_local - WHERE row_num = 1 -) - -SELECT * FROM nearest_local diff --git a/dbt/models/proximity/proximity.dist_pin_to_local_2.sql b/dbt/models/proximity/proximity.dist_pin_to_local_2.sql deleted file mode 100644 index f54d6b9b1..000000000 --- a/dbt/models/proximity/proximity.dist_pin_to_local_2.sql +++ /dev/null @@ -1,63 +0,0 @@ -{{ - config( - materialized='table', - partitioned_by=['year'], - bucketed_by=['pin10'], - bucket_count=1 - ) -}} - -WITH traffic_local AS ( -- noqa: ST03 - SELECT - *, - -- Cast traffic geometry to the correct type - ST_GEOMFROMBINARY(geometry_3435) AS geometry_cast - FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Local Road or Street' -), - -township_geometry AS ( -- noqa: ST03 - SELECT - *, - -- Cast township geometry to the correct type - ST_GEOMFROMBINARY(geometry_3435) AS geometry_cast - FROM {{ source('spatial', 'township') }} -), - -joined_traffic_township AS ( -- noqa: ST03 - SELECT - tl.*, - town.township_code, - ST_DISTANCE(tl.geometry_cast, town.geometry_cast) - AS distance_to_township - FROM traffic_local AS tl - INNER JOIN township_geometry AS town - ON ST_INTERSECTS(tl.geometry_cast, town.geometry_cast) -), - -distinct_pins AS ( -- noqa: ST03 - SELECT DISTINCT - x_3435, - y_3435, - town_code, - pin10 - FROM {{ source('spatial', 'parcel') }} -) - -SELECT - pcl.pin10, - xy.road_name AS nearest_local_road_name, - xy.dist_ft AS nearest_local_road_dist_ft, - xy.year AS nearest_local_road_data_year, - xy.daily_traffic AS nearest_local_road_daily_traffic, - xy.speed_limit AS nearest_local_road_speed_limit, - xy.surface_type AS nearest_local_road_surface_type, - xy.lanes AS nearest_local_road_lanes, - xy.year -FROM distinct_pins AS pcl -INNER JOIN ( - {{ dist_to_nearest_geometry('joined_traffic_township') }} -) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - AND pcl.town_code = xy.township_code From a4277e84779b14144bbe734b67babf9d4fe2424f Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 13 Nov 2024 19:29:13 +0000 Subject: [PATCH 066/166] Add _road to macro --- dbt/macros/nearest_feature_aggregation.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/macros/nearest_feature_aggregation.sql b/dbt/macros/nearest_feature_aggregation.sql index 82125083e..7fa914ce4 100644 --- a/dbt/macros/nearest_feature_aggregation.sql +++ b/dbt/macros/nearest_feature_aggregation.sql @@ -6,6 +6,6 @@ then {{ base.replace("dist_ft", characteristic) }} {%- endfor %} else null - end as closest_{{ characteristic }}, + end as closest_road_{{ characteristic }}, {%- endfor %} {% endmacro %} From cdf419c96a341cef975f561de4d6a78cd0844068 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 13 Nov 2024 19:30:03 +0000 Subject: [PATCH 067/166] Add _road --- dbt/macros/process_data.sql | 54 +++++++++++++++++++++++++++++++++++++ dbt/macros/split_data.sql | 7 +++++ 2 files changed, 61 insertions(+) create mode 100644 dbt/macros/process_data.sql create mode 100644 dbt/macros/split_data.sql diff --git a/dbt/macros/process_data.sql b/dbt/macros/process_data.sql new file mode 100644 index 000000000..815fe69ac --- /dev/null +++ b/dbt/macros/process_data.sql @@ -0,0 +1,54 @@ +{% macro process_partitioned_data(partitions) %} + {% set union_all_parts = [] %} + + with + distinct_pins as ( + select distinct x_3435, y_3435, pin10 from {{ source("spatial", "parcel") }} + ) + + {% for partition in partitions %} + , + nearest_for_{{ partition }} as ( + select + pcl.pin10, + xy.year, + xy.road_name as nearest_local_road_name, + xy.dist_ft as nearest_local_road_dist_ft, + xy.year as nearest_local_road_data_year, + xy.daily_traffic as nearest_local_road_daily_traffic, + xy.speed_limit as nearest_local_road_speed_limit, + xy.surface_type as nearest_local_road_surface_type, + xy.lanes as nearest_local_road_lanes, + row_number() over ( + partition by pcl.pin10, xy.year order by xy.dist_ft + ) as row_num + from distinct_pins as pcl + inner join + {{ dist_to_nearest_geometry(ref(partition)) }} as xy + on pcl.x_3435 = xy.x_3435 + and pcl.y_3435 = xy.y_3435 + ) + + {% do union_all_parts.append("nearest_for_" ~ partition) %} + {% endfor %}, + ranked_nearest_local as ( + select * from {{ union_all_parts | join(" union all\n") }} + ), + nearest_local as ( + select + pin10, + nearest_local_road_name, + nearest_local_road_dist_ft, + nearest_local_road_data_year, + nearest_local_road_daily_traffic, + nearest_local_road_speed_limit, + nearest_local_road_surface_type, + nearest_local_road_lanes, + year + from ranked_nearest_local + where row_num = 1 + ) + + select * + from nearest_local +{% endmacro %} diff --git a/dbt/macros/split_data.sql b/dbt/macros/split_data.sql new file mode 100644 index 000000000..f13e05649 --- /dev/null +++ b/dbt/macros/split_data.sql @@ -0,0 +1,7 @@ +{% macro create_dist_to_nearest_cte(township_code) %} + filtered_joined_traffic_township_{{ township_code }} as ( + select * + from {{ dist_to_nearest_geometry("filtered_joined_traffic_township") }} + where township_code = {{ township_code }} + ) +{% endmacro %} From 1a3f32ebd8628506aa7492194ad5f9726d10a096 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 13 Nov 2024 19:30:52 +0000 Subject: [PATCH 068/166] Remove test macros --- dbt/macros/process_data.sql | 54 ------------------------------------- dbt/macros/split_data.sql | 7 ----- 2 files changed, 61 deletions(-) delete mode 100644 dbt/macros/process_data.sql delete mode 100644 dbt/macros/split_data.sql diff --git a/dbt/macros/process_data.sql b/dbt/macros/process_data.sql deleted file mode 100644 index 815fe69ac..000000000 --- a/dbt/macros/process_data.sql +++ /dev/null @@ -1,54 +0,0 @@ -{% macro process_partitioned_data(partitions) %} - {% set union_all_parts = [] %} - - with - distinct_pins as ( - select distinct x_3435, y_3435, pin10 from {{ source("spatial", "parcel") }} - ) - - {% for partition in partitions %} - , - nearest_for_{{ partition }} as ( - select - pcl.pin10, - xy.year, - xy.road_name as nearest_local_road_name, - xy.dist_ft as nearest_local_road_dist_ft, - xy.year as nearest_local_road_data_year, - xy.daily_traffic as nearest_local_road_daily_traffic, - xy.speed_limit as nearest_local_road_speed_limit, - xy.surface_type as nearest_local_road_surface_type, - xy.lanes as nearest_local_road_lanes, - row_number() over ( - partition by pcl.pin10, xy.year order by xy.dist_ft - ) as row_num - from distinct_pins as pcl - inner join - {{ dist_to_nearest_geometry(ref(partition)) }} as xy - on pcl.x_3435 = xy.x_3435 - and pcl.y_3435 = xy.y_3435 - ) - - {% do union_all_parts.append("nearest_for_" ~ partition) %} - {% endfor %}, - ranked_nearest_local as ( - select * from {{ union_all_parts | join(" union all\n") }} - ), - nearest_local as ( - select - pin10, - nearest_local_road_name, - nearest_local_road_dist_ft, - nearest_local_road_data_year, - nearest_local_road_daily_traffic, - nearest_local_road_speed_limit, - nearest_local_road_surface_type, - nearest_local_road_lanes, - year - from ranked_nearest_local - where row_num = 1 - ) - - select * - from nearest_local -{% endmacro %} diff --git a/dbt/macros/split_data.sql b/dbt/macros/split_data.sql deleted file mode 100644 index f13e05649..000000000 --- a/dbt/macros/split_data.sql +++ /dev/null @@ -1,7 +0,0 @@ -{% macro create_dist_to_nearest_cte(township_code) %} - filtered_joined_traffic_township_{{ township_code }} as ( - select * - from {{ dist_to_nearest_geometry("filtered_joined_traffic_township") }} - where township_code = {{ township_code }} - ) -{% endmacro %} From 3647eed78fffe6d2e0db5ffd82b5a43c6beb26f8 Mon Sep 17 00:00:00 2001 From: Damonamajor <56321109+Damonamajor@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:00:56 -0600 Subject: [PATCH 069/166] Update dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql Co-authored-by: William Ridgeway <10358980+wrridgeway@users.noreply.github.com> --- dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql index 04f18448d..90a81b977 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql @@ -269,7 +269,8 @@ final_aggregation AS ( local_road.year, collector.year, other.year ) - >= (SELECT MIN(year) FROM distinct_years) + >= (SELECT MIN(year) FROM "awsdatacatalog"."spatial"."traffic" + WHERE road_type IS NOT NULL) ) From 9cd97aef7bc58db9e2c0d00d29db97eb1c857c92 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 14 Nov 2024 20:26:33 +0000 Subject: [PATCH 070/166] nearest highway --- .../proximity_dist_pin_to_highway.sql | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 dbt/models/proximity/proximity_dist_pin_to_highway.sql diff --git a/dbt/models/proximity/proximity_dist_pin_to_highway.sql b/dbt/models/proximity/proximity_dist_pin_to_highway.sql new file mode 100644 index 000000000..0c384e77f --- /dev/null +++ b/dbt/models/proximity/proximity_dist_pin_to_highway.sql @@ -0,0 +1,31 @@ +{{ + config( + materialized='table', + partitioned_by=['year'], + bucketed_by=['pin10'], + bucket_count=1 + ) +}} + +WITH highway AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE road_type = 'Interstate' + OR road_type = 'Freeway and Expressway' +), + +SELECT + pcl.pin10, + xy.year, + ARBITRARY(xy.road_name) AS nearest_highway_road_name, + ARBITRARY(xy.dist_ft) AS nearest_highway_road_dist_ft, + ARBITRARY(xy.year) AS nearest_highway_road_data_year, + ARBITRARY(xy.daily_traffic) AS nearest_highway_road_daily_traffic, + ARBITRARY(xy.speed_limit) AS nearest_highway_road_speed_limit, + ARBITRARY(xy.surface_type) AS nearest_highway_road_surface_type, + ARBITRARY(xy.lanes) AS nearest_highway_road_lanes +FROM distinct_pins AS pcl +INNER JOIN ( {{ dist_to_nearest_geometry('highway') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 +GROUP BY pcl.pin10, xy.year From e415509f498d361d369377f1078a14ed281972f6 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 14 Nov 2024 20:46:42 +0000 Subject: [PATCH 071/166] Add highways/collector --- .../proximity.dist_pin_to_collector.sql | 31 +++++++++++++++++++ ....sql => proximity.dist_pin_to_highway.sql} | 0 2 files changed, 31 insertions(+) create mode 100644 dbt/models/proximity/proximity.dist_pin_to_collector.sql rename dbt/models/proximity/{proximity_dist_pin_to_highway.sql => proximity.dist_pin_to_highway.sql} (100%) diff --git a/dbt/models/proximity/proximity.dist_pin_to_collector.sql b/dbt/models/proximity/proximity.dist_pin_to_collector.sql new file mode 100644 index 000000000..65f0808ac --- /dev/null +++ b/dbt/models/proximity/proximity.dist_pin_to_collector.sql @@ -0,0 +1,31 @@ +{{ + config( + materialized='table', + partitioned_by=['year'], + bucketed_by=['pin10'], + bucket_count=1 + ) +}} + +WITH collector AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE road_type = 'Major Collector' + OR road_type = 'Minor Collector' +), + +SELECT + pcl.pin10, + xy.year, + ARBITRARY(xy.road_name) AS nearest_collector_road_name, + ARBITRARY(xy.dist_ft) AS nearest_collector_road_dist_ft, + ARBITRARY(xy.year) AS nearest_collector_road_data_year, + ARBITRARY(xy.daily_traffic) AS nearest_collector_road_daily_traffic, + ARBITRARY(xy.speed_limit) AS nearest_collector_road_speed_limit, + ARBITRARY(xy.surface_type) AS nearest_collector_road_surface_type, + ARBITRARY(xy.lanes) AS nearest_collector_road_lanes +FROM distinct_pins AS pcl +INNER JOIN ( {{ dist_to_nearest_geometry('collector') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 +GROUP BY pcl.pin10, xy.year diff --git a/dbt/models/proximity/proximity_dist_pin_to_highway.sql b/dbt/models/proximity/proximity.dist_pin_to_highway.sql similarity index 100% rename from dbt/models/proximity/proximity_dist_pin_to_highway.sql rename to dbt/models/proximity/proximity.dist_pin_to_highway.sql From d9035177ccf8989dd872d500b23f5bc78f15ee54 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 14 Nov 2024 20:53:48 +0000 Subject: [PATCH 072/166] Add arterial, delete master --- .../proximity/proximity.dist_pin_to_arterial | 31 ++ .../proximity.dist_pin_to_traffic_master.sql | 341 ------------------ 2 files changed, 31 insertions(+), 341 deletions(-) create mode 100644 dbt/models/proximity/proximity.dist_pin_to_arterial delete mode 100644 dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql diff --git a/dbt/models/proximity/proximity.dist_pin_to_arterial b/dbt/models/proximity/proximity.dist_pin_to_arterial new file mode 100644 index 000000000..e3dfa6717 --- /dev/null +++ b/dbt/models/proximity/proximity.dist_pin_to_arterial @@ -0,0 +1,31 @@ +{{ + config( + materialized='table', + partitioned_by=['year'], + bucketed_by=['pin10'], + bucket_count=1 + ) +}} + +WITH arterial AS ( -- noqa: ST03 + SELECT * + FROM {{ source('spatial', 'traffic') }} + WHERE road_type = 'Major Arterial' + OR road_type = 'Minor Arterial' +), + +SELECT + pcl.pin10, + xy.year, + ARBITRARY(xy.road_name) AS nearest_arterial_road_name, + ARBITRARY(xy.dist_ft) AS nearest_arterial_road_dist_ft, + ARBITRARY(xy.year) AS nearest_arterial_road_data_year, + ARBITRARY(xy.daily_traffic) AS nearest_arterial_road_daily_traffic, + ARBITRARY(xy.speed_limit) AS nearest_arterial_road_speed_limit, + ARBITRARY(xy.surface_type) AS nearest_arterial_road_surface_type, + ARBITRARY(xy.lanes) AS nearest_arterial_road_lanes +FROM distinct_pins AS pcl +INNER JOIN ( {{ dist_to_nearest_geometry('arterial') }} ) AS xy + ON pcl.x_3435 = xy.x_3435 + AND pcl.y_3435 = xy.y_3435 +GROUP BY pcl.pin10, xy.year diff --git a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql b/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql deleted file mode 100644 index 90a81b977..000000000 --- a/dbt/models/proximity/proximity.dist_pin_to_traffic_master.sql +++ /dev/null @@ -1,341 +0,0 @@ -{{ - config( - materialized='table', - partitioned_by=['year'], - bucketed_by=['pin10'], - bucket_count=1 - ) -}} - -WITH traffic_minor AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Minor Arterial' -), - -traffic_interstate AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Interstate' -), - -traffic_freeway AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Freeway and Expressway' -), - -traffic_local AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Local Road or Street' - AND daily_traffic IS NOT NULL -), - -traffic_collector AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Major Collector' - OR road_type = 'Minor Collector' -), - -traffic_other AS ( -- noqa: ST03 - SELECT * - FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Other Principal Arterial' -), - -distinct_pins AS ( - SELECT DISTINCT - x_3435, - y_3435, - pin10 - FROM {{ source('spatial', 'parcel') }} -), - -distinct_years AS ( - SELECT DISTINCT year - FROM {{ source('spatial', 'traffic') }} - WHERE road_type IS NOT NULL -), - --- Calculate nearest Minor Arterial road per pin -nearest_minor AS ( - SELECT - pcl.pin10, - xy.year, - ARBITRARY(xy.road_name) AS nearest_minor_road_name, - ARBITRARY(xy.dist_ft) AS nearest_minor_road_dist_ft, - ARBITRARY(xy.year) AS nearest_minor_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_minor_road_daily_traffic, - ARBITRARY(xy.speed_limit) AS nearest_minor_road_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_minor_road_surface_type, - ARBITRARY(xy.lanes) AS nearest_minor_road_lanes - FROM distinct_pins AS pcl - INNER JOIN ( {{ dist_to_nearest_geometry('traffic_minor') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - GROUP BY pcl.pin10, xy.year -), - --- Calculate nearest Interstate road per pin -nearest_interstate AS ( - SELECT - pcl.pin10, - xy.year, - ARBITRARY(xy.road_name) AS nearest_interstate_road_name, - ARBITRARY(xy.dist_ft) AS nearest_interstate_road_dist_ft, - ARBITRARY(xy.year) AS nearest_interstate_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_interstate_road_daily_traffic, - ARBITRARY(xy.speed_limit) AS nearest_interstate_road_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_interstate_road_surface_type, - ARBITRARY(xy.lanes) AS nearest_interstate_road_lanes - FROM distinct_pins AS pcl - INNER JOIN ( {{ dist_to_nearest_geometry('traffic_interstate') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - GROUP BY pcl.pin10, xy.year -), - --- Calculate nearest Freeway or Expressway road per pin -nearest_freeway AS ( - SELECT - pcl.pin10, - xy.year, - ARBITRARY(xy.road_name) AS nearest_freeway_road_name, - ARBITRARY(xy.dist_ft) AS nearest_freeway_road_dist_ft, - ARBITRARY(xy.year) AS nearest_freeway_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_freeway_road_daily_traffic, - ARBITRARY(xy.speed_limit) AS nearest_freeway_road_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_freeway_road_surface_type, - ARBITRARY(xy.lanes) AS nearest_freeway_road_lanes - FROM distinct_pins AS pcl - INNER JOIN ( {{ dist_to_nearest_geometry('traffic_freeway') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - GROUP BY pcl.pin10, xy.year -), - -nearest_local AS ( - SELECT - pcl.pin10, - xy.year, - ARBITRARY(xy.road_name) AS nearest_local_road_name, - ARBITRARY(xy.dist_ft) AS nearest_local_road_dist_ft, - ARBITRARY(xy.year) AS nearest_local_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_local_road_daily_traffic, - ARBITRARY(xy.speed_limit) AS nearest_local_road_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_local_road_surface_type, - ARBITRARY(xy.lanes) AS nearest_local_road_lanes - FROM distinct_pins AS pcl - INNER JOIN ( {{ dist_to_nearest_geometry('traffic_local') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - GROUP BY pcl.pin10, xy.year -), - --- Calculate nearest Major Collector road per pin -nearest_collector AS ( - SELECT - pcl.pin10, - xy.year, - ARBITRARY(xy.road_name) AS nearest_collector_road_name, - ARBITRARY(xy.dist_ft) AS nearest_collector_road_dist_ft, - ARBITRARY(xy.year) AS nearest_collector_road_data_year, - ARBITRARY(xy.speed_limit) - AS nearest_collector_road_speed_limit, - ARBITRARY(xy.daily_traffic) AS nearest_collector_road_daily_traffic, - ARBITRARY(xy.surface_type) AS nearest_collector_road_surface_type, - ARBITRARY(xy.lanes) AS nearest_collector_road_lanes - FROM distinct_pins AS pcl - INNER JOIN - ( {{ dist_to_nearest_geometry('traffic_collector') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - GROUP BY pcl.pin10, xy.year -), - --- Calculate nearest Other Principal Arterial road per pin -nearest_other AS ( - SELECT - pcl.pin10, - xy.year, - ARBITRARY(xy.road_name) AS nearest_other_road_name, - ARBITRARY(xy.dist_ft) AS nearest_other_road_dist_ft, - ARBITRARY(xy.year) AS nearest_other_road_data_year, - ARBITRARY(xy.daily_traffic) AS nearest_other_road_daily_traffic, - ARBITRARY(xy.speed_limit) AS nearest_other_road_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_other_road_surface_type, - ARBITRARY(xy.lanes) AS nearest_other_road_lanes - FROM distinct_pins AS pcl - INNER JOIN ( {{ dist_to_nearest_geometry('traffic_other') }} ) AS xy - ON pcl.x_3435 = xy.x_3435 - AND pcl.y_3435 = xy.y_3435 - GROUP BY pcl.pin10, xy.year -), - --- Join all nearest roads by pin10 and year -final_aggregation AS ( - SELECT - COALESCE( - minor.pin10, interstate.pin10, freeway.pin10, - local_road.pin10, collector.pin10 - ) AS pin10, - minor.nearest_minor_road_name, - minor.nearest_minor_road_dist_ft, - minor.nearest_minor_road_data_year, - minor.nearest_minor_road_daily_traffic, - minor.nearest_minor_road_speed_limit, - minor.nearest_minor_road_surface_type, - minor.nearest_minor_road_lanes, - interstate.nearest_interstate_road_name, - interstate.nearest_interstate_road_dist_ft, - interstate.nearest_interstate_road_data_year, - interstate.nearest_interstate_road_daily_traffic, - interstate.nearest_interstate_road_speed_limit, - interstate.nearest_interstate_road_surface_type, - interstate.nearest_interstate_road_lanes, - freeway.nearest_freeway_road_name, - freeway.nearest_freeway_road_dist_ft, - freeway.nearest_freeway_road_data_year, - freeway.nearest_freeway_road_daily_traffic, - freeway.nearest_freeway_road_speed_limit, - freeway.nearest_freeway_road_surface_type, - freeway.nearest_freeway_road_lanes, - local_road.nearest_local_road_name, - local_road.nearest_local_road_dist_ft, - local_road.nearest_local_road_data_year, - local_road.nearest_local_road_daily_traffic, - local_road.nearest_local_road_speed_limit, - local_road.nearest_local_road_surface_type, - local_road.nearest_local_road_lanes, - collector.nearest_collector_road_name, - collector.nearest_collector_road_dist_ft, - collector.nearest_collector_road_data_year, - collector.nearest_collector_road_daily_traffic, - collector.nearest_collector_road_speed_limit, - collector.nearest_collector_road_surface_type, - collector.nearest_collector_road_lanes, - other.nearest_other_road_name, - other.nearest_other_road_dist_ft, - other.nearest_other_road_data_year, - other.nearest_other_road_daily_traffic, - other.nearest_other_road_speed_limit, - other.nearest_other_road_surface_type, - other.nearest_other_road_lanes, - COALESCE( - minor.year, interstate.year, freeway.year, - local_road.year, collector.year, other.year) - AS year - FROM nearest_minor AS minor - FULL OUTER JOIN nearest_interstate AS interstate - ON minor.pin10 = interstate.pin10 - AND minor.year = interstate.year - FULL OUTER JOIN nearest_freeway AS freeway - ON COALESCE(minor.pin10, interstate.pin10) = freeway.pin10 - AND COALESCE(minor.year, interstate.year) = freeway.year - FULL OUTER JOIN nearest_local AS local_road - ON COALESCE(minor.pin10, interstate.pin10, freeway.pin10) - = local_road.pin10 - AND COALESCE(minor.year, interstate.year, freeway.year) - = local_road.year - FULL OUTER JOIN nearest_collector AS collector - ON COALESCE( - minor.pin10, interstate.pin10, - freeway.pin10, local_road.pin10 - ) - = collector.pin10 - AND COALESCE(minor.year, interstate.year, freeway.year, local_road.year) - = collector.year - FULL OUTER JOIN nearest_other AS other - ON COALESCE( - minor.pin10, - interstate.pin10, - freeway.pin10, - local_road.pin10, - collector.pin10 - ) - = other.pin10 - AND COALESCE( - minor.year, - interstate.year, - freeway.year, - local_road.year, - collector.year - ) - = other.year - WHERE COALESCE( - minor.year, interstate.year, freeway.year, - local_road.year, collector.year, - other.year - ) - >= (SELECT MIN(year) FROM "awsdatacatalog"."spatial"."traffic" - WHERE road_type IS NOT NULL) -) - - -SELECT - pin10, - nearest_minor_road_name, - nearest_minor_road_dist_ft, - nearest_minor_road_data_year, - nearest_minor_road_daily_traffic, - nearest_minor_road_speed_limit, - nearest_minor_road_surface_type, - nearest_minor_road_lanes, - nearest_interstate_road_name, - nearest_interstate_road_dist_ft, - nearest_interstate_road_data_year, - nearest_interstate_road_daily_traffic, - nearest_interstate_road_speed_limit, - nearest_interstate_road_surface_type, - nearest_interstate_road_lanes, - nearest_freeway_road_name, - nearest_freeway_road_dist_ft, - nearest_freeway_road_data_year, - nearest_freeway_road_daily_traffic, - nearest_freeway_road_speed_limit, - nearest_freeway_road_surface_type, - nearest_freeway_road_lanes, - nearest_local_road_name, - nearest_local_road_dist_ft, - nearest_local_road_data_year, - nearest_local_road_daily_traffic, - nearest_local_road_speed_limit, - nearest_local_road_surface_type, - nearest_local_road_lanes, - nearest_collector_road_name, - nearest_collector_road_dist_ft, - nearest_collector_road_data_year, - nearest_collector_road_daily_traffic, - nearest_collector_road_speed_limit, - nearest_collector_road_surface_type, - nearest_collector_road_lanes, - nearest_other_road_name, - nearest_other_road_dist_ft, - nearest_other_road_data_year, - nearest_other_road_daily_traffic, - nearest_other_road_speed_limit, - nearest_other_road_surface_type, - nearest_other_road_lanes, - {{ nearest_feature_aggregation( - [ - "nearest_minor_road_dist_ft", - "nearest_interstate_road_dist_ft", - "nearest_other_road_dist_ft", - "nearest_freeway_road_dist_ft", - "nearest_local_road_dist_ft", - "nearest_collector_road_dist_ft" - ], - [ - "name", - "dist_ft", - "data_year", - "daily_traffic", - "speed_limit", - "surface_type", - "lanes" - ] - ) }} - year -FROM final_aggregation From f17707431069cb0f024835c9d176a4615b478265 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 14 Nov 2024 21:00:49 +0000 Subject: [PATCH 073/166] Add spatial.parcel --- ....dist_pin_to_arterial => proximity.dist_pin_to_arterial.sql} | 2 +- dbt/models/proximity/proximity.dist_pin_to_collector.sql | 2 +- dbt/models/proximity/proximity.dist_pin_to_highway.sql | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename dbt/models/proximity/{proximity.dist_pin_to_arterial => proximity.dist_pin_to_arterial.sql} (95%) diff --git a/dbt/models/proximity/proximity.dist_pin_to_arterial b/dbt/models/proximity/proximity.dist_pin_to_arterial.sql similarity index 95% rename from dbt/models/proximity/proximity.dist_pin_to_arterial rename to dbt/models/proximity/proximity.dist_pin_to_arterial.sql index e3dfa6717..672a34407 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_arterial +++ b/dbt/models/proximity/proximity.dist_pin_to_arterial.sql @@ -24,7 +24,7 @@ SELECT ARBITRARY(xy.speed_limit) AS nearest_arterial_road_speed_limit, ARBITRARY(xy.surface_type) AS nearest_arterial_road_surface_type, ARBITRARY(xy.lanes) AS nearest_arterial_road_lanes -FROM distinct_pins AS pcl +FROM {{ source('spatial', 'parcel') }} AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('arterial') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 diff --git a/dbt/models/proximity/proximity.dist_pin_to_collector.sql b/dbt/models/proximity/proximity.dist_pin_to_collector.sql index 65f0808ac..bcba5c8e3 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_collector.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_collector.sql @@ -24,7 +24,7 @@ SELECT ARBITRARY(xy.speed_limit) AS nearest_collector_road_speed_limit, ARBITRARY(xy.surface_type) AS nearest_collector_road_surface_type, ARBITRARY(xy.lanes) AS nearest_collector_road_lanes -FROM distinct_pins AS pcl +FROM {{ source('spatial', 'parcel') }} AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('collector') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 diff --git a/dbt/models/proximity/proximity.dist_pin_to_highway.sql b/dbt/models/proximity/proximity.dist_pin_to_highway.sql index 0c384e77f..97d62b383 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_highway.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_highway.sql @@ -24,7 +24,7 @@ SELECT ARBITRARY(xy.speed_limit) AS nearest_highway_road_speed_limit, ARBITRARY(xy.surface_type) AS nearest_highway_road_surface_type, ARBITRARY(xy.lanes) AS nearest_highway_road_lanes -FROM distinct_pins AS pcl +FROM {{ source('spatial', 'parcel') }} AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('highway') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 From c03c32046ad38ede2316ed9d884ef08a59b20be8 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 14 Nov 2024 21:04:13 +0000 Subject: [PATCH 074/166] Add year to join --- dbt/models/proximity/proximity.dist_pin_to_arterial.sql | 1 + dbt/models/proximity/proximity.dist_pin_to_collector.sql | 1 + dbt/models/proximity/proximity.dist_pin_to_highway.sql | 1 + 3 files changed, 3 insertions(+) diff --git a/dbt/models/proximity/proximity.dist_pin_to_arterial.sql b/dbt/models/proximity/proximity.dist_pin_to_arterial.sql index 672a34407..16cd1de9c 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_arterial.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_arterial.sql @@ -28,4 +28,5 @@ FROM {{ source('spatial', 'parcel') }} AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('arterial') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year GROUP BY pcl.pin10, xy.year diff --git a/dbt/models/proximity/proximity.dist_pin_to_collector.sql b/dbt/models/proximity/proximity.dist_pin_to_collector.sql index bcba5c8e3..5446851de 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_collector.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_collector.sql @@ -28,4 +28,5 @@ FROM {{ source('spatial', 'parcel') }} AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('collector') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year GROUP BY pcl.pin10, xy.year diff --git a/dbt/models/proximity/proximity.dist_pin_to_highway.sql b/dbt/models/proximity/proximity.dist_pin_to_highway.sql index 97d62b383..8aea38ee1 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_highway.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_highway.sql @@ -28,4 +28,5 @@ FROM {{ source('spatial', 'parcel') }} AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('highway') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 + AND pcl.year = xy.pin_year GROUP BY pcl.pin10, xy.year From 48603c09c2b6f2e04dfc908025078a7ee6651737 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 14 Nov 2024 21:20:09 +0000 Subject: [PATCH 075/166] Fix query --- .../proximity/proximity.dist_pin_to_arterial.sql | 10 +++++----- .../proximity/proximity.dist_pin_to_collector.sql | 10 +++++----- dbt/models/proximity/proximity.dist_pin_to_highway.sql | 10 +++++----- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_arterial.sql b/dbt/models/proximity/proximity.dist_pin_to_arterial.sql index 16cd1de9c..44311dce1 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_arterial.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_arterial.sql @@ -12,21 +12,21 @@ WITH arterial AS ( -- noqa: ST03 FROM {{ source('spatial', 'traffic') }} WHERE road_type = 'Major Arterial' OR road_type = 'Minor Arterial' -), +) SELECT pcl.pin10, - xy.year, ARBITRARY(xy.road_name) AS nearest_arterial_road_name, ARBITRARY(xy.dist_ft) AS nearest_arterial_road_dist_ft, - ARBITRARY(xy.year) AS nearest_arterial_road_data_year, ARBITRARY(xy.daily_traffic) AS nearest_arterial_road_daily_traffic, ARBITRARY(xy.speed_limit) AS nearest_arterial_road_speed_limit, ARBITRARY(xy.surface_type) AS nearest_arterial_road_surface_type, - ARBITRARY(xy.lanes) AS nearest_arterial_road_lanes + ARBITRARY(xy.lanes) AS nearest_arterial_road_lanes, + ARBITRARY(xy.year) AS nearest_arterial_road_data_year, + pcl.year FROM {{ source('spatial', 'parcel') }} AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('arterial') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 AND pcl.year = xy.pin_year -GROUP BY pcl.pin10, xy.year +GROUP BY pcl.pin10, pcl.year diff --git a/dbt/models/proximity/proximity.dist_pin_to_collector.sql b/dbt/models/proximity/proximity.dist_pin_to_collector.sql index 5446851de..f49c4e2ae 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_collector.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_collector.sql @@ -12,21 +12,21 @@ WITH collector AS ( -- noqa: ST03 FROM {{ source('spatial', 'traffic') }} WHERE road_type = 'Major Collector' OR road_type = 'Minor Collector' -), +) SELECT pcl.pin10, - xy.year, ARBITRARY(xy.road_name) AS nearest_collector_road_name, ARBITRARY(xy.dist_ft) AS nearest_collector_road_dist_ft, - ARBITRARY(xy.year) AS nearest_collector_road_data_year, ARBITRARY(xy.daily_traffic) AS nearest_collector_road_daily_traffic, ARBITRARY(xy.speed_limit) AS nearest_collector_road_speed_limit, ARBITRARY(xy.surface_type) AS nearest_collector_road_surface_type, - ARBITRARY(xy.lanes) AS nearest_collector_road_lanes + ARBITRARY(xy.lanes) AS nearest_collector_road_lanes, + ARBITRARY(xy.year) AS nearest_collector_road_data_year, + pcl.year FROM {{ source('spatial', 'parcel') }} AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('collector') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 AND pcl.year = xy.pin_year -GROUP BY pcl.pin10, xy.year +GROUP BY pcl.pin10, pcl.year diff --git a/dbt/models/proximity/proximity.dist_pin_to_highway.sql b/dbt/models/proximity/proximity.dist_pin_to_highway.sql index 8aea38ee1..a39675b05 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_highway.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_highway.sql @@ -12,21 +12,21 @@ WITH highway AS ( -- noqa: ST03 FROM {{ source('spatial', 'traffic') }} WHERE road_type = 'Interstate' OR road_type = 'Freeway and Expressway' -), +) SELECT pcl.pin10, - xy.year, ARBITRARY(xy.road_name) AS nearest_highway_road_name, ARBITRARY(xy.dist_ft) AS nearest_highway_road_dist_ft, - ARBITRARY(xy.year) AS nearest_highway_road_data_year, ARBITRARY(xy.daily_traffic) AS nearest_highway_road_daily_traffic, ARBITRARY(xy.speed_limit) AS nearest_highway_road_speed_limit, ARBITRARY(xy.surface_type) AS nearest_highway_road_surface_type, - ARBITRARY(xy.lanes) AS nearest_highway_road_lanes + ARBITRARY(xy.lanes) AS nearest_highway_road_lanes, + ARBITRARY(xy.year) AS nearest_highway_road_data_year, + pcl.year FROM {{ source('spatial', 'parcel') }} AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('highway') }} ) AS xy ON pcl.x_3435 = xy.x_3435 AND pcl.y_3435 = xy.y_3435 AND pcl.year = xy.pin_year -GROUP BY pcl.pin10, xy.year +GROUP BY pcl.pin10, pcl.year From 8544f802c97ec56345e70a62e317f5d3083d946c Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 14 Nov 2024 21:54:41 +0000 Subject: [PATCH 076/166] Add to proximity --- ...> proximity.dist_pin_to_arterial_road.sql} | 0 ... proximity.dist_pin_to_collector_road.sql} | 0 ...=> proximity.dist_pin_to_highway_road.sql} | 0 .../proximity.vw_pin10_proximity.sql | 40 +++++++++++++++++++ 4 files changed, 40 insertions(+) rename dbt/models/proximity/{proximity.dist_pin_to_arterial.sql => proximity.dist_pin_to_arterial_road.sql} (100%) rename dbt/models/proximity/{proximity.dist_pin_to_collector.sql => proximity.dist_pin_to_collector_road.sql} (100%) rename dbt/models/proximity/{proximity.dist_pin_to_highway.sql => proximity.dist_pin_to_highway_road.sql} (100%) diff --git a/dbt/models/proximity/proximity.dist_pin_to_arterial.sql b/dbt/models/proximity/proximity.dist_pin_to_arterial_road.sql similarity index 100% rename from dbt/models/proximity/proximity.dist_pin_to_arterial.sql rename to dbt/models/proximity/proximity.dist_pin_to_arterial_road.sql diff --git a/dbt/models/proximity/proximity.dist_pin_to_collector.sql b/dbt/models/proximity/proximity.dist_pin_to_collector_road.sql similarity index 100% rename from dbt/models/proximity/proximity.dist_pin_to_collector.sql rename to dbt/models/proximity/proximity.dist_pin_to_collector_road.sql diff --git a/dbt/models/proximity/proximity.dist_pin_to_highway.sql b/dbt/models/proximity/proximity.dist_pin_to_highway_road.sql similarity index 100% rename from dbt/models/proximity/proximity.dist_pin_to_highway.sql rename to dbt/models/proximity/proximity.dist_pin_to_highway_road.sql diff --git a/dbt/models/proximity/proximity.vw_pin10_proximity.sql b/dbt/models/proximity/proximity.vw_pin10_proximity.sql index 9085b479d..b09e50d34 100644 --- a/dbt/models/proximity/proximity.vw_pin10_proximity.sql +++ b/dbt/models/proximity/proximity.vw_pin10_proximity.sql @@ -19,6 +19,15 @@ SELECT cnt_pin_num_school.num_school_rating_data_year, dist_pin_to_airport.airport_dnl_total, + + dist_pin_to_arterial_road.nearest_arterial_road_name, + dist_pin_to_arterial_road.nearest_arterial_road_daily_traffic, + dist_pin_to_arterial_road.nearest_arterial_road_speed_limit, + dist_pin_to_arterial_road.nearest_arterial_road_surface_type, + dist_pin_to_arterial_road.nearest_arterial_road_lanes, + dist_pin_to_arterial_road.nearest_arterial_road_dist_ft, + dist_pin_to_arterial_road.nearest_arterial_road_data_year, + dist_pin_to_bike_trail.nearest_bike_trail_id, dist_pin_to_bike_trail.nearest_bike_trail_name, dist_pin_to_bike_trail.nearest_bike_trail_dist_ft, @@ -29,6 +38,14 @@ SELECT dist_pin_to_cemetery.nearest_cemetery_dist_ft, dist_pin_to_cemetery.nearest_cemetery_data_year, + dist_pin_to_collector_road.nearest_collector_road_name, + dist_pin_to_collector_road.nearest_collector_road_daily_traffic, + dist_pin_to_collector_road.nearest_collector_road_speed_limit, + dist_pin_to_collector_road.nearest_collector_road_surface_type, + dist_pin_to_collector_road.nearest_collector_road_lanes, + dist_pin_to_collector_road.nearest_collector_road_dist_ft, + dist_pin_to_collector_road.nearest_collector_road_data_year, + dist_pin_to_cta_route.nearest_cta_route_id, dist_pin_to_cta_route.nearest_cta_route_name, dist_pin_to_cta_route.nearest_cta_route_dist_ft, @@ -47,6 +64,14 @@ SELECT dist_pin_to_grocery_store.nearest_grocery_store_dist_ft, dist_pin_to_grocery_store.nearest_grocery_store_data_year, + dist_pin_to_highway_road.nearest_highway_road_name, + dist_pin_to_highway_road.nearest_highway_road_daily_traffic, + dist_pin_to_highway_road.nearest_highway_road_speed_limit, + dist_pin_to_highway_road.nearest_highway_road_surface_type, + dist_pin_to_highway_road.nearest_highway_road_lanes, + dist_pin_to_highway_road.nearest_highway_road_dist_ft, + dist_pin_to_highway_road.nearest_highway_road_data_year, + dist_pin_to_hospital.nearest_hospital_gnis_code, dist_pin_to_hospital.nearest_hospital_name, dist_pin_to_hospital.nearest_hospital_dist_ft, @@ -130,6 +155,11 @@ LEFT JOIN {{ ref('proximity.dist_pin_to_airport') }} AS dist_pin_to_airport ON pin.pin10 = dist_pin_to_airport.pin10 AND pin.year = dist_pin_to_airport.year +LEFT JOIN + {{ ref('proximity.dist_pin_to_arterial_road') }} + AS dist_pin_to_arterial_road + ON pin.pin10 = dist_pin_to_arterial_road.pin10 + AND pin.year = dist_pin_to_arterial_road.year LEFT JOIN {{ ref('proximity.dist_pin_to_bike_trail') }} AS dist_pin_to_bike_trail ON pin.pin10 = dist_pin_to_bike_trail.pin10 @@ -138,6 +168,11 @@ LEFT JOIN {{ ref('proximity.dist_pin_to_cemetery') }} AS dist_pin_to_cemetery ON pin.pin10 = dist_pin_to_cemetery.pin10 AND pin.year = dist_pin_to_cemetery.year +LEFT JOIN + {{ ref('proximity.dist_pin_to_collector_road') }} + AS dist_pin_to_collector_road + ON pin.pin10 = dist_pin_to_collector_road.pin10 + AND pin.year = dist_pin_to_collector_road.year LEFT JOIN {{ ref('proximity.dist_pin_to_cta_route') }} AS dist_pin_to_cta_route ON pin.pin10 = dist_pin_to_cta_route.pin10 @@ -155,6 +190,11 @@ LEFT JOIN AS dist_pin_to_grocery_store ON pin.pin10 = dist_pin_to_grocery_store.pin10 AND pin.year = dist_pin_to_grocery_store.year +LEFT JOIN + {{ ref('proximity.dist_pin_to_highway_road') }} + AS dist_pin_to_highway_road + ON pin.pin10 = dist_pin_to_highway_road.pin10 + AND pin.year = dist_pin_to_highway_road.year LEFT JOIN {{ ref('proximity.dist_pin_to_hospital') }} AS dist_pin_to_hospital ON pin.pin10 = dist_pin_to_hospital.pin10 From f93fefde32770b13dfcebbe18f5ca050975a5d17 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 14 Nov 2024 22:11:08 +0000 Subject: [PATCH 077/166] Add to fill --- .../proximity.vw_pin10_proximity_fill.sql | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/dbt/models/proximity/proximity.vw_pin10_proximity_fill.sql b/dbt/models/proximity/proximity.vw_pin10_proximity_fill.sql index e45f1a4da..cd7eb235e 100644 --- a/dbt/models/proximity/proximity.vw_pin10_proximity_fill.sql +++ b/dbt/models/proximity/proximity.vw_pin10_proximity_fill.sql @@ -22,6 +22,15 @@ SELECT dist_pin_to_airport.airport_midway_dist_ft, dist_pin_to_airport.airport_dnl_total, dist_pin_to_airport.airport_data_year, + + dist_pin_to_arterial_road.nearest_arterial_road_name, + dist_pin_to_arterial_road.nearest_arterial_road_dist_ft, + dist_pin_to_arterial_road.nearest_arterial_road_daily_traffic, + dist_pin_to_arterial_road.nearest_arterial_road_speed_limit, + dist_pin_to_arterial_road.nearest_arterial_road_surface_type, + dist_pin_to_arterial_road.nearest_arterial_road_lanes, + dist_pin_to_arterial_road.nearest_arterial_road_data_year, + dist_pin_to_bike_trail.nearest_bike_trail_id, dist_pin_to_bike_trail.nearest_bike_trail_name, dist_pin_to_bike_trail.nearest_bike_trail_dist_ft, @@ -32,6 +41,14 @@ SELECT dist_pin_to_cemetery.nearest_cemetery_dist_ft, dist_pin_to_cemetery.nearest_cemetery_data_year, + dist_pin_to_collector_road.nearest_collector_road_name, + dist_pin_to_collector_road.nearest_collector_road_dist_ft, + dist_pin_to_collector_road.nearest_collector_road_daily_traffic, + dist_pin_to_collector_road.nearest_collector_road_speed_limit, + dist_pin_to_collector_road.nearest_collector_road_surface_type, + dist_pin_to_collector_road.nearest_collector_road_lanes, + dist_pin_to_collector_road.nearest_collector_road_data_year, + dist_pin_to_cta_route.nearest_cta_route_id, dist_pin_to_cta_route.nearest_cta_route_name, dist_pin_to_cta_route.nearest_cta_route_dist_ft, @@ -50,6 +67,13 @@ SELECT dist_pin_to_grocery_store.nearest_grocery_store_dist_ft, dist_pin_to_grocery_store.nearest_grocery_store_data_year, + dist_pin_to_highway_road.nearest_highway_road_name, + dist_pin_to_highway_road.nearest_highway_road_dist_ft, + dist_pin_to_highway_road.nearest_highway_road_daily_traffic, + dist_pin_to_highway_road.nearest_highway_road_speed_limit, + dist_pin_to_highway_road.nearest_highway_road_surface_type, + dist_pin_to_highway_road.nearest_highway_road_lanes, + dist_pin_to_hospital.nearest_hospital_gnis_code, dist_pin_to_hospital.nearest_hospital_name, dist_pin_to_hospital.nearest_hospital_dist_ft, @@ -138,6 +162,11 @@ LEFT JOIN {{ ref('proximity.dist_pin_to_airport') }} AS dist_pin_to_airport ON pin.pin10 = dist_pin_to_airport.pin10 AND cyf.airport_data_year = dist_pin_to_airport.year +LEFT JOIN + {{ ref('proximity.dist_pin_to_arterial_road') }} + AS dist_pin_to_arterial_road + ON pin.pin10 = dist_pin_to_arterial_road.pin10 + AND cyf.nearest_arterial_road_data_year = dist_pin_to_arterial_road.year LEFT JOIN {{ ref('proximity.dist_pin_to_bike_trail') }} AS dist_pin_to_bike_trail ON pin.pin10 = dist_pin_to_bike_trail.pin10 @@ -146,6 +175,11 @@ LEFT JOIN {{ ref('proximity.dist_pin_to_cemetery') }} AS dist_pin_to_cemetery ON pin.pin10 = dist_pin_to_cemetery.pin10 AND cyf.nearest_cemetery_data_year = dist_pin_to_cemetery.year +LEFT JOIN + {{ ref('proximity.dist_pin_to_collector_road') }} + AS dist_pin_to_collector_road + ON pin.pin10 = dist_pin_to_collector_road.pin10 + AND cyf.nearest_collector_road_data_year = dist_pin_to_collector_road.year LEFT JOIN {{ ref('proximity.dist_pin_to_cta_route') }} AS dist_pin_to_cta_route ON pin.pin10 = dist_pin_to_cta_route.pin10 @@ -163,6 +197,10 @@ LEFT JOIN AS dist_pin_to_grocery_store ON pin.pin10 = dist_pin_to_grocery_store.pin10 AND cyf.nearest_grocery_store_data_year = dist_pin_to_grocery_store.year +LEFT JOIN + {{ ref('proximity.dist_pin_to_highway_road') }} AS dist_pin_to_highway_road + ON pin.pin10 = dist_pin_to_highway_road.pin10 + AND cyf.nearest_highway_road_data_year = dist_pin_to_highway_road.year LEFT JOIN {{ ref('proximity.dist_pin_to_hospital') }} AS dist_pin_to_hospital ON pin.pin10 = dist_pin_to_hospital.pin10 From 53ef3be92373e3973d31677704dc2f554e0906ea Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 14 Nov 2024 22:55:36 +0000 Subject: [PATCH 078/166] Improve docs --- dbt/models/proximity/columns.md | 145 ++++++++++++++++++++++++++++++++ dbt/models/proximity/docs.md | 40 +++++++++ dbt/models/proximity/schema.yml | 9 ++ 3 files changed, 194 insertions(+) diff --git a/dbt/models/proximity/columns.md b/dbt/models/proximity/columns.md index 8f9ad3a1b..7858effdd 100644 --- a/dbt/models/proximity/columns.md +++ b/dbt/models/proximity/columns.md @@ -49,6 +49,54 @@ Distance to centroid of Midway airport (feet) Distance to centroid of O'Hare airport (feet) {% enddocs %} +## nearest_arterial_road_name + +{% docs column_nearest_arterial_road_name %} +Nearest arterial road name. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_arterial_road_dist_ft + +{% docs column_nearest_arterial_road_dist_ft %} +Distance to nearest arterial road. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_arterial_road_daily_traffic + +{% docs column_nearest_arterial_road_daily_traffic %} +Daily traffic of nearest arterial road. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_arterial_road_lanes + +{% docs column_nearest_arterial_road_daily_traffic %} +Number of lanes for the nearest arterial road. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_arterial_road_surface type + +{% docs column_nearest_arterial_road_daily_traffic %} +Surface type for the nearest arterial road (for example brick, stone, etc.). + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_arterial_road_speed_limit + +{% docs column_nearest_arterial_road_daily_traffic %} +Speed limit for the nearest arterial road + +Road data sourced from Illinois Department of Transportation +{% enddocs %} + ## avg_school_rating_in_half_mile {% docs column_avg_school_rating_in_half_mile %} @@ -114,6 +162,53 @@ Nearest cemetery name. Cemetery data sourced from Cook County GIS {% enddocs %} +## nearest_collector_road_name + +{% docs column_nearest_collector_road_name %} +Nearest collector road name. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_collector_road_dist_ft + +{% docs column_nearest_collector_road_dist_ft %} +Distance to nearest collector road. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_collector_road_daily_traffic + +{% docs column_nearest_collector_road_daily_traffic %} +Daily traffic of nearest collector road. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_collector_road_lanes + +{% docs column_nearest_collector_road_daily_traffic %} +Number of lanes for the nearest collector road. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_collector_road_surface type + +{% docs column_nearest_collector_road_daily_traffic %} +Surface type for the nearest collector road (for example brick, stone, etc.). + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_collector_road_speed_limit + +{% docs column_nearest_collector_road_daily_traffic %} +Speed limit for the nearest collector road + +Road data sourced from Illinois Department of Transportation +{% enddocs %} ## nearest_cta_route_dist_ft {% docs column_nearest_cta_route_dist_ft %} @@ -199,6 +294,56 @@ Nearest grocery store distance (feet) Nearest grocery store name via OSM {% enddocs %} + +## nearest_highway_road_name + +{% docs column_nearest_highway_road_name %} +Nearest highway road name. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_highway_road_dist_ft + +{% docs column_nearest_highway_road_dist_ft %} +Distance to nearest highway road. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_highway_road_daily_traffic + +{% docs column_nearest_highway_road_daily_traffic %} +Daily traffic of nearest highway road. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_highway_road_lanes + +{% docs column_nearest_arterial_road_daily_traffic %} +Number of lanes for the nearest highway road. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_highway_road_surface type + +{% docs column_nearest_highway_road_daily_traffic %} +Surface type for the nearest highway road (for example brick, stone, etc.). + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_highway_road_speed_limit + +{% docs column_nearest_highway_road_daily_traffic %} +Speed limit for the nearest highway road + +Road data sourced from Illinois Department of Transportation +{% enddocs %} + + ## nearest_hospital_dist_ft {% docs column_nearest_hospital_dist_ft %} diff --git a/dbt/models/proximity/docs.md b/dbt/models/proximity/docs.md index 55c5430e8..5220a28d9 100644 --- a/dbt/models/proximity/docs.md +++ b/dbt/models/proximity/docs.md @@ -38,6 +38,19 @@ as a result of contributions from both airports plus baseline DNL of 50. **Primary Key**: `pin10`, `year` {% enddocs %} +# dist_pin_to_arterial_road + +{% docs table_dist_pin_to_arterial_road %} +Distance from each PIN to the nearest arterial road. Data is derived from Illinois Department of Transportation + Added features include + - lanes + - average daily traffic + - speed limit + - road surface + +**Primary Key**: `pin10`, `year` +{% enddocs %} + # dist_pin_to_bike_trail {% docs table_dist_pin_to_bike_trail %} @@ -54,6 +67,20 @@ Distance from each PIN to the nearest cemetery. **Primary Key**: `pin10`, `year` {% enddocs %} +# dist_pin_to_collector_road + +{% docs table_dist_pin_to_collector_road %} +Distance from each PIN to the nearest collector road. Data is derived from Illinois Department of Transportation + Added features include + - lanes + - average daily traffic + - speed limit + - road surface + +**Primary Key**: `pin10`, `year` +{% enddocs %} + + # dist_pin_to_cta_route {% docs table_dist_pin_to_cta_route %} @@ -100,6 +127,19 @@ Distance from each PIN to the nearest grocery store. Locations sourced from Open **Primary Key**: `pin10`, `year` {% enddocs %} +# dist_pin_to_highway_road + +{% docs table_dist_pin_to_highway_road %} +Distance from each PIN to the nearest highway. Data is derived from Illinois Department of Transportation + Added features include + - lanes + - average daily traffic + - speed limit + - road surface + +**Primary Key**: `pin10`, `year` +{% enddocs %} + # dist_pin_to_hospital {% docs table_dist_pin_to_hospital %} diff --git a/dbt/models/proximity/schema.yml b/dbt/models/proximity/schema.yml index 14608d813..f27f6f639 100644 --- a/dbt/models/proximity/schema.yml +++ b/dbt/models/proximity/schema.yml @@ -14,12 +14,18 @@ models: - name: proximity.dist_pin_to_airport description: '{{ doc("table_dist_pin_to_airport") }}' + - name: proximity.dist_pin_to_arterial_road + description: '{{ doc("table_dist_pin_to_arterial_road") }}' + - name: proximity.dist_pin_to_bike_trail description: '{{ doc("table_dist_pin_to_bike_trail") }}' - name: proximity.dist_pin_to_cemetery description: '{{ doc("table_dist_pin_to_cemetery") }}' + -name: proximity.dist_pin_to_collector_road + description: '{{ doc("table_dist_pin_to_collector_road") }}' + - name: proximity.dist_pin_to_cta_route description: '{{ doc("table_dist_pin_to_cta_route") }}' @@ -29,6 +35,9 @@ models: - name: proximity.dist_pin_to_golf_course description: '{{ doc("table_dist_pin_to_golf_course") }}' + -name: proximity.dist_pin_to_highway_road + description: '{{ doc("table_dist_pin_to_highway_road") }}' + - name: proximity.dist_pin_to_hospital description: '{{ doc("table_dist_pin_to_hospital") }}' From bddc0c860253947f6d20938c5e7796ae3b3f0ca4 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 15 Nov 2024 16:40:59 +0000 Subject: [PATCH 079/166] Reorder columns --- dbt/models/proximity/columns.md | 82 +++++++++++++++++---------------- 1 file changed, 42 insertions(+), 40 deletions(-) diff --git a/dbt/models/proximity/columns.md b/dbt/models/proximity/columns.md index 7858effdd..08e8df0bb 100644 --- a/dbt/models/proximity/columns.md +++ b/dbt/models/proximity/columns.md @@ -49,10 +49,10 @@ Distance to centroid of Midway airport (feet) Distance to centroid of O'Hare airport (feet) {% enddocs %} -## nearest_arterial_road_name +## nearest_arterial_road_daily_traffic -{% docs column_nearest_arterial_road_name %} -Nearest arterial road name. +{% docs column_nearest_arterial_road_daily_traffic %} +Daily traffic of nearest arterial road. Road data sourced from Illinois Department of Transportation. {% enddocs %} @@ -65,14 +65,6 @@ Distance to nearest arterial road. Road data sourced from Illinois Department of Transportation. {% enddocs %} -## nearest_arterial_road_daily_traffic - -{% docs column_nearest_arterial_road_daily_traffic %} -Daily traffic of nearest arterial road. - -Road data sourced from Illinois Department of Transportation. -{% enddocs %} - ## nearest_arterial_road_lanes {% docs column_nearest_arterial_road_daily_traffic %} @@ -81,10 +73,10 @@ Number of lanes for the nearest arterial road. Road data sourced from Illinois Department of Transportation. {% enddocs %} -## nearest_arterial_road_surface type +## nearest_arterial_road_name -{% docs column_nearest_arterial_road_daily_traffic %} -Surface type for the nearest arterial road (for example brick, stone, etc.). +{% docs column_nearest_arterial_road_name %} +Nearest arterial road name. Road data sourced from Illinois Department of Transportation. {% enddocs %} @@ -97,6 +89,16 @@ Speed limit for the nearest arterial road Road data sourced from Illinois Department of Transportation {% enddocs %} + +## nearest_arterial_road_surface type + +{% docs column_nearest_arterial_road_daily_traffic %} +Surface type for the nearest arterial road (for example brick, stone, etc.). + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + + ## avg_school_rating_in_half_mile {% docs column_avg_school_rating_in_half_mile %} @@ -162,10 +164,10 @@ Nearest cemetery name. Cemetery data sourced from Cook County GIS {% enddocs %} -## nearest_collector_road_name +## nearest_collector_road_daily_traffic -{% docs column_nearest_collector_road_name %} -Nearest collector road name. +{% docs column_nearest_collector_road_daily_traffic %} +Daily traffic of nearest collector road. Road data sourced from Illinois Department of Transportation. {% enddocs %} @@ -178,14 +180,6 @@ Distance to nearest collector road. Road data sourced from Illinois Department of Transportation. {% enddocs %} -## nearest_collector_road_daily_traffic - -{% docs column_nearest_collector_road_daily_traffic %} -Daily traffic of nearest collector road. - -Road data sourced from Illinois Department of Transportation. -{% enddocs %} - ## nearest_collector_road_lanes {% docs column_nearest_collector_road_daily_traffic %} @@ -194,10 +188,10 @@ Number of lanes for the nearest collector road. Road data sourced from Illinois Department of Transportation. {% enddocs %} -## nearest_collector_road_surface type +## nearest_collector_road_name -{% docs column_nearest_collector_road_daily_traffic %} -Surface type for the nearest collector road (for example brick, stone, etc.). +{% docs column_nearest_collector_road_name %} +Nearest collector road name. Road data sourced from Illinois Department of Transportation. {% enddocs %} @@ -209,6 +203,15 @@ Speed limit for the nearest collector road Road data sourced from Illinois Department of Transportation {% enddocs %} + +## nearest_collector_road_surface_type + +{% docs column_nearest_collector_road_daily_traffic %} +Surface type for the nearest collector road (for example brick, stone, etc.). + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + ## nearest_cta_route_dist_ft {% docs column_nearest_cta_route_dist_ft %} @@ -294,11 +297,10 @@ Nearest grocery store distance (feet) Nearest grocery store name via OSM {% enddocs %} +## nearest_highway_road_daily_traffic -## nearest_highway_road_name - -{% docs column_nearest_highway_road_name %} -Nearest highway road name. +{% docs column_nearest_highway_road_daily_traffic %} +Daily traffic of nearest highway road. Road data sourced from Illinois Department of Transportation. {% enddocs %} @@ -311,18 +313,18 @@ Distance to nearest highway road. Road data sourced from Illinois Department of Transportation. {% enddocs %} -## nearest_highway_road_daily_traffic +## nearest_highway_road_lanes -{% docs column_nearest_highway_road_daily_traffic %} -Daily traffic of nearest highway road. +{% docs column_nearest_arterial_road_daily_traffic %} +Number of lanes for the nearest highway road. Road data sourced from Illinois Department of Transportation. {% enddocs %} -## nearest_highway_road_lanes +## nearest_highway_road_name -{% docs column_nearest_arterial_road_daily_traffic %} -Number of lanes for the nearest highway road. +{% docs column_nearest_highway_road_name %} +Nearest highway road name. Road data sourced from Illinois Department of Transportation. {% enddocs %} @@ -338,9 +340,9 @@ Road data sourced from Illinois Department of Transportation. ## nearest_highway_road_speed_limit {% docs column_nearest_highway_road_daily_traffic %} -Speed limit for the nearest highway road +Speed limit for the nearest highway road. -Road data sourced from Illinois Department of Transportation +Road data sourced from Illinois Department of Transportation. {% enddocs %} From ed414a9c83296ec0fa9c934529667af12c1ec731 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 15 Nov 2024 16:55:06 +0000 Subject: [PATCH 080/166] Reorder columns --- dbt/models/proximity/columns.md | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/dbt/models/proximity/columns.md b/dbt/models/proximity/columns.md index 08e8df0bb..06e99e991 100644 --- a/dbt/models/proximity/columns.md +++ b/dbt/models/proximity/columns.md @@ -49,6 +49,15 @@ Distance to centroid of Midway airport (feet) Distance to centroid of O'Hare airport (feet) {% enddocs %} +## avg_school_rating_in_half_mile + +{% docs column_avg_school_rating_in_half_mile %} +Average school rating of schools within half mile. + +Schools of any type (elementary, secondary, etc.) are included. +School ratings sourced from [GreatSchools](https://www.greatschools.org/) +{% enddocs %} + ## nearest_arterial_road_daily_traffic {% docs column_nearest_arterial_road_daily_traffic %} @@ -89,7 +98,6 @@ Speed limit for the nearest arterial road Road data sourced from Illinois Department of Transportation {% enddocs %} - ## nearest_arterial_road_surface type {% docs column_nearest_arterial_road_daily_traffic %} @@ -98,16 +106,6 @@ Surface type for the nearest arterial road (for example brick, stone, etc.). Road data sourced from Illinois Department of Transportation. {% enddocs %} - -## avg_school_rating_in_half_mile - -{% docs column_avg_school_rating_in_half_mile %} -Average school rating of schools within half mile. - -Schools of any type (elementary, secondary, etc.) are included. -School ratings sourced from [GreatSchools](https://www.greatschools.org/) -{% enddocs %} - ## lake_michigan_dist_ft {% docs column_lake_michigan_dist_ft %} From 623a80189ed29731f58efbaec05d72f0443b7551 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 15 Nov 2024 17:01:09 +0000 Subject: [PATCH 081/166] Fix schema --- dbt/models/proximity/schema.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/models/proximity/schema.yml b/dbt/models/proximity/schema.yml index f27f6f639..0e4db4764 100644 --- a/dbt/models/proximity/schema.yml +++ b/dbt/models/proximity/schema.yml @@ -23,7 +23,7 @@ models: - name: proximity.dist_pin_to_cemetery description: '{{ doc("table_dist_pin_to_cemetery") }}' - -name: proximity.dist_pin_to_collector_road + - name: proximity.dist_pin_to_collector_road description: '{{ doc("table_dist_pin_to_collector_road") }}' - name: proximity.dist_pin_to_cta_route @@ -35,7 +35,7 @@ models: - name: proximity.dist_pin_to_golf_course description: '{{ doc("table_dist_pin_to_golf_course") }}' - -name: proximity.dist_pin_to_highway_road + - name: proximity.dist_pin_to_highway_road description: '{{ doc("table_dist_pin_to_highway_road") }}' - name: proximity.dist_pin_to_hospital From cc24ca4944a424a392dae22d2506e37aaaf98f99 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 15 Nov 2024 17:04:50 +0000 Subject: [PATCH 082/166] Fix columns --- dbt/models/proximity/columns.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbt/models/proximity/columns.md b/dbt/models/proximity/columns.md index 06e99e991..80ac06a24 100644 --- a/dbt/models/proximity/columns.md +++ b/dbt/models/proximity/columns.md @@ -76,7 +76,7 @@ Road data sourced from Illinois Department of Transportation. ## nearest_arterial_road_lanes -{% docs column_nearest_arterial_road_daily_traffic %} +{% docs column_nearest_arterial_road_lanes %} Number of lanes for the nearest arterial road. Road data sourced from Illinois Department of Transportation. @@ -92,7 +92,7 @@ Road data sourced from Illinois Department of Transportation. ## nearest_arterial_road_speed_limit -{% docs column_nearest_arterial_road_daily_traffic %} +{% docs column_nearest_arterial_road_speed_limit %} Speed limit for the nearest arterial road Road data sourced from Illinois Department of Transportation @@ -100,7 +100,7 @@ Road data sourced from Illinois Department of Transportation ## nearest_arterial_road_surface type -{% docs column_nearest_arterial_road_daily_traffic %} +{% docs column_nearest_arterial_road_surface_type %} Surface type for the nearest arterial road (for example brick, stone, etc.). Road data sourced from Illinois Department of Transportation. @@ -313,7 +313,7 @@ Road data sourced from Illinois Department of Transportation. ## nearest_highway_road_lanes -{% docs column_nearest_arterial_road_daily_traffic %} +{% docs column_nearest_highway_road_lanes %} Number of lanes for the nearest highway road. Road data sourced from Illinois Department of Transportation. From ca3ab540a30d033d3d7a89a41f243a447cb0df11 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 15 Nov 2024 17:12:02 +0000 Subject: [PATCH 083/166] Reorder columns --- dbt/models/proximity/columns.md | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/dbt/models/proximity/columns.md b/dbt/models/proximity/columns.md index 80ac06a24..da943c387 100644 --- a/dbt/models/proximity/columns.md +++ b/dbt/models/proximity/columns.md @@ -58,6 +58,14 @@ Schools of any type (elementary, secondary, etc.) are included. School ratings sourced from [GreatSchools](https://www.greatschools.org/) {% enddocs %} +## lake_michigan_dist_ft + +{% docs column_lake_michigan_dist_ft %} +Distance to Lake Michigan shoreline (feet). + +Shoreline sourced from Census hydrography files +{% enddocs %} + ## nearest_arterial_road_daily_traffic {% docs column_nearest_arterial_road_daily_traffic %} @@ -106,14 +114,6 @@ Surface type for the nearest arterial road (for example brick, stone, etc.). Road data sourced from Illinois Department of Transportation. {% enddocs %} -## lake_michigan_dist_ft - -{% docs column_lake_michigan_dist_ft %} -Distance to Lake Michigan shoreline (feet). - -Shoreline sourced from Census hydrography files -{% enddocs %} - ## nearest_bike_trail_dist_ft {% docs column_nearest_bike_trail_dist_ft %} @@ -327,23 +327,22 @@ Nearest highway road name. Road data sourced from Illinois Department of Transportation. {% enddocs %} -## nearest_highway_road_surface type +## nearest_highway_road_speed_limit {% docs column_nearest_highway_road_daily_traffic %} -Surface type for the nearest highway road (for example brick, stone, etc.). +Speed limit for the nearest highway road. Road data sourced from Illinois Department of Transportation. {% enddocs %} -## nearest_highway_road_speed_limit +## nearest_highway_road_surface type {% docs column_nearest_highway_road_daily_traffic %} -Speed limit for the nearest highway road. +Surface type for the nearest highway road (for example brick, stone, etc.). Road data sourced from Illinois Department of Transportation. {% enddocs %} - ## nearest_hospital_dist_ft {% docs column_nearest_hospital_dist_ft %} From 159df1c4a0607af0f1079e4115ae612fa6b9fef2 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 15 Nov 2024 17:13:35 +0000 Subject: [PATCH 084/166] Rename columns --- dbt/models/proximity/columns.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dbt/models/proximity/columns.md b/dbt/models/proximity/columns.md index da943c387..7aa6164da 100644 --- a/dbt/models/proximity/columns.md +++ b/dbt/models/proximity/columns.md @@ -180,7 +180,7 @@ Road data sourced from Illinois Department of Transportation. ## nearest_collector_road_lanes -{% docs column_nearest_collector_road_daily_traffic %} +{% docs column_nearest_collector_road_lanes %} Number of lanes for the nearest collector road. Road data sourced from Illinois Department of Transportation. @@ -196,7 +196,7 @@ Road data sourced from Illinois Department of Transportation. ## nearest_collector_road_speed_limit -{% docs column_nearest_collector_road_daily_traffic %} +{% docs column_nearest_collector_road_speed_limit %} Speed limit for the nearest collector road Road data sourced from Illinois Department of Transportation @@ -204,7 +204,7 @@ Road data sourced from Illinois Department of Transportation ## nearest_collector_road_surface_type -{% docs column_nearest_collector_road_daily_traffic %} +{% docs column_nearest_collector_road_surface_type %} Surface type for the nearest collector road (for example brick, stone, etc.). Road data sourced from Illinois Department of Transportation. @@ -329,7 +329,7 @@ Road data sourced from Illinois Department of Transportation. ## nearest_highway_road_speed_limit -{% docs column_nearest_highway_road_daily_traffic %} +{% docs column_nearest_highway_speed_limit %} Speed limit for the nearest highway road. Road data sourced from Illinois Department of Transportation. @@ -337,7 +337,7 @@ Road data sourced from Illinois Department of Transportation. ## nearest_highway_road_surface type -{% docs column_nearest_highway_road_daily_traffic %} +{% docs column_nearest_highway_road_surface_type %} Surface type for the nearest highway road (for example brick, stone, etc.). Road data sourced from Illinois Department of Transportation. From 5d8ee596044418560f1934ddf3f99f9dbcf2b1cb Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 15 Nov 2024 17:57:52 +0000 Subject: [PATCH 085/166] Add shared input --- dbt/models/model/model.vw_pin_shared_input.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbt/models/model/model.vw_pin_shared_input.sql b/dbt/models/model/model.vw_pin_shared_input.sql index a7d68626e..c513b1e1d 100644 --- a/dbt/models/model/model.vw_pin_shared_input.sql +++ b/dbt/models/model/model.vw_pin_shared_input.sql @@ -253,12 +253,15 @@ SELECT -- PIN proximity distance variables vwpf.airport_dnl_total AS prox_airport_dnl_total, --new + vwpf.arterial_road_dist_ft AS prox_arterial_road_dist_ft, vwpf.nearest_bike_trail_dist_ft AS prox_nearest_bike_trail_dist_ft, vwpf.nearest_cemetery_dist_ft AS prox_nearest_cemetery_dist_ft, + vwpf.nearest_collector_dist_ft AS prox_nearest_collector_dist_ft, vwpf.nearest_cta_route_dist_ft AS prox_nearest_cta_route_dist_ft, vwpf.nearest_cta_stop_dist_ft AS prox_nearest_cta_stop_dist_ft, vwpf.nearest_golf_course_dist_ft AS prox_nearest_golf_course_dist_ft, vwpf.nearest_grocery_store_dist_ft AS prox_nearest_grocery_store_dist_ft, + vwpf.nearest_highway_road_dist_ft AS prox_nearest_highway_road_dist_ft, vwpf.nearest_hospital_dist_ft AS prox_nearest_hospital_dist_ft, vwpf.lake_michigan_dist_ft AS prox_lake_michigan_dist_ft, vwpf.nearest_major_road_dist_ft AS prox_nearest_major_road_dist_ft, From ecb5fe041e1659e715f64f99f365c320fdec8859 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 15 Nov 2024 18:24:22 +0000 Subject: [PATCH 086/166] Add crosswalk year fill --- .../proximity.crosswalk_year_fill.sql | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/dbt/models/proximity/proximity.crosswalk_year_fill.sql b/dbt/models/proximity/proximity.crosswalk_year_fill.sql index 872d05b1f..fac4b9428 100644 --- a/dbt/models/proximity/proximity.crosswalk_year_fill.sql +++ b/dbt/models/proximity/proximity.crosswalk_year_fill.sql @@ -22,10 +22,14 @@ WITH unfilled AS ( AS num_school_rating_data_year, MAX(dist_pin_to_airport.airport_data_year) AS airport_data_year, + MAX(dist_pin_to_arterial_road.arterial_road_data_year) + AS arterial_road_data_year, MAX(dist_pin_to_bike_trail.nearest_bike_trail_data_year) AS nearest_bike_trail_data_year, MAX(dist_pin_to_cemetery.nearest_cemetery_data_year) AS nearest_cemetery_data_year, + MAX(dist_pin_to_collector_road.collector_road_data_year) + AS collector_road_data_year, MAX(dist_pin_to_cta_route.nearest_cta_route_data_year) AS nearest_cta_route_data_year, MAX(dist_pin_to_cta_stop.nearest_cta_stop_data_year) @@ -34,6 +38,8 @@ WITH unfilled AS ( AS nearest_golf_course_data_year, MAX(dist_pin_to_grocery_store.nearest_grocery_store_data_year) AS nearest_grocery_store_data_year, + MAX(dist_pin_to_highway_road.highway_road_data_year) + AS highway_road_data_year, MAX(dist_pin_to_hospital.nearest_hospital_data_year) AS nearest_hospital_data_year, MAX(dist_pin_to_lake_michigan.lake_michigan_data_year) @@ -90,6 +96,12 @@ WITH unfilled AS ( airport_data_year FROM {{ ref('proximity.dist_pin_to_airport' ) }} ) AS dist_pin_to_airport ON pin.year = dist_pin_to_airport.year + LEFT JOIN ( + SELECT DISTINCT + year, + arterial_road_data_year + FROM {{ ref('proximity.dist_pin_to_arterial_road' ) }} + ) AS dist_pin_to_arterial_road ON pin.year = dist_pin_to_arterial_road.year LEFT JOIN ( SELECT DISTINCT year, @@ -102,6 +114,13 @@ WITH unfilled AS ( nearest_cemetery_data_year FROM {{ ref('proximity.dist_pin_to_cemetery') }} ) AS dist_pin_to_cemetery ON pin.year = dist_pin_to_cemetery.year + LEFT JOIN ( + SELECT DISTINCT + year, + collector_road_data_year + FROM {{ ref('proximity.dist_pin_to_collector_road' ) }} + ) AS dist_pin_to_collector_road + ON pin.year = dist_pin_to_collector_road.year LEFT JOIN ( SELECT DISTINCT year, @@ -126,6 +145,12 @@ WITH unfilled AS ( nearest_grocery_store_data_year FROM {{ ref('proximity.dist_pin_to_grocery_store') }} ) AS dist_pin_to_grocery_store ON pin.year = dist_pin_to_grocery_store.year + LEFT JOIN ( + SELECT DISTINCT + year, + highway_road_data_year + FROM {{ ref('proximity.dist_pin_to_highway_road' ) }} + ) AS dist_pin_to_highway_road ON pin.year = dist_pin_to_highway_road.year LEFT JOIN ( SELECT DISTINCT year, From 15e6c3b102e92de9234ab4c68cc6471afbe38845 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 15 Nov 2024 19:59:49 +0000 Subject: [PATCH 087/166] add nearest_ --- .../proximity/proximity.crosswalk_year_fill.sql | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dbt/models/proximity/proximity.crosswalk_year_fill.sql b/dbt/models/proximity/proximity.crosswalk_year_fill.sql index fac4b9428..b678253c1 100644 --- a/dbt/models/proximity/proximity.crosswalk_year_fill.sql +++ b/dbt/models/proximity/proximity.crosswalk_year_fill.sql @@ -22,14 +22,14 @@ WITH unfilled AS ( AS num_school_rating_data_year, MAX(dist_pin_to_airport.airport_data_year) AS airport_data_year, - MAX(dist_pin_to_arterial_road.arterial_road_data_year) - AS arterial_road_data_year, + MAX(dist_pin_to_arterial_road.nearest_arterial_road_data_year) + AS nearest_arterial_road_data_year, MAX(dist_pin_to_bike_trail.nearest_bike_trail_data_year) AS nearest_bike_trail_data_year, MAX(dist_pin_to_cemetery.nearest_cemetery_data_year) AS nearest_cemetery_data_year, - MAX(dist_pin_to_collector_road.collector_road_data_year) - AS collector_road_data_year, + MAX(dist_pin_to_collector_road.nearest_collector_road_data_year) + AS nearest_collector_road_data_year, MAX(dist_pin_to_cta_route.nearest_cta_route_data_year) AS nearest_cta_route_data_year, MAX(dist_pin_to_cta_stop.nearest_cta_stop_data_year) @@ -38,8 +38,8 @@ WITH unfilled AS ( AS nearest_golf_course_data_year, MAX(dist_pin_to_grocery_store.nearest_grocery_store_data_year) AS nearest_grocery_store_data_year, - MAX(dist_pin_to_highway_road.highway_road_data_year) - AS highway_road_data_year, + MAX(dist_pin_to_highway_road.nearest_highway_road_data_year) + AS nearest_highway_road_data_year, MAX(dist_pin_to_hospital.nearest_hospital_data_year) AS nearest_hospital_data_year, MAX(dist_pin_to_lake_michigan.lake_michigan_data_year) From 88d62ea8e4b2accaa0f33e63b625ef740d47821c Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 15 Nov 2024 20:19:17 +0000 Subject: [PATCH 088/166] Add nearest --- dbt/models/proximity/proximity.crosswalk_year_fill.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbt/models/proximity/proximity.crosswalk_year_fill.sql b/dbt/models/proximity/proximity.crosswalk_year_fill.sql index b678253c1..b5abe6e0f 100644 --- a/dbt/models/proximity/proximity.crosswalk_year_fill.sql +++ b/dbt/models/proximity/proximity.crosswalk_year_fill.sql @@ -99,7 +99,7 @@ WITH unfilled AS ( LEFT JOIN ( SELECT DISTINCT year, - arterial_road_data_year + nearest_arterial_road_data_year FROM {{ ref('proximity.dist_pin_to_arterial_road' ) }} ) AS dist_pin_to_arterial_road ON pin.year = dist_pin_to_arterial_road.year LEFT JOIN ( @@ -117,7 +117,7 @@ WITH unfilled AS ( LEFT JOIN ( SELECT DISTINCT year, - collector_road_data_year + nearest_collector_road_data_year FROM {{ ref('proximity.dist_pin_to_collector_road' ) }} ) AS dist_pin_to_collector_road ON pin.year = dist_pin_to_collector_road.year @@ -148,7 +148,7 @@ WITH unfilled AS ( LEFT JOIN ( SELECT DISTINCT year, - highway_road_data_year + nearest_highway_road_data_year FROM {{ ref('proximity.dist_pin_to_highway_road' ) }} ) AS dist_pin_to_highway_road ON pin.year = dist_pin_to_highway_road.year LEFT JOIN ( From b3d3e5ff6c833ae1584a1d21a775112a4787d3f7 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 15 Nov 2024 20:45:19 +0000 Subject: [PATCH 089/166] add to cyf --- .../proximity.crosswalk_year_fill.sql | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/dbt/models/proximity/proximity.crosswalk_year_fill.sql b/dbt/models/proximity/proximity.crosswalk_year_fill.sql index b5abe6e0f..45658d2a5 100644 --- a/dbt/models/proximity/proximity.crosswalk_year_fill.sql +++ b/dbt/models/proximity/proximity.crosswalk_year_fill.sql @@ -267,6 +267,12 @@ SELECT IGNORE NULLS OVER (ORDER BY year DESC) ) AS airport_data_year, + COALESCE( + nearest_arterial_road_data_year, + LAST_VALUE(nearest_arterial_road_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS nearest_arterial_road_data_year, COALESCE( nearest_bike_trail_data_year, LAST_VALUE(nearest_bike_trail_data_year) @@ -291,6 +297,12 @@ SELECT IGNORE NULLS OVER (ORDER BY year DESC) ) AS nearest_cta_stop_data_year, + COALESCE( + nearest_collector_road_data_year, + LAST_VALUE(nearest_collector_road_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS nearest_collector_road_data_year, COALESCE( nearest_golf_course_data_year, LAST_VALUE(nearest_golf_course_data_year) @@ -303,6 +315,12 @@ SELECT IGNORE NULLS OVER (ORDER BY year DESC) ) AS nearest_grocery_store_data_year, + COALESCE( + nearest_highway_road_data_year, + LAST_VALUE(nearest_highway_road_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS nearest_highway_road_data_year, COALESCE( nearest_hospital_data_year, LAST_VALUE(nearest_hospital_data_year) From fae063591e7bedb71db5cc188395c86a5795d851 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 15 Nov 2024 21:29:39 +0000 Subject: [PATCH 090/166] Add nearest --- dbt/models/model/model.vw_pin_shared_input.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/model/model.vw_pin_shared_input.sql b/dbt/models/model/model.vw_pin_shared_input.sql index c513b1e1d..d7351b403 100644 --- a/dbt/models/model/model.vw_pin_shared_input.sql +++ b/dbt/models/model/model.vw_pin_shared_input.sql @@ -253,7 +253,7 @@ SELECT -- PIN proximity distance variables vwpf.airport_dnl_total AS prox_airport_dnl_total, --new - vwpf.arterial_road_dist_ft AS prox_arterial_road_dist_ft, + vwpf.nearest_arterial_road_dist_ft AS prox_nearest_arterial_road_dist_ft, vwpf.nearest_bike_trail_dist_ft AS prox_nearest_bike_trail_dist_ft, vwpf.nearest_cemetery_dist_ft AS prox_nearest_cemetery_dist_ft, vwpf.nearest_collector_dist_ft AS prox_nearest_collector_dist_ft, From dadc1001497c77530e51f4003ed0fb4982992ca9 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 15 Nov 2024 22:32:00 +0000 Subject: [PATCH 091/166] Add to schema --- dbt/models/model/schema.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dbt/models/model/schema.yml b/dbt/models/model/schema.yml index a4501d9a0..411c1ccd5 100644 --- a/dbt/models/model/schema.yml +++ b/dbt/models/model/schema.yml @@ -396,12 +396,18 @@ models: - &prox_lake_michigan_dist_ft name: prox_lake_michigan_dist_ft description: '{{ doc("column_lake_michigan_dist_ft") }}' + - &prox_nearest_arterial_road_dist_ft + name: prox_nearest_arterial_road_dist_ft + description: '{{ doc("column_prox_nearest_arterial_road_dist_ft") }}' - &prox_nearest_bike_trail_dist_ft name: prox_nearest_bike_trail_dist_ft description: '{{ doc("column_nearest_bike_trail_dist_ft") }}' - &prox_nearest_cemetery_dist_ft name: prox_nearest_cemetery_dist_ft description: '{{ doc("column_nearest_cemetery_dist_ft") }}' + - &prox_nearest_connector_road_dist_ft + name: prox_nearest_connector_road_dist_ft + description: '{{ doc("column_prox_nearest_connector_road_dist_ft") }}' - &prox_nearest_cta_route_dist_ft name: prox_nearest_cta_route_dist_ft description: '{{ doc("column_nearest_cta_route_dist_ft") }}' @@ -411,6 +417,9 @@ models: - &prox_nearest_golf_course_dist_ft name: prox_nearest_golf_course_dist_ft description: '{{ doc("column_nearest_golf_course_dist_ft") }}' + - &prox_nearest_highway_road_dist_ft + name: prox_nearest_highway_road_dist_ft + description: '{{ doc("column_prox_nearest_highway_road_dist_ft") }}' - &prox_nearest_hospital_dist_ft name: prox_nearest_hospital_dist_ft description: '{{ doc("column_nearest_hospital_dist_ft") }}' From 72a244a5d088dcde189ca645e5a07d1972a136b9 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Fri, 15 Nov 2024 22:49:52 +0000 Subject: [PATCH 092/166] Add to models schema --- dbt/models/model/schema.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbt/models/model/schema.yml b/dbt/models/model/schema.yml index 411c1ccd5..03f691b81 100644 --- a/dbt/models/model/schema.yml +++ b/dbt/models/model/schema.yml @@ -595,12 +595,15 @@ models: - *prox_airport_dnl_total - *prox_avg_school_rating_in_half_mile - *prox_lake_michigan_dist_ft + - *prox_nearest_arterial_road_dist_ft - *prox_nearest_bike_trail_dist_ft - *prox_nearest_cemetery_dist_ft + - *prox_nearest_connector_road_dist_ft - *prox_nearest_cta_route_dist_ft - *prox_nearest_cta_stop_dist_ft - *prox_nearest_golf_course_dist_ft - *prox_nearest_hospital_dist_ft + - *prox_nearest_highway_road_dist_ft - *prox_nearest_major_road_dist_ft - *prox_nearest_metra_route_dist_ft - *prox_nearest_metra_stop_dist_ft From b8963040c9cab7544fc4c8358687870c3888a70c Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Sun, 17 Nov 2024 00:19:25 +0000 Subject: [PATCH 093/166] Rename to roads --- dbt/models/spatial/docs.md | 32 +++++++++---------- dbt/models/spatial/schema.yml | 4 +-- ...-traffic.R => spatial-environment_roads.R} | 2 +- ...-traffic.R => spatial-environment-roads.R} | 2 +- 4 files changed, 20 insertions(+), 20 deletions(-) rename etl/scripts-ccao-data-raw-us-east-1/spatial/{spatial-environment-traffic.R => spatial-environment_roads.R} (98%) rename etl/scripts-ccao-data-warehouse-us-east-1/spatial/{spatial-environment-traffic.R => spatial-environment-roads.R} (99%) diff --git a/dbt/models/spatial/docs.md b/dbt/models/spatial/docs.md index 95bda9c5c..e5bac097e 100644 --- a/dbt/models/spatial/docs.md +++ b/dbt/models/spatial/docs.md @@ -391,6 +391,22 @@ Rail locations sourced from Cook County GIS. **Geometry:** `MULTILINESTRING` {% enddocs %} +# roads + +{% docs table_roads %} + +Illinois Department of Transportation data source from +[https://apps1.dot.illinois.gov/gist2/](https://apps1.dot.illinois.gov/gist2/). +Data focuses on five features; lanes, speed limits, traffic count, road type, +and surface type. Some columns are not present in all years of data (for example +speed limit in 2012) Data for columns is not universally present so we average +numeric values for roads which overlap and have a matching name. For example, +if segment B touches segment A and C with speed limits of 25 and 30, the speed +limit for segment B will be 27.5. + +**Geometry:** `MULTILINESTRING` +{% enddocs %} + # sanitation_district {% docs table_sanitation_district %} @@ -493,22 +509,6 @@ Includes townships within the City of Chicago, which are technically defunct. **Geometry:** `MULTIPOLYGON` {% enddocs %} -# traffic - -{% docs table_traffic %} - -Illinois Department of Transportation data source from -[https://apps1.dot.illinois.gov/gist2/](https://apps1.dot.illinois.gov/gist2/). -Data focuses on five features; lanes, speed limits, traffic count, road type, -and surface type. Some columns are not present in all years of data (for example -speed limit in 2012) Data for columns is not universally present so we average -numeric values for roads which overlap and have a matching name. For example, -if segment B touches segment A and C with speed limits of 25 and 30, the speed -limit for segment B will be 27.5. - -**Geometry:** `MULTILINESTRING` -{% enddocs %} - # transit_dict {% docs table_transit_dict %} diff --git a/dbt/models/spatial/schema.yml b/dbt/models/spatial/schema.yml index 513900073..9539f0db2 100644 --- a/dbt/models/spatial/schema.yml +++ b/dbt/models/spatial/schema.yml @@ -174,8 +174,8 @@ sources: - name: township description: '{{ doc("table_township") }}' - - name: traffic - description: '{{ doc("table_traffic") }}' + - name: roads + description: '{{ doc("table_roads") }}' - name: transit_dict description: '{{ doc("table_transit_dict") }}' diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_roads.R similarity index 98% rename from etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R rename to etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_roads.R index bc69fe4b2..4d14b0499 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-traffic.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_roads.R @@ -10,7 +10,7 @@ library(arrow) AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET") output_bucket <- file.path( AWS_S3_RAW_BUCKET, - "spatial", "environment", "traffic" + "spatial", "environment", "roads" ) # Get list of available files diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-roads.R similarity index 99% rename from etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R rename to etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-roads.R index d98ed2c8a..e63cbfa9a 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-roads.R @@ -8,7 +8,7 @@ library(stringr) # Define the S3 bucket and folder path AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET") AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET") -s3_folder <- "spatial/environment/traffic" +s3_folder <- "spatial/environment/roads" output_bucket <- sub("/$", "", file.path(AWS_S3_WAREHOUSE_BUCKET, s3_folder)) # Re-coding of road type From 546cfefcdcd1f1b528c4446a695732a5c6f9825a Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Sun, 17 Nov 2024 00:24:33 +0000 Subject: [PATCH 094/166] Revert to traffic --- dbt/models/spatial/docs.md | 32 +++++++++---------- dbt/models/spatial/schema.yml | 4 +-- ..._roads.R => spatial-environment_traffic.R} | 2 +- ...-roads.R => spatial-environment-traffic.R} | 2 +- 4 files changed, 20 insertions(+), 20 deletions(-) rename etl/scripts-ccao-data-raw-us-east-1/spatial/{spatial-environment_roads.R => spatial-environment_traffic.R} (98%) rename etl/scripts-ccao-data-warehouse-us-east-1/spatial/{spatial-environment-roads.R => spatial-environment-traffic.R} (99%) diff --git a/dbt/models/spatial/docs.md b/dbt/models/spatial/docs.md index e5bac097e..95bda9c5c 100644 --- a/dbt/models/spatial/docs.md +++ b/dbt/models/spatial/docs.md @@ -391,22 +391,6 @@ Rail locations sourced from Cook County GIS. **Geometry:** `MULTILINESTRING` {% enddocs %} -# roads - -{% docs table_roads %} - -Illinois Department of Transportation data source from -[https://apps1.dot.illinois.gov/gist2/](https://apps1.dot.illinois.gov/gist2/). -Data focuses on five features; lanes, speed limits, traffic count, road type, -and surface type. Some columns are not present in all years of data (for example -speed limit in 2012) Data for columns is not universally present so we average -numeric values for roads which overlap and have a matching name. For example, -if segment B touches segment A and C with speed limits of 25 and 30, the speed -limit for segment B will be 27.5. - -**Geometry:** `MULTILINESTRING` -{% enddocs %} - # sanitation_district {% docs table_sanitation_district %} @@ -509,6 +493,22 @@ Includes townships within the City of Chicago, which are technically defunct. **Geometry:** `MULTIPOLYGON` {% enddocs %} +# traffic + +{% docs table_traffic %} + +Illinois Department of Transportation data source from +[https://apps1.dot.illinois.gov/gist2/](https://apps1.dot.illinois.gov/gist2/). +Data focuses on five features; lanes, speed limits, traffic count, road type, +and surface type. Some columns are not present in all years of data (for example +speed limit in 2012) Data for columns is not universally present so we average +numeric values for roads which overlap and have a matching name. For example, +if segment B touches segment A and C with speed limits of 25 and 30, the speed +limit for segment B will be 27.5. + +**Geometry:** `MULTILINESTRING` +{% enddocs %} + # transit_dict {% docs table_transit_dict %} diff --git a/dbt/models/spatial/schema.yml b/dbt/models/spatial/schema.yml index 9539f0db2..513900073 100644 --- a/dbt/models/spatial/schema.yml +++ b/dbt/models/spatial/schema.yml @@ -174,8 +174,8 @@ sources: - name: township description: '{{ doc("table_township") }}' - - name: roads - description: '{{ doc("table_roads") }}' + - name: traffic + description: '{{ doc("table_traffic") }}' - name: transit_dict description: '{{ doc("table_transit_dict") }}' diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_roads.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_traffic.R similarity index 98% rename from etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_roads.R rename to etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_traffic.R index 4d14b0499..bc69fe4b2 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_roads.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_traffic.R @@ -10,7 +10,7 @@ library(arrow) AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET") output_bucket <- file.path( AWS_S3_RAW_BUCKET, - "spatial", "environment", "roads" + "spatial", "environment", "traffic" ) # Get list of available files diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-roads.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R similarity index 99% rename from etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-roads.R rename to etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R index e63cbfa9a..d98ed2c8a 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-roads.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R @@ -8,7 +8,7 @@ library(stringr) # Define the S3 bucket and folder path AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET") AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET") -s3_folder <- "spatial/environment/roads" +s3_folder <- "spatial/environment/traffic" output_bucket <- sub("/$", "", file.path(AWS_S3_WAREHOUSE_BUCKET, s3_folder)) # Re-coding of road type From 7d98745ecaecb5641a8083386ac4a68597e046e7 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Sun, 17 Nov 2024 02:35:23 +0000 Subject: [PATCH 095/166] Rename --- dbt/models/proximity/columns.md | 2 +- ...tial-environment-traffic.R => spatial-environment_traffic.R} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename etl/scripts-ccao-data-warehouse-us-east-1/spatial/{spatial-environment-traffic.R => spatial-environment_traffic.R} (100%) diff --git a/dbt/models/proximity/columns.md b/dbt/models/proximity/columns.md index 7aa6164da..581befd94 100644 --- a/dbt/models/proximity/columns.md +++ b/dbt/models/proximity/columns.md @@ -106,7 +106,7 @@ Speed limit for the nearest arterial road Road data sourced from Illinois Department of Transportation {% enddocs %} -## nearest_arterial_road_surface type +## nearest_arterial_road_surface_type {% docs column_nearest_arterial_road_surface_type %} Surface type for the nearest arterial road (for example brick, stone, etc.). diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_traffic.R similarity index 100% rename from etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-traffic.R rename to etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_traffic.R From f333fa1f0e19e42dd6d1078ee39e026ea5e08b63 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Sun, 17 Nov 2024 02:36:21 +0000 Subject: [PATCH 096/166] Delete file --- dbt/macros/nearest_feature_aggregation.sql | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 dbt/macros/nearest_feature_aggregation.sql diff --git a/dbt/macros/nearest_feature_aggregation.sql b/dbt/macros/nearest_feature_aggregation.sql deleted file mode 100644 index 7fa914ce4..000000000 --- a/dbt/macros/nearest_feature_aggregation.sql +++ /dev/null @@ -1,11 +0,0 @@ -{% macro nearest_feature_aggregation(base_columns, characteristics) %} - {%- for characteristic in characteristics %} - case - {%- for base in base_columns %} - when {{ base }} = least({{ base_columns | join(", ") }}) - then {{ base.replace("dist_ft", characteristic) }} - {%- endfor %} - else null - end as closest_road_{{ characteristic }}, - {%- endfor %} -{% endmacro %} From 8daaffd7868d7172f9604a9ddc5f8c66ea252a34 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Sun, 17 Nov 2024 02:52:08 +0000 Subject: [PATCH 097/166] Add to schema --- dbt/models/model/schema.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbt/models/model/schema.yml b/dbt/models/model/schema.yml index 03f691b81..10bc42cc8 100644 --- a/dbt/models/model/schema.yml +++ b/dbt/models/model/schema.yml @@ -852,11 +852,14 @@ models: - *prox_airport_dnl_total - *prox_avg_school_rating_in_half_mile - *prox_lake_michigan_dist_ft + - *prox_nearest_arterial_road_dist_ft - *prox_nearest_bike_trail_dist_ft - *prox_nearest_cemetery_dist_ft + - *prox_nearest_connector_road_dist_ft - *prox_nearest_cta_route_dist_ft - *prox_nearest_cta_stop_dist_ft - *prox_nearest_golf_course_dist_ft + - *prox_nearest_highway_road_dist_ft - *prox_nearest_hospital_dist_ft - *prox_nearest_major_road_dist_ft - *prox_nearest_metra_route_dist_ft From 2dd70d7f4727a3dad1e7feba4661db96baf92cfb Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Sun, 17 Nov 2024 02:54:14 +0000 Subject: [PATCH 098/166] Fix schema --- dbt/models/model/schema.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbt/models/model/schema.yml b/dbt/models/model/schema.yml index 10bc42cc8..d888eb308 100644 --- a/dbt/models/model/schema.yml +++ b/dbt/models/model/schema.yml @@ -398,7 +398,7 @@ models: description: '{{ doc("column_lake_michigan_dist_ft") }}' - &prox_nearest_arterial_road_dist_ft name: prox_nearest_arterial_road_dist_ft - description: '{{ doc("column_prox_nearest_arterial_road_dist_ft") }}' + description: '{{ doc("column_nearest_arterial_road_dist_ft") }}' - &prox_nearest_bike_trail_dist_ft name: prox_nearest_bike_trail_dist_ft description: '{{ doc("column_nearest_bike_trail_dist_ft") }}' @@ -407,7 +407,7 @@ models: description: '{{ doc("column_nearest_cemetery_dist_ft") }}' - &prox_nearest_connector_road_dist_ft name: prox_nearest_connector_road_dist_ft - description: '{{ doc("column_prox_nearest_connector_road_dist_ft") }}' + description: '{{ doc("column_nearest_connector_road_dist_ft") }}' - &prox_nearest_cta_route_dist_ft name: prox_nearest_cta_route_dist_ft description: '{{ doc("column_nearest_cta_route_dist_ft") }}' @@ -419,7 +419,7 @@ models: description: '{{ doc("column_nearest_golf_course_dist_ft") }}' - &prox_nearest_highway_road_dist_ft name: prox_nearest_highway_road_dist_ft - description: '{{ doc("column_prox_nearest_highway_road_dist_ft") }}' + description: '{{ doc("column_nearest_highway_road_dist_ft") }}' - &prox_nearest_hospital_dist_ft name: prox_nearest_hospital_dist_ft description: '{{ doc("column_nearest_hospital_dist_ft") }}' From 64bbf9628aa629f2e3ef9b902485636cf4c98bb8 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Sun, 17 Nov 2024 04:47:14 +0000 Subject: [PATCH 099/166] Add to inputs --- dbt/models/model/model.vw_card_res_input.sql | 33 +++++++++++++++++++ dbt/models/model/model.vw_pin_condo_input.sql | 3 ++ dbt/models/model/schema.yml | 2 +- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/dbt/models/model/model.vw_card_res_input.sql b/dbt/models/model/model.vw_card_res_input.sql index e071943f3..59d8c0b8b 100644 --- a/dbt/models/model/model.vw_card_res_input.sql +++ b/dbt/models/model/model.vw_card_res_input.sql @@ -438,6 +438,17 @@ SELECT nn1.prox_airport_dnl_total IS NULL THEN nn2.prox_airport_dnl_total END AS prox_airport_dnl_total, + CASE + WHEN + f1.prox_nearest_arterial_road_dist_ft IS NOT NULL + THEN f1.prox_nearest_arterial_road_dist_ft + WHEN + f1.prox_nearest_arterial_road_dist_ft IS NULL + THEN nn1.prox_nearest_arterial_road_dist_ft + WHEN + nn1.prox_nearest_arterial_road_dist_ft IS NULL + THEN nn2.prox_nearest_arterial_road_dist_ft + END AS prox_nearest_arterial_road_dist_ft, CASE WHEN f1.prox_nearest_bike_trail_dist_ft IS NOT NULL @@ -462,8 +473,30 @@ SELECT END AS prox_nearest_cemetery_dist_ft, f1.prox_nearest_cta_route_dist_ft, f1.prox_nearest_cta_stop_dist_ft, + CASE + WHEN + f1.prox_nearest_collector_road_dist_ft IS NOT NULL + THEN f1.prox_nearest_collector_road_dist_ft + WHEN + f1.prox_nearest_collector_road_dist_ft IS NULL + THEN nn1.prox_nearest_collector_road_dist_ft + WHEN + nn1.prox_nearest_collector_road_dist_ft IS NULL + THEN nn2.prox_nearest_collector_road_dist_ft + END AS prox_nearest_collector_road_dist_ft, f1.prox_nearest_golf_course_dist_ft, f1.prox_nearest_grocery_store_dist_ft, + CASE + WHEN + f1.prox_nearest_highway_road_dist_ft IS NOT NULL + THEN f1.prox_nearest_highway_road_dist_ft + WHEN + f1.prox_nearest_highway_road_dist_ft IS NULL + THEN nn1.prox_nearest_highway_road_dist_ft + WHEN + nn1.prox_nearest_highway_road_dist_ft IS NULL + THEN nn2.prox_nearest_highway_road_dist_ft + END AS prox_nearest_highway_road_dist_ft, CASE WHEN f1.prox_nearest_hospital_dist_ft IS NOT NULL diff --git a/dbt/models/model/model.vw_pin_condo_input.sql b/dbt/models/model/model.vw_pin_condo_input.sql index 0071e2c71..eff2d284e 100644 --- a/dbt/models/model/model.vw_pin_condo_input.sql +++ b/dbt/models/model/model.vw_pin_condo_input.sql @@ -84,12 +84,15 @@ WITH uni AS ( vpsi.prox_num_school_with_rating_in_half_mile, vpsi.prox_avg_school_rating_in_half_mile, vpsi.prox_airport_dnl_total, + vpsi.prox_nearest_arterial_road_dist_ft, vpsi.prox_nearest_bike_trail_dist_ft, vpsi.prox_nearest_cemetery_dist_ft, + vpsi.prox_nearest_collector_road_dist_ft, vpsi.prox_nearest_cta_route_dist_ft, vpsi.prox_nearest_cta_stop_dist_ft, vpsi.prox_nearest_golf_course_dist_ft, vpsi.prox_nearest_grocery_store_dist_ft, + vpsi.prox_nearest_highway_road_dist_ft, vpsi.prox_nearest_hospital_dist_ft, vpsi.prox_lake_michigan_dist_ft, vpsi.prox_nearest_major_road_dist_ft, diff --git a/dbt/models/model/schema.yml b/dbt/models/model/schema.yml index d888eb308..c029b3bdd 100644 --- a/dbt/models/model/schema.yml +++ b/dbt/models/model/schema.yml @@ -602,8 +602,8 @@ models: - *prox_nearest_cta_route_dist_ft - *prox_nearest_cta_stop_dist_ft - *prox_nearest_golf_course_dist_ft - - *prox_nearest_hospital_dist_ft - *prox_nearest_highway_road_dist_ft + - *prox_nearest_hospital_dist_ft - *prox_nearest_major_road_dist_ft - *prox_nearest_metra_route_dist_ft - *prox_nearest_metra_stop_dist_ft From 40585a7aa5382e218a8a1684771fda460034ccdf Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Sun, 17 Nov 2024 04:52:33 +0000 Subject: [PATCH 100/166] Rename to connector --- dbt/models/model/model.vw_pin_shared_input.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/model/model.vw_pin_shared_input.sql b/dbt/models/model/model.vw_pin_shared_input.sql index d7351b403..4123b4fe8 100644 --- a/dbt/models/model/model.vw_pin_shared_input.sql +++ b/dbt/models/model/model.vw_pin_shared_input.sql @@ -256,7 +256,7 @@ SELECT vwpf.nearest_arterial_road_dist_ft AS prox_nearest_arterial_road_dist_ft, vwpf.nearest_bike_trail_dist_ft AS prox_nearest_bike_trail_dist_ft, vwpf.nearest_cemetery_dist_ft AS prox_nearest_cemetery_dist_ft, - vwpf.nearest_collector_dist_ft AS prox_nearest_collector_dist_ft, + vwpf.nearest_connector_road_dist_ft AS prox_nearest_connector_road_dist_ft, vwpf.nearest_cta_route_dist_ft AS prox_nearest_cta_route_dist_ft, vwpf.nearest_cta_stop_dist_ft AS prox_nearest_cta_stop_dist_ft, vwpf.nearest_golf_course_dist_ft AS prox_nearest_golf_course_dist_ft, From 92a459c84ec5f8e4ab05eb724c55dbc93914d467 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Sun, 17 Nov 2024 04:53:09 +0000 Subject: [PATCH 101/166] Rename to collector --- dbt/models/model/model.vw_pin_shared_input.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/model/model.vw_pin_shared_input.sql b/dbt/models/model/model.vw_pin_shared_input.sql index 4123b4fe8..01761b650 100644 --- a/dbt/models/model/model.vw_pin_shared_input.sql +++ b/dbt/models/model/model.vw_pin_shared_input.sql @@ -256,7 +256,7 @@ SELECT vwpf.nearest_arterial_road_dist_ft AS prox_nearest_arterial_road_dist_ft, vwpf.nearest_bike_trail_dist_ft AS prox_nearest_bike_trail_dist_ft, vwpf.nearest_cemetery_dist_ft AS prox_nearest_cemetery_dist_ft, - vwpf.nearest_connector_road_dist_ft AS prox_nearest_connector_road_dist_ft, + vwpf.nearest_collector_road_dist_ft AS prox_nearest_collector_road_dist_ft, vwpf.nearest_cta_route_dist_ft AS prox_nearest_cta_route_dist_ft, vwpf.nearest_cta_stop_dist_ft AS prox_nearest_cta_stop_dist_ft, vwpf.nearest_golf_course_dist_ft AS prox_nearest_golf_course_dist_ft, From 50c8740d695072eb392dae3754e9a84e361cc275 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Sun, 17 Nov 2024 04:54:51 +0000 Subject: [PATCH 102/166] Rename to collector --- dbt/models/model/schema.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dbt/models/model/schema.yml b/dbt/models/model/schema.yml index c029b3bdd..b2e91fb17 100644 --- a/dbt/models/model/schema.yml +++ b/dbt/models/model/schema.yml @@ -405,9 +405,9 @@ models: - &prox_nearest_cemetery_dist_ft name: prox_nearest_cemetery_dist_ft description: '{{ doc("column_nearest_cemetery_dist_ft") }}' - - &prox_nearest_connector_road_dist_ft - name: prox_nearest_connector_road_dist_ft - description: '{{ doc("column_nearest_connector_road_dist_ft") }}' + - &prox_nearest_collector_road_dist_ft + name: prox_nearest_collector_road_dist_ft + description: '{{ doc("column_nearest_collector_road_dist_ft") }}' - &prox_nearest_cta_route_dist_ft name: prox_nearest_cta_route_dist_ft description: '{{ doc("column_nearest_cta_route_dist_ft") }}' @@ -598,7 +598,7 @@ models: - *prox_nearest_arterial_road_dist_ft - *prox_nearest_bike_trail_dist_ft - *prox_nearest_cemetery_dist_ft - - *prox_nearest_connector_road_dist_ft + - *prox_nearest_collector_road_dist_ft - *prox_nearest_cta_route_dist_ft - *prox_nearest_cta_stop_dist_ft - *prox_nearest_golf_course_dist_ft @@ -855,7 +855,7 @@ models: - *prox_nearest_arterial_road_dist_ft - *prox_nearest_bike_trail_dist_ft - *prox_nearest_cemetery_dist_ft - - *prox_nearest_connector_road_dist_ft + - *prox_nearest_collector_road_dist_ft - *prox_nearest_cta_route_dist_ft - *prox_nearest_cta_stop_dist_ft - *prox_nearest_golf_course_dist_ft From 42f6815cc442c07f47707d1fabd45b2448e8d625 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Sun, 17 Nov 2024 15:43:56 +0000 Subject: [PATCH 103/166] Add other features --- dbt/models/model/model.vw_card_res_input.sql | 132 ++++++++++++++++++ dbt/models/model/model.vw_pin_condo_input.sql | 12 ++ .../model/model.vw_pin_shared_input.sql | 21 +++ dbt/models/model/schema.yml | 37 +++++ 4 files changed, 202 insertions(+) diff --git a/dbt/models/model/model.vw_card_res_input.sql b/dbt/models/model/model.vw_card_res_input.sql index 59d8c0b8b..465264f48 100644 --- a/dbt/models/model/model.vw_card_res_input.sql +++ b/dbt/models/model/model.vw_card_res_input.sql @@ -438,6 +438,17 @@ SELECT nn1.prox_airport_dnl_total IS NULL THEN nn2.prox_airport_dnl_total END AS prox_airport_dnl_total, + CASE + WHEN + f1.prox_nearest_arterial_road_daily_traffic IS NOT NULL + THEN f1.prox_nearest_arterial_road_daily_traffic + WHEN + f1.prox_nearest_arterial_road_daily_traffic IS NULL + THEN nn1.prox_nearest_arterial_road_daily_traffic + WHEN + nn1.prox_nearest_arterial_road_daily_traffic IS NULL + THEN nn2.prox_nearest_arterial_road_daily_traffic + END AS prox_nearest_arterial_road_daily_traffic, CASE WHEN f1.prox_nearest_arterial_road_dist_ft IS NOT NULL @@ -449,6 +460,39 @@ SELECT nn1.prox_nearest_arterial_road_dist_ft IS NULL THEN nn2.prox_nearest_arterial_road_dist_ft END AS prox_nearest_arterial_road_dist_ft, + CASE + WHEN + f1.prox_nearest_arterial_road_lanes IS NOT NULL + THEN f1.prox_nearest_arterial_road_lanes + WHEN + f1.prox_nearest_arterial_road_lanes IS NULL + THEN nn1.prox_nearest_arterial_road_lanes + WHEN + nn1.prox_nearest_arterial_road_lanes IS NULL + THEN nn2.prox_nearest_arterial_road_lanes + END AS prox_nearest_arterial_road_lanes, + CASE + WHEN + f1.prox_nearest_arterial_road_speed_limit IS NOT NULL + THEN f1.prox_nearest_arterial_road_speed_limit + WHEN + f1.prox_nearest_arterial_road_speed_limit IS NULL + THEN nn1.prox_nearest_arterial_road_speed_limit + WHEN + nn1.prox_nearest_arterial_road_speed_limit IS NULL + THEN nn2.prox_nearest_arterial_road_speed_limit + END AS prox_nearest_arterial_road_speed_limit, + CASE + WHEN + f1.prox_nearest_arterial_road_surface_type IS NOT NULL + THEN f1.prox_nearest_arterial_road_surface_type + WHEN + f1.prox_nearest_arterial_road_surface_type IS NULL + THEN nn1.prox_nearest_arterial_road_surface_type + WHEN + nn1.prox_nearest_arterial_road_surface_type IS NULL + THEN nn2.prox_nearest_arterial_road_surface_type + END AS prox_nearest_arterial_road_surface_type, CASE WHEN f1.prox_nearest_bike_trail_dist_ft IS NOT NULL @@ -473,6 +517,17 @@ SELECT END AS prox_nearest_cemetery_dist_ft, f1.prox_nearest_cta_route_dist_ft, f1.prox_nearest_cta_stop_dist_ft, + CASE + WHEN + f1.prox_nearest_collector_road_daily_traffic IS NOT NULL + THEN f1.prox_nearest_collector_road_daily_traffic + WHEN + f1.prox_nearest_collector_road_daily_traffic IS NULL + THEN nn1.prox_nearest_collector_road_daily_traffic + WHEN + nn1.prox_nearest_collector_road_daily_traffic IS NULL + THEN nn2.prox_nearest_collector_road_daily_traffic + END AS prox_nearest_collector_road_daily_traffic, CASE WHEN f1.prox_nearest_collector_road_dist_ft IS NOT NULL @@ -484,8 +539,52 @@ SELECT nn1.prox_nearest_collector_road_dist_ft IS NULL THEN nn2.prox_nearest_collector_road_dist_ft END AS prox_nearest_collector_road_dist_ft, + CASE + WHEN + f1.prox_nearest_collector_road_lanes IS NOT NULL + THEN f1.prox_nearest_collector_road_lanes + WHEN + f1.prox_nearest_collector_road_lanes IS NULL + THEN nn1.prox_nearest_collector_road_lanes + WHEN + nn1.prox_nearest_collector_road_lanes IS NULL + THEN nn2.prox_nearest_collector_road_lanes + END AS prox_nearest_collector_road_lanes, + CASE + WHEN + f1.prox_nearest_collector_road_speed_limit IS NOT NULL + THEN f1.prox_nearest_collector_road_speed_limit + WHEN + f1.prox_nearest_collector_road_speed_limit IS NULL + THEN nn1.prox_nearest_collector_road_speed_limit + WHEN + nn1.prox_nearest_collector_road_speed_limit IS NULL + THEN nn2.prox_nearest_collector_road_speed_limit + END AS prox_nearest_collector_road_speed_limit, + CASE + WHEN + f1.prox_nearest_collector_road_surface_type IS NOT NULL + THEN f1.prox_nearest_collector_road_surface_type + WHEN + f1.prox_nearest_collector_road_surface_type IS NULL + THEN nn1.prox_nearest_collector_road_surface_type + WHEN + nn1.prox_nearest_collector_road_surface_type IS NULL + THEN nn2.prox_nearest_collector_road_surface_type + END AS prox_nearest_collector_road_surface_type, f1.prox_nearest_golf_course_dist_ft, f1.prox_nearest_grocery_store_dist_ft, + CASE + WHEN + f1.prox_nearest_highway_road_daily_traffic IS NOT NULL + THEN f1.prox_nearest_highway_road_daily_traffic + WHEN + f1.prox_nearest_highway_road_daily_traffic IS NULL + THEN nn1.prox_nearest_highway_road_daily_traffic + WHEN + nn1.prox_nearest_highway_road_daily_traffic IS NULL + THEN nn2.prox_nearest_highway_road_daily_traffic + END AS prox_nearest_highway_road_daily_traffic, CASE WHEN f1.prox_nearest_highway_road_dist_ft IS NOT NULL @@ -497,6 +596,39 @@ SELECT nn1.prox_nearest_highway_road_dist_ft IS NULL THEN nn2.prox_nearest_highway_road_dist_ft END AS prox_nearest_highway_road_dist_ft, + CASE + WHEN + f1.prox_nearest_highway_road_lanes IS NOT NULL + THEN f1.prox_nearest_highway_road_lanes + WHEN + f1.prox_nearest_highway_road_lanes IS NULL + THEN nn1.prox_nearest_highway_road_lanes + WHEN + nn1.prox_nearest_highway_road_lanes IS NULL + THEN nn2.prox_nearest_highway_road_lanes + END AS prox_nearest_highway_road_lanes, + CASE + WHEN + f1.prox_nearest_highway_road_speed_limit IS NOT NULL + THEN f1.prox_nearest_highway_road_speed_limit + WHEN + f1.prox_nearest_highway_road_speed_limit IS NULL + THEN nn1.prox_nearest_highway_road_speed_limit + WHEN + nn1.prox_nearest_highway_road_speed_limit IS NULL + THEN nn2.prox_nearest_highway_road_speed_limit + END AS prox_nearest_highway_road_speed_limit, + CASE + WHEN + f1.prox_nearest_highway_road_surface_type IS NOT NULL + THEN f1.prox_nearest_highway_road_surface_type + WHEN + f1.prox_nearest_highway_road_surface_type IS NULL + THEN nn1.prox_nearest_highway_road_surface_type + WHEN + nn1.prox_nearest_highway_road_surface_type IS NULL + THEN nn2.prox_nearest_highway_road_surface_type + END AS prox_nearest_highway_road_surface_type, CASE WHEN f1.prox_nearest_hospital_dist_ft IS NOT NULL diff --git a/dbt/models/model/model.vw_pin_condo_input.sql b/dbt/models/model/model.vw_pin_condo_input.sql index eff2d284e..9625a3e05 100644 --- a/dbt/models/model/model.vw_pin_condo_input.sql +++ b/dbt/models/model/model.vw_pin_condo_input.sql @@ -84,15 +84,27 @@ WITH uni AS ( vpsi.prox_num_school_with_rating_in_half_mile, vpsi.prox_avg_school_rating_in_half_mile, vpsi.prox_airport_dnl_total, + vpsi.prox_nearest_arterial_road_daily_traffic, vpsi.prox_nearest_arterial_road_dist_ft, + vpsi.prox_nearest_arterial_road_lanes, + vpsi.prox_nearest_arterial_road_speed_limit, + vpsi.prox_nearest_arterial_road_surface_type, vpsi.prox_nearest_bike_trail_dist_ft, vpsi.prox_nearest_cemetery_dist_ft, + vpsi.prox_nearest_collector_road_daily_traffic, vpsi.prox_nearest_collector_road_dist_ft, + vpsi.prox_nearest_collector_road_lanes, + vpsi.prox_nearest_collector_road_speed_limit, + vpsi.prox_nearest_collector_road_surface_type, vpsi.prox_nearest_cta_route_dist_ft, vpsi.prox_nearest_cta_stop_dist_ft, vpsi.prox_nearest_golf_course_dist_ft, vpsi.prox_nearest_grocery_store_dist_ft, + vpsi.prox_nearest_highway_road_daily_traffic, vpsi.prox_nearest_highway_road_dist_ft, + vpsi.prox_nearest_highway_road_lanes, + vpsi.prox_nearest_highway_road_speed_limit, + vpsi.prox_nearest_highway_road_surface_type, vpsi.prox_nearest_hospital_dist_ft, vpsi.prox_lake_michigan_dist_ft, vpsi.prox_nearest_major_road_dist_ft, diff --git a/dbt/models/model/model.vw_pin_shared_input.sql b/dbt/models/model/model.vw_pin_shared_input.sql index 01761b650..cc9bf02f4 100644 --- a/dbt/models/model/model.vw_pin_shared_input.sql +++ b/dbt/models/model/model.vw_pin_shared_input.sql @@ -253,15 +253,36 @@ SELECT -- PIN proximity distance variables vwpf.airport_dnl_total AS prox_airport_dnl_total, --new + vwpf.nearest_arterial_road_daily_traffic + AS prox_nearest_arterial_road_daily_traffic, vwpf.nearest_arterial_road_dist_ft AS prox_nearest_arterial_road_dist_ft, + vwpf.nearest_arterial_road_lanes AS prox_nearest_arterial_road_lanes, + vwpf.nearest_arterial_road_speed_limit + AS prox_nearest_arterial_road_speed_limit, + vwpf.nearest_arterial_road_surface_type + AS prox_nearest_arterial_road_surface_type, vwpf.nearest_bike_trail_dist_ft AS prox_nearest_bike_trail_dist_ft, vwpf.nearest_cemetery_dist_ft AS prox_nearest_cemetery_dist_ft, + vwpf.nearest_collector_road_daily_traffic + AS prox_nearest_collector_road_daily_traffic, vwpf.nearest_collector_road_dist_ft AS prox_nearest_collector_road_dist_ft, + vwpf.nearest_collector_road_lanes AS prox_nearest_collector_road_lanes, + vwpf.nearest_collector_road_speed_limit + AS prox_nearest_collector_road_speed_limit, + vwpf.nearest_collector_road_surface_type + AS prox_nearest_collector_road_surface_type, vwpf.nearest_cta_route_dist_ft AS prox_nearest_cta_route_dist_ft, vwpf.nearest_cta_stop_dist_ft AS prox_nearest_cta_stop_dist_ft, vwpf.nearest_golf_course_dist_ft AS prox_nearest_golf_course_dist_ft, vwpf.nearest_grocery_store_dist_ft AS prox_nearest_grocery_store_dist_ft, + vwpf.nearest_highway_road_daily_traffic + AS prox_nearest_highway_road_daily_traffic, vwpf.nearest_highway_road_dist_ft AS prox_nearest_highway_road_dist_ft, + vwpf.nearest_highway_road_lanes AS prox_nearest_highway_road_lanes, + vwpf.nearest_highway_road_speed_limit + AS prox_nearest_highway_road_speed_limit, + vwpf.nearest_highway_road_surface_type + AS prox_nearest_highway_road_surface_type, vwpf.nearest_hospital_dist_ft AS prox_nearest_hospital_dist_ft, vwpf.lake_michigan_dist_ft AS prox_lake_michigan_dist_ft, vwpf.nearest_major_road_dist_ft AS prox_nearest_major_road_dist_ft, diff --git a/dbt/models/model/schema.yml b/dbt/models/model/schema.yml index b2e91fb17..f1795d0df 100644 --- a/dbt/models/model/schema.yml +++ b/dbt/models/model/schema.yml @@ -396,18 +396,42 @@ models: - &prox_lake_michigan_dist_ft name: prox_lake_michigan_dist_ft description: '{{ doc("column_lake_michigan_dist_ft") }}' + - &prox_nearest_arterial_road_average_traffic + name: prox_nearest_arterial_road_average_traffic + description: '{{ doc("column_nearest_arterial_road_average_traffic") }}' - &prox_nearest_arterial_road_dist_ft name: prox_nearest_arterial_road_dist_ft description: '{{ doc("column_nearest_arterial_road_dist_ft") }}' + - &prox_nearest_arterial_road_lanes + name: prox_nearest_arterial_road_lanes + description: '{{ doc("column_nearest_arterial_road_lanes") }}' + - &prox_nearest_arterial_road_speed_limit + name: prox_nearest_arterial_road_speed_limit + description: '{{ doc("column_nearest_arterial_road_speed_limit") }}' + - &prox_nearest_arterial_road_surface_type + name: prox_nearest_arterial_road_surface_type + description: '{{ doc("column_nearest_arterial_road_surface_type") }}' - &prox_nearest_bike_trail_dist_ft name: prox_nearest_bike_trail_dist_ft description: '{{ doc("column_nearest_bike_trail_dist_ft") }}' - &prox_nearest_cemetery_dist_ft name: prox_nearest_cemetery_dist_ft description: '{{ doc("column_nearest_cemetery_dist_ft") }}' + - &prox_nearest_collector_road_average_traffic + name: prox_nearest_collector_road_average_traffic + description: '{{ doc("column_nearest_collector_road_average_traffic") }}' - &prox_nearest_collector_road_dist_ft name: prox_nearest_collector_road_dist_ft description: '{{ doc("column_nearest_collector_road_dist_ft") }}' + - &prox_nearest_collector_road_lanes + name: prox_nearest_collector_road_lanes + description: '{{ doc("column_nearest_collector_road_lanes") }}' + - &prox_nearest_collector_road_speed_limit + name: prox_nearest_collector_road_speed_limit + description: '{{ doc("column_nearest_collector_road_speed_limit") }}' + - &prox_nearest_collector_road_surface_type + name: prox_nearest_collector_road_surface_type + description: '{{ doc("column_nearest_collector_road_surface_type") }}' - &prox_nearest_cta_route_dist_ft name: prox_nearest_cta_route_dist_ft description: '{{ doc("column_nearest_cta_route_dist_ft") }}' @@ -417,9 +441,21 @@ models: - &prox_nearest_golf_course_dist_ft name: prox_nearest_golf_course_dist_ft description: '{{ doc("column_nearest_golf_course_dist_ft") }}' + - &prox_nearest_highway_road_average_traffic + name: prox_nearest_highway_road_average_traffic + description: '{{ doc("column_nearest_highway_road_average_traffic") }}' - &prox_nearest_highway_road_dist_ft name: prox_nearest_highway_road_dist_ft description: '{{ doc("column_nearest_highway_road_dist_ft") }}' + - &prox_nearest_highway_road_lanes + name: prox_nearest_highway_road_lanes + description: '{{ doc("column_nearest_highway_road_lanes") }}' + - &prox_nearest_highway_road_speed_limit + name: prox_nearest_highway_road_speed_limit + description: '{{ doc("column_nearest_highway_road_speed_limit") }}' + - &prox_nearest_highway_road_surface_type + name: prox_nearest_highway_road_surface_type + description: '{{ doc("column_nearest_highway_road_surface_type") }}' - &prox_nearest_hospital_dist_ft name: prox_nearest_hospital_dist_ft description: '{{ doc("column_nearest_hospital_dist_ft") }}' @@ -859,6 +895,7 @@ models: - *prox_nearest_cta_route_dist_ft - *prox_nearest_cta_stop_dist_ft - *prox_nearest_golf_course_dist_ft + - *prox_nearest_highway_road_daily_traffic - *prox_nearest_highway_road_dist_ft - *prox_nearest_hospital_dist_ft - *prox_nearest_major_road_dist_ft From d24d72a20105eaff102512476d572ca51bf30750 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Sun, 17 Nov 2024 15:49:46 +0000 Subject: [PATCH 104/166] rename schema --- dbt/models/model/schema.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/dbt/models/model/schema.yml b/dbt/models/model/schema.yml index f1795d0df..57e5851db 100644 --- a/dbt/models/model/schema.yml +++ b/dbt/models/model/schema.yml @@ -396,9 +396,9 @@ models: - &prox_lake_michigan_dist_ft name: prox_lake_michigan_dist_ft description: '{{ doc("column_lake_michigan_dist_ft") }}' - - &prox_nearest_arterial_road_average_traffic - name: prox_nearest_arterial_road_average_traffic - description: '{{ doc("column_nearest_arterial_road_average_traffic") }}' + - &prox_nearest_arterial_road_daily_traffic + name: prox_nearest_arterial_road_daily_traffic + description: '{{ doc("column_nearest_arterial_road_daily_traffic") }}' - &prox_nearest_arterial_road_dist_ft name: prox_nearest_arterial_road_dist_ft description: '{{ doc("column_nearest_arterial_road_dist_ft") }}' @@ -417,9 +417,9 @@ models: - &prox_nearest_cemetery_dist_ft name: prox_nearest_cemetery_dist_ft description: '{{ doc("column_nearest_cemetery_dist_ft") }}' - - &prox_nearest_collector_road_average_traffic - name: prox_nearest_collector_road_average_traffic - description: '{{ doc("column_nearest_collector_road_average_traffic") }}' + - &prox_nearest_collector_road_daily_traffic + name: prox_nearest_collector_road_daily_traffic + description: '{{ doc("column_nearest_collector_road_daily_traffic") }}' - &prox_nearest_collector_road_dist_ft name: prox_nearest_collector_road_dist_ft description: '{{ doc("column_nearest_collector_road_dist_ft") }}' @@ -441,9 +441,9 @@ models: - &prox_nearest_golf_course_dist_ft name: prox_nearest_golf_course_dist_ft description: '{{ doc("column_nearest_golf_course_dist_ft") }}' - - &prox_nearest_highway_road_average_traffic - name: prox_nearest_highway_road_average_traffic - description: '{{ doc("column_nearest_highway_road_average_traffic") }}' + - &prox_nearest_highway_road_daily_traffic + name: prox_nearest_highway_road_daily_traffic + description: '{{ doc("column_nearest_highway_road_daily_traffic") }}' - &prox_nearest_highway_road_dist_ft name: prox_nearest_highway_road_dist_ft description: '{{ doc("column_nearest_highway_road_dist_ft") }}' From 530e843d720cf7752f7c71064615bc2a2be46455 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Sun, 17 Nov 2024 15:51:57 +0000 Subject: [PATCH 105/166] Add road --- dbt/models/proximity/columns.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/proximity/columns.md b/dbt/models/proximity/columns.md index 581befd94..9693743aa 100644 --- a/dbt/models/proximity/columns.md +++ b/dbt/models/proximity/columns.md @@ -329,7 +329,7 @@ Road data sourced from Illinois Department of Transportation. ## nearest_highway_road_speed_limit -{% docs column_nearest_highway_speed_limit %} +{% docs column_nearest_highway_road_speed_limit %} Speed limit for the nearest highway road. Road data sourced from Illinois Department of Transportation. From d8afea68f2f03f47b1d6f4ea03e44be10ade2ba4 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Mon, 18 Nov 2024 22:38:54 +0000 Subject: [PATCH 106/166] Add other category --- dbt/models/proximity/proximity.dist_pin_to_arterial_road.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/dbt/models/proximity/proximity.dist_pin_to_arterial_road.sql b/dbt/models/proximity/proximity.dist_pin_to_arterial_road.sql index 44311dce1..cdb3e8598 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_arterial_road.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_arterial_road.sql @@ -12,6 +12,7 @@ WITH arterial AS ( -- noqa: ST03 FROM {{ source('spatial', 'traffic') }} WHERE road_type = 'Major Arterial' OR road_type = 'Minor Arterial' + OR road_type = 'Other Principal Arterial' ) SELECT From cb4a74be38f20db6a2e7a9895aca22cd174dbb09 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 19 Nov 2024 15:16:36 +0000 Subject: [PATCH 107/166] Remove Major Arterial --- dbt/models/proximity/proximity.dist_pin_to_arterial_road.sql | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_arterial_road.sql b/dbt/models/proximity/proximity.dist_pin_to_arterial_road.sql index cdb3e8598..cd4688a6b 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_arterial_road.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_arterial_road.sql @@ -10,8 +10,7 @@ WITH arterial AS ( -- noqa: ST03 SELECT * FROM {{ source('spatial', 'traffic') }} - WHERE road_type = 'Major Arterial' - OR road_type = 'Minor Arterial' + WHERE road_type = 'Minor Arterial' OR road_type = 'Other Principal Arterial' ) From bb84a02ff1783e5d89b9db9043bb19c25a412bce Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Tue, 19 Nov 2024 18:17:05 +0000 Subject: [PATCH 108/166] Rename to roads --- ...patial-environment_traffic.R => spatial-environment_roads.R} | 2 +- ...patial-environment_traffic.R => spatial-environment_roads.R} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename etl/scripts-ccao-data-raw-us-east-1/spatial/{spatial-environment_traffic.R => spatial-environment_roads.R} (98%) rename etl/scripts-ccao-data-warehouse-us-east-1/spatial/{spatial-environment_traffic.R => spatial-environment_roads.R} (99%) diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_traffic.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_roads.R similarity index 98% rename from etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_traffic.R rename to etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_roads.R index bc69fe4b2..4d14b0499 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_traffic.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_roads.R @@ -10,7 +10,7 @@ library(arrow) AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET") output_bucket <- file.path( AWS_S3_RAW_BUCKET, - "spatial", "environment", "traffic" + "spatial", "environment", "roads" ) # Get list of available files diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_traffic.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_roads.R similarity index 99% rename from etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_traffic.R rename to etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_roads.R index d98ed2c8a..e63cbfa9a 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_traffic.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_roads.R @@ -8,7 +8,7 @@ library(stringr) # Define the S3 bucket and folder path AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET") AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET") -s3_folder <- "spatial/environment/traffic" +s3_folder <- "spatial/environment/roads" output_bucket <- sub("/$", "", file.path(AWS_S3_WAREHOUSE_BUCKET, s3_folder)) # Re-coding of road type From 0390507eb5a8a30990db5aaec9df4ca6291a2f60 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 20 Nov 2024 16:50:57 +0000 Subject: [PATCH 109/166] Rename to roads --- .../proximity.dist_pin_to_arterial_road.sql | 2 +- .../proximity.dist_pin_to_collector_road.sql | 2 +- .../proximity.dist_pin_to_highway_road.sql | 2 +- dbt/models/spatial/docs.md | 31 ++++++++++--------- dbt/models/spatial/schema.yml | 6 ++-- 5 files changed, 22 insertions(+), 21 deletions(-) diff --git a/dbt/models/proximity/proximity.dist_pin_to_arterial_road.sql b/dbt/models/proximity/proximity.dist_pin_to_arterial_road.sql index cd4688a6b..e73f9b9a4 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_arterial_road.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_arterial_road.sql @@ -9,7 +9,7 @@ WITH arterial AS ( -- noqa: ST03 SELECT * - FROM {{ source('spatial', 'traffic') }} + FROM {{ source('spatial', 'roads') }} WHERE road_type = 'Minor Arterial' OR road_type = 'Other Principal Arterial' ) diff --git a/dbt/models/proximity/proximity.dist_pin_to_collector_road.sql b/dbt/models/proximity/proximity.dist_pin_to_collector_road.sql index f49c4e2ae..40e31da82 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_collector_road.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_collector_road.sql @@ -9,7 +9,7 @@ WITH collector AS ( -- noqa: ST03 SELECT * - FROM {{ source('spatial', 'traffic') }} + FROM {{ source('spatial', 'roads') }} WHERE road_type = 'Major Collector' OR road_type = 'Minor Collector' ) diff --git a/dbt/models/proximity/proximity.dist_pin_to_highway_road.sql b/dbt/models/proximity/proximity.dist_pin_to_highway_road.sql index a39675b05..94e382dc1 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_highway_road.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_highway_road.sql @@ -9,7 +9,7 @@ WITH highway AS ( -- noqa: ST03 SELECT * - FROM {{ source('spatial', 'traffic') }} + FROM {{ source('spatial', 'roads') }} WHERE road_type = 'Interstate' OR road_type = 'Freeway and Expressway' ) diff --git a/dbt/models/spatial/docs.md b/dbt/models/spatial/docs.md index 95bda9c5c..e53b29a07 100644 --- a/dbt/models/spatial/docs.md +++ b/dbt/models/spatial/docs.md @@ -391,6 +391,22 @@ Rail locations sourced from Cook County GIS. **Geometry:** `MULTILINESTRING` {% enddocs %} +# roads + +{% docs table_roads %} + +Illinois Department of Transportation data source from +[https://apps1.dot.illinois.gov/gist2/](https://apps1.dot.illinois.gov/gist2/). +Data focuses on five features; lanes, speed limits, traffic count, road type, +and surface type. Some columns are not present in all years of data (for example +speed limit in 2012) Data for columns is not universally present so we average +numeric values for roads which overlap and have a matching name. For example, +if segment B touches segment A and C with speed limits of 25 and 30, the speed +limit for segment B will be 27.5. + +**Geometry:** `MULTILINESTRING` +{% enddocs %} + # sanitation_district {% docs table_sanitation_district %} @@ -493,21 +509,6 @@ Includes townships within the City of Chicago, which are technically defunct. **Geometry:** `MULTIPOLYGON` {% enddocs %} -# traffic - -{% docs table_traffic %} - -Illinois Department of Transportation data source from -[https://apps1.dot.illinois.gov/gist2/](https://apps1.dot.illinois.gov/gist2/). -Data focuses on five features; lanes, speed limits, traffic count, road type, -and surface type. Some columns are not present in all years of data (for example -speed limit in 2012) Data for columns is not universally present so we average -numeric values for roads which overlap and have a matching name. For example, -if segment B touches segment A and C with speed limits of 25 and 30, the speed -limit for segment B will be 27.5. - -**Geometry:** `MULTILINESTRING` -{% enddocs %} # transit_dict diff --git a/dbt/models/spatial/schema.yml b/dbt/models/spatial/schema.yml index 513900073..0e1c3eed0 100644 --- a/dbt/models/spatial/schema.yml +++ b/dbt/models/spatial/schema.yml @@ -141,6 +141,9 @@ sources: - name: railroad description: '{{ doc("table_railroad") }}' + - name: roads + description: '{{ doc("table_roads") }}' + - name: sanitation_district description: '{{ doc("table_sanitation_district") }}' @@ -174,9 +177,6 @@ sources: - name: township description: '{{ doc("table_township") }}' - - name: traffic - description: '{{ doc("table_traffic") }}' - - name: transit_dict description: '{{ doc("table_transit_dict") }}' From ce7e29055234cc5134c4da4e4feae6c3a0005732 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 20 Nov 2024 17:22:28 +0000 Subject: [PATCH 110/166] Rename columns --- dbt/models/model/model.vw_card_res_input.sql | 322 +++++++++--------- dbt/models/model/model.vw_pin_condo_input.sql | 30 +- .../model/model.vw_pin_shared_input.sql | 48 +-- dbt/models/proximity/columns.md | 288 ++++++++-------- dbt/models/proximity/docs.md | 79 ++--- .../proximity.crosswalk_year_fill.sql | 62 ++-- .../proximity.vw_pin10_proximity.sql | 78 ++--- .../proximity.vw_pin10_proximity_fill.sql | 74 ++-- 8 files changed, 491 insertions(+), 490 deletions(-) diff --git a/dbt/models/model/model.vw_card_res_input.sql b/dbt/models/model/model.vw_card_res_input.sql index 465264f48..6f33c9ada 100644 --- a/dbt/models/model/model.vw_card_res_input.sql +++ b/dbt/models/model/model.vw_card_res_input.sql @@ -440,253 +440,253 @@ SELECT END AS prox_airport_dnl_total, CASE WHEN - f1.prox_nearest_arterial_road_daily_traffic IS NOT NULL - THEN f1.prox_nearest_arterial_road_daily_traffic + f1.prox_nearest_bike_trail_dist_ft IS NOT NULL + THEN f1.prox_nearest_bike_trail_dist_ft WHEN - f1.prox_nearest_arterial_road_daily_traffic IS NULL - THEN nn1.prox_nearest_arterial_road_daily_traffic + f1.prox_nearest_bike_trail_dist_ft IS NULL + THEN nn1.prox_nearest_bike_trail_dist_ft WHEN - nn1.prox_nearest_arterial_road_daily_traffic IS NULL - THEN nn2.prox_nearest_arterial_road_daily_traffic - END AS prox_nearest_arterial_road_daily_traffic, + nn1.prox_nearest_bike_trail_dist_ft IS NULL + THEN nn2.prox_nearest_bike_trail_dist_ft + END AS prox_nearest_bike_trail_dist_ft, CASE WHEN - f1.prox_nearest_arterial_road_dist_ft IS NOT NULL - THEN f1.prox_nearest_arterial_road_dist_ft + f1.prox_nearest_cemetery_dist_ft IS NOT NULL + THEN f1.prox_nearest_cemetery_dist_ft WHEN - f1.prox_nearest_arterial_road_dist_ft IS NULL - THEN nn1.prox_nearest_arterial_road_dist_ft + f1.prox_nearest_cemetery_dist_ft IS NULL + THEN nn1.prox_nearest_cemetery_dist_ft WHEN - nn1.prox_nearest_arterial_road_dist_ft IS NULL - THEN nn2.prox_nearest_arterial_road_dist_ft - END AS prox_nearest_arterial_road_dist_ft, + nn1.prox_nearest_cemetery_dist_ft IS NULL + THEN nn2.prox_nearest_cemetery_dist_ft + END AS prox_nearest_cemetery_dist_ft, + f1.prox_nearest_cta_route_dist_ft, + f1.prox_nearest_cta_stop_dist_ft, + f1.prox_nearest_golf_course_dist_ft, + f1.prox_nearest_grocery_store_dist_ft, CASE WHEN - f1.prox_nearest_arterial_road_lanes IS NOT NULL - THEN f1.prox_nearest_arterial_road_lanes + f1.prox_nearest_hospital_dist_ft IS NOT NULL + THEN f1.prox_nearest_hospital_dist_ft WHEN - f1.prox_nearest_arterial_road_lanes IS NULL - THEN nn1.prox_nearest_arterial_road_lanes + f1.prox_nearest_hospital_dist_ft IS NULL + THEN nn1.prox_nearest_hospital_dist_ft WHEN - nn1.prox_nearest_arterial_road_lanes IS NULL - THEN nn2.prox_nearest_arterial_road_lanes - END AS prox_nearest_arterial_road_lanes, + nn1.prox_nearest_hospital_dist_ft IS NULL + THEN nn2.prox_nearest_hospital_dist_ft + END AS prox_nearest_hospital_dist_ft, CASE WHEN - f1.prox_nearest_arterial_road_speed_limit IS NOT NULL - THEN f1.prox_nearest_arterial_road_speed_limit + f1.prox_lake_michigan_dist_ft IS NOT NULL + THEN f1.prox_lake_michigan_dist_ft WHEN - f1.prox_nearest_arterial_road_speed_limit IS NULL - THEN nn1.prox_nearest_arterial_road_speed_limit + f1.prox_lake_michigan_dist_ft IS NULL + THEN nn1.prox_lake_michigan_dist_ft WHEN - nn1.prox_nearest_arterial_road_speed_limit IS NULL - THEN nn2.prox_nearest_arterial_road_speed_limit - END AS prox_nearest_arterial_road_speed_limit, + nn1.prox_lake_michigan_dist_ft IS NULL + THEN nn2.prox_lake_michigan_dist_ft + END AS prox_lake_michigan_dist_ft, CASE WHEN - f1.prox_nearest_arterial_road_surface_type IS NOT NULL - THEN f1.prox_nearest_arterial_road_surface_type + f1.prox_nearest_major_road_dist_ft IS NOT NULL + THEN f1.prox_nearest_major_road_dist_ft WHEN - f1.prox_nearest_arterial_road_surface_type IS NULL - THEN nn1.prox_nearest_arterial_road_surface_type + f1.prox_nearest_major_road_dist_ft IS NULL + THEN nn1.prox_nearest_major_road_dist_ft WHEN - nn1.prox_nearest_arterial_road_surface_type IS NULL - THEN nn2.prox_nearest_arterial_road_surface_type - END AS prox_nearest_arterial_road_surface_type, + nn1.prox_nearest_major_road_dist_ft IS NULL + THEN nn2.prox_nearest_major_road_dist_ft + END AS prox_nearest_major_road_dist_ft, + f1.prox_nearest_metra_route_dist_ft, + f1.prox_nearest_metra_stop_dist_ft, + f1.prox_nearest_new_construction_dist_ft, CASE WHEN - f1.prox_nearest_bike_trail_dist_ft IS NOT NULL - THEN f1.prox_nearest_bike_trail_dist_ft + f1.prox_nearest_park_dist_ft IS NOT NULL + THEN f1.prox_nearest_park_dist_ft WHEN - f1.prox_nearest_bike_trail_dist_ft IS NULL - THEN nn1.prox_nearest_bike_trail_dist_ft + f1.prox_nearest_park_dist_ft IS NULL + THEN nn1.prox_nearest_park_dist_ft WHEN - nn1.prox_nearest_bike_trail_dist_ft IS NULL - THEN nn2.prox_nearest_bike_trail_dist_ft - END AS prox_nearest_bike_trail_dist_ft, + nn1.prox_nearest_park_dist_ft IS NULL + THEN nn2.prox_nearest_park_dist_ft + END AS prox_nearest_park_dist_ft, CASE WHEN - f1.prox_nearest_cemetery_dist_ft IS NOT NULL - THEN f1.prox_nearest_cemetery_dist_ft + f1.prox_nearest_railroad_dist_ft IS NOT NULL + THEN f1.prox_nearest_railroad_dist_ft WHEN - f1.prox_nearest_cemetery_dist_ft IS NULL - THEN nn1.prox_nearest_cemetery_dist_ft + f1.prox_nearest_railroad_dist_ft IS NULL + THEN nn1.prox_nearest_railroad_dist_ft WHEN - nn1.prox_nearest_cemetery_dist_ft IS NULL - THEN nn2.prox_nearest_cemetery_dist_ft - END AS prox_nearest_cemetery_dist_ft, - f1.prox_nearest_cta_route_dist_ft, - f1.prox_nearest_cta_stop_dist_ft, + nn1.prox_nearest_railroad_dist_ft IS NULL + THEN nn2.prox_nearest_railroad_dist_ft + END AS prox_nearest_railroad_dist_ft, CASE WHEN - f1.prox_nearest_collector_road_daily_traffic IS NOT NULL - THEN f1.prox_nearest_collector_road_daily_traffic + f1.prox_nearest_road_arterial_daily_traffic IS NOT NULL + THEN f1.prox_nearest_road_arterial_daily_traffic WHEN - f1.prox_nearest_collector_road_daily_traffic IS NULL - THEN nn1.prox_nearest_collector_road_daily_traffic + f1.prox_nearest_road_arterial_daily_traffic IS NULL + THEN nn1.prox_nearest_road_arterial_daily_traffic WHEN - nn1.prox_nearest_collector_road_daily_traffic IS NULL - THEN nn2.prox_nearest_collector_road_daily_traffic - END AS prox_nearest_collector_road_daily_traffic, + nn1.prox_nearest_road_arterial_daily_traffic IS NULL + THEN nn2.prox_nearest_road_arterial_daily_traffic + END AS prox_nearest_road_arterial_daily_traffic, CASE WHEN - f1.prox_nearest_collector_road_dist_ft IS NOT NULL - THEN f1.prox_nearest_collector_road_dist_ft + f1.prox_nearest_road_arterial_dist_ft IS NOT NULL + THEN f1.prox_nearest_road_arterial_dist_ft WHEN - f1.prox_nearest_collector_road_dist_ft IS NULL - THEN nn1.prox_nearest_collector_road_dist_ft + f1.prox_nearest_road_arterial_dist_ft IS NULL + THEN nn1.prox_nearest_road_arterial_dist_ft WHEN - nn1.prox_nearest_collector_road_dist_ft IS NULL - THEN nn2.prox_nearest_collector_road_dist_ft - END AS prox_nearest_collector_road_dist_ft, + nn1.prox_nearest_road_arterial_dist_ft IS NULL + THEN nn2.prox_nearest_road_arterial_dist_ft + END AS prox_nearest_road_arterial_dist_ft, CASE WHEN - f1.prox_nearest_collector_road_lanes IS NOT NULL - THEN f1.prox_nearest_collector_road_lanes + f1.prox_nearest_road_arterial_lanes IS NOT NULL + THEN f1.prox_nearest_road_arterial_lanes WHEN - f1.prox_nearest_collector_road_lanes IS NULL - THEN nn1.prox_nearest_collector_road_lanes + f1.prox_nearest_road_arterial_lanes IS NULL + THEN nn1.prox_nearest_road_arterial_lanes WHEN - nn1.prox_nearest_collector_road_lanes IS NULL - THEN nn2.prox_nearest_collector_road_lanes - END AS prox_nearest_collector_road_lanes, + nn1.prox_nearest_road_arterial_lanes IS NULL + THEN nn2.prox_nearest_road_arterial_lanes + END AS prox_nearest_road_arterial_lanes, CASE WHEN - f1.prox_nearest_collector_road_speed_limit IS NOT NULL - THEN f1.prox_nearest_collector_road_speed_limit + f1.prox_nearest_road_arterial_speed_limit IS NOT NULL + THEN f1.prox_nearest_road_arterial_speed_limit WHEN - f1.prox_nearest_collector_road_speed_limit IS NULL - THEN nn1.prox_nearest_collector_road_speed_limit + f1.prox_nearest_road_arterial_speed_limit IS NULL + THEN nn1.prox_nearest_road_arterial_speed_limit WHEN - nn1.prox_nearest_collector_road_speed_limit IS NULL - THEN nn2.prox_nearest_collector_road_speed_limit - END AS prox_nearest_collector_road_speed_limit, + nn1.prox_nearest_road_arterial_speed_limit IS NULL + THEN nn2.prox_nearest_road_arterial_speed_limit + END AS prox_nearest_road_arterial_speed_limit, CASE WHEN - f1.prox_nearest_collector_road_surface_type IS NOT NULL - THEN f1.prox_nearest_collector_road_surface_type + f1.prox_nearest_road_arterial_surface_type IS NOT NULL + THEN f1.prox_nearest_road_arterial_surface_type WHEN - f1.prox_nearest_collector_road_surface_type IS NULL - THEN nn1.prox_nearest_collector_road_surface_type + f1.prox_nearest_road_arterial_surface_type IS NULL + THEN nn1.prox_nearest_road_arterial_surface_type WHEN - nn1.prox_nearest_collector_road_surface_type IS NULL - THEN nn2.prox_nearest_collector_road_surface_type - END AS prox_nearest_collector_road_surface_type, - f1.prox_nearest_golf_course_dist_ft, - f1.prox_nearest_grocery_store_dist_ft, + nn1.prox_nearest_road_arterial_surface_type IS NULL + THEN nn2.prox_nearest_road_arterial_surface_type + END AS prox_nearest_road_arterial_surface_type, CASE WHEN - f1.prox_nearest_highway_road_daily_traffic IS NOT NULL - THEN f1.prox_nearest_highway_road_daily_traffic + f1.prox_nearest_road_collector_daily_traffic IS NOT NULL + THEN f1.prox_nearest_road_collector_daily_traffic WHEN - f1.prox_nearest_highway_road_daily_traffic IS NULL - THEN nn1.prox_nearest_highway_road_daily_traffic + f1.prox_nearest_road_collector_daily_traffic IS NULL + THEN nn1.prox_nearest_road_collector_daily_traffic WHEN - nn1.prox_nearest_highway_road_daily_traffic IS NULL - THEN nn2.prox_nearest_highway_road_daily_traffic - END AS prox_nearest_highway_road_daily_traffic, + nn1.prox_nearest_road_collector_daily_traffic IS NULL + THEN nn2.prox_nearest_road_collector_daily_traffic + END AS prox_nearest_road_collector_daily_traffic, CASE WHEN - f1.prox_nearest_highway_road_dist_ft IS NOT NULL - THEN f1.prox_nearest_highway_road_dist_ft + f1.prox_nearest_road_collector_dist_ft IS NOT NULL + THEN f1.prox_nearest_road_collector_dist_ft WHEN - f1.prox_nearest_highway_road_dist_ft IS NULL - THEN nn1.prox_nearest_highway_road_dist_ft + f1.prox_nearest_road_collector_dist_ft IS NULL + THEN nn1.prox_nearest_road_collector_dist_ft WHEN - nn1.prox_nearest_highway_road_dist_ft IS NULL - THEN nn2.prox_nearest_highway_road_dist_ft - END AS prox_nearest_highway_road_dist_ft, + nn1.prox_nearest_road_collector_dist_ft IS NULL + THEN nn2.prox_nearest_road_collector_dist_ft + END AS prox_nearest_road_collector_dist_ft, CASE WHEN - f1.prox_nearest_highway_road_lanes IS NOT NULL - THEN f1.prox_nearest_highway_road_lanes + f1.prox_nearest_road_collector_lanes IS NOT NULL + THEN f1.prox_nearest_road_collector_lanes WHEN - f1.prox_nearest_highway_road_lanes IS NULL - THEN nn1.prox_nearest_highway_road_lanes + f1.prox_nearest_road_collector_lanes IS NULL + THEN nn1.prox_nearest_road_collector_lanes WHEN - nn1.prox_nearest_highway_road_lanes IS NULL - THEN nn2.prox_nearest_highway_road_lanes - END AS prox_nearest_highway_road_lanes, + nn1.prox_nearest_road_collector_lanes IS NULL + THEN nn2.prox_nearest_road_collector_lanes + END AS prox_nearest_road_collector_lanes, CASE WHEN - f1.prox_nearest_highway_road_speed_limit IS NOT NULL - THEN f1.prox_nearest_highway_road_speed_limit + f1.prox_nearest_road_collector_speed_limit IS NOT NULL + THEN f1.prox_nearest_road_collector_speed_limit WHEN - f1.prox_nearest_highway_road_speed_limit IS NULL - THEN nn1.prox_nearest_highway_road_speed_limit + f1.prox_nearest_road_collector_speed_limit IS NULL + THEN nn1.prox_nearest_road_collector_speed_limit WHEN - nn1.prox_nearest_highway_road_speed_limit IS NULL - THEN nn2.prox_nearest_highway_road_speed_limit - END AS prox_nearest_highway_road_speed_limit, + nn1.prox_nearest_road_collector_speed_limit IS NULL + THEN nn2.prox_nearest_road_collector_speed_limit + END AS prox_nearest_road_collector_speed_limit, CASE WHEN - f1.prox_nearest_highway_road_surface_type IS NOT NULL - THEN f1.prox_nearest_highway_road_surface_type + f1.prox_nearest_road_collector_surface_type IS NOT NULL + THEN f1.prox_nearest_road_collector_surface_type WHEN - f1.prox_nearest_highway_road_surface_type IS NULL - THEN nn1.prox_nearest_highway_road_surface_type + f1.prox_nearest_road_collector_surface_type IS NULL + THEN nn1.prox_nearest_road_collector_surface_type WHEN - nn1.prox_nearest_highway_road_surface_type IS NULL - THEN nn2.prox_nearest_highway_road_surface_type - END AS prox_nearest_highway_road_surface_type, + nn1.prox_nearest_road_collector_surface_type IS NULL + THEN nn2.prox_nearest_road_collector_surface_type + END AS prox_nearest_road_collector_surface_type, CASE WHEN - f1.prox_nearest_hospital_dist_ft IS NOT NULL - THEN f1.prox_nearest_hospital_dist_ft + f1.prox_nearest_road_highway_daily_traffic IS NOT NULL + THEN f1.prox_nearest_road_highway_daily_traffic WHEN - f1.prox_nearest_hospital_dist_ft IS NULL - THEN nn1.prox_nearest_hospital_dist_ft + f1.prox_nearest_road_highway_daily_traffic IS NULL + THEN nn1.prox_nearest_road_highway_daily_traffic WHEN - nn1.prox_nearest_hospital_dist_ft IS NULL - THEN nn2.prox_nearest_hospital_dist_ft - END AS prox_nearest_hospital_dist_ft, + nn1.prox_nearest_road_highway_daily_traffic IS NULL + THEN nn2.prox_nearest_road_highway_daily_traffic + END AS prox_nearest_road_highway_daily_traffic, CASE WHEN - f1.prox_lake_michigan_dist_ft IS NOT NULL - THEN f1.prox_lake_michigan_dist_ft + f1.prox_nearest_road_highway_dist_ft IS NOT NULL + THEN f1.prox_nearest_road_highway_dist_ft WHEN - f1.prox_lake_michigan_dist_ft IS NULL - THEN nn1.prox_lake_michigan_dist_ft + f1.prox_nearest_road_highway_dist_ft IS NULL + THEN nn1.prox_nearest_road_highway_dist_ft WHEN - nn1.prox_lake_michigan_dist_ft IS NULL - THEN nn2.prox_lake_michigan_dist_ft - END AS prox_lake_michigan_dist_ft, + nn1.prox_nearest_road_highway_dist_ft IS NULL + THEN nn2.prox_nearest_road_highway_dist_ft + END AS prox_nearest_road_highway_dist_ft, CASE WHEN - f1.prox_nearest_major_road_dist_ft IS NOT NULL - THEN f1.prox_nearest_major_road_dist_ft + f1.prox_nearest_road_highway_lanes IS NOT NULL + THEN f1.prox_nearest_road_highway_lanes WHEN - f1.prox_nearest_major_road_dist_ft IS NULL - THEN nn1.prox_nearest_major_road_dist_ft + f1.prox_nearest_road_highway_lanes IS NULL + THEN nn1.prox_nearest_road_highway_lanes WHEN - nn1.prox_nearest_major_road_dist_ft IS NULL - THEN nn2.prox_nearest_major_road_dist_ft - END AS prox_nearest_major_road_dist_ft, - f1.prox_nearest_metra_route_dist_ft, - f1.prox_nearest_metra_stop_dist_ft, - f1.prox_nearest_new_construction_dist_ft, + nn1.prox_nearest_road_highway_lanes IS NULL + THEN nn2.prox_nearest_road_highway_lanes + END AS prox_nearest_road_highway_lanes, CASE WHEN - f1.prox_nearest_park_dist_ft IS NOT NULL - THEN f1.prox_nearest_park_dist_ft + f1.prox_nearest_road_highway_speed_limit IS NOT NULL + THEN f1.prox_nearest_road_highway_speed_limit WHEN - f1.prox_nearest_park_dist_ft IS NULL - THEN nn1.prox_nearest_park_dist_ft + f1.prox_nearest_road_highway_speed_limit IS NULL + THEN nn1.prox_nearest_road_highway_speed_limit WHEN - nn1.prox_nearest_park_dist_ft IS NULL - THEN nn2.prox_nearest_park_dist_ft - END AS prox_nearest_park_dist_ft, + nn1.prox_nearest_road_highway_speed_limit IS NULL + THEN nn2.prox_nearest_road_highway_speed_limit + END AS prox_nearest_road_highway_speed_limit, CASE WHEN - f1.prox_nearest_railroad_dist_ft IS NOT NULL - THEN f1.prox_nearest_railroad_dist_ft + f1.prox_nearest_road_highway_surface_type IS NOT NULL + THEN f1.prox_nearest_road_highway_surface_type WHEN - f1.prox_nearest_railroad_dist_ft IS NULL - THEN nn1.prox_nearest_railroad_dist_ft + f1.prox_nearest_road_highway_surface_type IS NULL + THEN nn1.prox_nearest_road_highway_surface_type WHEN - nn1.prox_nearest_railroad_dist_ft IS NULL - THEN nn2.prox_nearest_railroad_dist_ft - END AS prox_nearest_railroad_dist_ft, + nn1.prox_nearest_road_highway_surface_type IS NULL + THEN nn2.prox_nearest_road_highway_surface_type + END AS prox_nearest_road_highway_surface_type, CASE WHEN f1.prox_nearest_secondary_road_dist_ft IS NOT NULL diff --git a/dbt/models/model/model.vw_pin_condo_input.sql b/dbt/models/model/model.vw_pin_condo_input.sql index 9625a3e05..1d144736f 100644 --- a/dbt/models/model/model.vw_pin_condo_input.sql +++ b/dbt/models/model/model.vw_pin_condo_input.sql @@ -84,27 +84,12 @@ WITH uni AS ( vpsi.prox_num_school_with_rating_in_half_mile, vpsi.prox_avg_school_rating_in_half_mile, vpsi.prox_airport_dnl_total, - vpsi.prox_nearest_arterial_road_daily_traffic, - vpsi.prox_nearest_arterial_road_dist_ft, - vpsi.prox_nearest_arterial_road_lanes, - vpsi.prox_nearest_arterial_road_speed_limit, - vpsi.prox_nearest_arterial_road_surface_type, vpsi.prox_nearest_bike_trail_dist_ft, vpsi.prox_nearest_cemetery_dist_ft, - vpsi.prox_nearest_collector_road_daily_traffic, - vpsi.prox_nearest_collector_road_dist_ft, - vpsi.prox_nearest_collector_road_lanes, - vpsi.prox_nearest_collector_road_speed_limit, - vpsi.prox_nearest_collector_road_surface_type, vpsi.prox_nearest_cta_route_dist_ft, vpsi.prox_nearest_cta_stop_dist_ft, vpsi.prox_nearest_golf_course_dist_ft, vpsi.prox_nearest_grocery_store_dist_ft, - vpsi.prox_nearest_highway_road_daily_traffic, - vpsi.prox_nearest_highway_road_dist_ft, - vpsi.prox_nearest_highway_road_lanes, - vpsi.prox_nearest_highway_road_speed_limit, - vpsi.prox_nearest_highway_road_surface_type, vpsi.prox_nearest_hospital_dist_ft, vpsi.prox_lake_michigan_dist_ft, vpsi.prox_nearest_major_road_dist_ft, @@ -113,6 +98,21 @@ WITH uni AS ( vpsi.prox_nearest_new_construction_dist_ft, vpsi.prox_nearest_park_dist_ft, vpsi.prox_nearest_railroad_dist_ft, + vpsi.prox_nearest_road_arterial_daily_traffic, + vpsi.prox_nearest_road_arterial_dist_ft, + vpsi.prox_nearest_road_arterial_lanes, + vpsi.prox_nearest_road_arterial_speed_limit, + vpsi.prox_nearest_road_arterial_surface_type, + vpsi.prox_nearest_road_collector_daily_traffic, + vpsi.prox_nearest_road_collector_dist_ft, + vpsi.prox_nearest_road_collector_lanes, + vpsi.prox_nearest_road_collector_speed_limit, + vpsi.prox_nearest_road_collector_surface_type, + vpsi.prox_nearest_road_highway_daily_traffic, + vpsi.prox_nearest_road_highway_dist_ft, + vpsi.prox_nearest_road_highway_lanes, + vpsi.prox_nearest_road_highway_speed_limit, + vpsi.prox_nearest_road_highway_surface_type, vpsi.prox_nearest_secondary_road_dist_ft, vpsi.prox_nearest_stadium_dist_ft, vpsi.prox_nearest_university_dist_ft, diff --git a/dbt/models/model/model.vw_pin_shared_input.sql b/dbt/models/model/model.vw_pin_shared_input.sql index cc9bf02f4..466790ac4 100644 --- a/dbt/models/model/model.vw_pin_shared_input.sql +++ b/dbt/models/model/model.vw_pin_shared_input.sql @@ -253,36 +253,12 @@ SELECT -- PIN proximity distance variables vwpf.airport_dnl_total AS prox_airport_dnl_total, --new - vwpf.nearest_arterial_road_daily_traffic - AS prox_nearest_arterial_road_daily_traffic, - vwpf.nearest_arterial_road_dist_ft AS prox_nearest_arterial_road_dist_ft, - vwpf.nearest_arterial_road_lanes AS prox_nearest_arterial_road_lanes, - vwpf.nearest_arterial_road_speed_limit - AS prox_nearest_arterial_road_speed_limit, - vwpf.nearest_arterial_road_surface_type - AS prox_nearest_arterial_road_surface_type, vwpf.nearest_bike_trail_dist_ft AS prox_nearest_bike_trail_dist_ft, vwpf.nearest_cemetery_dist_ft AS prox_nearest_cemetery_dist_ft, - vwpf.nearest_collector_road_daily_traffic - AS prox_nearest_collector_road_daily_traffic, - vwpf.nearest_collector_road_dist_ft AS prox_nearest_collector_road_dist_ft, - vwpf.nearest_collector_road_lanes AS prox_nearest_collector_road_lanes, - vwpf.nearest_collector_road_speed_limit - AS prox_nearest_collector_road_speed_limit, - vwpf.nearest_collector_road_surface_type - AS prox_nearest_collector_road_surface_type, vwpf.nearest_cta_route_dist_ft AS prox_nearest_cta_route_dist_ft, vwpf.nearest_cta_stop_dist_ft AS prox_nearest_cta_stop_dist_ft, vwpf.nearest_golf_course_dist_ft AS prox_nearest_golf_course_dist_ft, vwpf.nearest_grocery_store_dist_ft AS prox_nearest_grocery_store_dist_ft, - vwpf.nearest_highway_road_daily_traffic - AS prox_nearest_highway_road_daily_traffic, - vwpf.nearest_highway_road_dist_ft AS prox_nearest_highway_road_dist_ft, - vwpf.nearest_highway_road_lanes AS prox_nearest_highway_road_lanes, - vwpf.nearest_highway_road_speed_limit - AS prox_nearest_highway_road_speed_limit, - vwpf.nearest_highway_road_surface_type - AS prox_nearest_highway_road_surface_type, vwpf.nearest_hospital_dist_ft AS prox_nearest_hospital_dist_ft, vwpf.lake_michigan_dist_ft AS prox_lake_michigan_dist_ft, vwpf.nearest_major_road_dist_ft AS prox_nearest_major_road_dist_ft, @@ -292,6 +268,30 @@ SELECT AS prox_nearest_new_construction_dist_ft, vwpf.nearest_park_dist_ft AS prox_nearest_park_dist_ft, vwpf.nearest_railroad_dist_ft AS prox_nearest_railroad_dist_ft, + vwpf.nearest_road_arterial_daily_traffic + AS prox_nearest_road_arterial_daily_traffic, + vwpf.nearest_road_arterial_dist_ft AS prox_nearest_road_arterial_dist_ft, + vwpf.nearest_road_arterial_lanes AS prox_nearest_road_arterial_lanes, + vwpf.nearest_road_arterial_speed_limit + AS prox_nearest_road_arterial_speed_limit, + vwpf.nearest_road_arterial_surface_type + AS prox_nearest_road_arterial_surface_type, + vwpf.nearest_road_collector_daily_traffic + AS prox_nearest_road_collector_daily_traffic, + vwpf.nearest_road_collector_dist_ft AS prox_nearest_road_collector_dist_ft, + vwpf.nearest_road_collector_lanes AS prox_nearest_road_collector_lanes, + vwpf.nearest_road_collector_speed_limit + AS prox_nearest_road_collector_speed_limit, + vwpf.nearest_road_collector_surface_type + AS prox_nearest_road_collector_surface_type, + vwpf.nearest_road_highway_daily_traffic + AS prox_nearest_road_highway_daily_traffic, + vwpf.nearest_road_highway_dist_ft AS prox_nearest_road_highway_dist_ft, + vwpf.nearest_road_highway_lanes AS prox_nearest_road_highway_lanes, + vwpf.nearest_road_highway_speed_limit + AS prox_nearest_road_highway_speed_limit, + vwpf.nearest_road_highway_surface_type + AS prox_nearest_road_highway_surface_type, vwpf.nearest_secondary_road_dist_ft AS prox_nearest_secondary_road_dist_ft, vwpf.nearest_stadium_dist_ft AS prox_nearest_stadium_dist_ft, vwpf.nearest_university_dist_ft AS prox_nearest_university_dist_ft, diff --git a/dbt/models/proximity/columns.md b/dbt/models/proximity/columns.md index 9693743aa..57075dbcb 100644 --- a/dbt/models/proximity/columns.md +++ b/dbt/models/proximity/columns.md @@ -66,54 +66,6 @@ Distance to Lake Michigan shoreline (feet). Shoreline sourced from Census hydrography files {% enddocs %} -## nearest_arterial_road_daily_traffic - -{% docs column_nearest_arterial_road_daily_traffic %} -Daily traffic of nearest arterial road. - -Road data sourced from Illinois Department of Transportation. -{% enddocs %} - -## nearest_arterial_road_dist_ft - -{% docs column_nearest_arterial_road_dist_ft %} -Distance to nearest arterial road. - -Road data sourced from Illinois Department of Transportation. -{% enddocs %} - -## nearest_arterial_road_lanes - -{% docs column_nearest_arterial_road_lanes %} -Number of lanes for the nearest arterial road. - -Road data sourced from Illinois Department of Transportation. -{% enddocs %} - -## nearest_arterial_road_name - -{% docs column_nearest_arterial_road_name %} -Nearest arterial road name. - -Road data sourced from Illinois Department of Transportation. -{% enddocs %} - -## nearest_arterial_road_speed_limit - -{% docs column_nearest_arterial_road_speed_limit %} -Speed limit for the nearest arterial road - -Road data sourced from Illinois Department of Transportation -{% enddocs %} - -## nearest_arterial_road_surface_type - -{% docs column_nearest_arterial_road_surface_type %} -Surface type for the nearest arterial road (for example brick, stone, etc.). - -Road data sourced from Illinois Department of Transportation. -{% enddocs %} - ## nearest_bike_trail_dist_ft {% docs column_nearest_bike_trail_dist_ft %} @@ -162,54 +114,6 @@ Nearest cemetery name. Cemetery data sourced from Cook County GIS {% enddocs %} -## nearest_collector_road_daily_traffic - -{% docs column_nearest_collector_road_daily_traffic %} -Daily traffic of nearest collector road. - -Road data sourced from Illinois Department of Transportation. -{% enddocs %} - -## nearest_collector_road_dist_ft - -{% docs column_nearest_collector_road_dist_ft %} -Distance to nearest collector road. - -Road data sourced from Illinois Department of Transportation. -{% enddocs %} - -## nearest_collector_road_lanes - -{% docs column_nearest_collector_road_lanes %} -Number of lanes for the nearest collector road. - -Road data sourced from Illinois Department of Transportation. -{% enddocs %} - -## nearest_collector_road_name - -{% docs column_nearest_collector_road_name %} -Nearest collector road name. - -Road data sourced from Illinois Department of Transportation. -{% enddocs %} - -## nearest_collector_road_speed_limit - -{% docs column_nearest_collector_road_speed_limit %} -Speed limit for the nearest collector road - -Road data sourced from Illinois Department of Transportation -{% enddocs %} - -## nearest_collector_road_surface_type - -{% docs column_nearest_collector_road_surface_type %} -Surface type for the nearest collector road (for example brick, stone, etc.). - -Road data sourced from Illinois Department of Transportation. -{% enddocs %} - ## nearest_cta_route_dist_ft {% docs column_nearest_cta_route_dist_ft %} @@ -295,54 +199,6 @@ Nearest grocery store distance (feet) Nearest grocery store name via OSM {% enddocs %} -## nearest_highway_road_daily_traffic - -{% docs column_nearest_highway_road_daily_traffic %} -Daily traffic of nearest highway road. - -Road data sourced from Illinois Department of Transportation. -{% enddocs %} - -## nearest_highway_road_dist_ft - -{% docs column_nearest_highway_road_dist_ft %} -Distance to nearest highway road. - -Road data sourced from Illinois Department of Transportation. -{% enddocs %} - -## nearest_highway_road_lanes - -{% docs column_nearest_highway_road_lanes %} -Number of lanes for the nearest highway road. - -Road data sourced from Illinois Department of Transportation. -{% enddocs %} - -## nearest_highway_road_name - -{% docs column_nearest_highway_road_name %} -Nearest highway road name. - -Road data sourced from Illinois Department of Transportation. -{% enddocs %} - -## nearest_highway_road_speed_limit - -{% docs column_nearest_highway_road_speed_limit %} -Speed limit for the nearest highway road. - -Road data sourced from Illinois Department of Transportation. -{% enddocs %} - -## nearest_highway_road_surface type - -{% docs column_nearest_highway_road_surface_type %} -Surface type for the nearest highway road (for example brick, stone, etc.). - -Road data sourced from Illinois Department of Transportation. -{% enddocs %} - ## nearest_hospital_dist_ft {% docs column_nearest_hospital_dist_ft %} @@ -535,6 +391,150 @@ Railroad locations sourced from Cook County GIS. Inclusive of any rail (CTA, Metra, non-passenger freight, etc.) {% enddocs %} +## nearest_road_arterial_daily_traffic + +{% docs column_nearest_road_arterial_daily_traffic %} +Daily traffic of nearest arterial road. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_road_arterial_dist_ft + +{% docs column_nearest_road_arterial_dist_ft %} +Distance to nearest arterial road. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_road_arterial_lanes + +{% docs column_nearest_road_arterial_lanes %} +Number of lanes for the nearest arterial road. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_road_arterial_name + +{% docs column_nearest_road_arterial_name %} +Nearest arterial road name. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_road_arterial_speed_limit + +{% docs column_nearest_road_arterial_speed_limit %} +Speed limit for the nearest arterial road + +Road data sourced from Illinois Department of Transportation +{% enddocs %} + +## nearest_road_arterial_surface_type + +{% docs column_nearest_road_arterial_surface_type %} +Surface type for the nearest arterial road (for example brick, stone, etc.). + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_road_collector_daily_traffic + +{% docs column_nearest_road_collector_daily_traffic %} +Daily traffic of nearest collector road. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_road_collector_dist_ft + +{% docs column_nearest_road_collector_dist_ft %} +Distance to nearest collector road. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_road_collector_lanes + +{% docs column_nearest_road_collector_lanes %} +Number of lanes for the nearest collector road. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_road_collector_name + +{% docs column_nearest_road_collector_name %} +Nearest collector road name. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_road_collector_speed_limit + +{% docs column_nearest_road_collector_speed_limit %} +Speed limit for the nearest collector road + +Road data sourced from Illinois Department of Transportation +{% enddocs %} + +## nearest_road_collector_surface_type + +{% docs column_nearest_road_collector_surface_type %} +Surface type for the nearest collector road (for example brick, stone, etc.). + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_road_highway_daily_traffic + +{% docs column_nearest_road_highway_daily_traffic %} +Daily traffic of nearest highway road. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_road_highway_dist_ft + +{% docs column_nearest_road_highway_dist_ft %} +Distance to nearest highway road. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_road_highway_lanes + +{% docs column_nearest_road_highway_lanes %} +Number of lanes for the nearest highway road. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_road_highway_name + +{% docs column_nearest_road_highway_name %} +Nearest highway road name. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_road_highway_speed_limit + +{% docs column_nearest_road_highway_speed_limit %} +Speed limit for the nearest highway road. + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + +## nearest_road_highway_surface type + +{% docs column_nearest_road_highway_surface_type %} +Surface type for the nearest highway road (for example brick, stone, etc.). + +Road data sourced from Illinois Department of Transportation. +{% enddocs %} + ## nearest_secondary_road_dist_ft {% docs column_nearest_secondary_road_dist_ft %} diff --git a/dbt/models/proximity/docs.md b/dbt/models/proximity/docs.md index 5220a28d9..4c3c1dce0 100644 --- a/dbt/models/proximity/docs.md +++ b/dbt/models/proximity/docs.md @@ -38,19 +38,6 @@ as a result of contributions from both airports plus baseline DNL of 50. **Primary Key**: `pin10`, `year` {% enddocs %} -# dist_pin_to_arterial_road - -{% docs table_dist_pin_to_arterial_road %} -Distance from each PIN to the nearest arterial road. Data is derived from Illinois Department of Transportation - Added features include - - lanes - - average daily traffic - - speed limit - - road surface - -**Primary Key**: `pin10`, `year` -{% enddocs %} - # dist_pin_to_bike_trail {% docs table_dist_pin_to_bike_trail %} @@ -67,19 +54,6 @@ Distance from each PIN to the nearest cemetery. **Primary Key**: `pin10`, `year` {% enddocs %} -# dist_pin_to_collector_road - -{% docs table_dist_pin_to_collector_road %} -Distance from each PIN to the nearest collector road. Data is derived from Illinois Department of Transportation - Added features include - - lanes - - average daily traffic - - speed limit - - road surface - -**Primary Key**: `pin10`, `year` -{% enddocs %} - # dist_pin_to_cta_route @@ -127,19 +101,6 @@ Distance from each PIN to the nearest grocery store. Locations sourced from Open **Primary Key**: `pin10`, `year` {% enddocs %} -# dist_pin_to_highway_road - -{% docs table_dist_pin_to_highway_road %} -Distance from each PIN to the nearest highway. Data is derived from Illinois Department of Transportation - Added features include - - lanes - - average daily traffic - - speed limit - - road surface - -**Primary Key**: `pin10`, `year` -{% enddocs %} - # dist_pin_to_hospital {% docs table_dist_pin_to_hospital %} @@ -241,6 +202,46 @@ Distance from each PIN to the nearest rail track of any kind. **Primary Key**: `pin10`, `year` {% enddocs %} +# dist_pin_to_road_arterial + +{% docs table_dist_pin_to_road_arterial %} +Distance from each PIN to the nearest arterial road. Data is derived from Illinois Department of Transportation + Added features include + - lanes + - average daily traffic + - speed limit + - road surface + +**Primary Key**: `pin10`, `year` +{% enddocs %} + +# dist_pin_to_road_collector + +{% docs table_dist_pin_to_road_collector %} +Distance from each PIN to the nearest collector road. Data is derived from Illinois Department of Transportation + Added features include + - lanes + - average daily traffic + - speed limit + - road surface + +**Primary Key**: `pin10`, `year` +{% enddocs %} + +# dist_pin_to_road_highway + +{% docs table_dist_pin_to_road_highway %} +Distance from each PIN to the nearest highway. Data is derived from Illinois Department of Transportation + Added features include + - lanes + - average daily traffic + - speed limit + - road surface + +**Primary Key**: `pin10`, `year` +{% enddocs %} + + # dist_pin_to_secondary_road {% docs table_dist_pin_to_secondary_road %} diff --git a/dbt/models/proximity/proximity.crosswalk_year_fill.sql b/dbt/models/proximity/proximity.crosswalk_year_fill.sql index 45658d2a5..047cb8b7e 100644 --- a/dbt/models/proximity/proximity.crosswalk_year_fill.sql +++ b/dbt/models/proximity/proximity.crosswalk_year_fill.sql @@ -22,14 +22,10 @@ WITH unfilled AS ( AS num_school_rating_data_year, MAX(dist_pin_to_airport.airport_data_year) AS airport_data_year, - MAX(dist_pin_to_arterial_road.nearest_arterial_road_data_year) - AS nearest_arterial_road_data_year, MAX(dist_pin_to_bike_trail.nearest_bike_trail_data_year) AS nearest_bike_trail_data_year, MAX(dist_pin_to_cemetery.nearest_cemetery_data_year) AS nearest_cemetery_data_year, - MAX(dist_pin_to_collector_road.nearest_collector_road_data_year) - AS nearest_collector_road_data_year, MAX(dist_pin_to_cta_route.nearest_cta_route_data_year) AS nearest_cta_route_data_year, MAX(dist_pin_to_cta_stop.nearest_cta_stop_data_year) @@ -38,8 +34,6 @@ WITH unfilled AS ( AS nearest_golf_course_data_year, MAX(dist_pin_to_grocery_store.nearest_grocery_store_data_year) AS nearest_grocery_store_data_year, - MAX(dist_pin_to_highway_road.nearest_highway_road_data_year) - AS nearest_highway_road_data_year, MAX(dist_pin_to_hospital.nearest_hospital_data_year) AS nearest_hospital_data_year, MAX(dist_pin_to_lake_michigan.lake_michigan_data_year) @@ -56,6 +50,12 @@ WITH unfilled AS ( AS nearest_park_data_year, MAX(dist_pin_to_railroad.nearest_railroad_data_year) AS nearest_railroad_data_year, + MAX(dist_pin_to_road_arterial.nearest_road_arterial_data_year) + AS nearest_road_arterial_data_year, + MAX(dist_pin_to_road_collector.nearest_road_collector_data_year) + AS nearest_road_collector_data_year, + MAX(dist_pin_to_road_highway.nearest_road_highway_data_year) + AS nearest_road_highway_data_year, MAX(dist_pin_to_secondary_road.nearest_secondary_road_data_year) AS nearest_secondary_road_data_year, MAX(dist_pin_to_stadium.nearest_stadium_data_year) @@ -96,12 +96,6 @@ WITH unfilled AS ( airport_data_year FROM {{ ref('proximity.dist_pin_to_airport' ) }} ) AS dist_pin_to_airport ON pin.year = dist_pin_to_airport.year - LEFT JOIN ( - SELECT DISTINCT - year, - nearest_arterial_road_data_year - FROM {{ ref('proximity.dist_pin_to_arterial_road' ) }} - ) AS dist_pin_to_arterial_road ON pin.year = dist_pin_to_arterial_road.year LEFT JOIN ( SELECT DISTINCT year, @@ -114,13 +108,6 @@ WITH unfilled AS ( nearest_cemetery_data_year FROM {{ ref('proximity.dist_pin_to_cemetery') }} ) AS dist_pin_to_cemetery ON pin.year = dist_pin_to_cemetery.year - LEFT JOIN ( - SELECT DISTINCT - year, - nearest_collector_road_data_year - FROM {{ ref('proximity.dist_pin_to_collector_road' ) }} - ) AS dist_pin_to_collector_road - ON pin.year = dist_pin_to_collector_road.year LEFT JOIN ( SELECT DISTINCT year, @@ -145,12 +132,6 @@ WITH unfilled AS ( nearest_grocery_store_data_year FROM {{ ref('proximity.dist_pin_to_grocery_store') }} ) AS dist_pin_to_grocery_store ON pin.year = dist_pin_to_grocery_store.year - LEFT JOIN ( - SELECT DISTINCT - year, - nearest_highway_road_data_year - FROM {{ ref('proximity.dist_pin_to_highway_road' ) }} - ) AS dist_pin_to_highway_road ON pin.year = dist_pin_to_highway_road.year LEFT JOIN ( SELECT DISTINCT year, @@ -201,6 +182,25 @@ WITH unfilled AS ( nearest_railroad_data_year FROM {{ ref('proximity.dist_pin_to_railroad') }} ) AS dist_pin_to_railroad ON pin.year = dist_pin_to_railroad.year + LEFT JOIN ( + SELECT DISTINCT + year, + nearest_road_collector_data_year + FROM {{ ref('proximity.dist_pin_to_road_collector' ) }} + ) AS dist_pin_to_road_collector + ON pin.year = dist_pin_to_road_collector.year + LEFT JOIN ( + SELECT DISTINCT + year, + nearest_road_arterial_data_year + FROM {{ ref('proximity.dist_pin_to_road_arterial' ) }} + ) AS dist_pin_to_road_arterial ON pin.year = dist_pin_to_road_arterial.year + LEFT JOIN ( + SELECT DISTINCT + year, + nearest_highway_road_data_year + FROM {{ ref('proximity.dist_pin_to_road_highway' ) }} + ) AS dist_pin_to_road_highway ON pin.year = dist_pin_to_road_highway.year LEFT JOIN ( SELECT DISTINCT year, @@ -267,12 +267,6 @@ SELECT IGNORE NULLS OVER (ORDER BY year DESC) ) AS airport_data_year, - COALESCE( - nearest_arterial_road_data_year, - LAST_VALUE(nearest_arterial_road_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS nearest_arterial_road_data_year, COALESCE( nearest_bike_trail_data_year, LAST_VALUE(nearest_bike_trail_data_year) @@ -315,6 +309,12 @@ SELECT IGNORE NULLS OVER (ORDER BY year DESC) ) AS nearest_grocery_store_data_year, + COALESCE( + nearest_road_arterial_data_year, + LAST_VALUE(nearest_road_arterial_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS nearest_road_arterial_data_year, COALESCE( nearest_highway_road_data_year, LAST_VALUE(nearest_highway_road_data_year) diff --git a/dbt/models/proximity/proximity.vw_pin10_proximity.sql b/dbt/models/proximity/proximity.vw_pin10_proximity.sql index b09e50d34..48b7e86fb 100644 --- a/dbt/models/proximity/proximity.vw_pin10_proximity.sql +++ b/dbt/models/proximity/proximity.vw_pin10_proximity.sql @@ -20,14 +20,6 @@ SELECT dist_pin_to_airport.airport_dnl_total, - dist_pin_to_arterial_road.nearest_arterial_road_name, - dist_pin_to_arterial_road.nearest_arterial_road_daily_traffic, - dist_pin_to_arterial_road.nearest_arterial_road_speed_limit, - dist_pin_to_arterial_road.nearest_arterial_road_surface_type, - dist_pin_to_arterial_road.nearest_arterial_road_lanes, - dist_pin_to_arterial_road.nearest_arterial_road_dist_ft, - dist_pin_to_arterial_road.nearest_arterial_road_data_year, - dist_pin_to_bike_trail.nearest_bike_trail_id, dist_pin_to_bike_trail.nearest_bike_trail_name, dist_pin_to_bike_trail.nearest_bike_trail_dist_ft, @@ -38,14 +30,6 @@ SELECT dist_pin_to_cemetery.nearest_cemetery_dist_ft, dist_pin_to_cemetery.nearest_cemetery_data_year, - dist_pin_to_collector_road.nearest_collector_road_name, - dist_pin_to_collector_road.nearest_collector_road_daily_traffic, - dist_pin_to_collector_road.nearest_collector_road_speed_limit, - dist_pin_to_collector_road.nearest_collector_road_surface_type, - dist_pin_to_collector_road.nearest_collector_road_lanes, - dist_pin_to_collector_road.nearest_collector_road_dist_ft, - dist_pin_to_collector_road.nearest_collector_road_data_year, - dist_pin_to_cta_route.nearest_cta_route_id, dist_pin_to_cta_route.nearest_cta_route_name, dist_pin_to_cta_route.nearest_cta_route_dist_ft, @@ -64,14 +48,6 @@ SELECT dist_pin_to_grocery_store.nearest_grocery_store_dist_ft, dist_pin_to_grocery_store.nearest_grocery_store_data_year, - dist_pin_to_highway_road.nearest_highway_road_name, - dist_pin_to_highway_road.nearest_highway_road_daily_traffic, - dist_pin_to_highway_road.nearest_highway_road_speed_limit, - dist_pin_to_highway_road.nearest_highway_road_surface_type, - dist_pin_to_highway_road.nearest_highway_road_lanes, - dist_pin_to_highway_road.nearest_highway_road_dist_ft, - dist_pin_to_highway_road.nearest_highway_road_data_year, - dist_pin_to_hospital.nearest_hospital_gnis_code, dist_pin_to_hospital.nearest_hospital_name, dist_pin_to_hospital.nearest_hospital_dist_ft, @@ -110,6 +86,30 @@ SELECT dist_pin_to_railroad.nearest_railroad_dist_ft, dist_pin_to_railroad.nearest_railroad_data_year, + dist_pin_to_road_arterial.nearest_road_arterial_name, + dist_pin_to_road_arterial.nearest_road_arterial_daily_traffic, + dist_pin_to_road_arterial.nearest_road_arterial_speed_limit, + dist_pin_to_road_arterial.nearest_road_arterial_surface_type, + dist_pin_to_road_arterial.nearest_road_arterial_lanes, + dist_pin_to_road_arterial.nearest_road_arterial_dist_ft, + dist_pin_to_road_arterial.nearest_road_arterial_data_year, + + dist_pin_to_road_collector.nearest_road_collector_name, + dist_pin_to_road_collector.nearest_road_collector_daily_traffic, + dist_pin_to_road_collector.nearest_road_collector_speed_limit, + dist_pin_to_road_collector.nearest_road_collector_surface_type, + dist_pin_to_road_collector.nearest_road_collector_lanes, + dist_pin_to_road_collector.nearest_road_collector_dist_ft, + dist_pin_to_road_collector.nearest_road_collector_data_year, + + dist_pin_to_road_highway.nearest_road_highway_name, + dist_pin_to_road_highway.nearest_road_highway_daily_traffic, + dist_pin_to_road_highway.nearest_road_highway_speed_limit, + dist_pin_to_road_highway.nearest_road_highway_surface_type, + dist_pin_to_road_highway.nearest_road_highway_lanes, + dist_pin_to_road_highway.nearest_road_highway_dist_ft, + dist_pin_to_road_highway.nearest_road_highway_data_year, + dist_pin_to_secondary_road.nearest_secondary_road_osm_id, dist_pin_to_secondary_road.nearest_secondary_road_name, dist_pin_to_secondary_road.nearest_secondary_road_dist_ft, @@ -155,11 +155,6 @@ LEFT JOIN {{ ref('proximity.dist_pin_to_airport') }} AS dist_pin_to_airport ON pin.pin10 = dist_pin_to_airport.pin10 AND pin.year = dist_pin_to_airport.year -LEFT JOIN - {{ ref('proximity.dist_pin_to_arterial_road') }} - AS dist_pin_to_arterial_road - ON pin.pin10 = dist_pin_to_arterial_road.pin10 - AND pin.year = dist_pin_to_arterial_road.year LEFT JOIN {{ ref('proximity.dist_pin_to_bike_trail') }} AS dist_pin_to_bike_trail ON pin.pin10 = dist_pin_to_bike_trail.pin10 @@ -168,11 +163,6 @@ LEFT JOIN {{ ref('proximity.dist_pin_to_cemetery') }} AS dist_pin_to_cemetery ON pin.pin10 = dist_pin_to_cemetery.pin10 AND pin.year = dist_pin_to_cemetery.year -LEFT JOIN - {{ ref('proximity.dist_pin_to_collector_road') }} - AS dist_pin_to_collector_road - ON pin.pin10 = dist_pin_to_collector_road.pin10 - AND pin.year = dist_pin_to_collector_road.year LEFT JOIN {{ ref('proximity.dist_pin_to_cta_route') }} AS dist_pin_to_cta_route ON pin.pin10 = dist_pin_to_cta_route.pin10 @@ -190,11 +180,6 @@ LEFT JOIN AS dist_pin_to_grocery_store ON pin.pin10 = dist_pin_to_grocery_store.pin10 AND pin.year = dist_pin_to_grocery_store.year -LEFT JOIN - {{ ref('proximity.dist_pin_to_highway_road') }} - AS dist_pin_to_highway_road - ON pin.pin10 = dist_pin_to_highway_road.pin10 - AND pin.year = dist_pin_to_highway_road.year LEFT JOIN {{ ref('proximity.dist_pin_to_hospital') }} AS dist_pin_to_hospital ON pin.pin10 = dist_pin_to_hospital.pin10 @@ -228,6 +213,21 @@ LEFT JOIN {{ ref('proximity.dist_pin_to_railroad') }} AS dist_pin_to_railroad ON pin.pin10 = dist_pin_to_railroad.pin10 AND pin.year = dist_pin_to_railroad.year +LEFT JOIN + {{ ref('proximity.dist_pin_to_road_arterial') }} + AS dist_pin_to_road_arterial + ON pin.pin10 = dist_pin_to_road_arterial.pin10 + AND pin.year = dist_pin_to_road_arterial.year +LEFT JOIN + {{ ref('proximity.dist_pin_to_road_collector') }} + AS dist_pin_to_road_collector + ON pin.pin10 = dist_pin_to_road_collector.pin10 + AND pin.year = dist_pin_to_road_collector.year +LEFT JOIN + {{ ref('proximity.dist_pin_to_road_highway') }} + AS dist_pin_to_road_highway + ON pin.pin10 = dist_pin_to_road_highway.pin10 + AND pin.year = dist_pin_to_road_highway.year LEFT JOIN {{ ref('proximity.dist_pin_to_secondary_road') }} AS dist_pin_to_secondary_road diff --git a/dbt/models/proximity/proximity.vw_pin10_proximity_fill.sql b/dbt/models/proximity/proximity.vw_pin10_proximity_fill.sql index cd7eb235e..c5fb21f7a 100644 --- a/dbt/models/proximity/proximity.vw_pin10_proximity_fill.sql +++ b/dbt/models/proximity/proximity.vw_pin10_proximity_fill.sql @@ -23,14 +23,6 @@ SELECT dist_pin_to_airport.airport_dnl_total, dist_pin_to_airport.airport_data_year, - dist_pin_to_arterial_road.nearest_arterial_road_name, - dist_pin_to_arterial_road.nearest_arterial_road_dist_ft, - dist_pin_to_arterial_road.nearest_arterial_road_daily_traffic, - dist_pin_to_arterial_road.nearest_arterial_road_speed_limit, - dist_pin_to_arterial_road.nearest_arterial_road_surface_type, - dist_pin_to_arterial_road.nearest_arterial_road_lanes, - dist_pin_to_arterial_road.nearest_arterial_road_data_year, - dist_pin_to_bike_trail.nearest_bike_trail_id, dist_pin_to_bike_trail.nearest_bike_trail_name, dist_pin_to_bike_trail.nearest_bike_trail_dist_ft, @@ -41,14 +33,6 @@ SELECT dist_pin_to_cemetery.nearest_cemetery_dist_ft, dist_pin_to_cemetery.nearest_cemetery_data_year, - dist_pin_to_collector_road.nearest_collector_road_name, - dist_pin_to_collector_road.nearest_collector_road_dist_ft, - dist_pin_to_collector_road.nearest_collector_road_daily_traffic, - dist_pin_to_collector_road.nearest_collector_road_speed_limit, - dist_pin_to_collector_road.nearest_collector_road_surface_type, - dist_pin_to_collector_road.nearest_collector_road_lanes, - dist_pin_to_collector_road.nearest_collector_road_data_year, - dist_pin_to_cta_route.nearest_cta_route_id, dist_pin_to_cta_route.nearest_cta_route_name, dist_pin_to_cta_route.nearest_cta_route_dist_ft, @@ -67,13 +51,6 @@ SELECT dist_pin_to_grocery_store.nearest_grocery_store_dist_ft, dist_pin_to_grocery_store.nearest_grocery_store_data_year, - dist_pin_to_highway_road.nearest_highway_road_name, - dist_pin_to_highway_road.nearest_highway_road_dist_ft, - dist_pin_to_highway_road.nearest_highway_road_daily_traffic, - dist_pin_to_highway_road.nearest_highway_road_speed_limit, - dist_pin_to_highway_road.nearest_highway_road_surface_type, - dist_pin_to_highway_road.nearest_highway_road_lanes, - dist_pin_to_hospital.nearest_hospital_gnis_code, dist_pin_to_hospital.nearest_hospital_name, dist_pin_to_hospital.nearest_hospital_dist_ft, @@ -112,6 +89,29 @@ SELECT dist_pin_to_railroad.nearest_railroad_dist_ft, dist_pin_to_railroad.nearest_railroad_data_year, + dist_pin_to_road_arterial.nearest_road_arterial_name, + dist_pin_to_road_arterial.nearest_road_arterial_dist_ft, + dist_pin_to_road_arterial.nearest_road_arterial_daily_traffic, + dist_pin_to_road_arterial.nearest_road_arterial_speed_limit, + dist_pin_to_road_arterial.nearest_road_arterial_surface_type, + dist_pin_to_road_arterial.nearest_road_arterial_lanes, + dist_pin_to_road_arterial.nearest_road_arterial_data_year, + + dist_pin_to_road_collector.nearest_road_collector_name, + dist_pin_to_road_collector.nearest_road_collector_dist_ft, + dist_pin_to_road_collector.nearest_road_collector_daily_traffic, + dist_pin_to_road_collector.nearest_road_collector_speed_limit, + dist_pin_to_road_collector.nearest_road_collector_surface_type, + dist_pin_to_road_collector.nearest_road_collector_lanes, + dist_pin_to_road_collector.nearest_road_collector_data_year, + + dist_pin_to_road_highway.nearest_road_highway_name, + dist_pin_to_road_highway.nearest_road_highway_dist_ft, + dist_pin_to_road_highway.nearest_road_highway_daily_traffic, + dist_pin_to_road_highway.nearest_road_highway_speed_limit, + dist_pin_to_road_highway.nearest_road_highway_surface_type, + dist_pin_to_road_highway.nearest_road_highway_lanes, + dist_pin_to_secondary_road.nearest_secondary_road_osm_id, dist_pin_to_secondary_road.nearest_secondary_road_name, dist_pin_to_secondary_road.nearest_secondary_road_dist_ft, @@ -162,11 +162,6 @@ LEFT JOIN {{ ref('proximity.dist_pin_to_airport') }} AS dist_pin_to_airport ON pin.pin10 = dist_pin_to_airport.pin10 AND cyf.airport_data_year = dist_pin_to_airport.year -LEFT JOIN - {{ ref('proximity.dist_pin_to_arterial_road') }} - AS dist_pin_to_arterial_road - ON pin.pin10 = dist_pin_to_arterial_road.pin10 - AND cyf.nearest_arterial_road_data_year = dist_pin_to_arterial_road.year LEFT JOIN {{ ref('proximity.dist_pin_to_bike_trail') }} AS dist_pin_to_bike_trail ON pin.pin10 = dist_pin_to_bike_trail.pin10 @@ -175,11 +170,6 @@ LEFT JOIN {{ ref('proximity.dist_pin_to_cemetery') }} AS dist_pin_to_cemetery ON pin.pin10 = dist_pin_to_cemetery.pin10 AND cyf.nearest_cemetery_data_year = dist_pin_to_cemetery.year -LEFT JOIN - {{ ref('proximity.dist_pin_to_collector_road') }} - AS dist_pin_to_collector_road - ON pin.pin10 = dist_pin_to_collector_road.pin10 - AND cyf.nearest_collector_road_data_year = dist_pin_to_collector_road.year LEFT JOIN {{ ref('proximity.dist_pin_to_cta_route') }} AS dist_pin_to_cta_route ON pin.pin10 = dist_pin_to_cta_route.pin10 @@ -197,10 +187,6 @@ LEFT JOIN AS dist_pin_to_grocery_store ON pin.pin10 = dist_pin_to_grocery_store.pin10 AND cyf.nearest_grocery_store_data_year = dist_pin_to_grocery_store.year -LEFT JOIN - {{ ref('proximity.dist_pin_to_highway_road') }} AS dist_pin_to_highway_road - ON pin.pin10 = dist_pin_to_highway_road.pin10 - AND cyf.nearest_highway_road_data_year = dist_pin_to_highway_road.year LEFT JOIN {{ ref('proximity.dist_pin_to_hospital') }} AS dist_pin_to_hospital ON pin.pin10 = dist_pin_to_hospital.pin10 @@ -238,6 +224,20 @@ LEFT JOIN {{ ref('proximity.dist_pin_to_railroad') }} AS dist_pin_to_railroad ON pin.pin10 = dist_pin_to_railroad.pin10 AND cyf.nearest_railroad_data_year = dist_pin_to_railroad.year +LEFT JOIN + {{ ref('proximity.dist_pin_to_road_arterial') }} + AS dist_pin_to_road_arterial + ON pin.pin10 = dist_pin_to_road_arterial.pin10 + AND cyf.nearest_road_arterial_data_year = dist_pin_to_road_arterial.year +LEFT JOIN + {{ ref('proximity.dist_pin_to_road_collector') }} + AS dist_pin_to_road_collector + ON pin.pin10 = dist_pin_to_road_collector.pin10 + AND cyf.nearest_road_collector_data_year = dist_pin_to_road_collector.year +LEFT JOIN + {{ ref('proximity.dist_pin_to_road_highway') }} AS dist_pin_to_road_highway + ON pin.pin10 = dist_pin_to_road_highway.pin10 + AND cyf.nearest_road_highway_data_year = dist_pin_to_road_highway.year LEFT JOIN {{ ref('proximity.dist_pin_to_secondary_road') }} AS dist_pin_to_secondary_road From 2b2b6c4196a68a6127f6222d3406cff5f90a6626 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 20 Nov 2024 17:29:37 +0000 Subject: [PATCH 111/166] Fix crosswalk --- .../proximity.crosswalk_year_fill.sql | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/dbt/models/proximity/proximity.crosswalk_year_fill.sql b/dbt/models/proximity/proximity.crosswalk_year_fill.sql index 047cb8b7e..85c4a718c 100644 --- a/dbt/models/proximity/proximity.crosswalk_year_fill.sql +++ b/dbt/models/proximity/proximity.crosswalk_year_fill.sql @@ -182,6 +182,12 @@ WITH unfilled AS ( nearest_railroad_data_year FROM {{ ref('proximity.dist_pin_to_railroad') }} ) AS dist_pin_to_railroad ON pin.year = dist_pin_to_railroad.year + LEFT JOIN ( + SELECT DISTINCT + year, + nearest_road_arterial_data_year + FROM {{ ref('proximity.dist_pin_to_road_arterial' ) }} + ) AS dist_pin_to_road_arterial ON pin.year = dist_pin_to_road_arterial.year LEFT JOIN ( SELECT DISTINCT year, @@ -189,12 +195,6 @@ WITH unfilled AS ( FROM {{ ref('proximity.dist_pin_to_road_collector' ) }} ) AS dist_pin_to_road_collector ON pin.year = dist_pin_to_road_collector.year - LEFT JOIN ( - SELECT DISTINCT - year, - nearest_road_arterial_data_year - FROM {{ ref('proximity.dist_pin_to_road_arterial' ) }} - ) AS dist_pin_to_road_arterial ON pin.year = dist_pin_to_road_arterial.year LEFT JOIN ( SELECT DISTINCT year, @@ -291,12 +291,6 @@ SELECT IGNORE NULLS OVER (ORDER BY year DESC) ) AS nearest_cta_stop_data_year, - COALESCE( - nearest_collector_road_data_year, - LAST_VALUE(nearest_collector_road_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS nearest_collector_road_data_year, COALESCE( nearest_golf_course_data_year, LAST_VALUE(nearest_golf_course_data_year) @@ -309,18 +303,6 @@ SELECT IGNORE NULLS OVER (ORDER BY year DESC) ) AS nearest_grocery_store_data_year, - COALESCE( - nearest_road_arterial_data_year, - LAST_VALUE(nearest_road_arterial_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS nearest_road_arterial_data_year, - COALESCE( - nearest_highway_road_data_year, - LAST_VALUE(nearest_highway_road_data_year) - IGNORE NULLS - OVER (ORDER BY year DESC) - ) AS nearest_highway_road_data_year, COALESCE( nearest_hospital_data_year, LAST_VALUE(nearest_hospital_data_year) @@ -367,6 +349,24 @@ SELECT IGNORE NULLS OVER (ORDER BY year DESC) ) AS nearest_railroad_data_year, + COALESCE( + nearest_road_arterial_data_year, + LAST_VALUE(nearest_road_arterial_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS nearest_road_arterial_data_year, + COALESCE( + nearest_road_collector_data_year, + LAST_VALUE(nearest_road_collector_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS nearest_road_collector_data_year, + COALESCE( + nearest_road_highway_data_year, + LAST_VALUE(nearest_road_highway_data_year) + IGNORE NULLS + OVER (ORDER BY year DESC) + ) AS nearest_road_highway_data_year, COALESCE( nearest_secondary_road_data_year, LAST_VALUE(nearest_secondary_road_data_year) From 1e92767a97887ea939f88fd329a03460c95f3e6a Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 20 Nov 2024 18:23:44 +0000 Subject: [PATCH 112/166] More renames --- dbt/models/model/schema.yml | 127 +++++++++++------- ... proximity.dist_pin_to_road_collector.sql} | 0 ... => proximity.dist_pin_to_road_highwaysql} | 0 ... proximity.dist_pin_to_roads_arterial.sql} | 14 +- dbt/models/proximity/schema.yml | 18 +-- 5 files changed, 91 insertions(+), 68 deletions(-) rename dbt/models/proximity/{proximity.dist_pin_to_collector_road.sql => proximity.dist_pin_to_road_collector.sql} (100%) rename dbt/models/proximity/{proximity.dist_pin_to_highway_road.sql => proximity.dist_pin_to_road_highwaysql} (100%) rename dbt/models/proximity/{proximity.dist_pin_to_arterial_road.sql => proximity.dist_pin_to_roads_arterial.sql} (58%) diff --git a/dbt/models/model/schema.yml b/dbt/models/model/schema.yml index 57e5851db..03b4308c4 100644 --- a/dbt/models/model/schema.yml +++ b/dbt/models/model/schema.yml @@ -396,42 +396,12 @@ models: - &prox_lake_michigan_dist_ft name: prox_lake_michigan_dist_ft description: '{{ doc("column_lake_michigan_dist_ft") }}' - - &prox_nearest_arterial_road_daily_traffic - name: prox_nearest_arterial_road_daily_traffic - description: '{{ doc("column_nearest_arterial_road_daily_traffic") }}' - - &prox_nearest_arterial_road_dist_ft - name: prox_nearest_arterial_road_dist_ft - description: '{{ doc("column_nearest_arterial_road_dist_ft") }}' - - &prox_nearest_arterial_road_lanes - name: prox_nearest_arterial_road_lanes - description: '{{ doc("column_nearest_arterial_road_lanes") }}' - - &prox_nearest_arterial_road_speed_limit - name: prox_nearest_arterial_road_speed_limit - description: '{{ doc("column_nearest_arterial_road_speed_limit") }}' - - &prox_nearest_arterial_road_surface_type - name: prox_nearest_arterial_road_surface_type - description: '{{ doc("column_nearest_arterial_road_surface_type") }}' - &prox_nearest_bike_trail_dist_ft name: prox_nearest_bike_trail_dist_ft description: '{{ doc("column_nearest_bike_trail_dist_ft") }}' - &prox_nearest_cemetery_dist_ft name: prox_nearest_cemetery_dist_ft description: '{{ doc("column_nearest_cemetery_dist_ft") }}' - - &prox_nearest_collector_road_daily_traffic - name: prox_nearest_collector_road_daily_traffic - description: '{{ doc("column_nearest_collector_road_daily_traffic") }}' - - &prox_nearest_collector_road_dist_ft - name: prox_nearest_collector_road_dist_ft - description: '{{ doc("column_nearest_collector_road_dist_ft") }}' - - &prox_nearest_collector_road_lanes - name: prox_nearest_collector_road_lanes - description: '{{ doc("column_nearest_collector_road_lanes") }}' - - &prox_nearest_collector_road_speed_limit - name: prox_nearest_collector_road_speed_limit - description: '{{ doc("column_nearest_collector_road_speed_limit") }}' - - &prox_nearest_collector_road_surface_type - name: prox_nearest_collector_road_surface_type - description: '{{ doc("column_nearest_collector_road_surface_type") }}' - &prox_nearest_cta_route_dist_ft name: prox_nearest_cta_route_dist_ft description: '{{ doc("column_nearest_cta_route_dist_ft") }}' @@ -441,21 +411,6 @@ models: - &prox_nearest_golf_course_dist_ft name: prox_nearest_golf_course_dist_ft description: '{{ doc("column_nearest_golf_course_dist_ft") }}' - - &prox_nearest_highway_road_daily_traffic - name: prox_nearest_highway_road_daily_traffic - description: '{{ doc("column_nearest_highway_road_daily_traffic") }}' - - &prox_nearest_highway_road_dist_ft - name: prox_nearest_highway_road_dist_ft - description: '{{ doc("column_nearest_highway_road_dist_ft") }}' - - &prox_nearest_highway_road_lanes - name: prox_nearest_highway_road_lanes - description: '{{ doc("column_nearest_highway_road_lanes") }}' - - &prox_nearest_highway_road_speed_limit - name: prox_nearest_highway_road_speed_limit - description: '{{ doc("column_nearest_highway_road_speed_limit") }}' - - &prox_nearest_highway_road_surface_type - name: prox_nearest_highway_road_surface_type - description: '{{ doc("column_nearest_highway_road_surface_type") }}' - &prox_nearest_hospital_dist_ft name: prox_nearest_hospital_dist_ft description: '{{ doc("column_nearest_hospital_dist_ft") }}' @@ -477,6 +432,51 @@ models: - &prox_nearest_railroad_dist_ft name: prox_nearest_railroad_dist_ft description: '{{ doc("column_nearest_railroad_dist_ft") }}' + - &prox_nearest_road_arterial_daily_traffic + name: prox_nearest_road_arterial_daily_traffic + description: '{{ doc("column_nearest_road_arterial_daily_traffic") }}' + - &prox_nearest_road_arterial_dist_ft + name: prox_nearest_road_arterial_dist_ft + description: '{{ doc("column_nearest_road_arterial_dist_ft") }}' + - &prox_nearest_road_arterial_lanes + name: prox_nearest_road_arterial_lanes + description: '{{ doc("column_nearest_road_arterial_lanes") }}' + - &prox_nearest_road_arterial_speed_limit + name: prox_nearest_road_arterial_speed_limit + description: '{{ doc("column_nearest_road_arterial_speed_limit") }}' + - &prox_nearest_road_arterial_surface_type + name: prox_nearest_road_arterial_surface_type + description: '{{ doc("column_nearest_road_arterial_surface_type") }}' + - &prox_nearest_road_collector_daily_traffic + name: prox_nearest_road_collector_daily_traffic + description: '{{ doc("column_nearest_road_collector_daily_traffic") }}' + - &prox_nearest_road_collector_dist_ft + name: prox_nearest_road_collector_dist_ft + description: '{{ doc("column_nearest_road_collector_dist_ft") }}' + - &prox_nearest_road_collector_lanes + name: prox_nearest_road_collector_lanes + description: '{{ doc("column_nearest_road_collector_lanes") }}' + - &prox_nearest_road_collector_speed_limit + name: prox_nearest_road_collector_speed_limit + description: '{{ doc("column_nearest_road_collector_speed_limit") }}' + - &prox_nearest_road_collector_surface_type + name: prox_nearest_road_collector_surface_type + description: '{{ doc("column_nearest_road_collector_surface_type") }}' + - &prox_nearest_road_highway_daily_traffic + name: prox_nearest_road_highway_daily_traffic + description: '{{ doc("column_nearest_road_highway_daily_traffic") }}' + - &prox_nearest_road_highway_dist_ft + name: prox_nearest_road_highway_dist_ft + description: '{{ doc("column_nearest_road_highway_dist_ft") }}' + - &prox_nearest_road_highway_lanes + name: prox_nearest_road_highway_lanes + description: '{{ doc("column_nearest_road_highway_lanes") }}' + - &prox_nearest_road_highway_speed_limit + name: prox_nearest_road_highway_speed_limit + description: '{{ doc("column_nearest_road_highway_speed_limit") }}' + - &prox_nearest_road_highway_surface_type + name: prox_nearest_road_highway_surface_type + description: '{{ doc("column_nearest_road_highway_surface_type") }}' - &prox_nearest_secondary_road_dist_ft name: prox_nearest_secondary_road_dist_ft description: '{{ doc("column_nearest_secondary_road_dist_ft") }}' @@ -631,14 +631,11 @@ models: - *prox_airport_dnl_total - *prox_avg_school_rating_in_half_mile - *prox_lake_michigan_dist_ft - - *prox_nearest_arterial_road_dist_ft - *prox_nearest_bike_trail_dist_ft - *prox_nearest_cemetery_dist_ft - - *prox_nearest_collector_road_dist_ft - *prox_nearest_cta_route_dist_ft - *prox_nearest_cta_stop_dist_ft - *prox_nearest_golf_course_dist_ft - - *prox_nearest_highway_road_dist_ft - *prox_nearest_hospital_dist_ft - *prox_nearest_major_road_dist_ft - *prox_nearest_metra_route_dist_ft @@ -646,6 +643,21 @@ models: - *prox_nearest_new_construction_dist_ft - *prox_nearest_park_dist_ft - *prox_nearest_railroad_dist_ft + - *prox_nearest_road_arterial_daily_traffic + - *prox_nearest_road_arterial_dist_ft + - *prox_nearest_road_arterial_lanes + - *prox_nearest_road_arterial_speed_limit + - *prox_nearest_road_arterial_surface_type + - *prox_nearest_road_collector_daily_traffic + - *prox_nearest_road_collector_dist_ft + - *prox_nearest_road_collector_lanes + - *prox_nearest_road_collector_speed_limit + - *prox_nearest_road_collector_surface_type + - *prox_nearest_road_highway_daily_traffic + - *prox_nearest_road_highway_dist_ft + - *prox_nearest_road_highway_lanes + - *prox_nearest_road_highway_speed_limit + - *prox_nearest_road_highway_surface_type - *prox_nearest_secondary_road_dist_ft - *prox_nearest_stadium_dist_ft - *prox_nearest_university_dist_ft @@ -888,15 +900,11 @@ models: - *prox_airport_dnl_total - *prox_avg_school_rating_in_half_mile - *prox_lake_michigan_dist_ft - - *prox_nearest_arterial_road_dist_ft - *prox_nearest_bike_trail_dist_ft - *prox_nearest_cemetery_dist_ft - - *prox_nearest_collector_road_dist_ft - *prox_nearest_cta_route_dist_ft - *prox_nearest_cta_stop_dist_ft - *prox_nearest_golf_course_dist_ft - - *prox_nearest_highway_road_daily_traffic - - *prox_nearest_highway_road_dist_ft - *prox_nearest_hospital_dist_ft - *prox_nearest_major_road_dist_ft - *prox_nearest_metra_route_dist_ft @@ -910,6 +918,21 @@ models: - *nearest_neighbor_3_pin10 - *prox_nearest_park_dist_ft - *prox_nearest_railroad_dist_ft + - *prox_nearest_road_arterial_dist_ft + - *prox_nearest_road_arterial_daily_traffic + - *prox_nearest_road_arterial_lanes + - *prox_nearest_road_arterial_speed_limit + - *prox_nearest_road_arterial_surface_type + - *prox_nearest_road_collector_dist_ft + - *prox_nearest_road_collector_daily_traffic + - *prox_nearest_road_collector_lanes + - *prox_nearest_road_collector_speed_limit + - *prox_nearest_road_collector_surface_type + - *prox_nearest_road_highway_daily_traffic + - *prox_nearest_road_highway_dist_ft + - *prox_nearest_road_highway_lanes + - *prox_nearest_road_highway_speed_limit + - *prox_nearest_road_highway_surface_type - *prox_nearest_secondary_road_dist_ft - *prox_nearest_stadium_dist_ft - *prox_nearest_university_dist_ft diff --git a/dbt/models/proximity/proximity.dist_pin_to_collector_road.sql b/dbt/models/proximity/proximity.dist_pin_to_road_collector.sql similarity index 100% rename from dbt/models/proximity/proximity.dist_pin_to_collector_road.sql rename to dbt/models/proximity/proximity.dist_pin_to_road_collector.sql diff --git a/dbt/models/proximity/proximity.dist_pin_to_highway_road.sql b/dbt/models/proximity/proximity.dist_pin_to_road_highwaysql similarity index 100% rename from dbt/models/proximity/proximity.dist_pin_to_highway_road.sql rename to dbt/models/proximity/proximity.dist_pin_to_road_highwaysql diff --git a/dbt/models/proximity/proximity.dist_pin_to_arterial_road.sql b/dbt/models/proximity/proximity.dist_pin_to_roads_arterial.sql similarity index 58% rename from dbt/models/proximity/proximity.dist_pin_to_arterial_road.sql rename to dbt/models/proximity/proximity.dist_pin_to_roads_arterial.sql index e73f9b9a4..67178ffcb 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_arterial_road.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_roads_arterial.sql @@ -16,13 +16,13 @@ WITH arterial AS ( -- noqa: ST03 SELECT pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_arterial_road_name, - ARBITRARY(xy.dist_ft) AS nearest_arterial_road_dist_ft, - ARBITRARY(xy.daily_traffic) AS nearest_arterial_road_daily_traffic, - ARBITRARY(xy.speed_limit) AS nearest_arterial_road_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_arterial_road_surface_type, - ARBITRARY(xy.lanes) AS nearest_arterial_road_lanes, - ARBITRARY(xy.year) AS nearest_arterial_road_data_year, + ARBITRARY(xy.road_name) AS nearest_road_arterial_name, + ARBITRARY(xy.dist_ft) AS nearest_road_arterial_dist_ft, + ARBITRARY(xy.daily_traffic) AS nearest_road_arterial_daily_traffic, + ARBITRARY(xy.speed_limit) AS nearest_road_arterial_speed_limit, + ARBITRARY(xy.surface_type) AS nearest_road_arterial_surface_type, + ARBITRARY(xy.lanes) AS nearest_road_arterial_lanes, + ARBITRARY(xy.year) AS nearest_road_arterial_data_year, pcl.year FROM {{ source('spatial', 'parcel') }} AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('arterial') }} ) AS xy diff --git a/dbt/models/proximity/schema.yml b/dbt/models/proximity/schema.yml index 0e4db4764..14de533eb 100644 --- a/dbt/models/proximity/schema.yml +++ b/dbt/models/proximity/schema.yml @@ -14,18 +14,12 @@ models: - name: proximity.dist_pin_to_airport description: '{{ doc("table_dist_pin_to_airport") }}' - - name: proximity.dist_pin_to_arterial_road - description: '{{ doc("table_dist_pin_to_arterial_road") }}' - - name: proximity.dist_pin_to_bike_trail description: '{{ doc("table_dist_pin_to_bike_trail") }}' - name: proximity.dist_pin_to_cemetery description: '{{ doc("table_dist_pin_to_cemetery") }}' - - name: proximity.dist_pin_to_collector_road - description: '{{ doc("table_dist_pin_to_collector_road") }}' - - name: proximity.dist_pin_to_cta_route description: '{{ doc("table_dist_pin_to_cta_route") }}' @@ -35,9 +29,6 @@ models: - name: proximity.dist_pin_to_golf_course description: '{{ doc("table_dist_pin_to_golf_course") }}' - - name: proximity.dist_pin_to_highway_road - description: '{{ doc("table_dist_pin_to_highway_road") }}' - - name: proximity.dist_pin_to_hospital description: '{{ doc("table_dist_pin_to_hospital") }}' @@ -77,6 +68,15 @@ models: - name: proximity.dist_pin_to_railroad description: '{{ doc("table_dist_pin_to_railroad") }}' + - name: proximity.dist_pin_to_road_arterial + description: '{{ doc("table_dist_pin_to_road_arterial") }}' + + - name: proximity.dist_pin_to_road_collector + description: '{{ doc("table_dist_pin_to_road_collector") }}' + + - name: proximity.dist_pin_to_road_highway + description: '{{ doc("table_dist_pin_to_road_highway") }}' + - name: proximity.dist_pin_to_secondary_road description: '{{ doc("table_dist_pin_to_secondary_road") }}' From bbef26f01a3d7beb35d61d822a44ac85a37bbbc3 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Wed, 20 Nov 2024 18:27:18 +0000 Subject: [PATCH 113/166] More renames --- .../proximity/proximity.crosswalk_year_fill.sql | 2 +- ...sql => proximity.dist_pin_to_road_arterial.sql} | 0 .../proximity.dist_pin_to_road_collector.sql | 14 +++++++------- ...ysql => proximity.dist_pin_to_road_highway.sql} | 14 +++++++------- 4 files changed, 15 insertions(+), 15 deletions(-) rename dbt/models/proximity/{proximity.dist_pin_to_roads_arterial.sql => proximity.dist_pin_to_road_arterial.sql} (100%) rename dbt/models/proximity/{proximity.dist_pin_to_road_highwaysql => proximity.dist_pin_to_road_highway.sql} (58%) diff --git a/dbt/models/proximity/proximity.crosswalk_year_fill.sql b/dbt/models/proximity/proximity.crosswalk_year_fill.sql index 85c4a718c..b876bc501 100644 --- a/dbt/models/proximity/proximity.crosswalk_year_fill.sql +++ b/dbt/models/proximity/proximity.crosswalk_year_fill.sql @@ -198,7 +198,7 @@ WITH unfilled AS ( LEFT JOIN ( SELECT DISTINCT year, - nearest_highway_road_data_year + nearest_road_highway_data_year FROM {{ ref('proximity.dist_pin_to_road_highway' ) }} ) AS dist_pin_to_road_highway ON pin.year = dist_pin_to_road_highway.year LEFT JOIN ( diff --git a/dbt/models/proximity/proximity.dist_pin_to_roads_arterial.sql b/dbt/models/proximity/proximity.dist_pin_to_road_arterial.sql similarity index 100% rename from dbt/models/proximity/proximity.dist_pin_to_roads_arterial.sql rename to dbt/models/proximity/proximity.dist_pin_to_road_arterial.sql diff --git a/dbt/models/proximity/proximity.dist_pin_to_road_collector.sql b/dbt/models/proximity/proximity.dist_pin_to_road_collector.sql index 40e31da82..65d2baca5 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_road_collector.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_road_collector.sql @@ -16,13 +16,13 @@ WITH collector AS ( -- noqa: ST03 SELECT pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_collector_road_name, - ARBITRARY(xy.dist_ft) AS nearest_collector_road_dist_ft, - ARBITRARY(xy.daily_traffic) AS nearest_collector_road_daily_traffic, - ARBITRARY(xy.speed_limit) AS nearest_collector_road_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_collector_road_surface_type, - ARBITRARY(xy.lanes) AS nearest_collector_road_lanes, - ARBITRARY(xy.year) AS nearest_collector_road_data_year, + ARBITRARY(xy.road_name) AS nearest_road_collector_name, + ARBITRARY(xy.dist_ft) AS nearest_road_collector_dist_ft, + ARBITRARY(xy.daily_traffic) AS nearest_road_collector_daily_traffic, + ARBITRARY(xy.speed_limit) AS nearest_road_collector_speed_limit, + ARBITRARY(xy.surface_type) AS nearest_road_collector_surface_type, + ARBITRARY(xy.lanes) AS nearest_road_collector_lanes, + ARBITRARY(xy.year) AS nearest_road_collector_data_year, pcl.year FROM {{ source('spatial', 'parcel') }} AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('collector') }} ) AS xy diff --git a/dbt/models/proximity/proximity.dist_pin_to_road_highwaysql b/dbt/models/proximity/proximity.dist_pin_to_road_highway.sql similarity index 58% rename from dbt/models/proximity/proximity.dist_pin_to_road_highwaysql rename to dbt/models/proximity/proximity.dist_pin_to_road_highway.sql index 94e382dc1..ecc1f4ca4 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_road_highwaysql +++ b/dbt/models/proximity/proximity.dist_pin_to_road_highway.sql @@ -16,13 +16,13 @@ WITH highway AS ( -- noqa: ST03 SELECT pcl.pin10, - ARBITRARY(xy.road_name) AS nearest_highway_road_name, - ARBITRARY(xy.dist_ft) AS nearest_highway_road_dist_ft, - ARBITRARY(xy.daily_traffic) AS nearest_highway_road_daily_traffic, - ARBITRARY(xy.speed_limit) AS nearest_highway_road_speed_limit, - ARBITRARY(xy.surface_type) AS nearest_highway_road_surface_type, - ARBITRARY(xy.lanes) AS nearest_highway_road_lanes, - ARBITRARY(xy.year) AS nearest_highway_road_data_year, + ARBITRARY(xy.road_name) AS nearest_road_highway_name, + ARBITRARY(xy.dist_ft) AS nearest_road_highway_dist_ft, + ARBITRARY(xy.daily_traffic) AS nearest_road_highway_daily_traffic, + ARBITRARY(xy.speed_limit) AS nearest_road_highway_speed_limit, + ARBITRARY(xy.surface_type) AS nearest_road_highway_surface_type, + ARBITRARY(xy.lanes) AS nearest_road_highway_lanes, + ARBITRARY(xy.year) AS nearest_road_highway_data_year, pcl.year FROM {{ source('spatial', 'parcel') }} AS pcl INNER JOIN ( {{ dist_to_nearest_geometry('highway') }} ) AS xy From 1875deaa876eb88aaaa4005e78effded16158973 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 21 Nov 2024 16:13:58 +0000 Subject: [PATCH 114/166] Make singular --- dbt/models/proximity/proximity.dist_pin_to_road_arterial.sql | 2 +- dbt/models/proximity/proximity.dist_pin_to_road_collector.sql | 2 +- dbt/models/proximity/proximity.dist_pin_to_road_highway.sql | 2 +- dbt/models/spatial/schema.yml | 4 ++-- ...spatial-environment_roads.R => spatial-environment_road.R} | 2 +- ...spatial-environment_roads.R => spatial-environment_road.R} | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) rename etl/scripts-ccao-data-raw-us-east-1/spatial/{spatial-environment_roads.R => spatial-environment_road.R} (98%) rename etl/scripts-ccao-data-warehouse-us-east-1/spatial/{spatial-environment_roads.R => spatial-environment_road.R} (99%) diff --git a/dbt/models/proximity/proximity.dist_pin_to_road_arterial.sql b/dbt/models/proximity/proximity.dist_pin_to_road_arterial.sql index 67178ffcb..a2373f25b 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_road_arterial.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_road_arterial.sql @@ -9,7 +9,7 @@ WITH arterial AS ( -- noqa: ST03 SELECT * - FROM {{ source('spatial', 'roads') }} + FROM {{ source('spatial', 'road') }} WHERE road_type = 'Minor Arterial' OR road_type = 'Other Principal Arterial' ) diff --git a/dbt/models/proximity/proximity.dist_pin_to_road_collector.sql b/dbt/models/proximity/proximity.dist_pin_to_road_collector.sql index 65d2baca5..0572c76e2 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_road_collector.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_road_collector.sql @@ -9,7 +9,7 @@ WITH collector AS ( -- noqa: ST03 SELECT * - FROM {{ source('spatial', 'roads') }} + FROM {{ source('spatial', 'road') }} WHERE road_type = 'Major Collector' OR road_type = 'Minor Collector' ) diff --git a/dbt/models/proximity/proximity.dist_pin_to_road_highway.sql b/dbt/models/proximity/proximity.dist_pin_to_road_highway.sql index ecc1f4ca4..8a6bcfaee 100644 --- a/dbt/models/proximity/proximity.dist_pin_to_road_highway.sql +++ b/dbt/models/proximity/proximity.dist_pin_to_road_highway.sql @@ -9,7 +9,7 @@ WITH highway AS ( -- noqa: ST03 SELECT * - FROM {{ source('spatial', 'roads') }} + FROM {{ source('spatial', 'road') }} WHERE road_type = 'Interstate' OR road_type = 'Freeway and Expressway' ) diff --git a/dbt/models/spatial/schema.yml b/dbt/models/spatial/schema.yml index 0e1c3eed0..3636d473e 100644 --- a/dbt/models/spatial/schema.yml +++ b/dbt/models/spatial/schema.yml @@ -141,8 +141,8 @@ sources: - name: railroad description: '{{ doc("table_railroad") }}' - - name: roads - description: '{{ doc("table_roads") }}' + - name: road + description: '{{ doc("table_road") }}' - name: sanitation_district description: '{{ doc("table_sanitation_district") }}' diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_roads.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_road.R similarity index 98% rename from etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_roads.R rename to etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_road.R index 4d14b0499..336dace84 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_roads.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_road.R @@ -10,7 +10,7 @@ library(arrow) AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET") output_bucket <- file.path( AWS_S3_RAW_BUCKET, - "spatial", "environment", "roads" + "spatial", "environment", "road" ) # Get list of available files diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_roads.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_road.R similarity index 99% rename from etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_roads.R rename to etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_road.R index e63cbfa9a..5abdc8581 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_roads.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_road.R @@ -8,7 +8,7 @@ library(stringr) # Define the S3 bucket and folder path AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET") AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET") -s3_folder <- "spatial/environment/roads" +s3_folder <- "spatial/environment/road" output_bucket <- sub("/$", "", file.path(AWS_S3_WAREHOUSE_BUCKET, s3_folder)) # Re-coding of road type From a54494e1e366d45c1b486c4611d63846bf10b4f3 Mon Sep 17 00:00:00 2001 From: Damonamajor Date: Thu, 21 Nov 2024 17:11:59 +0000 Subject: [PATCH 115/166] Make singular --- dbt/models/spatial/docs.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/models/spatial/docs.md b/dbt/models/spatial/docs.md index e53b29a07..5b1564e62 100644 --- a/dbt/models/spatial/docs.md +++ b/dbt/models/spatial/docs.md @@ -391,9 +391,9 @@ Rail locations sourced from Cook County GIS. **Geometry:** `MULTILINESTRING` {% enddocs %} -# roads +# road -{% docs table_roads %} +{% docs table_road %} Illinois Department of Transportation data source from [https://apps1.dot.illinois.gov/gist2/](https://apps1.dot.illinois.gov/gist2/). From f4921141f646ceae5273d7868487f65aa0c17738 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Thu, 21 Nov 2024 22:45:35 +0000 Subject: [PATCH 116/166] Add loaded_at to utils --- etl/utils.R | 1 + 1 file changed, 1 insertion(+) diff --git a/etl/utils.R b/etl/utils.R index 1b429ac04..6136dcb8b 100644 --- a/etl/utils.R +++ b/etl/utils.R @@ -67,6 +67,7 @@ write_partitions_to_s3 <- function(df, warning("Input data must contain grouping vars for partitioning") } + df <- df %>% mutate(loaded_at = as.character(Sys.time())) dplyr::group_walk(df, ~ { partitions_df <- purrr::map_dfr( .y, replace_na, "__HIVE_DEFAULT_PARTITION__" From 5b0d5d7e652929ab8374cf26e70ebe4a567a9c49 Mon Sep 17 00:00:00 2001 From: sweatyhandshake Date: Mon, 25 Nov 2024 16:23:01 -0600 Subject: [PATCH 117/166] Simplify code --- .../sale/sale-foreclosure.R | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/sale/sale-foreclosure.R b/etl/scripts-ccao-data-raw-us-east-1/sale/sale-foreclosure.R index 8fb9baf3f..6c7846ee2 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/sale/sale-foreclosure.R +++ b/etl/scripts-ccao-data-raw-us-east-1/sale/sale-foreclosure.R @@ -16,7 +16,7 @@ output_bucket <- file.path(AWS_S3_RAW_BUCKET, "sale", "foreclosure") files <- list.files("O:/CCAODATA/data/foreclosures", recursive = TRUE) # Function to retrieve data and write to S3 -read_write <- function(x) { +walk(files, \(x) { output_dest <- file.path(output_bucket, glue(parse_number(x), ".parquet")) @@ -32,7 +32,4 @@ read_write <- function(x) { } -} - -# Apply function to foreclosure data -walk(files, read_write) +}) From 3e329453491717a9f47ca0842b4a55eed7245e0e Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Mon, 25 Nov 2024 23:58:15 +0000 Subject: [PATCH 118/166] Minor code cleaning --- .../housing/housing-ihs_index.R | 14 +++++++------- .../sale/sale-mydec.R | 8 ++------ 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/housing/housing-ihs_index.R b/etl/scripts-ccao-data-raw-us-east-1/housing/housing-ihs_index.R index c12d428c5..f75af2624 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/housing/housing-ihs_index.R +++ b/etl/scripts-ccao-data-raw-us-east-1/housing/housing-ihs_index.R @@ -26,13 +26,13 @@ remote_file <- file.path(output_bucket, paste0("ihs_price_index_data.parquet")) # Grab the data, clean it just a bit, and write if it doesn't already exist data.frame(t( - openxlsx::read.xlsx(most_recent_ihs_data_url, sheet = 2) %>% - dplyr::select(-c("X2", "X3", "X4")) + read.xlsx(most_recent_ihs_data_url, sheet = 2) %>% + select(-c("X2", "X3", "X4")) )) %>% # Names and columns are kind of a mess after the transpose, # shift up first row, shift over column names - janitor::row_to_names(1) %>% - dplyr::mutate(puma = rownames(.)) %>% - dplyr::relocate(puma, .before = "YEARQ") %>% - dplyr::rename(name = "YEARQ") %>% - arrow::write_parquet(remote_file) + row_to_names(1) %>% + mutate(puma = rownames(.)) %>% + relocate(puma, .before = "YEARQ") %>% + rename(name = "YEARQ") %>% + write_parquet(remote_file) diff --git a/etl/scripts-ccao-data-raw-us-east-1/sale/sale-mydec.R b/etl/scripts-ccao-data-raw-us-east-1/sale/sale-mydec.R index 231d2279b..de8ce290f 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/sale/sale-mydec.R +++ b/etl/scripts-ccao-data-raw-us-east-1/sale/sale-mydec.R @@ -22,7 +22,7 @@ files <- xml2::read_html( str_subset("ptax203") # Function to scrape IDOR data and write to S3 -down_up <- function(x) { +walk(files, \(x) { year <- str_extract(x, pattern = "[0-9]{4}") if ( @@ -44,8 +44,4 @@ down_up <- function(x) { readr::read_delim(list.files(tmp2, full.names = TRUE), delim = "\t") %>% write_parquet(file.path(output_bucket, glue("{year}.parquet"))) } -} - - -# Apply function to foreclosure data -walk(files, down_up) +}) From d761d88b76495b046317f493a1825fff5aaf5eab Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Tue, 26 Nov 2024 16:42:21 +0000 Subject: [PATCH 119/166] Add commenting --- etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-access.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-access.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-access.R index d523f3158..2634eb0e2 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-access.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-access.R @@ -12,6 +12,8 @@ output_bucket <- file.path(AWS_S3_RAW_BUCKET, "spatial", "access") # List APIs from city site sources_list <- bind_rows(list( # INDUSTRIAL CORRIDORS + # See https://data.cityofchicago.org/Community-Economic-Development/IndustrialCorridor_Jan2013/3tu3-iesz/about_data + # for more information "ind_2013" = c( "source" = "https://data.cityofchicago.org/api/geospatial/", "api_url" = "e6xh-nr8w?method=export&format=GeoJSON", From 2b1e903fc1cb4e4dc0823a883d36177d11079eb4 Mon Sep 17 00:00:00 2001 From: William Ridgeway Date: Thu, 5 Dec 2024 12:10:07 -0600 Subject: [PATCH 120/166] Commenting --- etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R | 1 + 1 file changed, 1 insertion(+) diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R index 745c4581c..31833e94f 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R @@ -45,6 +45,7 @@ gdb_files <- data.frame("path" = list.files(file_path, full.names = TRUE)) %>% filter( str_detect(path, "Current", negate = TRUE) & str_detect(path, "20") & + # We detect parcel GDBs, but will extract the township layer str_detect(path, "Parcels") ) From 08b765fb2c0f332f861969361315bd5ae1b34e41 Mon Sep 17 00:00:00 2001 From: William Ridgeway Date: Thu, 5 Dec 2024 15:36:13 -0600 Subject: [PATCH 121/166] Correct file path --- etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R index 31833e94f..1bebebda3 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R @@ -11,7 +11,7 @@ output_bucket <- file.path(AWS_S3_RAW_BUCKET, "spatial", "ccao") # Read privileges for the this drive location are limited. # Contact Cook County GIS if permissions need to be changed. -file_path <- "//gisemcv1.ccounty.com/ArchiveServices/" +file_path <- "\\gisemcv1.ccounty.com\ArchiveServices" sources_list <- bind_rows(list( # NEIGHBORHOOD From 073901093e757aa50d1a0da6d7eeda377c584148 Mon Sep 17 00:00:00 2001 From: William Ridgeway Date: Thu, 5 Dec 2024 15:56:18 -0600 Subject: [PATCH 122/166] Add loaded_at columns --- .../spatial/spatial-ccao-neighborhood.R | 1 + .../spatial/spatial-ccao-township.R | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-neighborhood.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-neighborhood.R index a3ca54cbe..09ba19984 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-neighborhood.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-neighborhood.R @@ -193,6 +193,7 @@ for (year in 2010:2021) { township_name, township_code, triad_name, triad_code, nbhd, town_nbhd, geometry, geometry_3435 ) %>% + mutate(loaded_at = as.character(Sys.time())) %>% write_geoparquet( file.path(output_bucket, paste0("year=", year), "part-0.parquet"), compression = "snappy" diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-township.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-township.R index eb95e5ba6..f58dc2a70 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-township.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-township.R @@ -46,5 +46,6 @@ if (!aws.s3::object_exists(remote_file_town_warehouse)) { geometry_3435 = st_transform(geometry, 3435), across(township_code:triad_code, as.character) ) %>% + mutate(loaded_at = as.character(Sys.time())) %>% geoarrow::write_geoparquet(remote_file_town_warehouse) -} \ No newline at end of file +} From d4bddfbeb0efca322f0f5beeffbebf1c882392b8 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Thu, 5 Dec 2024 22:27:03 +0000 Subject: [PATCH 123/166] Add 2024 fema url --- .../spatial/spatial-environment.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment.R index c4e53a17c..464462def 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment.R @@ -22,7 +22,7 @@ remote_file_flood_fema <- file.path( fema_url <- "https://hazards.fema.gov/femaportal/NFHL/Download/ProductsDownLoadServlet?DFIRMID=17031C&state=ILLINOIS&county=COOK%20COUNTY&fileName=17031C_" #noqa -fema_files <- c("2021" = "20210615", "2022" = "20221130", "2023" = "20231006") +fema_files <- c("2021" = "20210615", "2022" = "20221130", "2023" = "20231006", "2024" = "20240625") # Write FEMA floodplains to S3 if they don't exist From 15f480ebc3a3f88096bb8e78d3509dd8406b082c Mon Sep 17 00:00:00 2001 From: William Ridgeway Date: Thu, 5 Dec 2024 16:39:32 -0600 Subject: [PATCH 124/166] Simplify code --- .../spatial/spatial-political.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R index 2acad2bd3..afefd3af2 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R @@ -150,15 +150,15 @@ pwalk(sources_list, function(...) { # MUNICIPALITY # Paths for all relevant geodatabases -gdb_files <- data.frame("path" = list.files(file_path, full.names = TRUE)) %>% +data.frame("path" = list.files(file_path, full.names = TRUE)) %>% filter( str_detect(path, "Current", negate = TRUE) & str_detect(path, "20") & str_detect(path, "Admin") - ) + ) %>% # Function to call referenced API, pull requested data, and write it to S3 -pwalk(gdb_files, function(...) { +pwalk(function(...) { df <- tibble::tibble(...) county_gdb_to_s3( s3_bucket_uri = output_bucket, From 1cb245beb6fd07bb386a5c35241029ce023b586b Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Fri, 6 Dec 2024 15:32:08 +0000 Subject: [PATCH 125/166] Add new cps boundaries --- .../spatial/spatial-school.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-school.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-school.R index 15bb97273..7dfef5727 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-school.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-school.R @@ -121,6 +121,12 @@ sources_list <- bind_rows(list( "boundary" = "cps_attendance_elementary", "year" = "2023-2024" ), + "attendance_ele_2025" = c( + "source" = "https://data.cityofchicago.org/api/geospatial/", + "api_url" = "td7k-bjgv?method=export&format=GeoJSON", + "boundary" = "cps_attendance_elementary", + "year" = "2024-2025" + ), # CPS ATTENDANCE - SECONDARY "attendance_sec_0607" = c( @@ -231,6 +237,12 @@ sources_list <- bind_rows(list( "boundary" = "cps_attendance_secondary", "year" = "2023-2024" ), + "attendance_sec_2025" = c( + "source" = "https://data.cityofchicago.org/api/geospatial/", + "api_url" = "cczt-jtaj?method=export&format=GeoJSON", + "boundary" = "cps_attendance_secondary", + "year" = "2024-2025" + ), # LOCATION "locations_all_21" = c( From 233d3fb2d7c6851d31863f4312b1635ebe5cb849 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Tue, 10 Dec 2024 22:54:13 +0000 Subject: [PATCH 126/166] Temp adjustment to lintr --- .lintr | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.lintr b/.lintr index ef6caf3a3..19baae449 100644 --- a/.lintr +++ b/.lintr @@ -1,5 +1,7 @@ linters: linters_with_defaults( + object_length_linter = NULL, object_name_linter = NULL, - object_usage_linter = NULL + object_usage_linter = NULL, + line_length_linter = NULL ) encoding: "UTF-8" From 3bb9e545408d74a6dc71327935b7ba3f2f2fdd4d Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Wed, 11 Dec 2024 17:35:28 +0000 Subject: [PATCH 127/166] Remove accidental change to .lintr --- .lintr | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.lintr b/.lintr index 19baae449..e2b3682bb 100644 --- a/.lintr +++ b/.lintr @@ -1,7 +1,6 @@ linters: linters_with_defaults( object_length_linter = NULL, object_name_linter = NULL, - object_usage_linter = NULL, - line_length_linter = NULL + object_usage_linter = NULL ) encoding: "UTF-8" From 26a236ba7018b2724fb008b730ac8dc0e4d0b466 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Wed, 11 Dec 2024 17:44:56 +0000 Subject: [PATCH 128/166] Linting --- .../sale/sale-foreclosure.R | 2 -- .../spatial/spatial-ccao.R | 2 +- .../spatial/spatial-political.R | 21 +++++++++---------- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/sale/sale-foreclosure.R b/etl/scripts-ccao-data-raw-us-east-1/sale/sale-foreclosure.R index e2dcc8380..200affe9d 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/sale/sale-foreclosure.R +++ b/etl/scripts-ccao-data-raw-us-east-1/sale/sale-foreclosure.R @@ -17,7 +17,6 @@ files <- list.files("O:/CCAODATA/data/foreclosures", recursive = TRUE) # Function to retrieve data and write to S3 walk(files, \(x) { - output_dest <- file.path(output_bucket, glue(parse_number(x), ".parquet")) if (!object_exists(output_dest)) { @@ -29,5 +28,4 @@ walk(files, \(x) { ) %>% write_parquet(output_dest) } - }) diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R index 1bebebda3..55dd53f5a 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R @@ -11,7 +11,7 @@ output_bucket <- file.path(AWS_S3_RAW_BUCKET, "spatial", "ccao") # Read privileges for the this drive location are limited. # Contact Cook County GIS if permissions need to be changed. -file_path <- "\\gisemcv1.ccounty.com\ArchiveServices" +file_path <- "\\gisemcv1.ccounty.com\ArchiveServices" # nolint sources_list <- bind_rows(list( # NEIGHBORHOOD diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R index afefd3af2..83ca0299e 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R @@ -156,14 +156,13 @@ data.frame("path" = list.files(file_path, full.names = TRUE)) %>% str_detect(path, "20") & str_detect(path, "Admin") ) %>% - -# Function to call referenced API, pull requested data, and write it to S3 -pwalk(function(...) { - df <- tibble::tibble(...) - county_gdb_to_s3( - s3_bucket_uri = output_bucket, - dir_name = "municipality", - file_path = df$path, - layer = "MuniTaxDist" - ) -}) + # Function to call referenced API, pull requested data, and write it to S3 + pwalk(function(...) { + df <- tibble::tibble(...) + county_gdb_to_s3( + s3_bucket_uri = output_bucket, + dir_name = "municipality", + file_path = df$path, + layer = "MuniTaxDist" + ) + }) From 8c2181684e733b926d6182367a01045949d50730 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Wed, 11 Dec 2024 17:54:41 +0000 Subject: [PATCH 129/166] Linting --- .../spatial/spatial-access.R | 2 +- .../spatial/spatial-ccao.R | 22 ++++++++++--------- .../spatial/spatial-political.R | 14 ++++++------ 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-access.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-access.R index e76931c50..6237a16b5 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-access.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-access.R @@ -12,7 +12,7 @@ output_bucket <- file.path(AWS_S3_RAW_BUCKET, "spatial", "access") # List APIs from city site sources_list <- bind_rows(list( # INDUSTRIAL CORRIDORS - # See https://data.cityofchicago.org/Community-Economic-Development/IndustrialCorridor_Jan2013/3tu3-iesz/about_data + # See https://data.cityofchicago.org/Community-Economic-Development/IndustrialCorridor_Jan2013/3tu3-iesz/about_data # nolint # for more information "ind_2013" = c( "source" = "https://data.cityofchicago.org/api/geospatial/", diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R index 55dd53f5a..c6c09fbda 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R @@ -13,17 +13,19 @@ output_bucket <- file.path(AWS_S3_RAW_BUCKET, "spatial", "ccao") # Contact Cook County GIS if permissions need to be changed. file_path <- "\\gisemcv1.ccounty.com\ArchiveServices" # nolint -sources_list <- bind_rows(list( - # NEIGHBORHOOD - "neighborhood" = c( - "url" = paste0( - "https://gitlab.com/ccao-data-science---modeling/packages/ccao", - "/-/raw/master/data-raw/nbhd_shp.geojson" - ), - "boundary" = "neighborhood", - "year" = "2021" +sources_list <- bind_rows( + list( + # NEIGHBORHOOD + "neighborhood" = c( + "url" = paste0( + "https://gitlab.com/ccao-data-science---modeling/packages/ccao", + "/-/raw/master/data-raw/nbhd_shp.geojson" + ), + "boundary" = "neighborhood", + "year" = "2021" + ) ) -)) +) # Function to call referenced API, pull requested data, and write it to S3 pwalk(sources_list, function(...) { diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R index 83ca0299e..fe325595e 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R @@ -12,7 +12,7 @@ output_bucket <- file.path(AWS_S3_RAW_BUCKET, "spatial", "political") # Read privileges for the this drive location are limited. # Contact Cook County GIS if permissions need to be changed. -file_path <- "//gisemcv1.ccounty.com/ArchiveServices/" +file_path <- "//gisemcv1.ccounty.com/ArchiveServices/" # nolint sources_list <- bind_rows(list( # BOARD OF REVIEW @@ -23,7 +23,7 @@ sources_list <- bind_rows(list( "year" = "2012" ), "bor_2023" = c( - "source" = "https://gis.cookcountyil.gov/traditional/rest/services/politicalBoundary/MapServer/", + "source" = "https://gis.cookcountyil.gov/traditional/rest/services/politicalBoundary/MapServer/", # nolint "api_url" = "10/query?outFields=*&where=1%3D1&f=geojson", "boundary" = "board_of_review_district", "year" = "2023" @@ -37,7 +37,7 @@ sources_list <- bind_rows(list( "year" = "2012" ), "cmd_2023" = c( - "source" = "https://gis.cookcountyil.gov/traditional/rest/services/politicalBoundary/MapServer/", + "source" = "https://gis.cookcountyil.gov/traditional/rest/services/politicalBoundary/MapServer/", # nolint "api_url" = "9/query?outFields=*&where=1%3D1&f=geojson", "boundary" = "commissioner_district", "year" = "2023" @@ -51,7 +51,7 @@ sources_list <- bind_rows(list( "year" = "2010" ), "cnd_2023" = c( - "source" = "https://gis.cookcountyil.gov/traditional/rest/services/politicalBoundary/MapServer/", + "source" = "https://gis.cookcountyil.gov/traditional/rest/services/politicalBoundary/MapServer/", # nolint "api_url" = "13/query?outFields=*&where=1%3D1&f=geojson", "boundary" = "congressional_district", "year" = "2023" @@ -65,7 +65,7 @@ sources_list <- bind_rows(list( "year" = "2012" ), "jsd_2022" = c( - "source" = "https://gis.cookcountyil.gov/traditional/rest/services/politicalBoundary/MapServer/", + "source" = "https://gis.cookcountyil.gov/traditional/rest/services/politicalBoundary/MapServer/", # nolint "api_url" = "5/query?outFields=*&where=1%3D1&f=geojson", "boundary" = "judicial_district", "year" = "2022" @@ -79,7 +79,7 @@ sources_list <- bind_rows(list( "year" = "2010" ), "str_2023" = c( - "source" = "https://gis.cookcountyil.gov/traditional/rest/services/politicalBoundary/MapServer/", + "source" = "https://gis.cookcountyil.gov/traditional/rest/services/politicalBoundary/MapServer/", # nolint "api_url" = "11/query?outFields=*&where=1%3D1&f=geojson", "boundary" = "state_representative_district", "year" = "2023" @@ -93,7 +93,7 @@ sources_list <- bind_rows(list( "year" = "2010" ), "sts_2023" = c( - "source" = "https://gis.cookcountyil.gov/traditional/rest/services/politicalBoundary/MapServer/", + "source" = "https://gis.cookcountyil.gov/traditional/rest/services/politicalBoundary/MapServer/", # nolint "api_url" = "12/query?outFields=*&where=1%3D1&f=geojson", "boundary" = "state_senate_district", "year" = "2023" From 38f68fe72a34b41ec1d6cc0bd97605be89fc6ab2 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Thu, 12 Dec 2024 15:31:37 +0000 Subject: [PATCH 130/166] Test lintr --- etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R index c6c09fbda..58b1e266f 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R @@ -11,7 +11,7 @@ output_bucket <- file.path(AWS_S3_RAW_BUCKET, "spatial", "ccao") # Read privileges for the this drive location are limited. # Contact Cook County GIS if permissions need to be changed. -file_path <- "\\gisemcv1.ccounty.com\ArchiveServices" # nolint +file_path <- "//gisemcv1.ccounty.com/ArchiveServices" # nolint sources_list <- bind_rows( list( From 5e84a1597f7712825cf4e3b3e3b8fb454170bb7e Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Thu, 12 Dec 2024 20:16:09 +0000 Subject: [PATCH 131/166] Final code updates --- .../spatial/spatial-school.R | 4 +- .../spatial/spatial-transit.R | 41 +++++++++++++++---- .../spatial/spatial-environment.R | 4 +- 3 files changed, 36 insertions(+), 13 deletions(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-school.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-school.R index 1eb475936..fd22da874 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-school.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-school.R @@ -123,7 +123,7 @@ sources_list <- bind_rows(list( ), "attendance_ele_2025" = c( "source" = "https://data.cityofchicago.org/api/geospatial/", - "api_url" = "td7k-bjgv?method=export&format=GeoJSON", + "api_url" = "5ihw-cbdn?method=export&format=GeoJSON", "boundary" = "cps_attendance_elementary", "year" = "2024-2025" ), @@ -239,7 +239,7 @@ sources_list <- bind_rows(list( ), "attendance_sec_2025" = c( "source" = "https://data.cityofchicago.org/api/geospatial/", - "api_url" = "cczt-jtaj?method=export&format=GeoJSON", + "api_url" = "4kfz-zr3a?method=export&format=GeoJSON", "boundary" = "cps_attendance_secondary", "year" = "2024-2025" ), diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-transit.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-transit.R index a148a7297..4d8c89569 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-transit.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-transit.R @@ -20,15 +20,25 @@ options(timeout = max(300, getOption("timeout"))) cta_feed_dates_list <- c( "2015-10-29", "2016-09-30", "2017-10-22", "2018-10-06", "2019-10-04", "2020-10-10", "2021-10-09", "2022-10-20", - "2023-10-04" + "2023-10-04", "2024-10-17" ) # If missing feed on S3, download and remove .htm file (causes errors) # then rezip and upload get_cta_feed <- function(feed_date) { - feed_url <- paste0( - "https://transitfeeds.com/p/chicago-transit-authority/165/", - str_remove_all(feed_date, "-"), "/download" + feed_url <- ifelse( + substr(feed_date, 1, 4) <= "2023", + paste0( + "https://transitfeeds.com/p/chicago-transit-authority/165/", + str_remove_all(feed_date, "-"), "/download" + ), + paste0( + "https://files.mobilitydatabase.org/mdb-389/mdb-389-", + str_remove_all(feed_date, "-"), + "0023/mdb-389-", + str_remove_all(feed_date, "-"), + "0023.zip" + ) ) s3_uri <- file.path(output_path, "cta", paste0(feed_date, "-gtfs.zip")) @@ -55,14 +65,26 @@ walk(cta_feed_dates_list, get_cta_feed) metra_feed_dates_list <- c( "2015-10-30", "2016-09-30", "2017-10-21", "2018-10-05", "2019-10-04", "2020-10-10", "2021-10-08", "2022-10-21", - "2023-10-14" + "2023-10-14", "2024-04-22" ) get_metra_feed <- function(feed_date) { - feed_url <- paste0( - "https://transitfeeds.com/p/metra/169/", - str_remove_all(feed_date, "-"), "/download" + + feed_url <- ifelse( + substr(feed_date, 1, 4) <= "2023", + paste0( + "https://transitfeeds.com/p/metra/169/", + str_remove_all(feed_date, "-"), "/download" + ), + paste0( + "https://files.mobilitydatabase.org/mdb-1187/mdb-1187-", + str_remove_all(feed_date, "-"), + "0016/mdb-1187-", + str_remove_all(feed_date, "-"), + "0016.zip" + ) ) + s3_uri <- file.path(output_path, "metra", paste0(feed_date, "-gtfs.zip")) if (!aws.s3::object_exists(s3_uri)) { @@ -80,7 +102,8 @@ walk(metra_feed_dates_list, get_metra_feed) ##### Pace ##### pace_feed_dates_list <- c( "2015-10-16", "2016-10-15", "2017-10-16", "2018-10-17", - "2019-10-22", "2020-09-23", "2021-03-15", "2023-09-24" + "2019-10-22", "2020-09-23", "2021-03-15", "2023-09-24", + "2024-02-07" ) get_pace_feed <- function(feed_date) { diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment.R index 69674d57a..1e28ed2e5 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment.R @@ -61,10 +61,10 @@ walk(2013:current_year, function(x) { ##### FEMA FLOODPLAINS ##### flood_fema_raw <- file.path( - input_bucket, "flood_fema", "2023.geojson" + input_bucket, "flood_fema", "2024.geojson" ) flood_fema_warehouse <- file.path( - output_bucket, "flood_fema", "year=2023", "part-0.parquet" + output_bucket, "flood_fema", "year=2024", "part-0.parquet" ) # Write FEMA floodplains to S3 if they don't exist From 27625605db058f8787308e255ecd1f4e12c47b13 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Thu, 12 Dec 2024 22:00:18 +0000 Subject: [PATCH 132/166] Add geoparquet_to_s3 --- .../spatial/spatial-environment-major_road.R | 2 +- .../spatial/spatial-environment-secondary_road.R | 2 +- etl/utils.R | 6 ++++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-major_road.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-major_road.R index b45fdcb6a..d09523083 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-major_road.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-major_road.R @@ -73,5 +73,5 @@ for (year in years) { paste0("major_road-", year, ".parquet") ) - geoarrow::write_geoparquet(data_to_write, output_file) + geoparquet_to_s3(data_to_write, output_file) } diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-secondary_road.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-secondary_road.R index 7bacf7dcb..3481a1ce1 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-secondary_road.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-secondary_road.R @@ -185,5 +185,5 @@ for (year in years) { paste0("secondary_road-", year, ".parquet") ) - geoarrow::write_geoparquet(data_to_write, output_file) + geoparquet_to_s3(data_to_write, output_file) } diff --git a/etl/utils.R b/etl/utils.R index 6136dcb8b..b5468b26b 100644 --- a/etl/utils.R +++ b/etl/utils.R @@ -165,3 +165,9 @@ county_gdb_to_s3 <- function( } } + +geoparquet_to_s3 <- function(spatial_df, s3_uri) { + spatial_df %>% + mutate(loaded_at = as.character(Sys.time())) %>% + geoarrow::write_geoparquet(s3_uri) +} From d095bfb8bc1e7145699c43282a2736ffa1268f94 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Thu, 12 Dec 2024 22:03:47 +0000 Subject: [PATCH 133/166] Switch to utils function --- .../spatial/spatial-ccao-neighborhood.R | 6 ++---- .../spatial/spatial-ccao-township.R | 3 +-- etl/utils.R | 2 +- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-neighborhood.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-neighborhood.R index 726a077ee..4ac379db0 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-neighborhood.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-neighborhood.R @@ -196,9 +196,7 @@ for (year in 2010:2021) { township_name, township_code, triad_name, triad_code, nbhd, town_nbhd, geometry, geometry_3435 ) %>% - mutate(loaded_at = as.character(Sys.time())) %>% - write_geoparquet( - file.path(output_bucket, paste0("year=", year), "part-0.parquet"), - compression = "snappy" + geoparquet_to_s3( + file.path(output_bucket, paste0("year=", year), "part-0.parquet") ) } diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-township.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-township.R index cc9ebe184..a05845810 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-township.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-township.R @@ -50,6 +50,5 @@ if (!aws.s3::object_exists(remote_file_town_warehouse)) { geometry_3435 = st_transform(geometry, 3435), across(township_code:triad_code, as.character) ) %>% - mutate(loaded_at = as.character(Sys.time())) %>% - geoarrow::write_geoparquet(remote_file_town_warehouse) + geoparquet_to_s3(remote_file_town_warehouse) } diff --git a/etl/utils.R b/etl/utils.R index b5468b26b..a4156acc4 100644 --- a/etl/utils.R +++ b/etl/utils.R @@ -169,5 +169,5 @@ county_gdb_to_s3 <- function( geoparquet_to_s3 <- function(spatial_df, s3_uri) { spatial_df %>% mutate(loaded_at = as.character(Sys.time())) %>% - geoarrow::write_geoparquet(s3_uri) + geoarrow::write_geoparquet(s3_uri, compression = "snappy") } From 11b33fe57525c5280fb87447c19671093810a836 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Thu, 12 Dec 2024 22:06:42 +0000 Subject: [PATCH 134/166] Switch to util function --- .../spatial/spatial-environment.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment.R index 1e28ed2e5..c76880ab4 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment.R @@ -53,7 +53,7 @@ walk(2013:current_year, function(x) { geometry_3435 = st_transform(geometry, 3435) ) %>% rename_with(tolower) %>% - geoarrow::write_geoparquet(remote_file_coastline_warehouse) + geoparquet_to_s3(remote_file_coastline_warehouse) } }) @@ -87,7 +87,7 @@ if ( fema_special_flood_hazard_area = SFHA_TF, geometry, geometry_3435 ) %>% - geoarrow::write_geoparquet(flood_fema_warehouse) + geoparquet_to_s3(flood_fema_warehouse) file.remove(tmp_file) } @@ -113,7 +113,7 @@ if (!aws.s3::object_exists(remote_file_rail_warehouse)) { mutate( geometry_3435 = st_transform(geometry, 3435) ) %>% - geoarrow::write_geoparquet(remote_file_rail_warehouse) + geoparquet_to_s3(remote_file_rail_warehouse) } @@ -166,7 +166,7 @@ walk(2011:current_year, function(year) { select(id = HYDROID, name = FULLNAME, hydrology_type, geometry) ) %>% mutate(geometry_3435 = st_transform(geometry, 3435)) %>% - geoarrow::write_geoparquet(remote_file_hydro_warehouse) + geoparquet_to_s3(remote_file_hydro_warehouse) file.remove(tmp_file) } From 253a71d81605ac60d725cd2c0b5e7345543beb29 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Thu, 12 Dec 2024 22:14:43 +0000 Subject: [PATCH 135/166] Swap out all instances --- .../spatial/spatial-environment-major_road.R | 2 +- .../spatial/spatial-environment-secondary_road.R | 2 +- .../spatial/spatial-environment_road.R | 2 +- .../spatial/spatial-access-grocery_store.R | 2 +- .../spatial/spatial-access.R | 12 ++++++------ .../spatial/spatial-building_footprint.R | 11 +++++------ .../spatial/spatial-ccao-corner.R | 2 +- .../spatial/spatial-ccao-county.R | 2 +- .../spatial/spatial-census.R | 2 +- .../spatial/spatial-environment-golf_course.R | 2 +- .../spatial/spatial-environment-midway_noise.R | 2 +- .../spatial/spatial-environment-ohare_noise.R | 4 ++-- .../spatial/spatial-environment_road.R | 2 +- .../spatial/spatial-other.R | 4 ++-- .../spatial/spatial-parcel.R | 2 +- .../spatial/spatial-transit.R | 4 ++-- 16 files changed, 28 insertions(+), 29 deletions(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-major_road.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-major_road.R index dcbb47de5..39a725314 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-major_road.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-major_road.R @@ -41,6 +41,6 @@ for (year in years) { st_transform(4326) %>% mutate(geometry_3435 = st_transform(geometry, 3435)) - geoarrow::write_geoparquet(osm_roads, remote_file) + geoparquet_to_s3(osm_roads, remote_file) } } diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-secondary_road.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-secondary_road.R index 3705cbdf5..697f23c92 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-secondary_road.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-secondary_road.R @@ -41,6 +41,6 @@ for (year in years) { st_transform(4326) %>% mutate(geometry_3435 = st_transform(geometry, 3435)) - geoarrow::write_geoparquet(osm_roads, remote_file) + geoparquet_to_s3(osm_roads, remote_file) } } diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_road.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_road.R index 336dace84..099a23979 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_road.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_road.R @@ -77,7 +77,7 @@ walk(years, \(x) { mutate(year = as.character(x)) # Save the shapefile as a GeoParquet file - geoarrow::write_geoparquet(shapefile_data, remote_file_path) + geoparquet_to_s3(shapefile_data, remote_file_path) } else { message(paste("No shapefile found for year", x, ".")) } diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-access-grocery_store.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-access-grocery_store.R index 4fdefd13c..8c07460ae 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-access-grocery_store.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-access-grocery_store.R @@ -54,6 +54,6 @@ for (year in years) { geometry_3435 = st_transform(geometry, 3435) ) %>% select(osm_id, name, category = shop, geometry, geometry_3435) %>% - geoarrow::write_geoparquet(remote_file) + geoparquet_to_s3(remote_file) } } diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-access.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-access.R index 5462fb404..bb8cfb92c 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-access.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-access.R @@ -42,7 +42,7 @@ if (!aws.s3::object_exists(remote_file_bike_warehouse)) { trail_width = trailwdth, trail_type = trailtype, trail_surface = trailsurfa ) %>% - geoarrow::write_geoparquet(remote_file_bike_warehouse) + geoparquet_to_s3(remote_file_bike_warehouse) } @@ -68,7 +68,7 @@ if (!aws.s3::object_exists(remote_file_ceme_warehouse)) { name = cfname, address, gniscode, source, community, comment, mergeid, geometry, geometry_3435 ) %>% - geoarrow::write_geoparquet(remote_file_ceme_warehouse) + geoparquet_to_s3(remote_file_ceme_warehouse) } @@ -94,7 +94,7 @@ if (!aws.s3::object_exists(remote_file_hosp_warehouse)) { name = cfname, address, gniscode, source, community, comment, mergeid, geometry, geometry_3435 ) %>% - geoarrow::write_geoparquet(remote_file_hosp_warehouse) + geoparquet_to_s3(remote_file_hosp_warehouse) } @@ -141,7 +141,7 @@ walk(remote_files_park_warehouse, function(x) { )) ) - geoarrow::write_geoparquet(parks_df, x, compression = "snappy") + geoparquet_to_s3(parks_df, x, compression = "snappy") } }) @@ -171,7 +171,7 @@ if (!aws.s3::object_exists(remote_file_indc_warehouse)) { num = no, hud_qualif, acres, geometry, geometry_3435 ) %>% - geoarrow::write_geoparquet(remote_file_indc_warehouse) + geoparquet_to_s3(remote_file_indc_warehouse) } ##### WALKABILITY ##### @@ -202,5 +202,5 @@ if (!aws.s3::object_exists(remote_file_walk_warehouse)) { standardize_expand_geo() %>% select(-contains("shape")) %>% mutate(year = "2017") %>% - geoarrow::write_geoparquet(remote_file_walk_warehouse) + geoparquet_to_s3(remote_file_walk_warehouse) } diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-building_footprint.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-building_footprint.R index b7de255e9..1d4cde9db 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-building_footprint.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-building_footprint.R @@ -56,10 +56,9 @@ if (!aws.s3::object_exists(esri_chicago_remote)) { lon = X, lat = Y, x_3435 = X.1, y_3435 = Y.1, geometry, geometry_3435 ) - write_geoparquet( + geoparquet_to_s3( esri_chicago_df_clean, - esri_chicago_remote, - compression = "snappy" + esri_chicago_remote ) } @@ -94,7 +93,7 @@ if (!aws.s3::object_exists(esri_sub_remote)) { lon = X, lat = Y, x_3435 = X.1, y_3435 = Y.1, geometry, geometry_3435 ) - write_geoparquet(esri_sub_df_clean, esri_sub_remote, compression = "snappy") + geoparquet_to_s3(esri_sub_df_clean, esri_sub_remote) } @@ -126,7 +125,7 @@ if (!aws.s3::object_exists(osm_remote)) { lon = X, lat = Y, x_3435 = X.1, y_3435 = Y.1, geometry, geometry_3435 ) - write_geoparquet(osm_df_clean, osm_remote, compression = "snappy") + geoparquet_to_s3(osm_df_clean, osm_remote) } @@ -182,5 +181,5 @@ if (!aws.s3::object_exists(ms_remote)) { lon = X, lat = Y, x_3435 = X.1, y_3435 = Y.1, geometry, geometry_3435 ) - write_geoparquet(ms_df_clean_cook_only, ms_remote, compression = "snappy") + geoparquet_to_s3(ms_df_clean_cook_only, ms_remote) } diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-corner.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-corner.R index 74a85e414..fbd21add0 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-corner.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-corner.R @@ -237,7 +237,7 @@ for (iter_year in parcel_years) { select(pin10, id) %>% inner_join(cross_final, by = "id") %>% select(-id) %>% - write_geoparquet(remote_file) + geoparquet_to_s3(remote_file) } tictoc::toc() } diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-county.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-county.R index d7089b71c..aea2e6741 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-county.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-ccao-county.R @@ -34,5 +34,5 @@ if (!aws.s3::object_exists(remote_file_county_warehouse)) { geometry_3435 = st_transform(geometry, 3435), ) %>% select(geometry, geometry_3435) %>% - geoarrow::write_geoparquet(remote_file_county_warehouse) + geoparquet_to_s3(remote_file_county_warehouse) } diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-census.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-census.R index 2d56b3da9..30736ff5c 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-census.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-census.R @@ -81,7 +81,7 @@ normalize_census_geo <- function(key) { geometry, geometry_3435 ) %>% filter(!str_detect(geoid, "Z")) %>% - write_geoparquet(remote_file, compression = "snappy") + geoparquet_to_s3(remote_file) } } diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-golf_course.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-golf_course.R index 58bf06ffd..7f0603aaa 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-golf_course.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-golf_course.R @@ -43,5 +43,5 @@ if (!aws.s3::object_exists(remote_file_golf_course_warehouse)) { geometry_3435 = st_transform(geometry, 3435) ) %>% select(-touches) %>% - geoarrow::write_geoparquet(remote_file_golf_course_warehouse) + geoparquet_to_s3(remote_file_golf_course_warehouse) } diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-midway_noise.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-midway_noise.R index fc562652d..df311a487 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-midway_noise.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-midway_noise.R @@ -95,4 +95,4 @@ data.frame( geometry_3435 = st_transform(geometry, 3435), year = str_replace(year, "X", "") ) %>% - write_geoparquet(remote_file) + geoparquet_to_s3(remote_file) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-ohare_noise.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-ohare_noise.R index 14d37b6bf..d188e1420 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-ohare_noise.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-ohare_noise.R @@ -126,7 +126,7 @@ remote_file <- file.path( AWS_S3_WAREHOUSE_BUCKET, "spatial", "environment", "ohare_noise_monitor", "ohare_noise_monitor.parquet" ) -write_geoparquet(noise_addresses_clean, remote_file) +geoparquet_to_s3(noise_addresses_clean, remote_file) file.remove(tmp_file) @@ -153,4 +153,4 @@ ohare_noise_contour <- st_read(tmp_file) %>% geometry_3435 = st_transform(geom, 3435) ) %>% select(airport, decibels, geometry = geom, geometry_3435) %>% - write_geoparquet(remote_file) + geoparquet_to_s3(remote_file) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_road.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_road.R index 4a098d798..97699a44c 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_road.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_road.R @@ -291,7 +291,7 @@ walk(parquet_files, \(file_key) { select(-year) output_path <- file.path(output_bucket, paste0("year=", tools::file_path_sans_ext(basename(file_key))), "part-0.parquet") - geoarrow::write_geoparquet(shapefile_data, output_path) + geoparquet_to_s3(shapefile_data, output_path) print(paste(file_key, "cleaned and uploaded.")) } diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-other.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-other.R index 1c0d15472..721a488bd 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-other.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-other.R @@ -43,7 +43,7 @@ walk(subdivisions_raw, function(shapefile_path) { filter(st_is_valid(geometry) & !is.na(pagesubref)) %>% mutate(geometry_3435 = st_transform(geometry, 3435)) %>% select(pagesubref, geometry, geometry_3435) %>% - geoarrow::write_geoparquet(dest_path) + geoparquet_to_s3(dest_path) } file.remove(tmp_file) @@ -79,7 +79,7 @@ clean_comm_areas <- function(shapefile_path) { area_number = area_numbe, geometry, geometry_3435 ) %>% - geoarrow::write_geoparquet( + geoparquet_to_s3( file.path( AWS_S3_WAREHOUSE_BUCKET, "spatial", "other", "community_area", paste0("year=", str_extract(shapefile_path, "[0-9]{4}")), diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-parcel.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-parcel.R index 6153e8d3d..a61ff8ce2 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-parcel.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-parcel.R @@ -387,7 +387,7 @@ process_parcel_file <- function(s3_bucket_uri, } # Write local backup copy - write_geoparquet(spatial_df_final, local_backup_file) + geoparquet_to_s3(spatial_df_final, local_backup_file) tictoc::toc() } else { message("Loading processed parcels from backup for: ", file_year) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-transit.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-transit.R index 7c6ad7812..68117dc2e 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-transit.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-transit.R @@ -70,7 +70,7 @@ process_gtfs_feed <- function(s3_bucket_uri, date, year, agency, feed_url) { any_of(c("location_type", "parent_station", "wheelchair_boarding")), any_of(c("feed_pull_date", "geometry", "geometry_3435")) ) %>% - write_geoparquet(remote_file_stop) + geoparquet_to_s3(remote_file_stop) } # Now create route geometries and save. Skip PACE since they have no geoms @@ -101,7 +101,7 @@ process_gtfs_feed <- function(s3_bucket_uri, date, year, agency, feed_url) { route_color, route_text_color, feed_pull_date, geometry, geometry_3435 ) %>% - write_geoparquet(remote_file_route) + geoparquet_to_s3(remote_file_route) } } } From b54f6f53a739413cd39c58e36a5f3668d421c1e7 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Thu, 12 Dec 2024 22:20:04 +0000 Subject: [PATCH 136/166] Clean ccao script --- .../spatial/spatial-ccao.R | 22 ++++++++----------- .../spatial/spatial-environment-major_road.R | 2 +- .../spatial-environment-secondary_road.R | 2 +- .../spatial/spatial-environment_road.R | 2 +- 4 files changed, 12 insertions(+), 16 deletions(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R index 58b1e266f..8ebf250c5 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R @@ -11,20 +11,16 @@ output_bucket <- file.path(AWS_S3_RAW_BUCKET, "spatial", "ccao") # Read privileges for the this drive location are limited. # Contact Cook County GIS if permissions need to be changed. -file_path <- "//gisemcv1.ccounty.com/ArchiveServices" # nolint +file_path <- "//gisemcv1.ccounty.com/ArchiveServices/" # nolint -sources_list <- bind_rows( - list( - # NEIGHBORHOOD - "neighborhood" = c( - "url" = paste0( - "https://gitlab.com/ccao-data-science---modeling/packages/ccao", - "/-/raw/master/data-raw/nbhd_shp.geojson" - ), - "boundary" = "neighborhood", - "year" = "2021" - ) - ) +sources_list <- data.frame( + # NEIGHBORHOOD + "url" = paste0( + "https://gitlab.com/ccao-data-science---modeling/packages/ccao", + "/-/raw/master/data-raw/nbhd_shp.geojson" + ), + "boundary" = "neighborhood", + "year" = "2021" ) # Function to call referenced API, pull requested data, and write it to S3 diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-major_road.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-major_road.R index 39a725314..dcbb47de5 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-major_road.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-major_road.R @@ -41,6 +41,6 @@ for (year in years) { st_transform(4326) %>% mutate(geometry_3435 = st_transform(geometry, 3435)) - geoparquet_to_s3(osm_roads, remote_file) + geoarrow::write_geoparquet(osm_roads, remote_file) } } diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-secondary_road.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-secondary_road.R index 697f23c92..3705cbdf5 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-secondary_road.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-secondary_road.R @@ -41,6 +41,6 @@ for (year in years) { st_transform(4326) %>% mutate(geometry_3435 = st_transform(geometry, 3435)) - geoparquet_to_s3(osm_roads, remote_file) + geoarrow::write_geoparquet(osm_roads, remote_file) } } diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_road.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_road.R index 099a23979..336dace84 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_road.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_road.R @@ -77,7 +77,7 @@ walk(years, \(x) { mutate(year = as.character(x)) # Save the shapefile as a GeoParquet file - geoparquet_to_s3(shapefile_data, remote_file_path) + geoarrow::write_geoparquet(shapefile_data, remote_file_path) } else { message(paste("No shapefile found for year", x, ".")) } From 0659dcdbe52778acd140287a7ad9a1350e2e3036 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Mon, 16 Dec 2024 19:10:57 +0000 Subject: [PATCH 137/166] Address non-spatial parquets --- .../ccao/ccao-condominium-pin_condo_char.R | 437 +++++++++--------- .../ccao/ccao-condominium_parking.R | 1 + .../census/census-acs.R | 5 +- .../census/census-decennial.R | 3 +- .../environment/environment-airport_noise.R | 1 + .../spatial/spatial-environment_road.R | 6 +- 6 files changed, 231 insertions(+), 222 deletions(-) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-condominium-pin_condo_char.R b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-condominium-pin_condo_char.R index 8c381b26e..34d2fae98 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-condominium-pin_condo_char.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-condominium-pin_condo_char.R @@ -1,218 +1,219 @@ -# This script cleans and combines raw condo characteristics data for the -# warehouse -library(arrow) -library(aws.s3) -library(DBI) -library(data.table) -library(dplyr) -library(glue) -library(noctua) -library(purrr) -library(stringr) -library(tidyr) -source("utils.R") - -# Declare raw and clean condo data locations -AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET") -AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET") -output_bucket <- file.path( - AWS_S3_WAREHOUSE_BUCKET, - "ccao", "condominium", "pin_condo_char" -) - -# Connect to Athena -AWS_ATHENA_CONN_NOCTUA <- dbConnect(noctua::athena()) - -# Get S3 file addresses -files <- grep( - ".parquet", - file.path( - AWS_S3_RAW_BUCKET, - aws.s3::get_bucket_df( - AWS_S3_RAW_BUCKET, - prefix = "ccao/condominium/pin_condo_char/" - )$Key - ), - value = TRUE -) - -# Grab sales/spatial data -classes <- dbGetQuery( - conn = AWS_ATHENA_CONN_NOCTUA, " - SELECT DISTINCT - parid AS pin, - class - FROM iasworld.pardat - WHERE taxyr = (SELECT MAX(taxyr) FROM iasworld.pardat) - AND class IN ('299', '399') - " -) - -# Grab all years of previously assembled condo data already present on Athena -years <- dbGetQuery( - conn = AWS_ATHENA_CONN_NOCTUA, " - SELECT DISTINCT year FROM ccao.pin_condo_char - " -) %>% - pull(year) - -# Function to grab chars data from Athena if it's already available -athena_chars <- function(x) { - dbGetQuery( - conn = AWS_ATHENA_CONN_NOCTUA, glue(" - SELECT * FROM ccao.pin_condo_char - WHERE year = '{x}' - ") - ) -} - -# A place to store characteristics data so we can stack it -chars <- list() - -# We use tax year, valuations uses year the work was done -for (i in c("2021", "2022", "2023")) { - if (!("2021" %in% years) && i == "2021") { - # If clean 2021 data is not already in Athena, load and clean it - chars[[i]] <- map( - grep("2022", files, value = TRUE), function(x) { - read_parquet(x) %>% - tibble(.name_repair = "unique") %>% - rename_with(~ tolower(.x)) %>% - mutate(pin = str_pad(parid, 14, side = "left", pad = "0")) %>% - select(contains(c("pin", "sqft", "bed", "source"))) %>% - select(-contains(c("x", "all", "search"))) %>% - rename_with(~"bedrooms", contains("bed")) %>% - rename_with(~"unit_sf", contains("unit")) %>% - rename_with(~"building_sf", contains("building")) - } - ) %>% - rbindlist(fill = TRUE) %>% - inner_join(classes) %>% - mutate(across(c(unit_sf, building_sf), ~ na_if(., "0"))) %>% - mutate(across(c(unit_sf, building_sf), ~ na_if(., "1"))) %>% - mutate( - across(c(building_sf, unit_sf, bedrooms), ~ gsub("[^0-9.-]", "", .)) - ) %>% - mutate(across(.cols = everything(), ~ trimws(., which = "both"))) %>% - na_if("") %>% - mutate( - bedrooms = case_when( - is.na(unit_sf) & bedrooms == "0" ~ NA_character_, - TRUE ~ bedrooms - ) - ) %>% - mutate(across(c(building_sf, unit_sf, bedrooms), ~ as.numeric(.))) %>% - mutate( - bedrooms = ceiling(bedrooms), - parking_pin = str_detect(source, "(?i)parking|garage") & - is.na(unit_sf) & is.na(building_sf), - year = "2021" - ) %>% - select(-c(class, source)) %>% - # These are obvious typos - mutate(unit_sf = case_when( - unit_sf == 28002000 ~ 2800, - unit_sf == 20002800 ~ 2000, - unit_sf == 182901 ~ 1829, - TRUE ~ unit_sf - )) - } else if (!("2022" %in% years) && i == "2022") { - # If clean 2022 data is not already in Athena, load and clean it - chars[[i]] <- lapply(grep("2023", files, value = TRUE), function(x) { - raw <- read_parquet(x)[, 1:20] - - names <- tolower(names(raw)) - names(raw) <- make.unique(names) - - raw %>% - select(!contains("pin")) %>% - rename_with(~ str_replace(.x, "iasworold", "iasworld")) %>% - mutate(pin = str_pad(iasworld_parid, 14, side = "left", pad = "0")) %>% - rename_with(~ str_replace_all(.x, "[[:space:]]", "")) %>% - rename_with(~ str_replace_all(.x, "\\.{4}", "")) %>% - select(!contains(c("1", "2", "all"))) %>% - select(contains(c("pin", "sq", "bed", "bath"))) %>% - rename_with(~"bedrooms", contains("bed")) %>% - rename_with(~"unit_sf", contains("unit")) %>% - rename_with(~"building_sf", contains(c("building", "bldg"))) %>% - rename_with(~"half_baths", contains("half")) %>% - rename_with(~"full_baths", contains("full")) %>% - mutate( - across(!contains("pin"), as.numeric), - year = "2022", - # Define a parking pin as a unit with only 0 or NA values for - # characteristics - parking_pin = case_when( - (bedrooms == 0 | unit_sf == 0) & - rowSums( - across(c(unit_sf, bedrooms, full_baths, half_baths)), - na.rm = TRUE - ) == 0 ~ TRUE, - TRUE ~ FALSE - ), - # Really low unit_sf should be considered NA - unit_sf = case_when( - unit_sf < 5 & !parking_pin ~ NA_real_, - TRUE ~ unit_sf - ), - # Assume missing half_baths value is 0 if there is full bathroom data - # for PIN - half_baths = case_when( - is.na(half_baths) & !is.na(full_baths) & full_baths > 0 ~ 0, - TRUE ~ half_baths - ), - # Make beds and baths are integers - across(c(half_baths, full_baths, bedrooms), ~ ceiling(.x)), - # Set all characteristics to NA for parking pins - across( - c(bedrooms, unit_sf, half_baths, full_baths), - ~ ifelse(parking_pin, NA, .x) - ) - ) - }) %>% - bind_rows() %>% - group_by(pin) %>% - arrange(unit_sf) %>% - filter(row_number() == 1) %>% - ungroup() %>% - filter(!is.na(pin)) - } else if (!("2023" %in% years) && i == "2023") { - chars[[i]] <- lapply(grep("2024", files, value = TRUE), function(x) { - read_parquet(x) %>% - select( - pin = "14.Digit.PIN", - building_sf = "Building.Square.Footage", - unit_sf = "Unit.Square.Footage", - bedrooms = "Bedrooms", - parking_pin = "Parking.Space.Change", - full_baths = "Full.Baths", - half_baths = "Half.Baths" - ) %>% - mutate( - pin = gsub("[^0-9]", "", pin), - parking_pin = if_all( - c(unit_sf, bedrooms, full_baths, half_baths), is.na - ) & !is.na(parking_pin), - year = "2023", - bedrooms = case_when(bedrooms > 15 ~ NA_real_, TRUE ~ bedrooms), - full_baths = case_when(full_baths > 10 ~ NA_real_, TRUE ~ full_baths), - unit_sf = case_when(unit_sf < 5 ~ NA_real_, TRUE ~ unit_sf) - ) - }) %>% - bind_rows() - } else { - # If data is already in Athena, just take it from there - chars[[i]] <- athena_chars(i) - } -} - -# Upload cleaned data to S3 -chars %>% - bind_rows() %>% - group_by(year) %>% - arrow::write_dataset( - path = output_bucket, - format = "parquet", - hive_style = TRUE, - compression = "snappy" - ) +# This script cleans and combines raw condo characteristics data for the +# warehouse +library(arrow) +library(aws.s3) +library(DBI) +library(data.table) +library(dplyr) +library(glue) +library(noctua) +library(purrr) +library(stringr) +library(tidyr) +source("utils.R") + +# Declare raw and clean condo data locations +AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET") +AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET") +output_bucket <- file.path( + AWS_S3_WAREHOUSE_BUCKET, + "ccao", "condominium", "pin_condo_char" +) + +# Connect to Athena +AWS_ATHENA_CONN_NOCTUA <- dbConnect(noctua::athena()) + +# Get S3 file addresses +files <- grep( + ".parquet", + file.path( + AWS_S3_RAW_BUCKET, + aws.s3::get_bucket_df( + AWS_S3_RAW_BUCKET, + prefix = "ccao/condominium/pin_condo_char/" + )$Key + ), + value = TRUE +) + +# Grab sales/spatial data +classes <- dbGetQuery( + conn = AWS_ATHENA_CONN_NOCTUA, " + SELECT DISTINCT + parid AS pin, + class + FROM iasworld.pardat + WHERE taxyr = (SELECT MAX(taxyr) FROM iasworld.pardat) + AND class IN ('299', '399') + " +) + +# Grab all years of previously assembled condo data already present on Athena +years <- dbGetQuery( + conn = AWS_ATHENA_CONN_NOCTUA, " + SELECT DISTINCT year FROM ccao.pin_condo_char + " +) %>% + pull(year) + +# Function to grab chars data from Athena if it's already available +athena_chars <- function(x) { + dbGetQuery( + conn = AWS_ATHENA_CONN_NOCTUA, glue(" + SELECT * FROM ccao.pin_condo_char + WHERE year = '{x}' + ") + ) +} + +# A place to store characteristics data so we can stack it +chars <- list() + +# We use tax year, valuations uses year the work was done +for (i in c("2021", "2022", "2023")) { + if (!("2021" %in% years) && i == "2021") { + # If clean 2021 data is not already in Athena, load and clean it + chars[[i]] <- map( + grep("2022", files, value = TRUE), function(x) { + read_parquet(x) %>% + tibble(.name_repair = "unique") %>% + rename_with(~ tolower(.x)) %>% + mutate(pin = str_pad(parid, 14, side = "left", pad = "0")) %>% + select(contains(c("pin", "sqft", "bed", "source"))) %>% + select(-contains(c("x", "all", "search"))) %>% + rename_with(~"bedrooms", contains("bed")) %>% + rename_with(~"unit_sf", contains("unit")) %>% + rename_with(~"building_sf", contains("building")) + } + ) %>% + rbindlist(fill = TRUE) %>% + inner_join(classes) %>% + mutate(across(c(unit_sf, building_sf), ~ na_if(., "0"))) %>% + mutate(across(c(unit_sf, building_sf), ~ na_if(., "1"))) %>% + mutate( + across(c(building_sf, unit_sf, bedrooms), ~ gsub("[^0-9.-]", "", .)) + ) %>% + mutate(across(.cols = everything(), ~ trimws(., which = "both"))) %>% + na_if("") %>% + mutate( + bedrooms = case_when( + is.na(unit_sf) & bedrooms == "0" ~ NA_character_, + TRUE ~ bedrooms + ) + ) %>% + mutate(across(c(building_sf, unit_sf, bedrooms), ~ as.numeric(.))) %>% + mutate( + bedrooms = ceiling(bedrooms), + parking_pin = str_detect(source, "(?i)parking|garage") & + is.na(unit_sf) & is.na(building_sf), + year = "2021" + ) %>% + select(-c(class, source)) %>% + # These are obvious typos + mutate(unit_sf = case_when( + unit_sf == 28002000 ~ 2800, + unit_sf == 20002800 ~ 2000, + unit_sf == 182901 ~ 1829, + TRUE ~ unit_sf + )) + } else if (!("2022" %in% years) && i == "2022") { + # If clean 2022 data is not already in Athena, load and clean it + chars[[i]] <- lapply(grep("2023", files, value = TRUE), function(x) { + raw <- read_parquet(x)[, 1:20] + + names <- tolower(names(raw)) + names(raw) <- make.unique(names) + + raw %>% + select(!contains("pin")) %>% + rename_with(~ str_replace(.x, "iasworold", "iasworld")) %>% + mutate(pin = str_pad(iasworld_parid, 14, side = "left", pad = "0")) %>% + rename_with(~ str_replace_all(.x, "[[:space:]]", "")) %>% + rename_with(~ str_replace_all(.x, "\\.{4}", "")) %>% + select(!contains(c("1", "2", "all"))) %>% + select(contains(c("pin", "sq", "bed", "bath"))) %>% + rename_with(~"bedrooms", contains("bed")) %>% + rename_with(~"unit_sf", contains("unit")) %>% + rename_with(~"building_sf", contains(c("building", "bldg"))) %>% + rename_with(~"half_baths", contains("half")) %>% + rename_with(~"full_baths", contains("full")) %>% + mutate( + across(!contains("pin"), as.numeric), + year = "2022", + # Define a parking pin as a unit with only 0 or NA values for + # characteristics + parking_pin = case_when( + (bedrooms == 0 | unit_sf == 0) & + rowSums( + across(c(unit_sf, bedrooms, full_baths, half_baths)), + na.rm = TRUE + ) == 0 ~ TRUE, + TRUE ~ FALSE + ), + # Really low unit_sf should be considered NA + unit_sf = case_when( + unit_sf < 5 & !parking_pin ~ NA_real_, + TRUE ~ unit_sf + ), + # Assume missing half_baths value is 0 if there is full bathroom data + # for PIN + half_baths = case_when( + is.na(half_baths) & !is.na(full_baths) & full_baths > 0 ~ 0, + TRUE ~ half_baths + ), + # Make beds and baths are integers + across(c(half_baths, full_baths, bedrooms), ~ ceiling(.x)), + # Set all characteristics to NA for parking pins + across( + c(bedrooms, unit_sf, half_baths, full_baths), + ~ ifelse(parking_pin, NA, .x) + ) + ) + }) %>% + bind_rows() %>% + group_by(pin) %>% + arrange(unit_sf) %>% + filter(row_number() == 1) %>% + ungroup() %>% + filter(!is.na(pin)) + } else if (!("2023" %in% years) && i == "2023") { + chars[[i]] <- lapply(grep("2024", files, value = TRUE), function(x) { + read_parquet(x) %>% + select( + pin = "14.Digit.PIN", + building_sf = "Building.Square.Footage", + unit_sf = "Unit.Square.Footage", + bedrooms = "Bedrooms", + parking_pin = "Parking.Space.Change", + full_baths = "Full.Baths", + half_baths = "Half.Baths" + ) %>% + mutate( + pin = gsub("[^0-9]", "", pin), + parking_pin = if_all( + c(unit_sf, bedrooms, full_baths, half_baths), is.na + ) & !is.na(parking_pin), + year = "2023", + bedrooms = case_when(bedrooms > 15 ~ NA_real_, TRUE ~ bedrooms), + full_baths = case_when(full_baths > 10 ~ NA_real_, TRUE ~ full_baths), + unit_sf = case_when(unit_sf < 5 ~ NA_real_, TRUE ~ unit_sf) + ) + }) %>% + bind_rows() + } else { + # If data is already in Athena, just take it from there + chars[[i]] <- athena_chars(i) + } +} + +# Upload cleaned data to S3 +chars %>% + bind_rows() %>% + mutate(loaded_at = as.character(Sys.time())) %>% + group_by(year) %>% + arrow::write_dataset( + path = output_bucket, + format = "parquet", + hive_style = TRUE, + compression = "snappy" + ) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-condominium_parking.R b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-condominium_parking.R index cee094c33..61b2dc5f6 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-condominium_parking.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-condominium_parking.R @@ -65,6 +65,7 @@ nonlivable[["neg_pred"]] <- map( # Upload all nonlivable spaces to nonlivable table nonlivable %>% bind_rows() %>% + mutate(loaded_at = as.character(Sys.time())) %>% group_by(year) %>% arrow::write_dataset( path = output_bucket, diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/census/census-acs.R b/etl/scripts-ccao-data-warehouse-us-east-1/census/census-acs.R index a17ba3bcf..fa467133c 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/census/census-acs.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/census/census-acs.R @@ -133,10 +133,11 @@ pull_and_write_acs <- function( )) %>% rename(any_of(c("GEOID" = "GEOID...1"))) %>% select(-starts_with("GEOID..."), -starts_with("NAME")) %>% - filter(!str_detect(GEOID, "Z")) + filter(!str_detect(GEOID, "Z")) %>% + mutate(loaded_at = as.character(Sys.time())) # Write to S3 - arrow::write_parquet(df, remote_file) + write_parquet(df, remote_file) } } diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/census/census-decennial.R b/etl/scripts-ccao-data-warehouse-us-east-1/census/census-decennial.R index 50977f0ba..7825c7c70 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/census/census-decennial.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/census/census-decennial.R @@ -110,7 +110,8 @@ pull_and_write_dec <- function(s3_bucket_uri, survey, folder, geography, year) { cache_table = TRUE ) %>% select(-NAME) %>% - rename_with(~ rename_to_2020(.x, year), .cols = !GEOID) + rename_with(~ rename_to_2020(.x, year), .cols = !GEOID) %>% + mutate(loaded_at = as.character(Sys.time())) # Write to S3 arrow::write_parquet(df, remote_file) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/environment/environment-airport_noise.R b/etl/scripts-ccao-data-warehouse-us-east-1/environment/environment-airport_noise.R index 3dd104e15..f712ec843 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/environment/environment-airport_noise.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/environment/environment-airport_noise.R @@ -84,6 +84,7 @@ pins %>% select(pin10, airport_noise_dnl) %>% mutate(airport_noise_dnl = replace_na(airport_noise_dnl, 52.5)) %>% st_drop_geometry() %>% + mutate(loaded_at = as.character(Sys.time())) %>% write_parquet( file.path(output_bucket, paste0("year=omp"), "part-0.parquet") ) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_road.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_road.R index 97699a44c..fdaba273a 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_road.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_road.R @@ -290,7 +290,11 @@ walk(parquet_files, \(file_key) { )) %>% select(-year) - output_path <- file.path(output_bucket, paste0("year=", tools::file_path_sans_ext(basename(file_key))), "part-0.parquet") + output_path <- file.path( + output_bucket, + paste0("year=", tools::file_path_sans_ext(basename(file_key))), + "part-0.parquet" + ) geoparquet_to_s3(shapefile_data, output_path) print(paste(file_key, "cleaned and uploaded.")) From ea3b35c5c1b4d51fa03fd605fae54c83cd84ad13 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Mon, 16 Dec 2024 19:15:23 +0000 Subject: [PATCH 138/166] More replacing --- .../spatial/spatial-environment-major_road.R | 2 +- .../spatial/spatial-environment-secondary_road.R | 2 +- .../spatial/spatial-environment_road.R | 2 +- .../spatial/spatial-environment_road.R | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-major_road.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-major_road.R index dcbb47de5..39a725314 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-major_road.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-major_road.R @@ -41,6 +41,6 @@ for (year in years) { st_transform(4326) %>% mutate(geometry_3435 = st_transform(geometry, 3435)) - geoarrow::write_geoparquet(osm_roads, remote_file) + geoparquet_to_s3(osm_roads, remote_file) } } diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-secondary_road.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-secondary_road.R index 3705cbdf5..697f23c92 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-secondary_road.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-secondary_road.R @@ -41,6 +41,6 @@ for (year in years) { st_transform(4326) %>% mutate(geometry_3435 = st_transform(geometry, 3435)) - geoarrow::write_geoparquet(osm_roads, remote_file) + geoparquet_to_s3(osm_roads, remote_file) } } diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_road.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_road.R index 336dace84..099a23979 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_road.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_road.R @@ -77,7 +77,7 @@ walk(years, \(x) { mutate(year = as.character(x)) # Save the shapefile as a GeoParquet file - geoarrow::write_geoparquet(shapefile_data, remote_file_path) + geoparquet_to_s3(shapefile_data, remote_file_path) } else { message(paste("No shapefile found for year", x, ".")) } diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_road.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_road.R index fdaba273a..6e94ccd5f 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_road.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment_road.R @@ -294,7 +294,7 @@ walk(parquet_files, \(file_key) { output_bucket, paste0("year=", tools::file_path_sans_ext(basename(file_key))), "part-0.parquet" - ) + ) geoparquet_to_s3(shapefile_data, output_path) print(paste(file_key, "cleaned and uploaded.")) From c76ca75e3c0ff84ec28541a9d3017a8f3dba661b Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Mon, 16 Dec 2024 19:55:23 +0000 Subject: [PATCH 139/166] Undo raw changes --- .../spatial/spatial-environment-major_road.R | 2 +- .../spatial/spatial-environment-secondary_road.R | 2 +- .../spatial/spatial-environment_road.R | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-major_road.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-major_road.R index 39a725314..dcbb47de5 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-major_road.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-major_road.R @@ -41,6 +41,6 @@ for (year in years) { st_transform(4326) %>% mutate(geometry_3435 = st_transform(geometry, 3435)) - geoparquet_to_s3(osm_roads, remote_file) + geoarrow::write_geoparquet(osm_roads, remote_file) } } diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-secondary_road.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-secondary_road.R index 697f23c92..3705cbdf5 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-secondary_road.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment-secondary_road.R @@ -41,6 +41,6 @@ for (year in years) { st_transform(4326) %>% mutate(geometry_3435 = st_transform(geometry, 3435)) - geoparquet_to_s3(osm_roads, remote_file) + geoarrow::write_geoparquet(osm_roads, remote_file) } } diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_road.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_road.R index 099a23979..336dace84 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_road.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-environment_road.R @@ -77,7 +77,7 @@ walk(years, \(x) { mutate(year = as.character(x)) # Save the shapefile as a GeoParquet file - geoparquet_to_s3(shapefile_data, remote_file_path) + geoarrow::write_geoparquet(shapefile_data, remote_file_path) } else { message(paste("No shapefile found for year", x, ".")) } From 52f5877f060fca3209226652f1d2c20363adb8c3 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Tue, 17 Dec 2024 15:09:51 +0000 Subject: [PATCH 140/166] Test renv fix --- .github/workflows/lint.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 8b0220cbb..033f1a313 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -23,6 +23,10 @@ jobs: # list of changed files within `super-linter` fetch-depth: 0 + - name: Disable renv + shell: bash + run: rm etl/.Rprofile + - name: Lint uses: github/super-linter@v6 env: From 3528bdfe106165a9ebee078a5239c9a5f6aa5827 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Tue, 17 Dec 2024 15:29:04 +0000 Subject: [PATCH 141/166] Linting --- etl/utils.R | 75 ++++++++++++++++++++++------------------------------- 1 file changed, 31 insertions(+), 44 deletions(-) diff --git a/etl/utils.R b/etl/utils.R index a4156acc4..0a69cbc31 100644 --- a/etl/utils.R +++ b/etl/utils.R @@ -7,7 +7,7 @@ library(tools) save_s3_to_local <- function(s3_uri, path, overwrite = FALSE) { - if (!file.exists(path) | overwrite) { + if (!file.exists(path) || overwrite) { message("Saving file: ", s3_uri, " to: ", path) aws.s3::save_object(object = s3_uri, file = path) } @@ -15,14 +15,14 @@ save_s3_to_local <- function(s3_uri, path, overwrite = FALSE) { save_local_to_s3 <- function(s3_uri, path, overwrite = FALSE) { - if (!aws.s3::object_exists(s3_uri) | overwrite) { + if (!aws.s3::object_exists(s3_uri) || overwrite) { message("Saving file: ", path, "to: ", s3_uri) aws.s3::put_object( file = path, object = s3_uri, show_progress = TRUE, multipart = TRUE - ) + ) } } @@ -34,8 +34,7 @@ open_data_to_s3 <- function(s3_bucket_uri, file_year, file_ext, file_prefix = NULL, - overwrite = FALSE - ) { + overwrite = FALSE) { open_data_file <- paste0(base_url, data_url) remote_file <- file.path( s3_bucket_uri, dir_name, @@ -61,8 +60,7 @@ open_data_to_s3 <- function(s3_bucket_uri, write_partitions_to_s3 <- function(df, s3_output_path, is_spatial = TRUE, - overwrite = FALSE - ) { + overwrite = FALSE) { if (!dplyr::is.grouped_df(df)) { warning("Input data must contain grouping vars for partitioning") } @@ -80,7 +78,7 @@ write_partitions_to_s3 <- function(df, remote_path <- file.path( s3_output_path, partition_path, "part-0.parquet" ) - if (!object_exists(remote_path) | overwrite) { + if (!object_exists(remote_path) || overwrite) { message("Now uploading: ", partition_path) tmp_file <- tempfile(fileext = ".parquet") if (is_spatial) { @@ -95,32 +93,30 @@ write_partitions_to_s3 <- function(df, standardize_expand_geo <- function(spatial_df, make_valid = FALSE, polygon = TRUE) { - return( - spatial_df %>% st_transform(4326) %>% - { if (make_valid) st_make_valid(.) else .} %>% + { + if (make_valid) st_make_valid(.) else . + } %>% mutate(geometry_3435 = st_transform(geometry, 3435)) %>% - { if (polygon) { - - mutate(., centroid = st_centroid(st_transform(geometry, 3435))) %>% - cbind(., - st_coordinates(st_transform(.$centroid, 4326)), - st_coordinates(.$centroid) - ) %>% - select(!contains("centroid"), - lon = X, lat = Y, x_3435 = `X.1`, y_3435 = `Y.1`, geometry, geometry_3435) - - } else { - - select(., dplyr::everything(), geometry, geometry_3435) - - } + { + if (polygon) { + mutate(., centroid = st_centroid(st_transform(geometry, 3435))) %>% + cbind( + ., + st_coordinates(st_transform(.$centroid, 4326)), + st_coordinates(.$centroid) + ) %>% + select(!contains("centroid"), + lon = X, lat = Y, x_3435 = `X.1`, y_3435 = `Y.1`, + geometry, geometry_3435 + ) + } else { + select(., dplyr::everything(), geometry, geometry_3435) } - + } ) - } county_gdb_to_s3 <- function( @@ -128,9 +124,7 @@ county_gdb_to_s3 <- function( dir_name, file_path, layer, - overwrite = FALSE -) { - + overwrite = FALSE) { remote_file <- file.path( s3_bucket_uri, dir_name, @@ -138,31 +132,24 @@ county_gdb_to_s3 <- function( ) if (!aws.s3::object_exists(remote_file)) { - message(paste0("Reading ", basename(file_path))) if (layer %in% st_layers(file_path)$name) { - try({ - tmp_file <- tempfile(fileext = ".geojson") st_read(file_path, layer) %>% st_write(tmp_file) save_local_to_s3(remote_file, tmp_file, overwrite = overwrite) file.remove(tmp_file) cat(paste0("File successfully written to ", remote_file, "\n")) - }) - } else { - - cat(paste0("Layer '", layer, - "' not present in ", - basename(file_path), - "... skipping.\n") - ) - + cat(paste0( + "Layer '", layer, + "' not present in ", + basename(file_path), + "... skipping.\n" + )) } - } } From 459fc8d656e1cd2d11a1ef9fc445d22776fd5b73 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Tue, 17 Dec 2024 15:33:37 +0000 Subject: [PATCH 142/166] Linting --- etl/utils.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/etl/utils.R b/etl/utils.R index 0a69cbc31..267b0232f 100644 --- a/etl/utils.R +++ b/etl/utils.R @@ -92,7 +92,9 @@ write_partitions_to_s3 <- function(df, } -standardize_expand_geo <- function(spatial_df, make_valid = FALSE, polygon = TRUE) { +standardize_expand_geo <- function( + spatial_df, make_valid = FALSE, polygon = TRUE + ) { return( spatial_df %>% st_transform(4326) %>% From 166701fe679f2baae81e8442f54fcc26fed30a74 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Tue, 17 Dec 2024 16:02:22 +0000 Subject: [PATCH 143/166] Linting --- etl/utils.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/etl/utils.R b/etl/utils.R index 267b0232f..a069cc571 100644 --- a/etl/utils.R +++ b/etl/utils.R @@ -93,8 +93,7 @@ write_partitions_to_s3 <- function(df, standardize_expand_geo <- function( - spatial_df, make_valid = FALSE, polygon = TRUE - ) { + spatial_df, make_valid = FALSE, polygon = TRUE) { return( spatial_df %>% st_transform(4326) %>% From 67771aab6bb197b0e0f2f2f07e66b35103b4908b Mon Sep 17 00:00:00 2001 From: William Ridgeway Date: Tue, 17 Dec 2024 14:07:43 -0600 Subject: [PATCH 144/166] More loaded_at --- .../ccao/ccao-other-hie.R | 209 +++++++++--------- 1 file changed, 105 insertions(+), 104 deletions(-) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-other-hie.R b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-other-hie.R index 0b01ab1a5..e0036ade3 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-other-hie.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-other-hie.R @@ -1,104 +1,105 @@ -library(arrow) -library(aws.s3) -library(DBI) -library(dplyr) -library(lubridate) -library(odbc) -library(purrr) -library(stringr) -library(tidyr) -source("utils.R") - -# This script retrieves and cleans home improvement exemption data stored in -# the CCAO's legacy AS/400 system -AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET") -output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, "ccao", "other", "hie") - -# Connect to legacy CCAO SQL server -CCAODATA <- dbConnect( - odbc(), - .connection_string = Sys.getenv("DB_CONFIG_CCAODATA") -) - -# Grab all legacy HIE data from the ADDCHARS table -hie <- DBI::dbGetQuery( - CCAODATA, - " - SELECT - QU_PIN, - QU_TOWN, - QU_MLT_CD, - QU_HOME_IMPROVEMENT, - QU_USE, - QU_EXTERIOR_WALL, - QU_ROOF, - QU_BASEMENT_TYPE, - QU_BASEMENT_FINISH, - QU_HEAT, - QU_AIR, - QU_ATTIC_TYPE, - QU_ATTIC_FINISH, - QU_TYPE_PLAN, - QU_TYPE_DESIGN, - QU_CONSTRUCT_QUALITY, - QU_PORCH, - QU_GARAGE_SIZE, - QU_GARAGE_CONST, - QU_GARAGE_ATTACHED, - QU_GARAGE_AREA, - QU_NUM_APTS, - QU_SQFT_BLD, - QU_LND_SQFT, - QU_CLASS, - QU_ROOMS, - QU_BEDS, - QU_FULL_BATH, - QU_HALF_BATH, - QU_FIRE_PLACE, - QU_NO__COM_UNIT, - QU_TYPE_OF_RES, - QU_UPLOAD_DATE, - TAX_YEAR - FROM ADDCHARS - WHERE QU_HOME_IMPROVEMENT = 1 - " -) - -# Clean up raw ADDCHARS data -hie_clean <- hie %>% - mutate( - QU_CLASS = as.numeric(stringr::str_sub(QU_CLASS, 1, 3)), - QU_PIN = str_pad(QU_PIN, 14, "left", "0"), - hie_last_year_active = map_chr( - ccao::chars_288_active(TAX_YEAR, as.character(QU_TOWN)), - ~ tail(.x, n = 1) - ), - QU_NO__COM_UNIT = as.numeric(QU_NO__COM_UNIT), - QU_NO__COM_UNIT = replace_na(QU_NO__COM_UNIT, 0), - across( - c(QU_TOWN:QU_NUM_APTS, QU_CLASS, QU_TYPE_OF_RES, TAX_YEAR), - as.character - ), - across(everything(), na_if, " "), - QU_CLASS = na_if(QU_CLASS, "0"), - # Convert upload date to date format and if missing, set as the earliest - # date for the year - QU_UPLOAD_DATE = lubridate::ymd(QU_UPLOAD_DATE), - QU_UPLOAD_DATE = lubridate::as_date(ifelse( - is.na(QU_UPLOAD_DATE), - lubridate::make_date(as.numeric(TAX_YEAR), 1, 1), - QU_UPLOAD_DATE - )), - ) %>% - rename_with(tolower) %>% - rename(pin = qu_pin, year = tax_year, qu_no_com_unit = qu_no__com_unit) - -# Save HIE data to warehouse, partitioned by year -hie_clean %>% - group_by(year) %>% - arrow::write_dataset( - path = output_bucket, - format = "parquet", - hive_style = TRUE, - compression = "snappy" - ) +library(arrow) +library(aws.s3) +library(DBI) +library(dplyr) +library(lubridate) +library(odbc) +library(purrr) +library(stringr) +library(tidyr) +source("utils.R") + +# This script retrieves and cleans home improvement exemption data stored in +# the CCAO's legacy AS/400 system +AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET") +output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, "ccao", "other", "hie") + +# Connect to legacy CCAO SQL server +CCAODATA <- dbConnect( + odbc(), + .connection_string = Sys.getenv("DB_CONFIG_CCAODATA") +) + +# Grab all legacy HIE data from the ADDCHARS table +hie <- DBI::dbGetQuery( + CCAODATA, + " + SELECT + QU_PIN, + QU_TOWN, + QU_MLT_CD, + QU_HOME_IMPROVEMENT, + QU_USE, + QU_EXTERIOR_WALL, + QU_ROOF, + QU_BASEMENT_TYPE, + QU_BASEMENT_FINISH, + QU_HEAT, + QU_AIR, + QU_ATTIC_TYPE, + QU_ATTIC_FINISH, + QU_TYPE_PLAN, + QU_TYPE_DESIGN, + QU_CONSTRUCT_QUALITY, + QU_PORCH, + QU_GARAGE_SIZE, + QU_GARAGE_CONST, + QU_GARAGE_ATTACHED, + QU_GARAGE_AREA, + QU_NUM_APTS, + QU_SQFT_BLD, + QU_LND_SQFT, + QU_CLASS, + QU_ROOMS, + QU_BEDS, + QU_FULL_BATH, + QU_HALF_BATH, + QU_FIRE_PLACE, + QU_NO__COM_UNIT, + QU_TYPE_OF_RES, + QU_UPLOAD_DATE, + TAX_YEAR + FROM ADDCHARS + WHERE QU_HOME_IMPROVEMENT = 1 + " +) + +# Clean up raw ADDCHARS data +hie_clean <- hie %>% + mutate( + QU_CLASS = as.numeric(stringr::str_sub(QU_CLASS, 1, 3)), + QU_PIN = str_pad(QU_PIN, 14, "left", "0"), + hie_last_year_active = map_chr( + ccao::chars_288_active(TAX_YEAR, as.character(QU_TOWN)), + ~ tail(.x, n = 1) + ), + QU_NO__COM_UNIT = as.numeric(QU_NO__COM_UNIT), + QU_NO__COM_UNIT = replace_na(QU_NO__COM_UNIT, 0), + across( + c(QU_TOWN:QU_NUM_APTS, QU_CLASS, QU_TYPE_OF_RES, TAX_YEAR), + as.character + ), + across(where(is.character), na_if, " "), + QU_CLASS = na_if(QU_CLASS, "0"), + # Convert upload date to date format and if missing, set as the earliest + # date for the year + QU_UPLOAD_DATE = lubridate::ymd(QU_UPLOAD_DATE), + QU_UPLOAD_DATE = lubridate::as_date(ifelse( + is.na(QU_UPLOAD_DATE), + lubridate::make_date(as.numeric(TAX_YEAR), 1, 1), + QU_UPLOAD_DATE + )), + ) %>% + rename_with(tolower) %>% + rename(pin = qu_pin, year = tax_year, qu_no_com_unit = qu_no__com_unit) + +# Save HIE data to warehouse, partitioned by year +hie_clean %>% + mutate(loaded_at = as.character(Sys.time())) %>% + group_by(year) %>% + arrow::write_dataset( + path = output_bucket, + format = "parquet", + hive_style = TRUE, + compression = "snappy" + ) From c5e7818abbea0d0f40a2a0c418226a6496c0b21e Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Tue, 17 Dec 2024 20:08:39 +0000 Subject: [PATCH 145/166] More loaded at --- .../ccao/ccao-land-land_nbhd_rate.R | 299 +++++++++--------- .../ccao/ccao-land-land_site_rate.R | 119 +++---- .../ccao/ccao-legacy.R | 4 + .../ccao/ccao-other-hie.R | 209 ++++++------ 4 files changed, 319 insertions(+), 312 deletions(-) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-land-land_nbhd_rate.R b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-land-land_nbhd_rate.R index 9aa97fa86..5c9ee49bd 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-land-land_nbhd_rate.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-land-land_nbhd_rate.R @@ -1,149 +1,150 @@ -library(arrow) -library(aws.s3) -library(dplyr) -library(noctua) -library(openxlsx) -library(purrr) -library(readr) -library(snakecase) -library(stringr) -library(tidyr) -source("utils.R") - -# This script retrieves and cleans land value spreadsheets provided by -# the Valuations department and formats them for use in Athena -AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET") -AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET") -input_bucket <- file.path(AWS_S3_RAW_BUCKET, "ccao", "land") -output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, "ccao", "land") - -AWS_ATHENA_CONN_NOCTUA <- dbConnect(noctua::athena(), rstudio_conn_tab = FALSE) - -# Location of remote files -remote_file_raw_nbhd_rate_2022 <- file.path( - input_bucket, "nbhd_rate", "2022.xlsx" -) -remote_file_raw_nbhd_rate_2023 <- file.path( - input_bucket, "nbhd_rate", "2023.xlsx" -) -remote_file_raw_nbhd_rate_2024 <- file.path( - input_bucket, "nbhd_rate", "2024.xlsx" -) -remote_file_warehouse_nbhd_rate <- file.path( - output_bucket, "land_nbhd_rate" -) - - -# Temp file to download workbook -tmp_file_nbhd_rate_2022 <- tempfile(fileext = ".xlsx") -tmp_file_nbhd_rate_2023 <- tempfile(fileext = ".xlsx") -tmp_file_nbhd_rate_2024 <- tempfile(fileext = ".xlsx") - -# Grab the workbook from the raw S3 bucket -aws.s3::save_object( - object = remote_file_raw_nbhd_rate_2022, - file = tmp_file_nbhd_rate_2022 -) -aws.s3::save_object( - object = remote_file_raw_nbhd_rate_2023, - file = tmp_file_nbhd_rate_2023 -) -aws.s3::save_object( - object = remote_file_raw_nbhd_rate_2024, - file = tmp_file_nbhd_rate_2024 -) - -# List of regression classes -class <- dbGetQuery( - AWS_ATHENA_CONN_NOCTUA, - "SELECT class_code FROM ccao.class_dict WHERE regression_class" -) %>% - pull(class_code) - -# Load the raw workbooks, rename and clean up columns -land_nbhd_rate_2022 <- openxlsx::read.xlsx(tmp_file_nbhd_rate_2022) %>% - set_names(snakecase::to_snake_case(names(.))) %>% - select( - township_code = twp_number, - township_name = twp_name, - town_nbhd = twp_nbhd, - `2019` = `2019_rate`, - `2022` = `2022_rate` - ) %>% - pivot_longer( - c(`2019`, `2022`), - names_to = "year", values_to = "land_rate_per_sqft" - ) %>% - mutate( - across(c(township_code:town_nbhd, year), as.character), - town_nbhd = str_remove_all(town_nbhd, "-"), - land_rate_per_sqft = parse_number(land_rate_per_sqft) - ) %>% - expand_grid(class) - -land_nbhd_rate_2023 <- openxlsx::read.xlsx(tmp_file_nbhd_rate_2023) %>% - set_names(snakecase::to_snake_case(names(.))) %>% - select( - town_nbhd = neighborhood_id, - `2020` = `2020_2_00_class_unit_price`, - `2023` = `2023_2_00_class_unit_price` - ) %>% - mutate( - town_nbhd = gsub("\\D", "", town_nbhd), - township_code = substr(town_nbhd, 1, 2), - township_name = ccao::town_convert(township_code) - ) %>% - relocate(c(township_code, township_name)) %>% - pivot_longer( - c(`2020`, `2023`), - names_to = "year", values_to = "land_rate_per_sqft" - ) %>% - mutate(across(c(township_code:town_nbhd, year), as.character)) %>% - expand_grid(class) - -land_nbhd_rate_2024 <- openxlsx::read.xlsx(tmp_file_nbhd_rate_2024) %>% - set_names(snakecase::to_snake_case(names(.))) %>% - mutate( - town_nbhd = paste0( - township_code, str_pad(neighborhood, 3, side = "left", pad = "0") - ) - ) %>% - select( - town_nbhd, - classes, - `2021` = `2021_unit_price`, - `2024` = `2024_unit_price` - ) %>% - mutate( - town_nbhd = gsub("\\D", "", town_nbhd), - township_code = substr(town_nbhd, 1, 2), - township_name = ccao::town_convert(township_code) - ) %>% - relocate(c(township_code, township_name)) %>% - pivot_longer( - c(`2021`, `2024`), - names_to = "year", values_to = "land_rate_per_sqft" - ) %>% - mutate(across(c(township_code:town_nbhd, year), as.character)) %>% - expand_grid(class) %>% - # 2024 contains bifurcated neighborhood land rates across class - filter( - !(classes == "all other regression classes" & class %in% c("210", "295")), - !(classes == "2-10s/2-95s" & !(class %in% c("210", "295"))) - ) %>% - select(-classes) - -# Write the rates to S3, partitioned by year -bind_rows( - land_nbhd_rate_2022, - land_nbhd_rate_2023, - land_nbhd_rate_2024 -) %>% - relocate(land_rate_per_sqft, .after = last_col()) %>% - group_by(year) %>% - arrow::write_dataset( - path = remote_file_warehouse_nbhd_rate, - format = "parquet", - hive_style = TRUE, - compression = "snappy" - ) +library(arrow) +library(aws.s3) +library(dplyr) +library(noctua) +library(openxlsx) +library(purrr) +library(readr) +library(snakecase) +library(stringr) +library(tidyr) +source("utils.R") + +# This script retrieves and cleans land value spreadsheets provided by +# the Valuations department and formats them for use in Athena +AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET") +AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET") +input_bucket <- file.path(AWS_S3_RAW_BUCKET, "ccao", "land") +output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, "ccao", "land") + +AWS_ATHENA_CONN_NOCTUA <- dbConnect(noctua::athena(), rstudio_conn_tab = FALSE) + +# Location of remote files +remote_file_raw_nbhd_rate_2022 <- file.path( + input_bucket, "nbhd_rate", "2022.xlsx" +) +remote_file_raw_nbhd_rate_2023 <- file.path( + input_bucket, "nbhd_rate", "2023.xlsx" +) +remote_file_raw_nbhd_rate_2024 <- file.path( + input_bucket, "nbhd_rate", "2024.xlsx" +) +remote_file_warehouse_nbhd_rate <- file.path( + output_bucket, "land_nbhd_rate" +) + + +# Temp file to download workbook +tmp_file_nbhd_rate_2022 <- tempfile(fileext = ".xlsx") +tmp_file_nbhd_rate_2023 <- tempfile(fileext = ".xlsx") +tmp_file_nbhd_rate_2024 <- tempfile(fileext = ".xlsx") + +# Grab the workbook from the raw S3 bucket +aws.s3::save_object( + object = remote_file_raw_nbhd_rate_2022, + file = tmp_file_nbhd_rate_2022 +) +aws.s3::save_object( + object = remote_file_raw_nbhd_rate_2023, + file = tmp_file_nbhd_rate_2023 +) +aws.s3::save_object( + object = remote_file_raw_nbhd_rate_2024, + file = tmp_file_nbhd_rate_2024 +) + +# List of regression classes +class <- dbGetQuery( + AWS_ATHENA_CONN_NOCTUA, + "SELECT class_code FROM ccao.class_dict WHERE regression_class" +) %>% + pull(class_code) + +# Load the raw workbooks, rename and clean up columns +land_nbhd_rate_2022 <- openxlsx::read.xlsx(tmp_file_nbhd_rate_2022) %>% + set_names(snakecase::to_snake_case(names(.))) %>% + select( + township_code = twp_number, + township_name = twp_name, + town_nbhd = twp_nbhd, + `2019` = `2019_rate`, + `2022` = `2022_rate` + ) %>% + pivot_longer( + c(`2019`, `2022`), + names_to = "year", values_to = "land_rate_per_sqft" + ) %>% + mutate( + across(c(township_code:town_nbhd, year), as.character), + town_nbhd = str_remove_all(town_nbhd, "-"), + land_rate_per_sqft = parse_number(land_rate_per_sqft) + ) %>% + expand_grid(class) + +land_nbhd_rate_2023 <- openxlsx::read.xlsx(tmp_file_nbhd_rate_2023) %>% + set_names(snakecase::to_snake_case(names(.))) %>% + select( + town_nbhd = neighborhood_id, + `2020` = `2020_2_00_class_unit_price`, + `2023` = `2023_2_00_class_unit_price` + ) %>% + mutate( + town_nbhd = gsub("\\D", "", town_nbhd), + township_code = substr(town_nbhd, 1, 2), + township_name = ccao::town_convert(township_code) + ) %>% + relocate(c(township_code, township_name)) %>% + pivot_longer( + c(`2020`, `2023`), + names_to = "year", values_to = "land_rate_per_sqft" + ) %>% + mutate(across(c(township_code:town_nbhd, year), as.character)) %>% + expand_grid(class) + +land_nbhd_rate_2024 <- openxlsx::read.xlsx(tmp_file_nbhd_rate_2024) %>% + set_names(snakecase::to_snake_case(names(.))) %>% + mutate( + town_nbhd = paste0( + township_code, str_pad(neighborhood, 3, side = "left", pad = "0") + ) + ) %>% + select( + town_nbhd, + classes, + `2021` = `2021_unit_price`, + `2024` = `2024_unit_price` + ) %>% + mutate( + town_nbhd = gsub("\\D", "", town_nbhd), + township_code = substr(town_nbhd, 1, 2), + township_name = ccao::town_convert(township_code) + ) %>% + relocate(c(township_code, township_name)) %>% + pivot_longer( + c(`2021`, `2024`), + names_to = "year", values_to = "land_rate_per_sqft" + ) %>% + mutate(across(c(township_code:town_nbhd, year), as.character)) %>% + expand_grid(class) %>% + # 2024 contains bifurcated neighborhood land rates across class + filter( + !(classes == "all other regression classes" & class %in% c("210", "295")), + !(classes == "2-10s/2-95s" & !(class %in% c("210", "295"))) + ) %>% + select(-classes) + +# Write the rates to S3, partitioned by year +bind_rows( + land_nbhd_rate_2022, + land_nbhd_rate_2023, + land_nbhd_rate_2024 +) %>% + relocate(land_rate_per_sqft, .after = last_col()) %>% + mutate(loaded_at = as.character(Sys.time())) %>% + group_by(year) %>% + arrow::write_dataset( + path = remote_file_warehouse_nbhd_rate, + format = "parquet", + hive_style = TRUE, + compression = "snappy" + ) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-land-land_site_rate.R b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-land-land_site_rate.R index 55c114c33..194aa7cb9 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-land-land_site_rate.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-land-land_site_rate.R @@ -1,59 +1,60 @@ -library(arrow) -library(aws.s3) -library(dplyr) -library(openxlsx) -library(purrr) -library(readr) -library(snakecase) -library(stringr) -library(tidyr) -source("utils.R") - -# This script retrieves and cleans land value spreadsheets provided by -# the Valuations department and formats them for use in Athena -AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET") -AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET") -input_bucket <- file.path(AWS_S3_RAW_BUCKET, "ccao", "land") -output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, "ccao", "land") - -# Location of remote files -remote_file_raw_site_rate_2022 <- file.path( - input_bucket, "site_rate", "2022.xlsx" -) -remote_file_warehouse_site_rate <- file.path( - output_bucket, "land_site_rate" -) - -# Temp file to download workbook -tmp_file_site_rate_2022 <- tempfile(fileext = ".xlsx") - -# Grab the workbook from the raw S3 bucket -aws.s3::save_object( - object = remote_file_raw_site_rate_2022, - file = tmp_file_site_rate_2022 -) - -# Load the raw workbook, rename and clean up columns, then write to S3 -# partitioned by year -land_site_rate <- openxlsx::read.xlsx(tmp_file_site_rate_2022) %>% - set_names(snakecase::to_snake_case(names(.))) %>% - select( - pin = parid, - class, - town_nbhd = nbhd, - land_rate_per_pin = flat_townhome_value_2022, - land_rate_per_sqft = rate_sf_2022, - land_pct_tot_fmv = flat_tot_mv - ) %>% - mutate( - year = "2022", - across(c(town_nbhd, class), str_remove_all, "-"), - land_rate_per_pin = as.integer(land_rate_per_pin) - ) %>% - drop_na(pin, land_rate_per_pin) %>% - group_by(year) %>% - write_partitions_to_s3( - remote_file_warehouse_site_rate, - is_spatial = FALSE, - overwrite = TRUE - ) +library(arrow) +library(aws.s3) +library(dplyr) +library(openxlsx) +library(purrr) +library(readr) +library(snakecase) +library(stringr) +library(tidyr) +source("utils.R") + +# This script retrieves and cleans land value spreadsheets provided by +# the Valuations department and formats them for use in Athena +AWS_S3_RAW_BUCKET <- Sys.getenv("AWS_S3_RAW_BUCKET") +AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET") +input_bucket <- file.path(AWS_S3_RAW_BUCKET, "ccao", "land") +output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, "ccao", "land") + +# Location of remote files +remote_file_raw_site_rate_2022 <- file.path( + input_bucket, "site_rate", "2022.xlsx" +) +remote_file_warehouse_site_rate <- file.path( + output_bucket, "land_site_rate" +) + +# Temp file to download workbook +tmp_file_site_rate_2022 <- tempfile(fileext = ".xlsx") + +# Grab the workbook from the raw S3 bucket +aws.s3::save_object( + object = remote_file_raw_site_rate_2022, + file = tmp_file_site_rate_2022 +) + +# Load the raw workbook, rename and clean up columns, then write to S3 +# partitioned by year +land_site_rate <- openxlsx::read.xlsx(tmp_file_site_rate_2022) %>% + set_names(snakecase::to_snake_case(names(.))) %>% + select( + pin = parid, + class, + town_nbhd = nbhd, + land_rate_per_pin = flat_townhome_value_2022, + land_rate_per_sqft = rate_sf_2022, + land_pct_tot_fmv = flat_tot_mv + ) %>% + mutate( + year = "2022", + across(c(town_nbhd, class), str_remove_all, "-"), + land_rate_per_pin = as.integer(land_rate_per_pin) + ) %>% + drop_na(pin, land_rate_per_pin) %>% + mutate(loaded_at = as.character(Sys.time())) %>% + group_by(year) %>% + write_partitions_to_s3( + remote_file_warehouse_site_rate, + is_spatial = FALSE, + overwrite = TRUE + ) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-legacy.R b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-legacy.R index 84673e731..da8d64e95 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-legacy.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-legacy.R @@ -99,6 +99,7 @@ cc_dli_senfrr <- map_dfr(files_cc_dli_senfrr$Key, \(f) { # Write the files to S3, partitioned by year cc_dli_senfrr %>% + mutate(loaded_at = as.character(Sys.time())) %>% group_by(year) %>% arrow::write_dataset( path = file.path(output_bucket, "cc_dli_senfrr"), @@ -180,6 +181,7 @@ cc_pifdb_piexemptre_sted <- map_dfr(files_cc_pifdb_piexemptre_sted$Key, \(f) { # Write the files to S3, partitioned by year cc_pifdb_piexemptre_sted %>% + mutate(loaded_at = as.character(Sys.time())) %>% group_by(year) %>% arrow::write_dataset( path = file.path( @@ -253,6 +255,7 @@ cc_pifdb_piexemptre_dise <- map_dfr(files_cc_pifdb_piexemptre_dise$Key, \(f) { # Write the files to S3, partitioned by year cc_pifdb_piexemptre_dise %>% + mutate(loaded_at = as.character(Sys.time())) %>% group_by(year) %>% arrow::write_dataset( path = file.path( @@ -341,6 +344,7 @@ cc_pifdb_piexemptre_ownr <- map_dfr(files_cc_pifdb_piexemptre_ownr$Key, \(f) { # Write the files to S3, partitioned by year cc_pifdb_piexemptre_ownr %>% + mutate(loaded_at = as.character(Sys.time())) %>% group_by(year) %>% arrow::write_dataset( path = file.path( diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-other-hie.R b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-other-hie.R index 0b01ab1a5..a7de08a87 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-other-hie.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-other-hie.R @@ -1,104 +1,105 @@ -library(arrow) -library(aws.s3) -library(DBI) -library(dplyr) -library(lubridate) -library(odbc) -library(purrr) -library(stringr) -library(tidyr) -source("utils.R") - -# This script retrieves and cleans home improvement exemption data stored in -# the CCAO's legacy AS/400 system -AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET") -output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, "ccao", "other", "hie") - -# Connect to legacy CCAO SQL server -CCAODATA <- dbConnect( - odbc(), - .connection_string = Sys.getenv("DB_CONFIG_CCAODATA") -) - -# Grab all legacy HIE data from the ADDCHARS table -hie <- DBI::dbGetQuery( - CCAODATA, - " - SELECT - QU_PIN, - QU_TOWN, - QU_MLT_CD, - QU_HOME_IMPROVEMENT, - QU_USE, - QU_EXTERIOR_WALL, - QU_ROOF, - QU_BASEMENT_TYPE, - QU_BASEMENT_FINISH, - QU_HEAT, - QU_AIR, - QU_ATTIC_TYPE, - QU_ATTIC_FINISH, - QU_TYPE_PLAN, - QU_TYPE_DESIGN, - QU_CONSTRUCT_QUALITY, - QU_PORCH, - QU_GARAGE_SIZE, - QU_GARAGE_CONST, - QU_GARAGE_ATTACHED, - QU_GARAGE_AREA, - QU_NUM_APTS, - QU_SQFT_BLD, - QU_LND_SQFT, - QU_CLASS, - QU_ROOMS, - QU_BEDS, - QU_FULL_BATH, - QU_HALF_BATH, - QU_FIRE_PLACE, - QU_NO__COM_UNIT, - QU_TYPE_OF_RES, - QU_UPLOAD_DATE, - TAX_YEAR - FROM ADDCHARS - WHERE QU_HOME_IMPROVEMENT = 1 - " -) - -# Clean up raw ADDCHARS data -hie_clean <- hie %>% - mutate( - QU_CLASS = as.numeric(stringr::str_sub(QU_CLASS, 1, 3)), - QU_PIN = str_pad(QU_PIN, 14, "left", "0"), - hie_last_year_active = map_chr( - ccao::chars_288_active(TAX_YEAR, as.character(QU_TOWN)), - ~ tail(.x, n = 1) - ), - QU_NO__COM_UNIT = as.numeric(QU_NO__COM_UNIT), - QU_NO__COM_UNIT = replace_na(QU_NO__COM_UNIT, 0), - across( - c(QU_TOWN:QU_NUM_APTS, QU_CLASS, QU_TYPE_OF_RES, TAX_YEAR), - as.character - ), - across(everything(), na_if, " "), - QU_CLASS = na_if(QU_CLASS, "0"), - # Convert upload date to date format and if missing, set as the earliest - # date for the year - QU_UPLOAD_DATE = lubridate::ymd(QU_UPLOAD_DATE), - QU_UPLOAD_DATE = lubridate::as_date(ifelse( - is.na(QU_UPLOAD_DATE), - lubridate::make_date(as.numeric(TAX_YEAR), 1, 1), - QU_UPLOAD_DATE - )), - ) %>% - rename_with(tolower) %>% - rename(pin = qu_pin, year = tax_year, qu_no_com_unit = qu_no__com_unit) - -# Save HIE data to warehouse, partitioned by year -hie_clean %>% - group_by(year) %>% - arrow::write_dataset( - path = output_bucket, - format = "parquet", - hive_style = TRUE, - compression = "snappy" - ) +library(arrow) +library(aws.s3) +library(DBI) +library(dplyr) +library(lubridate) +library(odbc) +library(purrr) +library(stringr) +library(tidyr) +source("utils.R") + +# This script retrieves and cleans home improvement exemption data stored in +# the CCAO's legacy AS/400 system +AWS_S3_WAREHOUSE_BUCKET <- Sys.getenv("AWS_S3_WAREHOUSE_BUCKET") +output_bucket <- file.path(AWS_S3_WAREHOUSE_BUCKET, "ccao", "other", "hie") + +# Connect to legacy CCAO SQL server +CCAODATA <- dbConnect( + odbc(), + .connection_string = Sys.getenv("DB_CONFIG_CCAODATA") +) + +# Grab all legacy HIE data from the ADDCHARS table +hie <- DBI::dbGetQuery( + CCAODATA, + " + SELECT + QU_PIN, + QU_TOWN, + QU_MLT_CD, + QU_HOME_IMPROVEMENT, + QU_USE, + QU_EXTERIOR_WALL, + QU_ROOF, + QU_BASEMENT_TYPE, + QU_BASEMENT_FINISH, + QU_HEAT, + QU_AIR, + QU_ATTIC_TYPE, + QU_ATTIC_FINISH, + QU_TYPE_PLAN, + QU_TYPE_DESIGN, + QU_CONSTRUCT_QUALITY, + QU_PORCH, + QU_GARAGE_SIZE, + QU_GARAGE_CONST, + QU_GARAGE_ATTACHED, + QU_GARAGE_AREA, + QU_NUM_APTS, + QU_SQFT_BLD, + QU_LND_SQFT, + QU_CLASS, + QU_ROOMS, + QU_BEDS, + QU_FULL_BATH, + QU_HALF_BATH, + QU_FIRE_PLACE, + QU_NO__COM_UNIT, + QU_TYPE_OF_RES, + QU_UPLOAD_DATE, + TAX_YEAR + FROM ADDCHARS + WHERE QU_HOME_IMPROVEMENT = 1 + " +) + +# Clean up raw ADDCHARS data +hie_clean <- hie %>% + mutate( + QU_CLASS = as.numeric(stringr::str_sub(QU_CLASS, 1, 3)), + QU_PIN = str_pad(QU_PIN, 14, "left", "0"), + hie_last_year_active = map_chr( + ccao::chars_288_active(TAX_YEAR, as.character(QU_TOWN)), + ~ tail(.x, n = 1) + ), + QU_NO__COM_UNIT = as.numeric(QU_NO__COM_UNIT), + QU_NO__COM_UNIT = replace_na(QU_NO__COM_UNIT, 0), + across( + c(QU_TOWN:QU_NUM_APTS, QU_CLASS, QU_TYPE_OF_RES, TAX_YEAR), + as.character + ), + across(everything(), na_if, " "), + QU_CLASS = na_if(QU_CLASS, "0"), + # Convert upload date to date format and if missing, set as the earliest + # date for the year + QU_UPLOAD_DATE = lubridate::ymd(QU_UPLOAD_DATE), + QU_UPLOAD_DATE = lubridate::as_date(ifelse( + is.na(QU_UPLOAD_DATE), + lubridate::make_date(as.numeric(TAX_YEAR), 1, 1), + QU_UPLOAD_DATE + )), + ) %>% + rename_with(tolower) %>% + rename(pin = qu_pin, year = tax_year, qu_no_com_unit = qu_no__com_unit) + +# Save HIE data to warehouse, partitioned by year +hie_clean %>% + mutate(loaded_at = as.character(Sys.time())) %>% + group_by(year) %>% + arrow::write_dataset( + path = output_bucket, + format = "parquet", + hive_style = TRUE, + compression = "snappy" + ) From 952b4b8cf0827cb249ff66b289a8c099d7b29212 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Tue, 17 Dec 2024 22:15:16 +0000 Subject: [PATCH 146/166] Housing --- .../housing/housing-ari.py | 2 ++ .../housing/housing-dci.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/housing/housing-ari.py b/etl/scripts-ccao-data-warehouse-us-east-1/housing/housing-ari.py index 576335ec7..bdefd7181 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/housing/housing-ari.py +++ b/etl/scripts-ccao-data-warehouse-us-east-1/housing/housing-ari.py @@ -1,5 +1,6 @@ import os import tempfile +from datetime import datetime import boto3 import pandas as pd @@ -33,6 +34,7 @@ temp_file.close() # Upload the Parquet file to S3 +data["loaded_at"] = str(datetime.now()) data.to_parquet( os.path.join( os.environ["AWS_S3_WAREHOUSE_BUCKET"], diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/housing/housing-dci.py b/etl/scripts-ccao-data-warehouse-us-east-1/housing/housing-dci.py index e62d1c0df..fb8ab7492 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/housing/housing-dci.py +++ b/etl/scripts-ccao-data-warehouse-us-east-1/housing/housing-dci.py @@ -1,5 +1,6 @@ import os import tempfile +from datetime import datetime import boto3 import pandas as pd @@ -39,6 +40,8 @@ ) data["geoid"] = data["geoid"].astype(str) +# Upload the Parquet file to S3 +data["loaded_at"] = str(datetime.now()) data.to_parquet( os.path.join( os.environ["AWS_S3_WAREHOUSE_BUCKET"], From ab52d1206e090e3cd53bb77d4b2bd6c1bd275074 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Wed, 18 Dec 2024 17:23:23 +0000 Subject: [PATCH 147/166] More loaded_at --- etl/renv.lock | 15 ++++++++++++++- .../census/census-dictionary.R | 2 ++ .../environment/environment-airport_noise.R | 4 +++- etl/utils.R | 2 +- 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/etl/renv.lock b/etl/renv.lock index 4f6f0dc6f..3d32c3624 100644 --- a/etl/renv.lock +++ b/etl/renv.lock @@ -1,6 +1,6 @@ { "R": { - "Version": "4.4.2", + "Version": "4.4.1", "Repositories": [ { "Name": "CRAN", @@ -2808,6 +2808,19 @@ ], "Hash": "ad57b543f7c3fca05213ba78ff63df9b" }, + "sfarrow": { + "Package": "sfarrow", + "Version": "0.4.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "arrow", + "dplyr", + "jsonlite", + "sf" + ], + "Hash": "b320f164b1d7bb7e4582b841e22d15a0" + }, "sfheaders": { "Package": "sfheaders", "Version": "0.4.4", diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/census/census-dictionary.R b/etl/scripts-ccao-data-warehouse-us-east-1/census/census-dictionary.R index a1e3f46f0..dbc5a16e6 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/census/census-dictionary.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/census/census-dictionary.R @@ -61,6 +61,7 @@ census_dec_tables <- # Combine table defs and write to dataset census_tables <- bind_rows(census_acs_tables_df, census_dec_tables) %>% + mutate(loaded_at = as.character(Sys.time())) %>% group_by(survey) %>% select(variable_table_code, variable_table_title, survey) remote_path_tables <- file.path(output_bucket, "table_dict") @@ -130,6 +131,7 @@ census_dec_vars <- load_variables(2020, "pl", cache = TRUE) %>% # Combine ACS and decennial census_vars_merged <- bind_rows(census_vars, census_dec_vars) %>% + mutate(loaded_at = as.character(Sys.time())) %>% group_by(survey) # Write final data to S3 diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/environment/environment-airport_noise.R b/etl/scripts-ccao-data-warehouse-us-east-1/environment/environment-airport_noise.R index f712ec843..8508cbca8 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/environment/environment-airport_noise.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/environment/environment-airport_noise.R @@ -3,6 +3,7 @@ library(aws.s3) library(dplyr) library(purrr) library(sf) +library(sfarrow) library(stars) library(stringr) library(tidyr) @@ -40,7 +41,7 @@ merge_pins_with_raster <- function(raw_file) { message("Now processing:", year) rast <- stars::read_stars(tmp_file) - pins <- read_sf_dataset(arrow::open_dataset(paste0( + pins <- sfarrow::read_sf_dataset(arrow::open_dataset(paste0( "s3://ccao-data-warehouse-us-east-1/spatial/parcel/year=", year ))) %>% @@ -55,6 +56,7 @@ merge_pins_with_raster <- function(raw_file) { select(pin10, airport_noise_dnl) %>% mutate(airport_noise_dnl = replace_na(airport_noise_dnl, 52.5)) %>% st_drop_geometry() %>% + mutate(loaded_at = as.character(Sys.time())) %>% write_parquet( file.path(output_bucket, paste0("year=", year), "part-0.parquet") ) diff --git a/etl/utils.R b/etl/utils.R index a069cc571..24313d832 100644 --- a/etl/utils.R +++ b/etl/utils.R @@ -68,7 +68,7 @@ write_partitions_to_s3 <- function(df, df <- df %>% mutate(loaded_at = as.character(Sys.time())) dplyr::group_walk(df, ~ { partitions_df <- purrr::map_dfr( - .y, replace_na, "__HIVE_DEFAULT_PARTITION__" + .y, tidyr::replace_na, "__HIVE_DEFAULT_PARTITION__" ) partition_path <- paste0(purrr::map2_chr( names(partitions_df), From 585a0d57a543f0bb7f5df13a6189c711c79ac904 Mon Sep 17 00:00:00 2001 From: William Ridgeway Date: Wed, 18 Dec 2024 14:40:25 -0600 Subject: [PATCH 148/166] More loaded_at --- .../spatial/spatial-access.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-access.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-access.R index bb8cfb92c..34286c8aa 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-access.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-access.R @@ -119,7 +119,7 @@ walk(remote_files_park_warehouse, function(x) { add_osm_feature(key = "leisure", value = "park") %>% osmdata_sf() - cook_boundary <- st_read_parquet( + cook_boundary <- geoarrow::read_geoparquet_sf( file.path( AWS_S3_WAREHOUSE_BUCKET, "spatial/ccao/county/2019.parquet" @@ -141,7 +141,7 @@ walk(remote_files_park_warehouse, function(x) { )) ) - geoparquet_to_s3(parks_df, x, compression = "snappy") + geoparquet_to_s3(parks_df, x) } }) @@ -189,16 +189,16 @@ if (!aws.s3::object_exists(remote_file_walk_warehouse)) { tmp_file_walk <- tempfile(fileext = ".geojson") aws.s3::save_object(remote_file_walk_raw, file = tmp_file_walk) - temp <- st_read(tmp_file_walk) %>% + st_read(tmp_file_walk) %>% st_transform(4326) %>% + rename( + walkability_rating = Walkabilit, + amenities_score = Amenities, + transitaccess = TransitAcc + ) %>% rename_with(tolower) %>% rename_with(~ gsub("sc$|sco|scor|score", "_score", .x)) %>% rename_with(~"walk_num", contains("subzone")) %>% - rename( - walkability_rating = walkabilit, - amenities_score = amenities, - transitaccess = transitacc - ) %>% standardize_expand_geo() %>% select(-contains("shape")) %>% mutate(year = "2017") %>% From 0760d2b88a4dbdeb4438840dc5fe647be916969b Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Wed, 18 Dec 2024 22:13:18 +0000 Subject: [PATCH 149/166] Code cleanup --- .../spatial/spatial-environment-ohare_noise.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-ohare_noise.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-ohare_noise.R index d188e1420..6f82d76b2 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-ohare_noise.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-ohare_noise.R @@ -60,8 +60,10 @@ names(noise_levels) <- columns # Clean NAs and remove inactive sites noise_levels <- noise_levels %>% - na_if("n/a") %>% - na_if("--") %>% + mutate( + across(where(is.character), ~ na_if(.x, "n/a")), + across(where(is.character), ~ na_if(.x, "--")) + ) %>% filter(!Site %in% c("6", "9", "36", "45")) # Grab sensor addresses pdf From fb81594052d2721bb6f0db07461d701c4f451d29 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Thu, 19 Dec 2024 22:00:11 +0000 Subject: [PATCH 150/166] loaded_at --- .../spatial/spatial-access.R | 1 - .../spatial/spatial-transit.R | 7 +++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-access.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-access.R index 34286c8aa..b4b26687e 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-access.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-access.R @@ -201,6 +201,5 @@ if (!aws.s3::object_exists(remote_file_walk_warehouse)) { rename_with(~"walk_num", contains("subzone")) %>% standardize_expand_geo() %>% select(-contains("shape")) %>% - mutate(year = "2017") %>% geoparquet_to_s3(remote_file_walk_warehouse) } diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-transit.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-transit.R index 68117dc2e..85b6712a7 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-transit.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-transit.R @@ -117,7 +117,7 @@ pwalk(gtfs_feeds_df, function(...) { agency = df$agency, feed_url = df$raw_feed_url ) -}) +}, .progress = TRUE) # Create dictionary for GTFS numeric codes # See: https://developers.google.com/transit/gtfs/reference @@ -136,7 +136,10 @@ transit_dict <- tribble( "route_type", 12, "monorail", "Monorail. Railway in which the track consists of a single rail or a beam." ) %>% # nolint end - mutate(field_code = as.integer(field_code)) + mutate( + field_code = as.integer(field_code), + loaded_at = as.character(Sys.time()) + ) # Write dict to parquet remote_file_dict <- file.path( From 009d471ed615ff2712412e11879f846f46a4b578 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Thu, 19 Dec 2024 22:25:35 +0000 Subject: [PATCH 151/166] loaded_at --- dbt/models/spatial/spatial.stadium.sql | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dbt/models/spatial/spatial.stadium.sql b/dbt/models/spatial/spatial.stadium.sql index 81c99d1c7..e01067b62 100644 --- a/dbt/models/spatial/spatial.stadium.sql +++ b/dbt/models/spatial/spatial.stadium.sql @@ -1,5 +1,5 @@ -{{ - config(materialized='table') +{{ + config(materialized='table') }} WITH distinct_years AS ( @@ -26,6 +26,7 @@ SELECT DISTINCT CAST(stadium_years.year AS VARCHAR) AS year, ST_ASBINARY(ST_POINT(stadium_years.lon, stadium_years.lat)) AS geometry, ST_ASBINARY(ST_POINT(stadium_years.x_3435, stadium_years.y_3435)) - AS geometry_3435 + AS geometry_3435, + DATE_FORMAT(CURRENT_TIMESTAMP, '%Y-%m-%d %H:%i:%s.%f') AS loaded_at FROM stadium_years ORDER BY year, name From 98a4b96cbf824280516ee91ba1023e6bbcf3657c Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Thu, 19 Dec 2024 22:36:38 +0000 Subject: [PATCH 152/166] Revert foreclosure changes --- .../sale/sale-foreclosure.R | 7 +- .../temp.R | 102 ++++++++++++++++++ 2 files changed, 107 insertions(+), 2 deletions(-) create mode 100644 etl/scripts-ccao-data-warehouse-us-east-1/temp.R diff --git a/etl/scripts-ccao-data-raw-us-east-1/sale/sale-foreclosure.R b/etl/scripts-ccao-data-raw-us-east-1/sale/sale-foreclosure.R index 200affe9d..177acbab7 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/sale/sale-foreclosure.R +++ b/etl/scripts-ccao-data-raw-us-east-1/sale/sale-foreclosure.R @@ -16,7 +16,7 @@ output_bucket <- file.path(AWS_S3_RAW_BUCKET, "sale", "foreclosure") files <- list.files("O:/CCAODATA/data/foreclosures", recursive = TRUE) # Function to retrieve data and write to S3 -walk(files, \(x) { +read_write <- function(files, x) { output_dest <- file.path(output_bucket, glue(parse_number(x), ".parquet")) if (!object_exists(output_dest)) { @@ -28,4 +28,7 @@ walk(files, \(x) { ) %>% write_parquet(output_dest) } -}) +} + +# Apply function to foreclosure data +walk(files, read_write) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/temp.R b/etl/scripts-ccao-data-warehouse-us-east-1/temp.R new file mode 100644 index 000000000..07b304db2 --- /dev/null +++ b/etl/scripts-ccao-data-warehouse-us-east-1/temp.R @@ -0,0 +1,102 @@ +library(dplyr) +library(purrr) +source("utils.R") +temp <- aws.s3::get_bucket_df("s3://ccao-data-warehouse-us-east-1/", prefix = "spatial/transit") %>% + filter( + stringr::str_detect(Key, "parquet") + ) %>% + pull(Key) + +walk(file.path("s3://ccao-data-warehouse-us-east-1", temp), \(x) { + print(x) + if (x == "s3://ccao-data-warehouse-us-east-1/spatial/transit/transit_dict/transit_dict.parquet") { + read_parquet(x) %>% + mutate(loaded_at = as.character(Sys.time())) %>% + write_parquet(x) + } else { + geoarrow::read_geoparquet_sf(x) %>% + geoparquet_to_s3(x) + } +}, .progress = TRUE) + + +library(DBI) +library(glue) +library(noctua) + +AWS_ATHENA_CONN_NOCTUA <- dbConnect( + noctua::athena(), + # Disable the Connections tab entry for this database. Always use this if + # you don't want to browser the tables in the Connections tab, since it + # speeds up instantiating the connection significantly + rstudio_conn_tab = FALSE +) + +tables <- c( + "bike_trail", + "board_of_review_district", + "building_footprint", + "cemetery", + "census", + "central_business_district", + "coastline", + "commissioner_district", + "community_area", + "community_college_district", + "congressional_district", + "coordinated_care", + "corner", + "county", + "enterprise_zone", + "fire_protection_district", + "flood_fema", + #"geojson", + "golf_course", + "grocery_store", + "hospital", + "hydrology", + "industrial_corridor", + "industrial_growth_zone", + "judicial_district", + "library_district", + "major_road", + "midway_noise_monitor", + "municipality", + "neighborhood", + "ohare_noise_contour", + "ohare_noise_monitor", + #"parcel", + "park", + "park_district", + "police_district", + "qualified_opportunity_zone", + "railroad", + "road", + "sanitation_district", + "school_district", + "school_location", + "secondary_road", + "special_service_area", + "stadium", + #"stadium_raw", + "state_representative_district", + "state_senate_district", + "subdivision", + "tif_district", + "township", + "transit_dict", + "transit_route", + "transit_stop", + "walkability", + "ward", + "ward_chicago", + "ward_evanston" +) + +walk(tables, \(x) { + print(x) + dbGetQuery( + conn = AWS_ATHENA_CONN_NOCTUA, + glue("SELECT loaded_at FROM spatial.{x}") + ) +}) From b6bd9702e8cf0d002adde382402f5156dac491d9 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Thu, 19 Dec 2024 22:37:16 +0000 Subject: [PATCH 153/166] Remove temp script --- .../temp.R | 102 ------------------ 1 file changed, 102 deletions(-) delete mode 100644 etl/scripts-ccao-data-warehouse-us-east-1/temp.R diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/temp.R b/etl/scripts-ccao-data-warehouse-us-east-1/temp.R deleted file mode 100644 index 07b304db2..000000000 --- a/etl/scripts-ccao-data-warehouse-us-east-1/temp.R +++ /dev/null @@ -1,102 +0,0 @@ -library(dplyr) -library(purrr) -source("utils.R") -temp <- aws.s3::get_bucket_df("s3://ccao-data-warehouse-us-east-1/", prefix = "spatial/transit") %>% - filter( - stringr::str_detect(Key, "parquet") - ) %>% - pull(Key) - -walk(file.path("s3://ccao-data-warehouse-us-east-1", temp), \(x) { - print(x) - if (x == "s3://ccao-data-warehouse-us-east-1/spatial/transit/transit_dict/transit_dict.parquet") { - read_parquet(x) %>% - mutate(loaded_at = as.character(Sys.time())) %>% - write_parquet(x) - } else { - geoarrow::read_geoparquet_sf(x) %>% - geoparquet_to_s3(x) - } -}, .progress = TRUE) - - -library(DBI) -library(glue) -library(noctua) - -AWS_ATHENA_CONN_NOCTUA <- dbConnect( - noctua::athena(), - # Disable the Connections tab entry for this database. Always use this if - # you don't want to browser the tables in the Connections tab, since it - # speeds up instantiating the connection significantly - rstudio_conn_tab = FALSE -) - -tables <- c( - "bike_trail", - "board_of_review_district", - "building_footprint", - "cemetery", - "census", - "central_business_district", - "coastline", - "commissioner_district", - "community_area", - "community_college_district", - "congressional_district", - "coordinated_care", - "corner", - "county", - "enterprise_zone", - "fire_protection_district", - "flood_fema", - #"geojson", - "golf_course", - "grocery_store", - "hospital", - "hydrology", - "industrial_corridor", - "industrial_growth_zone", - "judicial_district", - "library_district", - "major_road", - "midway_noise_monitor", - "municipality", - "neighborhood", - "ohare_noise_contour", - "ohare_noise_monitor", - #"parcel", - "park", - "park_district", - "police_district", - "qualified_opportunity_zone", - "railroad", - "road", - "sanitation_district", - "school_district", - "school_location", - "secondary_road", - "special_service_area", - "stadium", - #"stadium_raw", - "state_representative_district", - "state_senate_district", - "subdivision", - "tif_district", - "township", - "transit_dict", - "transit_route", - "transit_stop", - "walkability", - "ward", - "ward_chicago", - "ward_evanston" -) - -walk(tables, \(x) { - print(x) - dbGetQuery( - conn = AWS_ATHENA_CONN_NOCTUA, - glue("SELECT loaded_at FROM spatial.{x}") - ) -}) From 6377dccac4246ebe943d6ad84b9d895ab8362dbd Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Thu, 19 Dec 2024 22:37:49 +0000 Subject: [PATCH 154/166] Typo --- etl/scripts-ccao-data-raw-us-east-1/sale/sale-foreclosure.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/sale/sale-foreclosure.R b/etl/scripts-ccao-data-raw-us-east-1/sale/sale-foreclosure.R index 177acbab7..08a396750 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/sale/sale-foreclosure.R +++ b/etl/scripts-ccao-data-raw-us-east-1/sale/sale-foreclosure.R @@ -16,7 +16,7 @@ output_bucket <- file.path(AWS_S3_RAW_BUCKET, "sale", "foreclosure") files <- list.files("O:/CCAODATA/data/foreclosures", recursive = TRUE) # Function to retrieve data and write to S3 -read_write <- function(files, x) { +read_write <- function(x) { output_dest <- file.path(output_bucket, glue(parse_number(x), ".parquet")) if (!object_exists(output_dest)) { From f6fafdb4a5ef2d248e8666093f02d91ff79aa41f Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Thu, 19 Dec 2024 22:38:42 +0000 Subject: [PATCH 155/166] Undo mydec changes --- etl/scripts-ccao-data-raw-us-east-1/sale/sale-mydec.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/sale/sale-mydec.R b/etl/scripts-ccao-data-raw-us-east-1/sale/sale-mydec.R index 6f65f6dd9..fb87794b7 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/sale/sale-mydec.R +++ b/etl/scripts-ccao-data-raw-us-east-1/sale/sale-mydec.R @@ -22,7 +22,7 @@ files <- xml2::read_html( str_subset("ptax203") # Function to scrape IDOR data and write to S3 -walk(files, \(x) { +down_up <- function(x) { year <- str_extract(x, pattern = "[0-9]{4}") if ( @@ -44,4 +44,7 @@ walk(files, \(x) { readr::read_delim(list.files(tmp2, full.names = TRUE), delim = "\t") %>% write_parquet(file.path(output_bucket, glue("{year}.parquet"))) } -}) +} + +# Apply function to foreclosure data +walk(files, down_up) From bf894a7fd91d5c3d730bf75c5880929c982a036f Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Thu, 19 Dec 2024 22:41:07 +0000 Subject: [PATCH 156/166] Undo spatial raw changes --- .../spatial/spatial-political.R | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R index fe325595e..9747af251 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R @@ -150,19 +150,20 @@ pwalk(sources_list, function(...) { # MUNICIPALITY # Paths for all relevant geodatabases -data.frame("path" = list.files(file_path, full.names = TRUE)) %>% +gdb_files <- data.frame("path" = list.files(file_path, full.names = TRUE)) %>% filter( str_detect(path, "Current", negate = TRUE) & str_detect(path, "20") & str_detect(path, "Admin") - ) %>% - # Function to call referenced API, pull requested data, and write it to S3 - pwalk(function(...) { - df <- tibble::tibble(...) - county_gdb_to_s3( - s3_bucket_uri = output_bucket, - dir_name = "municipality", - file_path = df$path, - layer = "MuniTaxDist" - ) - }) + ) + +# Function to call referenced API, pull requested data, and write it to S3 +pwalk(function(...) { + df <- tibble::tibble(...) + county_gdb_to_s3( + s3_bucket_uri = output_bucket, + dir_name = "municipality", + file_path = df$path, + layer = "MuniTaxDist" + ) +}) From 2716bdb23aebfb269947f27f3c4a9e75ae89742e Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Thu, 19 Dec 2024 22:41:44 +0000 Subject: [PATCH 157/166] Typo --- etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R index 9747af251..c77905045 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-political.R @@ -158,7 +158,7 @@ gdb_files <- data.frame("path" = list.files(file_path, full.names = TRUE)) %>% ) # Function to call referenced API, pull requested data, and write it to S3 -pwalk(function(...) { +pwalk(gdb_files, function(...) { df <- tibble::tibble(...) county_gdb_to_s3( s3_bucket_uri = output_bucket, From 2e2805a6c0a1f814015dbcc241d855552f0ab508 Mon Sep 17 00:00:00 2001 From: William Ridgeway <10358980+wrridgeway@users.noreply.github.com> Date: Mon, 23 Dec 2024 09:25:01 -0600 Subject: [PATCH 158/166] Update etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-ohare_noise.R Co-authored-by: Jean Cochrane --- .../spatial/spatial-environment-ohare_noise.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-ohare_noise.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-ohare_noise.R index 6f82d76b2..cefd82e66 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-ohare_noise.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-environment-ohare_noise.R @@ -63,7 +63,7 @@ noise_levels <- noise_levels %>% mutate( across(where(is.character), ~ na_if(.x, "n/a")), across(where(is.character), ~ na_if(.x, "--")) - ) %>% + ) %>% filter(!Site %in% c("6", "9", "36", "45")) # Grab sensor addresses pdf From dbaa8878bae226d6cd447a401609f4896dff25cd Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Mon, 23 Dec 2024 16:01:38 +0000 Subject: [PATCH 159/166] Update neighborhood shapefile url --- etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R index 8ebf250c5..0d0b8c533 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R +++ b/etl/scripts-ccao-data-raw-us-east-1/spatial/spatial-ccao.R @@ -16,8 +16,7 @@ file_path <- "//gisemcv1.ccounty.com/ArchiveServices/" # nolint sources_list <- data.frame( # NEIGHBORHOOD "url" = paste0( - "https://gitlab.com/ccao-data-science---modeling/packages/ccao", - "/-/raw/master/data-raw/nbhd_shp.geojson" + "https://github.com/ccao-data/ccao/blob/master/data-raw/nbhd_shp.geojson" ), "boundary" = "neighborhood", "year" = "2021" From 61a4a4efdb8e571e82965eff1e177351c30657f7 Mon Sep 17 00:00:00 2001 From: William Ridgeway <10358980+wrridgeway@users.noreply.github.com> Date: Mon, 23 Dec 2024 10:03:04 -0600 Subject: [PATCH 160/166] Update etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-transit.R Co-authored-by: Jean Cochrane --- .../spatial/spatial-transit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-transit.R b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-transit.R index 85b6712a7..8efd3c1c7 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-transit.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/spatial/spatial-transit.R @@ -139,7 +139,7 @@ transit_dict <- tribble( mutate( field_code = as.integer(field_code), loaded_at = as.character(Sys.time()) - ) + ) # Write dict to parquet remote_file_dict <- file.path( From 3c7fb11a9a2ac0a3ea1ba8376c1c475757260a30 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Mon, 23 Dec 2024 16:23:44 +0000 Subject: [PATCH 161/166] Add new condo chars source --- .../ccao/ccao-condominium-pin_condo_char.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/ccao/ccao-condominium-pin_condo_char.R b/etl/scripts-ccao-data-raw-us-east-1/ccao/ccao-condominium-pin_condo_char.R index 72c93a3a4..355d5761a 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/ccao/ccao-condominium-pin_condo_char.R +++ b/etl/scripts-ccao-data-raw-us-east-1/ccao/ccao-condominium-pin_condo_char.R @@ -19,7 +19,8 @@ output_bucket <- file.path( source_paths <- c( "//fileserver/ocommon/2022 Data Collection/Condo Project/William Approved Layout North Tri Condo Project FINAL COMPLETED/", # nolint "//fileserver/ocommon/2023 Data Collection/South Tri Condo Project COMPLETED", - "//fileserver/ocommon/2024 Data Collection/City Tri Condo Characteristics COMPLETED" # nolint + "//fileserver/ocommon/2024 Data Collection/City Tri Condo Characteristics COMPLETED" # nolint, + "O:/CCAODATA/data/condo_chars/north_condo_review_2024.xlsx" # Local copy of sharepoint file ) source_files <- grep( From c4067833aabb96110aa207bb3cab8492d4dbe356 Mon Sep 17 00:00:00 2001 From: sweatyhandshake Date: Mon, 23 Dec 2024 10:40:46 -0600 Subject: [PATCH 162/166] Update local file path --- .../ccao/ccao-condominium-pin_condo_char.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/ccao/ccao-condominium-pin_condo_char.R b/etl/scripts-ccao-data-raw-us-east-1/ccao/ccao-condominium-pin_condo_char.R index 355d5761a..4ee07a329 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/ccao/ccao-condominium-pin_condo_char.R +++ b/etl/scripts-ccao-data-raw-us-east-1/ccao/ccao-condominium-pin_condo_char.R @@ -19,8 +19,8 @@ output_bucket <- file.path( source_paths <- c( "//fileserver/ocommon/2022 Data Collection/Condo Project/William Approved Layout North Tri Condo Project FINAL COMPLETED/", # nolint "//fileserver/ocommon/2023 Data Collection/South Tri Condo Project COMPLETED", - "//fileserver/ocommon/2024 Data Collection/City Tri Condo Characteristics COMPLETED" # nolint, - "O:/CCAODATA/data/condo_chars/north_condo_review_2024.xlsx" # Local copy of sharepoint file + "//fileserver/ocommon/2024 Data Collection/City Tri Condo Characteristics COMPLETED", # nolint + "O:/CCAODATA/data/condo_chars_2025_completed" # Local copy of sharepoint file ) source_files <- grep( From 8a15f6173aa8ab52c47a37887f5cb4ef7d98b5c1 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Mon, 23 Dec 2024 18:29:29 +0000 Subject: [PATCH 163/166] Add condo char updates --- .../ccao/ccao-condominium-pin_condo_char.R | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-condominium-pin_condo_char.R b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-condominium-pin_condo_char.R index 34d2fae98..3b84f069e 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-condominium-pin_condo_char.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-condominium-pin_condo_char.R @@ -206,9 +206,42 @@ for (i in c("2021", "2022", "2023")) { } } +# At the end of 2024 valuations revisted some old condos and updated their +# characteristics +updates <- read_parquet( + file.path( + AWS_S3_RAW_BUCKET, + "ccao/condominium/pin_condo_char/2025/wheeling.parquet" + ) +) %>% + rename_with(~gsub("\\.", "_", tolower(.x)), .cols = everything()) %>% + select( + "pin", + starts_with("new") + ) %>% + mutate( + pin = gsub("-", "", pin), + across(starts_with("new"), as.numeric), + # Three units with 100 for unit sqft + new_unit_sf = ifelse(new_unit_sf == 100, 1000, new_unit_sf) + ) %>% + filter(!if_all(starts_with("new"), is.na)) + +# Update parcels with new column values +chars <- chars %>% + bind_rows() %>% + left_join(updates, by = "pin") %>% + mutate( + building_sf = coalesce(new_building_sf, building_sf), + unit_sf = coalesce(new_unit_sf, unit_sf), + bedrooms = coalesce(new_bedrooms, bedrooms), + full_baths = coalesce(new_full_baths, full_baths), + half_baths = coalesce(new_half_baths, half_baths) + ) %>% + select(!starts_with("new")) + # Upload cleaned data to S3 chars %>% - bind_rows() %>% mutate(loaded_at = as.character(Sys.time())) %>% group_by(year) %>% arrow::write_dataset( From d4f624131de7f6b4b191744ade003854c637bdf9 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Mon, 23 Dec 2024 18:45:12 +0000 Subject: [PATCH 164/166] Include all changes --- .../ccao/ccao-condominium-pin_condo_char.R | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-condominium-pin_condo_char.R b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-condominium-pin_condo_char.R index 3b84f069e..2a000b617 100644 --- a/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-condominium-pin_condo_char.R +++ b/etl/scripts-ccao-data-warehouse-us-east-1/ccao/ccao-condominium-pin_condo_char.R @@ -206,19 +206,22 @@ for (i in c("2021", "2022", "2023")) { } } -# At the end of 2024 valuations revisted some old condos and updated their +# At the end of 2024 valuations revisited some old condos and updated their # characteristics -updates <- read_parquet( +updates <- map( file.path( - AWS_S3_RAW_BUCKET, - "ccao/condominium/pin_condo_char/2025/wheeling.parquet" - ) -) %>% + "s3://ccao-data-raw-us-east-1", + aws.s3::get_bucket_df( + AWS_S3_RAW_BUCKET, + prefix = "ccao/condominium/pin_condo_char/2025" + )$Key), + \(x) { + read_parquet(x) %>% + mutate(across(.cols = everything(), as.character)) + }) %>% + bind_rows() %>% rename_with(~gsub("\\.", "_", tolower(.x)), .cols = everything()) %>% - select( - "pin", - starts_with("new") - ) %>% + select("pin", starts_with("new")) %>% mutate( pin = gsub("-", "", pin), across(starts_with("new"), as.numeric), From 0149fb11dd4f27df5cce5dab3c77bda8552e4e28 Mon Sep 17 00:00:00 2001 From: William Ridgeway <10358980+wrridgeway@users.noreply.github.com> Date: Tue, 24 Dec 2024 21:13:44 -0600 Subject: [PATCH 165/166] Update etl/scripts-ccao-data-raw-us-east-1/ccao/ccao-condominium-pin_condo_char.R Co-authored-by: Dan Snow <31494343+dfsnow@users.noreply.github.com> --- .../ccao/ccao-condominium-pin_condo_char.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etl/scripts-ccao-data-raw-us-east-1/ccao/ccao-condominium-pin_condo_char.R b/etl/scripts-ccao-data-raw-us-east-1/ccao/ccao-condominium-pin_condo_char.R index 4ee07a329..539a6fa2f 100644 --- a/etl/scripts-ccao-data-raw-us-east-1/ccao/ccao-condominium-pin_condo_char.R +++ b/etl/scripts-ccao-data-raw-us-east-1/ccao/ccao-condominium-pin_condo_char.R @@ -20,7 +20,7 @@ source_paths <- c( "//fileserver/ocommon/2022 Data Collection/Condo Project/William Approved Layout North Tri Condo Project FINAL COMPLETED/", # nolint "//fileserver/ocommon/2023 Data Collection/South Tri Condo Project COMPLETED", "//fileserver/ocommon/2024 Data Collection/City Tri Condo Characteristics COMPLETED", # nolint - "O:/CCAODATA/data/condo_chars_2025_completed" # Local copy of sharepoint file + "//fileserver/ocommon/CCAODATA/data/condo_chars_2025_completed" # Local copy of sharepoint file ) source_files <- grep( From 5b1cbcd184e9bf0dbc6f4e5a145c819e33774fc3 Mon Sep 17 00:00:00 2001 From: Sweaty Handshake Date: Wed, 25 Dec 2024 03:22:00 +0000 Subject: [PATCH 166/166] Commenting --- .github/workflows/lint.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 033f1a313..904b1f010 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -23,6 +23,9 @@ jobs: # list of changed files within `super-linter` fetch-depth: 0 + # Superlinter will load renv if .Rprofile is present - it will then fail + # because the renv environment doesn't have lintr. Removing the .Rprofile + # file loads the default superlinter R environment. - name: Disable renv shell: bash run: rm etl/.Rprofile