From 2bc9242eb2d997f026cbec7cb738a86006b54c8e Mon Sep 17 00:00:00 2001 From: Daniel Guillamot Date: Fri, 14 Apr 2023 00:54:22 +0000 Subject: [PATCH 1/6] setting up models --- greenery/.gitignore | 4 ++ greenery/README.md | 15 +++++++ greenery/analyses/.gitkeep | 0 greenery/dbt_project.yml | 38 ++++++++++++++++++ greenery/macros/.gitkeep | 0 .../models/example/my_first_dbt_model.sql | 27 +++++++++++++ .../models/example/my_second_dbt_model.sql | 6 +++ greenery/models/example/schema.yml | 20 ++++++++++ .../models/staging/postgres/src_postgres.yml | 19 +++++++++ .../postgres/stg_postgres__addresses.sql | 23 +++++++++++ .../staging/postgres/stg_postgres__events.sql | 29 ++++++++++++++ .../postgres/stg_postgres__order_items.sql | 19 +++++++++ .../staging/postgres/stg_postgres__orders.sql | 39 +++++++++++++++++++ .../postgres/stg_postgres__products.sql | 21 ++++++++++ .../staging/postgres/stg_postgres__promos.sql | 19 +++++++++ .../staging/postgres/stg_postgres__users.sql | 29 ++++++++++++++ greenery/seeds/.gitkeep | 0 greenery/snapshots/.gitkeep | 0 greenery/tests/.gitkeep | 0 19 files changed, 308 insertions(+) create mode 100644 greenery/.gitignore create mode 100644 greenery/README.md create mode 100644 greenery/analyses/.gitkeep create mode 100644 greenery/dbt_project.yml create mode 100644 greenery/macros/.gitkeep create mode 100644 greenery/models/example/my_first_dbt_model.sql create mode 100644 greenery/models/example/my_second_dbt_model.sql create mode 100644 greenery/models/example/schema.yml create mode 100644 greenery/models/staging/postgres/src_postgres.yml create mode 100644 greenery/models/staging/postgres/stg_postgres__addresses.sql create mode 100644 greenery/models/staging/postgres/stg_postgres__events.sql create mode 100644 greenery/models/staging/postgres/stg_postgres__order_items.sql create mode 100644 greenery/models/staging/postgres/stg_postgres__orders.sql create mode 100644 greenery/models/staging/postgres/stg_postgres__products.sql create mode 100644 greenery/models/staging/postgres/stg_postgres__promos.sql create mode 100644 greenery/models/staging/postgres/stg_postgres__users.sql create mode 100644 greenery/seeds/.gitkeep create mode 100644 greenery/snapshots/.gitkeep create mode 100644 greenery/tests/.gitkeep diff --git a/greenery/.gitignore b/greenery/.gitignore new file mode 100644 index 000000000..49f147cb9 --- /dev/null +++ b/greenery/.gitignore @@ -0,0 +1,4 @@ + +target/ +dbt_packages/ +logs/ diff --git a/greenery/README.md b/greenery/README.md new file mode 100644 index 000000000..7874ac842 --- /dev/null +++ b/greenery/README.md @@ -0,0 +1,15 @@ +Welcome to your new dbt project! + +### Using the starter project + +Try running the following commands: +- dbt run +- dbt test + + +### Resources: +- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) +- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers +- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support +- Find [dbt events](https://events.getdbt.com) near you +- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices diff --git a/greenery/analyses/.gitkeep b/greenery/analyses/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/dbt_project.yml b/greenery/dbt_project.yml new file mode 100644 index 000000000..1439d0bc9 --- /dev/null +++ b/greenery/dbt_project.yml @@ -0,0 +1,38 @@ + +# Name your project! Project names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'greenery' +version: '1.0.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: 'greenery' + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that models in this project can be +# found in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +analysis-paths: ["analyses"] +test-paths: ["tests"] +seed-paths: ["seeds"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] + +target-path: "target" # directory which will store compiled SQL files +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ +# directory as views. These settings can be overridden in the individual model +# files using the `{{ config(...) }}` macro. +models: + greenery: + # Config indicated by + and applies to all files under models/example/ + example: + +materialized: view diff --git a/greenery/macros/.gitkeep b/greenery/macros/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/models/example/my_first_dbt_model.sql b/greenery/models/example/my_first_dbt_model.sql new file mode 100644 index 000000000..f31a12d94 --- /dev/null +++ b/greenery/models/example/my_first_dbt_model.sql @@ -0,0 +1,27 @@ + +/* + Welcome to your first dbt model! + Did you know that you can also configure models directly within SQL files? + This will override configurations stated in dbt_project.yml + + Try changing "table" to "view" below +*/ + +{{ config(materialized='table') }} + +with source_data as ( + + select 1 as id + union all + select null as id + +) + +select * +from source_data + +/* + Uncomment the line below to remove records with null `id` values +*/ + +-- where id is not null diff --git a/greenery/models/example/my_second_dbt_model.sql b/greenery/models/example/my_second_dbt_model.sql new file mode 100644 index 000000000..c91f8793a --- /dev/null +++ b/greenery/models/example/my_second_dbt_model.sql @@ -0,0 +1,6 @@ + +-- Use the `ref` function to select from other models + +select * +from {{ ref('my_first_dbt_model') }} +where id = 1 diff --git a/greenery/models/example/schema.yml b/greenery/models/example/schema.yml new file mode 100644 index 000000000..e2aef43ca --- /dev/null +++ b/greenery/models/example/schema.yml @@ -0,0 +1,20 @@ + +version: 2 + +models: + - name: my_first_dbt_model + description: "A starter dbt model" + columns: + - name: id + description: "The primary key for this table" + tests: + - unique + + - name: my_second_dbt_model + description: "A starter dbt model" + columns: + - name: id + description: "The primary key for this table" + tests: + - unique + - not_null diff --git a/greenery/models/staging/postgres/src_postgres.yml b/greenery/models/staging/postgres/src_postgres.yml new file mode 100644 index 000000000..d0f88faa8 --- /dev/null +++ b/greenery/models/staging/postgres/src_postgres.yml @@ -0,0 +1,19 @@ +version: 2 + +sources: + - name: postgres + database: raw + schema: public + tables: + - name: addresses + - name: events + - name: orders + - name: order_items + - name: products + - name: promos + - name: users + + + + + diff --git a/greenery/models/staging/postgres/stg_postgres__addresses.sql b/greenery/models/staging/postgres/stg_postgres__addresses.sql new file mode 100644 index 000000000..ce5641547 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__addresses.sql @@ -0,0 +1,23 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select address_id + , address + , state + , zipcode + , country from {{ source('postgres', 'addresses') }} +) + +, renamed_recast as ( + select + address_id as address_guid + , address + , state + , lpad(zipcode, 5, 0) as zip_code + , country + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__events.sql b/greenery/models/staging/postgres/stg_postgres__events.sql new file mode 100644 index 000000000..5c90ab60f --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__events.sql @@ -0,0 +1,29 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select event_id + , session_id + , user_id + , page_url + , created_at + , event_type + , order_id + , product_id from {{ source('postgres', 'events') }} +) + +, renamed_recast as ( + select + event_id as event_guid + , session_id as session_guid + , user_id as user_guid + , page_url + , created_at + , event_type + , order_id as order_guid + , product_id as product_guid + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__order_items.sql b/greenery/models/staging/postgres/stg_postgres__order_items.sql new file mode 100644 index 000000000..8ee3838ff --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__order_items.sql @@ -0,0 +1,19 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select order_id + , product_id + , quantity from {{ source('postgres', 'order_items') }} +) + +, renamed_recast as ( + select + order_id as order_guid + , product_id as product_guid + , quantity + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__orders.sql b/greenery/models/staging/postgres/stg_postgres__orders.sql new file mode 100644 index 000000000..e47493752 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__orders.sql @@ -0,0 +1,39 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select order_id + , user_id + , promo_id + , address_id + , created_at + , order_cost + , shipping_cost + , order_total + , tracking_id + , shipping_service + , estimated_delivery_at + , delivered_at + , status from {{ source('postgres', 'orders') }} +) + +, renamed_recast as ( + select + order_id as order_guid + , user_id as user_guid + , promo_id as promo_guid + , address_id as address_guid + , created_at + , order_cost + , shipping_cost + , order_total + , tracking_id as tracking_guid + , shipping_service + , estimated_delivery_at + , delivered_at + , status + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__products.sql b/greenery/models/staging/postgres/stg_postgres__products.sql new file mode 100644 index 000000000..5931bf439 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__products.sql @@ -0,0 +1,21 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select product_id + , name + , price + , inventory from {{ source('postgres', 'products') }} +) + +, renamed_recast as ( + select + product_id as product_guid + , name + , price + , inventory + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__promos.sql b/greenery/models/staging/postgres/stg_postgres__promos.sql new file mode 100644 index 000000000..f020b2eea --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__promos.sql @@ -0,0 +1,19 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select promo_id + , discount + , status from {{ source('postgres', 'promos') }} +) + +, renamed_recast as ( + select + promo_id as promo_guid + , discount + , status + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__users.sql b/greenery/models/staging/postgres/stg_postgres__users.sql new file mode 100644 index 000000000..f2b82e734 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__users.sql @@ -0,0 +1,29 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select user_id + , first_name + , last_name + , email + , phone_number + , created_at + , updated_at + , address_id from {{ source('postgres', 'users') }} +) + +, renamed_recast as ( + select + user_id as user_guid + , first_name + , last_name + , email + , phone_number + , created_at + , updated_at + , address_id as address_guid + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/seeds/.gitkeep b/greenery/seeds/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/snapshots/.gitkeep b/greenery/snapshots/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/tests/.gitkeep b/greenery/tests/.gitkeep new file mode 100644 index 000000000..e69de29bb From 64e22646c344455d40775c72149c8d71ffe56236 Mon Sep 17 00:00:00 2001 From: Daniel Guillamot Date: Fri, 14 Apr 2023 01:07:11 +0000 Subject: [PATCH 2/6] Revert "setting up models" This reverts commit 2bc9242eb2d997f026cbec7cb738a86006b54c8e. --- greenery/.gitignore | 4 -- greenery/README.md | 15 ------- greenery/analyses/.gitkeep | 0 greenery/dbt_project.yml | 38 ------------------ greenery/macros/.gitkeep | 0 .../models/example/my_first_dbt_model.sql | 27 ------------- .../models/example/my_second_dbt_model.sql | 6 --- greenery/models/example/schema.yml | 20 ---------- .../models/staging/postgres/src_postgres.yml | 19 --------- .../postgres/stg_postgres__addresses.sql | 23 ----------- .../staging/postgres/stg_postgres__events.sql | 29 -------------- .../postgres/stg_postgres__order_items.sql | 19 --------- .../staging/postgres/stg_postgres__orders.sql | 39 ------------------- .../postgres/stg_postgres__products.sql | 21 ---------- .../staging/postgres/stg_postgres__promos.sql | 19 --------- .../staging/postgres/stg_postgres__users.sql | 29 -------------- greenery/seeds/.gitkeep | 0 greenery/snapshots/.gitkeep | 0 greenery/tests/.gitkeep | 0 19 files changed, 308 deletions(-) delete mode 100644 greenery/.gitignore delete mode 100644 greenery/README.md delete mode 100644 greenery/analyses/.gitkeep delete mode 100644 greenery/dbt_project.yml delete mode 100644 greenery/macros/.gitkeep delete mode 100644 greenery/models/example/my_first_dbt_model.sql delete mode 100644 greenery/models/example/my_second_dbt_model.sql delete mode 100644 greenery/models/example/schema.yml delete mode 100644 greenery/models/staging/postgres/src_postgres.yml delete mode 100644 greenery/models/staging/postgres/stg_postgres__addresses.sql delete mode 100644 greenery/models/staging/postgres/stg_postgres__events.sql delete mode 100644 greenery/models/staging/postgres/stg_postgres__order_items.sql delete mode 100644 greenery/models/staging/postgres/stg_postgres__orders.sql delete mode 100644 greenery/models/staging/postgres/stg_postgres__products.sql delete mode 100644 greenery/models/staging/postgres/stg_postgres__promos.sql delete mode 100644 greenery/models/staging/postgres/stg_postgres__users.sql delete mode 100644 greenery/seeds/.gitkeep delete mode 100644 greenery/snapshots/.gitkeep delete mode 100644 greenery/tests/.gitkeep diff --git a/greenery/.gitignore b/greenery/.gitignore deleted file mode 100644 index 49f147cb9..000000000 --- a/greenery/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ - -target/ -dbt_packages/ -logs/ diff --git a/greenery/README.md b/greenery/README.md deleted file mode 100644 index 7874ac842..000000000 --- a/greenery/README.md +++ /dev/null @@ -1,15 +0,0 @@ -Welcome to your new dbt project! - -### Using the starter project - -Try running the following commands: -- dbt run -- dbt test - - -### Resources: -- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) -- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers -- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support -- Find [dbt events](https://events.getdbt.com) near you -- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices diff --git a/greenery/analyses/.gitkeep b/greenery/analyses/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/greenery/dbt_project.yml b/greenery/dbt_project.yml deleted file mode 100644 index 1439d0bc9..000000000 --- a/greenery/dbt_project.yml +++ /dev/null @@ -1,38 +0,0 @@ - -# Name your project! Project names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: 'greenery' -version: '1.0.0' -config-version: 2 - -# This setting configures which "profile" dbt uses for this project. -profile: 'greenery' - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that models in this project can be -# found in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -analysis-paths: ["analyses"] -test-paths: ["tests"] -seed-paths: ["seeds"] -macro-paths: ["macros"] -snapshot-paths: ["snapshots"] - -target-path: "target" # directory which will store compiled SQL files -clean-targets: # directories to be removed by `dbt clean` - - "target" - - "dbt_packages" - - -# Configuring models -# Full documentation: https://docs.getdbt.com/docs/configuring-models - -# In this example config, we tell dbt to build all models in the example/ -# directory as views. These settings can be overridden in the individual model -# files using the `{{ config(...) }}` macro. -models: - greenery: - # Config indicated by + and applies to all files under models/example/ - example: - +materialized: view diff --git a/greenery/macros/.gitkeep b/greenery/macros/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/greenery/models/example/my_first_dbt_model.sql b/greenery/models/example/my_first_dbt_model.sql deleted file mode 100644 index f31a12d94..000000000 --- a/greenery/models/example/my_first_dbt_model.sql +++ /dev/null @@ -1,27 +0,0 @@ - -/* - Welcome to your first dbt model! - Did you know that you can also configure models directly within SQL files? - This will override configurations stated in dbt_project.yml - - Try changing "table" to "view" below -*/ - -{{ config(materialized='table') }} - -with source_data as ( - - select 1 as id - union all - select null as id - -) - -select * -from source_data - -/* - Uncomment the line below to remove records with null `id` values -*/ - --- where id is not null diff --git a/greenery/models/example/my_second_dbt_model.sql b/greenery/models/example/my_second_dbt_model.sql deleted file mode 100644 index c91f8793a..000000000 --- a/greenery/models/example/my_second_dbt_model.sql +++ /dev/null @@ -1,6 +0,0 @@ - --- Use the `ref` function to select from other models - -select * -from {{ ref('my_first_dbt_model') }} -where id = 1 diff --git a/greenery/models/example/schema.yml b/greenery/models/example/schema.yml deleted file mode 100644 index e2aef43ca..000000000 --- a/greenery/models/example/schema.yml +++ /dev/null @@ -1,20 +0,0 @@ - -version: 2 - -models: - - name: my_first_dbt_model - description: "A starter dbt model" - columns: - - name: id - description: "The primary key for this table" - tests: - - unique - - - name: my_second_dbt_model - description: "A starter dbt model" - columns: - - name: id - description: "The primary key for this table" - tests: - - unique - - not_null diff --git a/greenery/models/staging/postgres/src_postgres.yml b/greenery/models/staging/postgres/src_postgres.yml deleted file mode 100644 index d0f88faa8..000000000 --- a/greenery/models/staging/postgres/src_postgres.yml +++ /dev/null @@ -1,19 +0,0 @@ -version: 2 - -sources: - - name: postgres - database: raw - schema: public - tables: - - name: addresses - - name: events - - name: orders - - name: order_items - - name: products - - name: promos - - name: users - - - - - diff --git a/greenery/models/staging/postgres/stg_postgres__addresses.sql b/greenery/models/staging/postgres/stg_postgres__addresses.sql deleted file mode 100644 index ce5641547..000000000 --- a/greenery/models/staging/postgres/stg_postgres__addresses.sql +++ /dev/null @@ -1,23 +0,0 @@ -{{ - config(materialized = 'table') -}} - -with source as ( - select address_id - , address - , state - , zipcode - , country from {{ source('postgres', 'addresses') }} -) - -, renamed_recast as ( - select - address_id as address_guid - , address - , state - , lpad(zipcode, 5, 0) as zip_code - , country - from source -) - -select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__events.sql b/greenery/models/staging/postgres/stg_postgres__events.sql deleted file mode 100644 index 5c90ab60f..000000000 --- a/greenery/models/staging/postgres/stg_postgres__events.sql +++ /dev/null @@ -1,29 +0,0 @@ -{{ - config(materialized = 'table') -}} - -with source as ( - select event_id - , session_id - , user_id - , page_url - , created_at - , event_type - , order_id - , product_id from {{ source('postgres', 'events') }} -) - -, renamed_recast as ( - select - event_id as event_guid - , session_id as session_guid - , user_id as user_guid - , page_url - , created_at - , event_type - , order_id as order_guid - , product_id as product_guid - from source -) - -select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__order_items.sql b/greenery/models/staging/postgres/stg_postgres__order_items.sql deleted file mode 100644 index 8ee3838ff..000000000 --- a/greenery/models/staging/postgres/stg_postgres__order_items.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ - config(materialized = 'table') -}} - -with source as ( - select order_id - , product_id - , quantity from {{ source('postgres', 'order_items') }} -) - -, renamed_recast as ( - select - order_id as order_guid - , product_id as product_guid - , quantity - from source -) - -select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__orders.sql b/greenery/models/staging/postgres/stg_postgres__orders.sql deleted file mode 100644 index e47493752..000000000 --- a/greenery/models/staging/postgres/stg_postgres__orders.sql +++ /dev/null @@ -1,39 +0,0 @@ -{{ - config(materialized = 'table') -}} - -with source as ( - select order_id - , user_id - , promo_id - , address_id - , created_at - , order_cost - , shipping_cost - , order_total - , tracking_id - , shipping_service - , estimated_delivery_at - , delivered_at - , status from {{ source('postgres', 'orders') }} -) - -, renamed_recast as ( - select - order_id as order_guid - , user_id as user_guid - , promo_id as promo_guid - , address_id as address_guid - , created_at - , order_cost - , shipping_cost - , order_total - , tracking_id as tracking_guid - , shipping_service - , estimated_delivery_at - , delivered_at - , status - from source -) - -select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__products.sql b/greenery/models/staging/postgres/stg_postgres__products.sql deleted file mode 100644 index 5931bf439..000000000 --- a/greenery/models/staging/postgres/stg_postgres__products.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ - config(materialized = 'table') -}} - -with source as ( - select product_id - , name - , price - , inventory from {{ source('postgres', 'products') }} -) - -, renamed_recast as ( - select - product_id as product_guid - , name - , price - , inventory - from source -) - -select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__promos.sql b/greenery/models/staging/postgres/stg_postgres__promos.sql deleted file mode 100644 index f020b2eea..000000000 --- a/greenery/models/staging/postgres/stg_postgres__promos.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ - config(materialized = 'table') -}} - -with source as ( - select promo_id - , discount - , status from {{ source('postgres', 'promos') }} -) - -, renamed_recast as ( - select - promo_id as promo_guid - , discount - , status - from source -) - -select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__users.sql b/greenery/models/staging/postgres/stg_postgres__users.sql deleted file mode 100644 index f2b82e734..000000000 --- a/greenery/models/staging/postgres/stg_postgres__users.sql +++ /dev/null @@ -1,29 +0,0 @@ -{{ - config(materialized = 'table') -}} - -with source as ( - select user_id - , first_name - , last_name - , email - , phone_number - , created_at - , updated_at - , address_id from {{ source('postgres', 'users') }} -) - -, renamed_recast as ( - select - user_id as user_guid - , first_name - , last_name - , email - , phone_number - , created_at - , updated_at - , address_id as address_guid - from source -) - -select * from renamed_recast \ No newline at end of file diff --git a/greenery/seeds/.gitkeep b/greenery/seeds/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/greenery/snapshots/.gitkeep b/greenery/snapshots/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/greenery/tests/.gitkeep b/greenery/tests/.gitkeep deleted file mode 100644 index e69de29bb..000000000 From fd25901d9c6e5a305c3d2c820546110df1e5b676 Mon Sep 17 00:00:00 2001 From: Daniel Guillamot Date: Fri, 14 Apr 2023 00:54:22 +0000 Subject: [PATCH 3/6] setting up models (cherry picked from commit 2bc9242eb2d997f026cbec7cb738a86006b54c8e) --- greenery/.gitignore | 4 ++ greenery/README.md | 15 +++++++ greenery/analyses/.gitkeep | 0 greenery/dbt_project.yml | 38 ++++++++++++++++++ greenery/macros/.gitkeep | 0 .../models/example/my_first_dbt_model.sql | 27 +++++++++++++ .../models/example/my_second_dbt_model.sql | 6 +++ greenery/models/example/schema.yml | 20 ++++++++++ .../models/staging/postgres/src_postgres.yml | 19 +++++++++ .../postgres/stg_postgres__addresses.sql | 23 +++++++++++ .../staging/postgres/stg_postgres__events.sql | 29 ++++++++++++++ .../postgres/stg_postgres__order_items.sql | 19 +++++++++ .../staging/postgres/stg_postgres__orders.sql | 39 +++++++++++++++++++ .../postgres/stg_postgres__products.sql | 21 ++++++++++ .../staging/postgres/stg_postgres__promos.sql | 19 +++++++++ .../staging/postgres/stg_postgres__users.sql | 29 ++++++++++++++ greenery/seeds/.gitkeep | 0 greenery/snapshots/.gitkeep | 0 greenery/tests/.gitkeep | 0 19 files changed, 308 insertions(+) create mode 100644 greenery/.gitignore create mode 100644 greenery/README.md create mode 100644 greenery/analyses/.gitkeep create mode 100644 greenery/dbt_project.yml create mode 100644 greenery/macros/.gitkeep create mode 100644 greenery/models/example/my_first_dbt_model.sql create mode 100644 greenery/models/example/my_second_dbt_model.sql create mode 100644 greenery/models/example/schema.yml create mode 100644 greenery/models/staging/postgres/src_postgres.yml create mode 100644 greenery/models/staging/postgres/stg_postgres__addresses.sql create mode 100644 greenery/models/staging/postgres/stg_postgres__events.sql create mode 100644 greenery/models/staging/postgres/stg_postgres__order_items.sql create mode 100644 greenery/models/staging/postgres/stg_postgres__orders.sql create mode 100644 greenery/models/staging/postgres/stg_postgres__products.sql create mode 100644 greenery/models/staging/postgres/stg_postgres__promos.sql create mode 100644 greenery/models/staging/postgres/stg_postgres__users.sql create mode 100644 greenery/seeds/.gitkeep create mode 100644 greenery/snapshots/.gitkeep create mode 100644 greenery/tests/.gitkeep diff --git a/greenery/.gitignore b/greenery/.gitignore new file mode 100644 index 000000000..49f147cb9 --- /dev/null +++ b/greenery/.gitignore @@ -0,0 +1,4 @@ + +target/ +dbt_packages/ +logs/ diff --git a/greenery/README.md b/greenery/README.md new file mode 100644 index 000000000..7874ac842 --- /dev/null +++ b/greenery/README.md @@ -0,0 +1,15 @@ +Welcome to your new dbt project! + +### Using the starter project + +Try running the following commands: +- dbt run +- dbt test + + +### Resources: +- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) +- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers +- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support +- Find [dbt events](https://events.getdbt.com) near you +- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices diff --git a/greenery/analyses/.gitkeep b/greenery/analyses/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/dbt_project.yml b/greenery/dbt_project.yml new file mode 100644 index 000000000..1439d0bc9 --- /dev/null +++ b/greenery/dbt_project.yml @@ -0,0 +1,38 @@ + +# Name your project! Project names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'greenery' +version: '1.0.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: 'greenery' + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that models in this project can be +# found in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +analysis-paths: ["analyses"] +test-paths: ["tests"] +seed-paths: ["seeds"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] + +target-path: "target" # directory which will store compiled SQL files +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ +# directory as views. These settings can be overridden in the individual model +# files using the `{{ config(...) }}` macro. +models: + greenery: + # Config indicated by + and applies to all files under models/example/ + example: + +materialized: view diff --git a/greenery/macros/.gitkeep b/greenery/macros/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/models/example/my_first_dbt_model.sql b/greenery/models/example/my_first_dbt_model.sql new file mode 100644 index 000000000..f31a12d94 --- /dev/null +++ b/greenery/models/example/my_first_dbt_model.sql @@ -0,0 +1,27 @@ + +/* + Welcome to your first dbt model! + Did you know that you can also configure models directly within SQL files? + This will override configurations stated in dbt_project.yml + + Try changing "table" to "view" below +*/ + +{{ config(materialized='table') }} + +with source_data as ( + + select 1 as id + union all + select null as id + +) + +select * +from source_data + +/* + Uncomment the line below to remove records with null `id` values +*/ + +-- where id is not null diff --git a/greenery/models/example/my_second_dbt_model.sql b/greenery/models/example/my_second_dbt_model.sql new file mode 100644 index 000000000..c91f8793a --- /dev/null +++ b/greenery/models/example/my_second_dbt_model.sql @@ -0,0 +1,6 @@ + +-- Use the `ref` function to select from other models + +select * +from {{ ref('my_first_dbt_model') }} +where id = 1 diff --git a/greenery/models/example/schema.yml b/greenery/models/example/schema.yml new file mode 100644 index 000000000..e2aef43ca --- /dev/null +++ b/greenery/models/example/schema.yml @@ -0,0 +1,20 @@ + +version: 2 + +models: + - name: my_first_dbt_model + description: "A starter dbt model" + columns: + - name: id + description: "The primary key for this table" + tests: + - unique + + - name: my_second_dbt_model + description: "A starter dbt model" + columns: + - name: id + description: "The primary key for this table" + tests: + - unique + - not_null diff --git a/greenery/models/staging/postgres/src_postgres.yml b/greenery/models/staging/postgres/src_postgres.yml new file mode 100644 index 000000000..d0f88faa8 --- /dev/null +++ b/greenery/models/staging/postgres/src_postgres.yml @@ -0,0 +1,19 @@ +version: 2 + +sources: + - name: postgres + database: raw + schema: public + tables: + - name: addresses + - name: events + - name: orders + - name: order_items + - name: products + - name: promos + - name: users + + + + + diff --git a/greenery/models/staging/postgres/stg_postgres__addresses.sql b/greenery/models/staging/postgres/stg_postgres__addresses.sql new file mode 100644 index 000000000..ce5641547 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__addresses.sql @@ -0,0 +1,23 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select address_id + , address + , state + , zipcode + , country from {{ source('postgres', 'addresses') }} +) + +, renamed_recast as ( + select + address_id as address_guid + , address + , state + , lpad(zipcode, 5, 0) as zip_code + , country + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__events.sql b/greenery/models/staging/postgres/stg_postgres__events.sql new file mode 100644 index 000000000..5c90ab60f --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__events.sql @@ -0,0 +1,29 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select event_id + , session_id + , user_id + , page_url + , created_at + , event_type + , order_id + , product_id from {{ source('postgres', 'events') }} +) + +, renamed_recast as ( + select + event_id as event_guid + , session_id as session_guid + , user_id as user_guid + , page_url + , created_at + , event_type + , order_id as order_guid + , product_id as product_guid + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__order_items.sql b/greenery/models/staging/postgres/stg_postgres__order_items.sql new file mode 100644 index 000000000..8ee3838ff --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__order_items.sql @@ -0,0 +1,19 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select order_id + , product_id + , quantity from {{ source('postgres', 'order_items') }} +) + +, renamed_recast as ( + select + order_id as order_guid + , product_id as product_guid + , quantity + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__orders.sql b/greenery/models/staging/postgres/stg_postgres__orders.sql new file mode 100644 index 000000000..e47493752 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__orders.sql @@ -0,0 +1,39 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select order_id + , user_id + , promo_id + , address_id + , created_at + , order_cost + , shipping_cost + , order_total + , tracking_id + , shipping_service + , estimated_delivery_at + , delivered_at + , status from {{ source('postgres', 'orders') }} +) + +, renamed_recast as ( + select + order_id as order_guid + , user_id as user_guid + , promo_id as promo_guid + , address_id as address_guid + , created_at + , order_cost + , shipping_cost + , order_total + , tracking_id as tracking_guid + , shipping_service + , estimated_delivery_at + , delivered_at + , status + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__products.sql b/greenery/models/staging/postgres/stg_postgres__products.sql new file mode 100644 index 000000000..5931bf439 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__products.sql @@ -0,0 +1,21 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select product_id + , name + , price + , inventory from {{ source('postgres', 'products') }} +) + +, renamed_recast as ( + select + product_id as product_guid + , name + , price + , inventory + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__promos.sql b/greenery/models/staging/postgres/stg_postgres__promos.sql new file mode 100644 index 000000000..f020b2eea --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__promos.sql @@ -0,0 +1,19 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select promo_id + , discount + , status from {{ source('postgres', 'promos') }} +) + +, renamed_recast as ( + select + promo_id as promo_guid + , discount + , status + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__users.sql b/greenery/models/staging/postgres/stg_postgres__users.sql new file mode 100644 index 000000000..f2b82e734 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__users.sql @@ -0,0 +1,29 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select user_id + , first_name + , last_name + , email + , phone_number + , created_at + , updated_at + , address_id from {{ source('postgres', 'users') }} +) + +, renamed_recast as ( + select + user_id as user_guid + , first_name + , last_name + , email + , phone_number + , created_at + , updated_at + , address_id as address_guid + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/seeds/.gitkeep b/greenery/seeds/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/snapshots/.gitkeep b/greenery/snapshots/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/tests/.gitkeep b/greenery/tests/.gitkeep new file mode 100644 index 000000000..e69de29bb From f9921deda35a736f19b6270d177c64b72eb6928f Mon Sep 17 00:00:00 2001 From: Daniel Guillamot Date: Fri, 14 Apr 2023 01:15:41 +0000 Subject: [PATCH 4/6] env setup and initial 7 model setup --- scratch/scratch | 1 + 1 file changed, 1 insertion(+) create mode 100644 scratch/scratch diff --git a/scratch/scratch b/scratch/scratch new file mode 100644 index 000000000..a33387d27 --- /dev/null +++ b/scratch/scratch @@ -0,0 +1 @@ +SELECT * FROM dev_db.dbt_danieloutschoolcom.stg_postgres__orders; From 57c851f819a15902265be936fc7d4b4ec981966a Mon Sep 17 00:00:00 2001 From: Daniel Guillamot Date: Fri, 14 Apr 2023 02:09:59 +0000 Subject: [PATCH 5/6] yml model descriptions --- .gitignore | 1 + .../staging/postgres/_postgres__models.yml | 245 ++++++++++++++++++ ...rc_postgres.yml => _postgres__sources.yml} | 2 +- 3 files changed, 247 insertions(+), 1 deletion(-) create mode 100644 greenery/models/staging/postgres/_postgres__models.yml rename greenery/models/staging/postgres/{src_postgres.yml => _postgres__sources.yml} (88%) diff --git a/.gitignore b/.gitignore index b6e47617d..8c8200842 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,4 @@ dmypy.json # Pyre type checker .pyre/ +scratch/ \ No newline at end of file diff --git a/greenery/models/staging/postgres/_postgres__models.yml b/greenery/models/staging/postgres/_postgres__models.yml new file mode 100644 index 000000000..18e3029a8 --- /dev/null +++ b/greenery/models/staging/postgres/_postgres__models.yml @@ -0,0 +1,245 @@ + +version: 2 + +models: + - name: stg_postgres__addresses + description: "Addresses (dbt stage model)" + columns: + - name: address_id + description: "The primary key for this table (PK)" + tests: + - unique + - not_null + - name: address + description: "Street address" + tests: + - not_null + - name: zipcode + description: "Zip / Postal Code" + - name: state + description: "State / Provice / Region" + - name: country + description: "Country" + tests: + - not_null + config: + column_types: + address_id: varchar(256) + address: varchar(256) + zipcode: int + state: varchar(256) + country: varchar(256) + + + + - name: stg_postgres__events + description: "Events (Analytics) (dbt stage model)" + columns: + - name: event_id + description: "The primary key for this table (PK)" + tests: + - unique + - not_null + - name: session_id + description: "Session this event belongs to (FK)" + tests: + - not_null + - name: user_id + description: "User who triggered event (FK)" + - name: event_type + description: "Type of analytics event" + - name: page_url + description: "URL of page this event occured on" + - name: created_at + description: "Datetime this event was triggered" + tests: + - not_null + - name: order_id + description: "Order attached to this event (FK)" + - name: product_id + description: "Product attached to this event (FK)" + config: + column_types: + event_id: varchar(256) + sesson_id: varchar(256) + user_id: varchar(256) + event_type: varchar(256) + page_url: varchar(256) + created_at: timestamp + order_id: varchar(256) + product_id: varchar(256) + + + + - name: stg_postgres__order_items + description: "Line items as part of an order (dbt stage model)" + columns: + - name: order_item_id + description: "The primary key for this table (PK)" + tests: + - unique + - not_null + - name: order_id + description: "The order this order item is attachd to (FK)" + tests: + - not_null + - name: product_id + description: "The product this order item is attached to (FK)" + tests: + - not_null + - name: quantity + description: "The quantity of this order item within the order it is attached to" + tests: + - not_null + config: + column_types: + order_item_id: varchar(256) + order_id: varchar(256) + product_id: varchar(256) + quantity: int + + + + - name: stg_postgres__orders + description: "Orders (dbt stage model)" + columns: + - name: order_id + description: "The primary key for this table (PK)" + tests: + - unique + - not_null + - name: promo_id + description: "The promotional campaign this order is attachd to (FK)" + - name: user_id + description: "The user this order is attached to (FK)" + tests: + - not_null + - name: address_id + description: "The address this order should be delivered to (FK)" + - name: created_at + description: "Datetime this order was created" + tests: + - not_null + - name: order_cost + description: "Cost of the order" + tests: + - not_null + - name: shipping_cost + description: "Shipping Cost of the order" + tests: + - not_null + - name: order_total + description: "Total Cost of the order" + tests: + - not_null + - name: tracking_id + description: "The tracking number attached to the shipment of this order" + - name: shipping_service + description: "The shipping service used for this order" + - name: estimated_delivery_at + description: "Datetime that the order is estimated to be delivered at" + - name: status + description: "The status of the order" + config: + column_types: + order_id: varchar(256) + promo_id: varchar(256) + user_id: varchar(256) + address_id: varchar(256) + created_at: timestamp + order_cost: float + shipping_cost: float + order_total: float + tracking_id: varchar(256) + shipping_service: varchar(256) + estimated_delivery_at: timestamp + delivered_at: timestamp + status: varchar(256) + + - name: stg_postgres__products + description: "Products (dbt stage model)" + columns: + - name: product_id + description: "The primary key for this table (PK)" + tests: + - unique + - not_null + - name: name + description: "Name of the product" + tests: + - not_null + - name: price + description: "The price of the product" + tests: + - not_null + - name: inventory + description: "The current inventory of the product" + tests: + - not_null + config: + column_types: + product_id: varchar(256) + name: varchar(256) + price: float + inventory: int + + + + - name: stg_postgres__promos + description: "Promotional campaigns (dbt stage model)" + columns: + - name: promo_id + description: "The primary key for this table (PK)" + tests: + - unique + - not_null + - name: discount + description: "The discount amount attached to this promotion" + tests: + - not_null + - name: status + description: "The current status of the promotional campaign" + tests: + - not_null + config: + column_types: + promo_id: varchar(256) + discount: int + status: varchar(256) + + + - name: stg_postgres__users + description: "Users (dbt stage model)" + columns: + - name: user_id + description: "The primary key for this table (PK)" + tests: + - unique + - not_null + - name: first_name + description: "The user's first name" + tests: + - not_null + - name: last_name + description: "The user's last name" + - name: email + description: "The user's email" + - name: phone_number + description: "The user's phone number" + - name: created_at + description: "Datetime this user signed up" + - name: updated_at + description: "Datetime this user record most recently was updated" + - name: address_id + description: "The address attached to this user (FK)" + config: + column_types: + user_id: varchar(256) + first_name: varchar(256) + last_name: varchar(256) + email: varchar(256) + phone_number: varchar(256) + creatd_at: timestamp + updated_at: timestamp + address_id: varchar(256) + diff --git a/greenery/models/staging/postgres/src_postgres.yml b/greenery/models/staging/postgres/_postgres__sources.yml similarity index 88% rename from greenery/models/staging/postgres/src_postgres.yml rename to greenery/models/staging/postgres/_postgres__sources.yml index d0f88faa8..54b381f69 100644 --- a/greenery/models/staging/postgres/src_postgres.yml +++ b/greenery/models/staging/postgres/_postgres__sources.yml @@ -7,8 +7,8 @@ sources: tables: - name: addresses - name: events + - name: order_items - name: orders - - name: order_items - name: products - name: promos - name: users From bf5d58f741aaaea488dd3e6576342c36217e7d0a Mon Sep 17 00:00:00 2001 From: Daniel Guillamot Date: Fri, 14 Apr 2023 03:22:58 +0000 Subject: [PATCH 6/6] Week 1 Queries --- Project1/README.md | 102 ++++++++++++++++++ .../snapshots/snap_postgres__products.sql | 16 +++ 2 files changed, 118 insertions(+) create mode 100644 Project1/README.md create mode 100644 greenery/snapshots/snap_postgres__products.sql diff --git a/Project1/README.md b/Project1/README.md new file mode 100644 index 000000000..9601bab0d --- /dev/null +++ b/Project1/README.md @@ -0,0 +1,102 @@ +# Project 1 + +### How many users do we have? +130 + +```sql +SELECT COUNT(user_guid) FROM dev_db.dbt_danieloutschoolcom.stg_postgres__users; +``` + +### On average, how many orders do we receive per hour? +5.416668 + +```sql +WITH orders_received_hourly as ( +SELECT date_trunc('hour', created_at) as hour_received +, COUNT(*) as num_received_this_hour +FROM dev_db.dbt_danieloutschoolcom.stg_postgres__orders +GROUP BY 1 +) + +SELECT AVG(num_received_this_hour) as average_num_orders_received_hourly +FROM orders_received_hourly; +``` + +### On average, how long does an order take from being placed to being delivered? + +93.4 + +```sql +with delivery_hours as +( + SELECT created_at + , delivered_at + , datediff(hour, created_at, delivered_at) as hours_to_deliver + FROM dev_db.dbt_danieloutschoolcom.stg_postgres__orders + WHERE status = 'delivered' +) + +SELECT round(AVG(hours_to_deliver), 2) +FROM delivery_hours +; +``` + +### How many users have only made one purchase? Two purchases? Three+ purchases? +1 purchase = 25 users +2 purchases = 28 users +3 or more purchass = 71 + +```sql +WITH orders_per_user_table as ( + SELECT user_guid + , COUNT(distinct order_guid) as orders_per_user + FROM dev_db.dbt_danieloutschoolcom.stg_postgres__orders + GROUP BY user_guid +) + +SELECT orders_per_user +, COUNT(distinct user_guid) as users_with_this_many_orders +FROM orders_per_user_table +GROUP BY orders_per_user +; +``` + +```sql +WITH orders_per_user_table as ( + SELECT user_guid + , COUNT(distinct order_guid) as orders_per_user + FROM dev_db.dbt_danieloutschoolcom.stg_postgres__orders + GROUP BY user_guid +), + +user_order_counts as ( + SELECT orders_per_user + , COUNT(distinct user_guid) as num_users_with_this_many_orders + FROM orders_per_user_table + GROUP BY orders_per_user +) + +SELECT SUM(num_users_with_this_many_orders) as num_users_with_three_or_more_orders +FROM user_order_counts +WHERE orders_per_user >= 3 +; +``` + +### Note: you should consider a purchase to be a single order. In other words, if a user places one order for 3 products, they are considered to have made 1 purchase. + +### On average, how many unique sessions do we have per hour? + +16.33 + +```sql +WITH unique_sessions_per_hour as ( + SELECT date_trunc(hour, created_at) as created_hour + , COUNT(distinct session_guid) as sessions_per_hour + FROM dev_db.dbt_danieloutschoolcom.stg_postgres__events + GROUP BY created_hour +) + +SELECT round(AVG(sessions_per_hour), 2) +FROM unique_sessions_per_hour +; +``` diff --git a/greenery/snapshots/snap_postgres__products.sql b/greenery/snapshots/snap_postgres__products.sql new file mode 100644 index 000000000..cff186397 --- /dev/null +++ b/greenery/snapshots/snap_postgres__products.sql @@ -0,0 +1,16 @@ +{% snapshot inventory_snapshot %} + +{{ + config( + target_database = target.database, + target_schema = target.schema, + strategy='check', + unique_key='product_id', + check_cols=['inventory'], + ) +}} + + +select * from {{ source('postgres', 'products') }} + +{% endsnapshot %} \ No newline at end of file