From 8434eba1f2b9fb12f827360408b5ee909f95fb35 Mon Sep 17 00:00:00 2001 From: Felipe Petribu Date: Fri, 15 Sep 2023 20:17:45 +0000 Subject: [PATCH 1/9] week_1_first_project_delivery --- greenery/.gitignore | 4 + greenery/README.md | 21 ++++ greenery/analyses/.gitkeep | 0 greenery/analyses/week_1_project.sql | 90 +++++++++++++ greenery/dbt_project.yml | 37 ++++++ greenery/macros/.gitkeep | 0 .../models/example/my_first_dbt_model.sql | 27 ++++ .../models/example/my_second_dbt_model.sql | 6 + greenery/models/example/schema.yml | 21 ++++ .../staging/postgres/_postgres__models.yaml | 118 ++++++++++++++++++ .../staging/postgres/_postgres__sources.yaml | 14 +++ .../models/staging/postgres/stg_addresses.sql | 7 ++ .../models/staging/postgres/stg_events.sql | 10 ++ .../staging/postgres/stg_order_items.sql | 5 + .../models/staging/postgres/stg_orders.sql | 15 +++ .../models/staging/postgres/stg_products.sql | 6 + .../models/staging/postgres/stg_promos.sql | 5 + .../models/staging/postgres/stg_users.sql | 10 ++ greenery/seeds/.gitkeep | 0 greenery/snapshots/.gitkeep | 0 greenery/snapshots/products_snapshot.sql | 20 +++ greenery/tests/.gitkeep | 0 22 files changed, 416 insertions(+) create mode 100644 greenery/.gitignore create mode 100644 greenery/README.md create mode 100644 greenery/analyses/.gitkeep create mode 100644 greenery/analyses/week_1_project.sql create mode 100644 greenery/dbt_project.yml create mode 100644 greenery/macros/.gitkeep create mode 100644 greenery/models/example/my_first_dbt_model.sql create mode 100644 greenery/models/example/my_second_dbt_model.sql create mode 100644 greenery/models/example/schema.yml create mode 100644 greenery/models/staging/postgres/_postgres__models.yaml create mode 100644 greenery/models/staging/postgres/_postgres__sources.yaml create mode 100644 greenery/models/staging/postgres/stg_addresses.sql create mode 100644 greenery/models/staging/postgres/stg_events.sql create mode 100644 greenery/models/staging/postgres/stg_order_items.sql create mode 100644 greenery/models/staging/postgres/stg_orders.sql create mode 100644 greenery/models/staging/postgres/stg_products.sql create mode 100644 greenery/models/staging/postgres/stg_promos.sql create mode 100644 greenery/models/staging/postgres/stg_users.sql create mode 100644 greenery/seeds/.gitkeep create mode 100644 greenery/snapshots/.gitkeep create mode 100644 greenery/snapshots/products_snapshot.sql create mode 100644 greenery/tests/.gitkeep diff --git a/greenery/.gitignore b/greenery/.gitignore new file mode 100644 index 000000000..49f147cb9 --- /dev/null +++ b/greenery/.gitignore @@ -0,0 +1,4 @@ + +target/ +dbt_packages/ +logs/ diff --git a/greenery/README.md b/greenery/README.md new file mode 100644 index 000000000..4682927ea --- /dev/null +++ b/greenery/README.md @@ -0,0 +1,21 @@ +# Week 1 + +## Metrics: + +- **Total Users**: The total number of unique users registered on the platform. +- **Average Orders per Hour**: The average number of orders placed every hour. +- **Average Hours to Deliver**: On average, the number of hours it takes from an order being placed to being delivered. +- **Users by Purchase Count**: How many users have made one, two, or three or more purchases. +- **Average Sessions per Hour**: The average number of unique browsing sessions on the platform every hour. + +## Results: + +| Metric | Value | +|---------------------------------|------------| +| Total Users | 130 | +| Average Orders per Hour | 7.520 | +| Average Hours to Deliver | 93.403 | +| Users with One Purchase | 25 | +| Users with Two Purchases | 28 | +| Users with Three+ Purchases | 71 | +| Average Sessions per Hour | 16.327 | \ No newline at end of file diff --git a/greenery/analyses/.gitkeep b/greenery/analyses/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/analyses/week_1_project.sql b/greenery/analyses/week_1_project.sql new file mode 100644 index 000000000..a2751d18f --- /dev/null +++ b/greenery/analyses/week_1_project.sql @@ -0,0 +1,90 @@ +WITH + --1. How many users do we have? + qtt_users AS ( + SELECT + COUNT(DISTINCT user_id) AS distinct_users + FROM {{ ref("stg_users") }} + )--qtt_users + + + + --2. On average, how many orders do we receive per hour? + , orders_per_hour AS ( + SELECT + date_trunc('HOUR', created_at) AS hour + , COUNT(DISTINCT order_id) AS order_count + FROM {{ ref("stg_orders") }} + GROUP BY date_trunc('HOUR', created_at) + )--orders_per_hour + + + , avg_orders_per_hour AS ( + SELECT + AVG(order_count) AS avg_order_per_hour + FROM orders_per_hour + )--avg_orders_per_hour + + + --3. On average, how long does an order take from being placed to being delivered? + , avg_delivery_time AS ( + SELECT + AVG(DATEDIFF(hour, created_at, delivered_at)) AS avg_hours_to_deliver + FROM {{ ref("stg_orders") }} + WHERE delivered_at IS NOT NULL + )--avg_delivery_time + + + + -- 4. How many users have only made one purchase? Two purchases? Three+ purchases? + , purchases_by_user AS ( + SELECT + user_id + , COUNT(DISTINCT order_id) AS total_purchases + FROM {{ ref("stg_orders") }} + GROUP BY user_id + )--purchases_by_user + + , purchase_counter AS ( + SELECT + SUM(CASE WHEN total_purchases = 1 THEN 1 ELSE 0 end) AS users_with_one_purchase + , SUM(CASE WHEN total_purchases = 2 THEN 1 ELSE 0 end) AS users_with_two_purchase + , SUM(CASE WHEN total_purchases >= 3 THEN 1 ELSE 0 end) AS users_with_three_purchase + FROM purchases_by_user + )--purchase_counter + + + + -- 5. On average, how many unique sessions do we have per hour? + ,sessions_per_hour AS ( + SELECT + DATE_TRUNC('HOUR', created_at) AS session_hour + , COUNT(DISTINCT session_id) AS session_count + FROM {{ ref("stg_events") }} + GROUP BY DATE_TRUNC('HOUR', created_at) + ) + + avg_sessions_per_hour AS ( + SELECT + AVG(session_count) AS avg_session_per_hour + FROM sessions_per_hour + ) + + + +SELECT + qtt_users.distinct_users + , avg_orders_per_hour.avg_order_per_hour + , avg_delivery_time.avg_hours_to_deliver + , purchase_counter.users_with_one_purchase + , purchase_counter.users_with_two_purchase + , purchase_counter.users_with_three_purchase + , avg_sessions_per_hour.avg_session_per_hour +FROM qtt_users +CROSS JOIN + avg_orders_per_hour +CROSS JOIN + avg_delivery_time +CROSS JOIN + purchase_counter +CROSS JOIN + avg_sessions_per_hour \ No newline at end of file diff --git a/greenery/dbt_project.yml b/greenery/dbt_project.yml new file mode 100644 index 000000000..a22d6f6e3 --- /dev/null +++ b/greenery/dbt_project.yml @@ -0,0 +1,37 @@ + +# Name your project! Project names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'greenery' +version: '1.0.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: 'greenery' + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that models in this project can be +# found in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +analysis-paths: ["analyses"] +test-paths: ["tests"] +seed-paths: ["seeds"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] + +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ +# directory as views. These settings can be overridden in the individual model +# files using the `{{ config(...) }}` macro. +models: + greenery: + # Config indicated by + and applies to all files under models/example/ + example: + +materialized: view diff --git a/greenery/macros/.gitkeep b/greenery/macros/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/models/example/my_first_dbt_model.sql b/greenery/models/example/my_first_dbt_model.sql new file mode 100644 index 000000000..f31a12d94 --- /dev/null +++ b/greenery/models/example/my_first_dbt_model.sql @@ -0,0 +1,27 @@ + +/* + Welcome to your first dbt model! + Did you know that you can also configure models directly within SQL files? + This will override configurations stated in dbt_project.yml + + Try changing "table" to "view" below +*/ + +{{ config(materialized='table') }} + +with source_data as ( + + select 1 as id + union all + select null as id + +) + +select * +from source_data + +/* + Uncomment the line below to remove records with null `id` values +*/ + +-- where id is not null diff --git a/greenery/models/example/my_second_dbt_model.sql b/greenery/models/example/my_second_dbt_model.sql new file mode 100644 index 000000000..c91f8793a --- /dev/null +++ b/greenery/models/example/my_second_dbt_model.sql @@ -0,0 +1,6 @@ + +-- Use the `ref` function to select from other models + +select * +from {{ ref('my_first_dbt_model') }} +where id = 1 diff --git a/greenery/models/example/schema.yml b/greenery/models/example/schema.yml new file mode 100644 index 000000000..2a5308171 --- /dev/null +++ b/greenery/models/example/schema.yml @@ -0,0 +1,21 @@ + +version: 2 + +models: + - name: my_first_dbt_model + description: "A starter dbt model" + columns: + - name: id + description: "The primary key for this table" + tests: + - unique + - not_null + + - name: my_second_dbt_model + description: "A starter dbt model" + columns: + - name: id + description: "The primary key for this table" + tests: + - unique + - not_null diff --git a/greenery/models/staging/postgres/_postgres__models.yaml b/greenery/models/staging/postgres/_postgres__models.yaml new file mode 100644 index 000000000..f0b48b561 --- /dev/null +++ b/greenery/models/staging/postgres/_postgres__models.yaml @@ -0,0 +1,118 @@ +version: 2 + +models: + - name: stg_addresses + description: Staging model for the addresses table + columns: + - name: address_id + description: UUID for each address + - name: address + description: address + - name: zipcode + description: The zipcode of the address + - name: state + description: State/Province + - name: country + description: Country + + - name: stg_events + description: Staging model for events table + columns: + - name: event_id + description: UUID for each event + - name: session_id + description: UUID of each browsing session + - name: user_id + description: UUID of the user who performed the event + - name: page_url + description: URL page of the event + - name: created_at + description: Timestamp for the event + - name: event_type + description: Type of event (add_to_cart, checkout, package_shipped, page_view) + - name: order_id + description: UUID of the order (only for checkout and package_shipped events) + - name: product_id + description: UUID of the product for product-specific events (only for checkout and page_view events) + + - name: stg_order_items + description: Staging model for the order items + columns: + - name: order_id + description: UUID of the order + - name: product_id + description: UUID of the product(s) in the order + - name: quantity + description: Number of units of the product in the order + + - name: stg_orders + description: Staging model for orders + columns: + - name: order_id + description: UUID for each order + - name: user_id + description: UUID of the user who placed the order + - name: promo_id + description: UUID of the promo code used in the order (not required) + - name: address_id + description: Delivery address ID for the order + - name: created_at + description: Timestamp for the order + - name: order_cost + description: Dollar amount of the order + - name: shipping_cost + description: Cost of shipping for the order + - name: order_total + description: Total cost of the order (includes shipping) + - name: tracking_id + description: Tracking number of the order + - name: shipping_service + description: Company used for shipping + - name: estimated_delivery_at + description: Estimated delivery date + - name: delivered_at + description: Actual delivery date + - name: status + description: Status of the order + + - name: stg_products + description: Staging model for products + columns: + - name: product_id + description: UUID for each product + - name: name + description: Name of the product + - name: price + description: Price of the product in dollars + - name: inventory + description: Inventory for each product + + - name: stg_promos + description: Staging model for promos + columns: + - name: promo_id + description: Every promocode generated + - name: discount + description: Dollar amount given off by using the code + - name: status + description: Status of the promo code (active or inactive) + + - name: stg_users + description: Staging model for users + columns: + - name: user_id + description: UUID for each user + - name: first_name + description: First name of the user + - name: last_name + description: Last name of the user + - name: email + description: Email address of the user + - name: phone_number + description: Phone number of the user + - name: created_at + description: Timestamp when the user was created + - name: updated_at + description: Timestamp when the user was last updated + - name: address_id + description: Default delivery address for the user \ No newline at end of file diff --git a/greenery/models/staging/postgres/_postgres__sources.yaml b/greenery/models/staging/postgres/_postgres__sources.yaml new file mode 100644 index 000000000..3b07c35f3 --- /dev/null +++ b/greenery/models/staging/postgres/_postgres__sources.yaml @@ -0,0 +1,14 @@ +version: 2 + +sources: + - name: postgres + database: RAW + schema: PUBLIC + tables: + - name: addresses + - name: users + - name: promos + - name: products + - name: orders + - name: order_items + - name: events \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_addresses.sql b/greenery/models/staging/postgres/stg_addresses.sql new file mode 100644 index 000000000..6702cf9f3 --- /dev/null +++ b/greenery/models/staging/postgres/stg_addresses.sql @@ -0,0 +1,7 @@ +SELECT + address_id + ,address + ,zipcode + ,state + ,country +FROM {{ source("postgres", "addresses") }} \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_events.sql b/greenery/models/staging/postgres/stg_events.sql new file mode 100644 index 000000000..5d7bae2a0 --- /dev/null +++ b/greenery/models/staging/postgres/stg_events.sql @@ -0,0 +1,10 @@ +SELECT + event_id + ,session_id + ,user_id + ,event_type + ,page_url + ,created_at + ,order_id + ,product_id +FROM {{ source("postgres", "events") }} \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_order_items.sql b/greenery/models/staging/postgres/stg_order_items.sql new file mode 100644 index 000000000..cd380e7c6 --- /dev/null +++ b/greenery/models/staging/postgres/stg_order_items.sql @@ -0,0 +1,5 @@ +SELECT + order_id + ,product_id + ,quantity +FROM {{ source("postgres", "order_items") }} \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_orders.sql b/greenery/models/staging/postgres/stg_orders.sql new file mode 100644 index 000000000..9e1b96373 --- /dev/null +++ b/greenery/models/staging/postgres/stg_orders.sql @@ -0,0 +1,15 @@ +SELECT + order_id + ,promo_id + ,user_id + ,address_id + ,created_at + ,order_cost + ,shipping_cost + ,order_total + ,tracking_id + ,shipping_service + ,estimated_delivery_at + ,delivered_at + ,status +FROM {{ source("postgres", "orders") }} \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_products.sql b/greenery/models/staging/postgres/stg_products.sql new file mode 100644 index 000000000..508187349 --- /dev/null +++ b/greenery/models/staging/postgres/stg_products.sql @@ -0,0 +1,6 @@ +SELECT + product_id + ,name + ,price + ,inventory +FROM {{ source("postgres", "products") }} \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_promos.sql b/greenery/models/staging/postgres/stg_promos.sql new file mode 100644 index 000000000..3b953d26d --- /dev/null +++ b/greenery/models/staging/postgres/stg_promos.sql @@ -0,0 +1,5 @@ +SELECT + promo_id + ,discount + ,status +FROM {{ source("postgres", "promos") }} \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_users.sql b/greenery/models/staging/postgres/stg_users.sql new file mode 100644 index 000000000..e7af02ba4 --- /dev/null +++ b/greenery/models/staging/postgres/stg_users.sql @@ -0,0 +1,10 @@ +SELECT + user_id + ,first_name + ,last_name + ,email + ,phone_number + ,created_at + ,updated_at + ,address_id +FROM {{ source("postgres", "users") }} \ No newline at end of file diff --git a/greenery/seeds/.gitkeep b/greenery/seeds/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/snapshots/.gitkeep b/greenery/snapshots/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/snapshots/products_snapshot.sql b/greenery/snapshots/products_snapshot.sql new file mode 100644 index 000000000..30749c9c8 --- /dev/null +++ b/greenery/snapshots/products_snapshot.sql @@ -0,0 +1,20 @@ +{% snapshot products_snapshot %} + + {{ + config( + target_database = target.database, + target_schema = target.schema, + strategy='check', + unique_key='product_id', + check_cols=['inventory'], + ) + }} + + SELECT + product_id + ,name + ,price + ,inventory + FROM {{ source("postgres", "products") }} + +{% endsnapshot %} \ No newline at end of file diff --git a/greenery/tests/.gitkeep b/greenery/tests/.gitkeep new file mode 100644 index 000000000..e69de29bb From d60e020db0bc8b9b201830e9a7eedbe4e77ec4b5 Mon Sep 17 00:00:00 2001 From: Felipe Petribu Date: Sat, 16 Sep 2023 01:27:27 +0000 Subject: [PATCH 2/9] corrections to naming convetions and properly setting up the snapshot --- greenery/README.md | 6 ++--- .../models/example/my_first_dbt_model.sql | 27 ------------------- .../models/example/my_second_dbt_model.sql | 6 ----- greenery/models/example/schema.yml | 21 --------------- ...res__models.yaml => _postgres__models.yml} | 0 ...s__sources.yaml => _postgres__sources.yml} | 0 ...resses.sql => stg_postgres__addresses.sql} | 0 ...tg_events.sql => stg_postgres__events.sql} | 0 ...tems.sql => stg_postgres__order_items.sql} | 0 ...tg_orders.sql => stg_postgres__orders.sql} | 0 ...roducts.sql => stg_postgres__products.sql} | 0 ...tg_promos.sql => stg_postgres__promos.sql} | 0 ...{stg_users.sql => stg_postgres__users.sql} | 0 ...ts_snapshot.sql => snapshot__products.sql} | 4 +-- 14 files changed, 5 insertions(+), 59 deletions(-) delete mode 100644 greenery/models/example/my_first_dbt_model.sql delete mode 100644 greenery/models/example/my_second_dbt_model.sql delete mode 100644 greenery/models/example/schema.yml rename greenery/models/staging/postgres/{_postgres__models.yaml => _postgres__models.yml} (100%) rename greenery/models/staging/postgres/{_postgres__sources.yaml => _postgres__sources.yml} (100%) rename greenery/models/staging/postgres/{stg_addresses.sql => stg_postgres__addresses.sql} (100%) rename greenery/models/staging/postgres/{stg_events.sql => stg_postgres__events.sql} (100%) rename greenery/models/staging/postgres/{stg_order_items.sql => stg_postgres__order_items.sql} (100%) rename greenery/models/staging/postgres/{stg_orders.sql => stg_postgres__orders.sql} (100%) rename greenery/models/staging/postgres/{stg_products.sql => stg_postgres__products.sql} (100%) rename greenery/models/staging/postgres/{stg_promos.sql => stg_postgres__promos.sql} (100%) rename greenery/models/staging/postgres/{stg_users.sql => stg_postgres__users.sql} (100%) rename greenery/snapshots/{products_snapshot.sql => snapshot__products.sql} (76%) diff --git a/greenery/README.md b/greenery/README.md index 4682927ea..b87ca17ef 100644 --- a/greenery/README.md +++ b/greenery/README.md @@ -13,9 +13,9 @@ | Metric | Value | |---------------------------------|------------| | Total Users | 130 | -| Average Orders per Hour | 7.520 | -| Average Hours to Deliver | 93.403 | +| Average Orders per Hour | 7.520 | +| Average Hours to Deliver | 93.403 | | Users with One Purchase | 25 | | Users with Two Purchases | 28 | | Users with Three+ Purchases | 71 | -| Average Sessions per Hour | 16.327 | \ No newline at end of file +| Average Sessions per Hour | 16.327 | \ No newline at end of file diff --git a/greenery/models/example/my_first_dbt_model.sql b/greenery/models/example/my_first_dbt_model.sql deleted file mode 100644 index f31a12d94..000000000 --- a/greenery/models/example/my_first_dbt_model.sql +++ /dev/null @@ -1,27 +0,0 @@ - -/* - Welcome to your first dbt model! - Did you know that you can also configure models directly within SQL files? - This will override configurations stated in dbt_project.yml - - Try changing "table" to "view" below -*/ - -{{ config(materialized='table') }} - -with source_data as ( - - select 1 as id - union all - select null as id - -) - -select * -from source_data - -/* - Uncomment the line below to remove records with null `id` values -*/ - --- where id is not null diff --git a/greenery/models/example/my_second_dbt_model.sql b/greenery/models/example/my_second_dbt_model.sql deleted file mode 100644 index c91f8793a..000000000 --- a/greenery/models/example/my_second_dbt_model.sql +++ /dev/null @@ -1,6 +0,0 @@ - --- Use the `ref` function to select from other models - -select * -from {{ ref('my_first_dbt_model') }} -where id = 1 diff --git a/greenery/models/example/schema.yml b/greenery/models/example/schema.yml deleted file mode 100644 index 2a5308171..000000000 --- a/greenery/models/example/schema.yml +++ /dev/null @@ -1,21 +0,0 @@ - -version: 2 - -models: - - name: my_first_dbt_model - description: "A starter dbt model" - columns: - - name: id - description: "The primary key for this table" - tests: - - unique - - not_null - - - name: my_second_dbt_model - description: "A starter dbt model" - columns: - - name: id - description: "The primary key for this table" - tests: - - unique - - not_null diff --git a/greenery/models/staging/postgres/_postgres__models.yaml b/greenery/models/staging/postgres/_postgres__models.yml similarity index 100% rename from greenery/models/staging/postgres/_postgres__models.yaml rename to greenery/models/staging/postgres/_postgres__models.yml diff --git a/greenery/models/staging/postgres/_postgres__sources.yaml b/greenery/models/staging/postgres/_postgres__sources.yml similarity index 100% rename from greenery/models/staging/postgres/_postgres__sources.yaml rename to greenery/models/staging/postgres/_postgres__sources.yml diff --git a/greenery/models/staging/postgres/stg_addresses.sql b/greenery/models/staging/postgres/stg_postgres__addresses.sql similarity index 100% rename from greenery/models/staging/postgres/stg_addresses.sql rename to greenery/models/staging/postgres/stg_postgres__addresses.sql diff --git a/greenery/models/staging/postgres/stg_events.sql b/greenery/models/staging/postgres/stg_postgres__events.sql similarity index 100% rename from greenery/models/staging/postgres/stg_events.sql rename to greenery/models/staging/postgres/stg_postgres__events.sql diff --git a/greenery/models/staging/postgres/stg_order_items.sql b/greenery/models/staging/postgres/stg_postgres__order_items.sql similarity index 100% rename from greenery/models/staging/postgres/stg_order_items.sql rename to greenery/models/staging/postgres/stg_postgres__order_items.sql diff --git a/greenery/models/staging/postgres/stg_orders.sql b/greenery/models/staging/postgres/stg_postgres__orders.sql similarity index 100% rename from greenery/models/staging/postgres/stg_orders.sql rename to greenery/models/staging/postgres/stg_postgres__orders.sql diff --git a/greenery/models/staging/postgres/stg_products.sql b/greenery/models/staging/postgres/stg_postgres__products.sql similarity index 100% rename from greenery/models/staging/postgres/stg_products.sql rename to greenery/models/staging/postgres/stg_postgres__products.sql diff --git a/greenery/models/staging/postgres/stg_promos.sql b/greenery/models/staging/postgres/stg_postgres__promos.sql similarity index 100% rename from greenery/models/staging/postgres/stg_promos.sql rename to greenery/models/staging/postgres/stg_postgres__promos.sql diff --git a/greenery/models/staging/postgres/stg_users.sql b/greenery/models/staging/postgres/stg_postgres__users.sql similarity index 100% rename from greenery/models/staging/postgres/stg_users.sql rename to greenery/models/staging/postgres/stg_postgres__users.sql diff --git a/greenery/snapshots/products_snapshot.sql b/greenery/snapshots/snapshot__products.sql similarity index 76% rename from greenery/snapshots/products_snapshot.sql rename to greenery/snapshots/snapshot__products.sql index 30749c9c8..d04f1cd65 100644 --- a/greenery/snapshots/products_snapshot.sql +++ b/greenery/snapshots/snapshot__products.sql @@ -2,8 +2,8 @@ {{ config( - target_database = target.database, - target_schema = target.schema, + target_database = "DEV_DB", + target_schema = "DBT_FPETRIBUFUNDTHROUGHCOM", strategy='check', unique_key='product_id', check_cols=['inventory'], From bd3a630eeae4455fd77b877fc1d81acbddd2ab00 Mon Sep 17 00:00:00 2001 From: Felipe Petribu Date: Sat, 16 Sep 2023 01:33:14 +0000 Subject: [PATCH 3/9] correcting models yml files names --- .../postgres/{_postgres__models.yml => stg_postgres__models.yml} | 0 .../{_postgres__sources.yml => stg_postgres__sources.yml} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename greenery/models/staging/postgres/{_postgres__models.yml => stg_postgres__models.yml} (100%) rename greenery/models/staging/postgres/{_postgres__sources.yml => stg_postgres__sources.yml} (100%) diff --git a/greenery/models/staging/postgres/_postgres__models.yml b/greenery/models/staging/postgres/stg_postgres__models.yml similarity index 100% rename from greenery/models/staging/postgres/_postgres__models.yml rename to greenery/models/staging/postgres/stg_postgres__models.yml diff --git a/greenery/models/staging/postgres/_postgres__sources.yml b/greenery/models/staging/postgres/stg_postgres__sources.yml similarity index 100% rename from greenery/models/staging/postgres/_postgres__sources.yml rename to greenery/models/staging/postgres/stg_postgres__sources.yml From 50107ac745dbe1553c5bd93b6908a92743f41f59 Mon Sep 17 00:00:00 2001 From: Felipe Petribu Date: Sat, 16 Sep 2023 01:42:39 +0000 Subject: [PATCH 4/9] correcting analysis file and stg_postgres__models.yml file --- greenery/analyses/week_1_project.sql | 10 +++++----- .../staging/postgres/stg_postgres__models.yml | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/greenery/analyses/week_1_project.sql b/greenery/analyses/week_1_project.sql index a2751d18f..1893576fb 100644 --- a/greenery/analyses/week_1_project.sql +++ b/greenery/analyses/week_1_project.sql @@ -3,7 +3,7 @@ WITH qtt_users AS ( SELECT COUNT(DISTINCT user_id) AS distinct_users - FROM {{ ref("stg_users") }} + FROM {{ ref("stg_postgres_users") }} )--qtt_users @@ -13,7 +13,7 @@ WITH SELECT date_trunc('HOUR', created_at) AS hour , COUNT(DISTINCT order_id) AS order_count - FROM {{ ref("stg_orders") }} + FROM {{ ref("stg_postgres_orders") }} GROUP BY date_trunc('HOUR', created_at) )--orders_per_hour @@ -29,7 +29,7 @@ WITH , avg_delivery_time AS ( SELECT AVG(DATEDIFF(hour, created_at, delivered_at)) AS avg_hours_to_deliver - FROM {{ ref("stg_orders") }} + FROM {{ ref("stg_postgres_orders") }} WHERE delivered_at IS NOT NULL )--avg_delivery_time @@ -40,7 +40,7 @@ WITH SELECT user_id , COUNT(DISTINCT order_id) AS total_purchases - FROM {{ ref("stg_orders") }} + FROM {{ ref("stg_postgres_orders") }} GROUP BY user_id )--purchases_by_user @@ -59,7 +59,7 @@ WITH SELECT DATE_TRUNC('HOUR', created_at) AS session_hour , COUNT(DISTINCT session_id) AS session_count - FROM {{ ref("stg_events") }} + FROM {{ ref("stg_postgres_events") }} GROUP BY DATE_TRUNC('HOUR', created_at) ) diff --git a/greenery/models/staging/postgres/stg_postgres__models.yml b/greenery/models/staging/postgres/stg_postgres__models.yml index f0b48b561..9d7fa88db 100644 --- a/greenery/models/staging/postgres/stg_postgres__models.yml +++ b/greenery/models/staging/postgres/stg_postgres__models.yml @@ -1,7 +1,7 @@ version: 2 models: - - name: stg_addresses + - name: stg_postgres_addresses description: Staging model for the addresses table columns: - name: address_id @@ -15,7 +15,7 @@ models: - name: country description: Country - - name: stg_events + - name: stg_postgres_events description: Staging model for events table columns: - name: event_id @@ -35,7 +35,7 @@ models: - name: product_id description: UUID of the product for product-specific events (only for checkout and page_view events) - - name: stg_order_items + - name: stg_postgres_order_items description: Staging model for the order items columns: - name: order_id @@ -45,7 +45,7 @@ models: - name: quantity description: Number of units of the product in the order - - name: stg_orders + - name: stg_postgres_orders description: Staging model for orders columns: - name: order_id @@ -75,7 +75,7 @@ models: - name: status description: Status of the order - - name: stg_products + - name: stg_postgres_products description: Staging model for products columns: - name: product_id @@ -87,7 +87,7 @@ models: - name: inventory description: Inventory for each product - - name: stg_promos + - name: stg_postgres_promos description: Staging model for promos columns: - name: promo_id @@ -97,7 +97,7 @@ models: - name: status description: Status of the promo code (active or inactive) - - name: stg_users + - name: stg_postgres_users description: Staging model for users columns: - name: user_id From da4eca6c65d5f6134f9165ca505e570867725c00 Mon Sep 17 00:00:00 2001 From: Felipe Petribu Date: Sat, 16 Sep 2023 01:46:08 +0000 Subject: [PATCH 5/9] fixing stg_postgres__models file --- .../staging/postgres/stg_postgres__models.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/greenery/models/staging/postgres/stg_postgres__models.yml b/greenery/models/staging/postgres/stg_postgres__models.yml index 9d7fa88db..e31f6c291 100644 --- a/greenery/models/staging/postgres/stg_postgres__models.yml +++ b/greenery/models/staging/postgres/stg_postgres__models.yml @@ -1,7 +1,7 @@ version: 2 models: - - name: stg_postgres_addresses + - name: stg_postgres__addresses description: Staging model for the addresses table columns: - name: address_id @@ -15,7 +15,7 @@ models: - name: country description: Country - - name: stg_postgres_events + - name: stg_postgres__events description: Staging model for events table columns: - name: event_id @@ -35,7 +35,7 @@ models: - name: product_id description: UUID of the product for product-specific events (only for checkout and page_view events) - - name: stg_postgres_order_items + - name: stg_postgres__order_items description: Staging model for the order items columns: - name: order_id @@ -45,7 +45,7 @@ models: - name: quantity description: Number of units of the product in the order - - name: stg_postgres_orders + - name: stg_postgres__orders description: Staging model for orders columns: - name: order_id @@ -75,7 +75,7 @@ models: - name: status description: Status of the order - - name: stg_postgres_products + - name: stg_postgres__products description: Staging model for products columns: - name: product_id @@ -87,7 +87,7 @@ models: - name: inventory description: Inventory for each product - - name: stg_postgres_promos + - name: stg_postgres__promos description: Staging model for promos columns: - name: promo_id @@ -97,7 +97,7 @@ models: - name: status description: Status of the promo code (active or inactive) - - name: stg_postgres_users + - name: stg_postgres__users description: Staging model for users columns: - name: user_id From 6acdc83491d048adc3866f4e483f7c42045f73ec Mon Sep 17 00:00:00 2001 From: Felipe Petribu Date: Sat, 16 Sep 2023 01:47:08 +0000 Subject: [PATCH 6/9] final fix version 687 --- greenery/analyses/week_1_project.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/greenery/analyses/week_1_project.sql b/greenery/analyses/week_1_project.sql index 1893576fb..c548532d3 100644 --- a/greenery/analyses/week_1_project.sql +++ b/greenery/analyses/week_1_project.sql @@ -3,7 +3,7 @@ WITH qtt_users AS ( SELECT COUNT(DISTINCT user_id) AS distinct_users - FROM {{ ref("stg_postgres_users") }} + FROM {{ ref("stg_postgres__users") }} )--qtt_users @@ -13,7 +13,7 @@ WITH SELECT date_trunc('HOUR', created_at) AS hour , COUNT(DISTINCT order_id) AS order_count - FROM {{ ref("stg_postgres_orders") }} + FROM {{ ref("stg_postgres__orders") }} GROUP BY date_trunc('HOUR', created_at) )--orders_per_hour @@ -29,7 +29,7 @@ WITH , avg_delivery_time AS ( SELECT AVG(DATEDIFF(hour, created_at, delivered_at)) AS avg_hours_to_deliver - FROM {{ ref("stg_postgres_orders") }} + FROM {{ ref("stg_postgres__orders") }} WHERE delivered_at IS NOT NULL )--avg_delivery_time @@ -40,7 +40,7 @@ WITH SELECT user_id , COUNT(DISTINCT order_id) AS total_purchases - FROM {{ ref("stg_postgres_orders") }} + FROM {{ ref("stg_postgres__orders") }} GROUP BY user_id )--purchases_by_user @@ -59,7 +59,7 @@ WITH SELECT DATE_TRUNC('HOUR', created_at) AS session_hour , COUNT(DISTINCT session_id) AS session_count - FROM {{ ref("stg_postgres_events") }} + FROM {{ ref("stg_postgres__events") }} GROUP BY DATE_TRUNC('HOUR', created_at) ) From a95d193b56bfae305f4abfcd845e4d1f96d2c6e9 Mon Sep 17 00:00:00 2001 From: Felipe Petribu Date: Mon, 18 Sep 2023 02:00:21 +0000 Subject: [PATCH 7/9] setting naming convention --- .../postgres/{stg_postgres__models.yml => _postgres__models.yml} | 0 .../{stg_postgres__sources.yml => _postgres__sources.yml} | 0 .../snapshots/{snapshot__products.sql => products__inventory.sql} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename greenery/models/staging/postgres/{stg_postgres__models.yml => _postgres__models.yml} (100%) rename greenery/models/staging/postgres/{stg_postgres__sources.yml => _postgres__sources.yml} (100%) rename greenery/snapshots/{snapshot__products.sql => products__inventory.sql} (100%) diff --git a/greenery/models/staging/postgres/stg_postgres__models.yml b/greenery/models/staging/postgres/_postgres__models.yml similarity index 100% rename from greenery/models/staging/postgres/stg_postgres__models.yml rename to greenery/models/staging/postgres/_postgres__models.yml diff --git a/greenery/models/staging/postgres/stg_postgres__sources.yml b/greenery/models/staging/postgres/_postgres__sources.yml similarity index 100% rename from greenery/models/staging/postgres/stg_postgres__sources.yml rename to greenery/models/staging/postgres/_postgres__sources.yml diff --git a/greenery/snapshots/snapshot__products.sql b/greenery/snapshots/products__inventory.sql similarity index 100% rename from greenery/snapshots/snapshot__products.sql rename to greenery/snapshots/products__inventory.sql From b62b2258ba875e80acb5ae76dbb21a139b59b7f2 Mon Sep 17 00:00:00 2001 From: Felipe Petribu Date: Sun, 24 Sep 2023 05:05:28 +0000 Subject: [PATCH 8/9] Added marts, tests and docs (tests n failing) --- greenery/README.md | 33 +++- greenery/analyses/week_2_project.sql | 25 +++ greenery/dbt_project.yml | 2 + greenery/models/marts/core/_core__models.yml | 38 +++++ greenery/models/marts/core/dim_products.sql | 34 ++++ greenery/models/marts/core/dim_users.sql | 10 ++ greenery/models/marts/core/fact_orders.sql | 45 +++++ greenery/models/marts/core/int_products.sql | 63 +++++++ .../marts/marketing/_marketing_models.yml | 44 +++++ .../marts/marketing/fact_user_orders.sql | 61 +++++++ .../models/marts/product/_product_models.yml | 47 +++++ .../models/marts/product/fact_page_views.sql | 91 ++++++++++ .../marts/product/int_quantity_by_user.sql | 20 +++ .../staging/postgres/_postgres__models.yml | 160 +++++++++++++++++- .../staging/postgres/_postgres__sources.yml | 32 +++- greenery/packages.yml | 3 + greenery/snapshots/products__inventory.sql | 18 +- 17 files changed, 710 insertions(+), 16 deletions(-) create mode 100644 greenery/analyses/week_2_project.sql create mode 100644 greenery/models/marts/core/_core__models.yml create mode 100644 greenery/models/marts/core/dim_products.sql create mode 100644 greenery/models/marts/core/dim_users.sql create mode 100644 greenery/models/marts/core/fact_orders.sql create mode 100644 greenery/models/marts/core/int_products.sql create mode 100644 greenery/models/marts/marketing/_marketing_models.yml create mode 100644 greenery/models/marts/marketing/fact_user_orders.sql create mode 100644 greenery/models/marts/product/_product_models.yml create mode 100644 greenery/models/marts/product/fact_page_views.sql create mode 100644 greenery/models/marts/product/int_quantity_by_user.sql create mode 100644 greenery/packages.yml diff --git a/greenery/README.md b/greenery/README.md index b87ca17ef..e4de3ccd5 100644 --- a/greenery/README.md +++ b/greenery/README.md @@ -18,4 +18,35 @@ | Users with One Purchase | 25 | | Users with Two Purchases | 28 | | Users with Three+ Purchases | 71 | -| Average Sessions per Hour | 16.327 | \ No newline at end of file +| Average Sessions per Hour | 16.327 | + + +# Week 2 + +## Business objectives: + +- **What is our user repeat rate?**: The ratio of users who made two or more purchases over total users. +- **Define good indicators for potential repeat users** +- **Define good indicators for potential non-repeat users** + + +## Results: + +| Metric | Value | +|---------------------------------|------------| +| Repeat rate | 79.84% | + +- **Define good indicators for potential repeat users** + - **Number of itens on previous order** + - **Usage of promo codes** + - **Number of sessions until purchase** + - **Average delivery time for the user** + - **Good Reviews on the products** + - **Good Reviews of the platform itself** + +- **Define good indicators for potential non-repeat users** + - **Type of product ordered (one-type purchases)** + - **Email spam rate** + - **Average delivery time for the user** + - **Bad Reviews on the products** + - **Bad Reviews of the platform itself** \ No newline at end of file diff --git a/greenery/analyses/week_2_project.sql b/greenery/analyses/week_2_project.sql new file mode 100644 index 000000000..ebc295bc4 --- /dev/null +++ b/greenery/analyses/week_2_project.sql @@ -0,0 +1,25 @@ +with +purchases_per_user AS ( + SELECT + user_id + ,COUNT(DISTINCT order_id) AS purchase_counter + FROM {{ ref("stg_postgres__orders") }} + GROUP BY user_id +)--purchases_per_user + +, purchase_counter AS ( + SELECT + COUNT(user_id) AS users_who_purchased + , sum(CASE WHEN purchase_counter >= 2 + THEN 1 + ELSE 0 + END + ) AS users_who_purchased_twice_or_more + FROM purchases_per_user +)--purchase_counter + +SELECT + users_who_purchased, + users_who_purchased_twice_or_more, + users_who_purchased_twice_or_more / users_who_purchased as repeat_rate +FROM purchase_counter \ No newline at end of file diff --git a/greenery/dbt_project.yml b/greenery/dbt_project.yml index a22d6f6e3..52703a22f 100644 --- a/greenery/dbt_project.yml +++ b/greenery/dbt_project.yml @@ -35,3 +35,5 @@ models: # Config indicated by + and applies to all files under models/example/ example: +materialized: view + marts: + +materialized: table diff --git a/greenery/models/marts/core/_core__models.yml b/greenery/models/marts/core/_core__models.yml new file mode 100644 index 000000000..e405e681e --- /dev/null +++ b/greenery/models/marts/core/_core__models.yml @@ -0,0 +1,38 @@ +version: 2 + +models: + - name: dim_products + columns: + - name: product_id + description: Unique UUID for each product + tests: + - unique + - not_null + - relationships: + to: ref('stg_postgres__products') + field: product_id + + - name: dim_users + columns: + - name: user_id + description: Unique UUID for each user + tests: + - unique + - not_null + - relationships: + to: ref('stg_postgres__users') + field: user_id + + - name: fact_orders + columns: + - name: order_id + description: Unique UUID for each order + tests: + - unique + - not_null + - name: user_id + description: Unique UUID for each user + tests: + - relationships: + to: ref('stg_postgres__users') + field: user_id \ No newline at end of file diff --git a/greenery/models/marts/core/dim_products.sql b/greenery/models/marts/core/dim_products.sql new file mode 100644 index 000000000..c726c97d6 --- /dev/null +++ b/greenery/models/marts/core/dim_products.sql @@ -0,0 +1,34 @@ +WITH +products AS ( +SELECT + product_id + ,name + ,price + ,inventory +FROM {{ ref("stg_postgres__products") }} +)--products + +, ordered_products AS ( +SELECT + product_id + ,num_orders + ,quantity_shipped + ,quantity_delivered + ,quantity_preparing + ,total_quantity +FROM {{ ref("int_products") }} +)--ordered_products + +SELECT + products.product_id + ,products.name + ,products.price + ,products.inventory + ,ordered_products_summary.num_orders + ,ordered_products_summary.quantity_shipped + ,ordered_products_summary.quantity_delivered + ,ordered_products_summary.quantity_preparing + ,ordered_products_summary.total_quantity + +FROM products +left join ordered_products_summary using (product_id) \ No newline at end of file diff --git a/greenery/models/marts/core/dim_users.sql b/greenery/models/marts/core/dim_users.sql new file mode 100644 index 000000000..d6b30239f --- /dev/null +++ b/greenery/models/marts/core/dim_users.sql @@ -0,0 +1,10 @@ +select + user_id + ,first_name + ,last_name + ,email + ,phone_number + ,created_at + ,updated_at + ,address_id +FROM {{ ref("stg_postgres__users") }} \ No newline at end of file diff --git a/greenery/models/marts/core/fact_orders.sql b/greenery/models/marts/core/fact_orders.sql new file mode 100644 index 000000000..e4f7bdcf9 --- /dev/null +++ b/greenery/models/marts/core/fact_orders.sql @@ -0,0 +1,45 @@ +WITH +orders AS ( +SELECT + order_id + ,promo_id + ,user_id + ,address_id + ,created_at + ,order_cost + ,shipping_cost + ,order_total + ,tracking_id + ,shipping_service + ,estimated_delivery_at + ,delivered_at + ,status +FROM {{ ref("stg_postgres__orders") }} +)--orders + +, promos AS ( +SELECT + promo_id + ,discount + ,status +FROM {{ ref("stg_postgres__promos") }} +)--promos + +SELECT + orders.order_id + ,orders.promo_id + ,orders.user_id + ,orders.address_id + ,orders.created_at + ,orders.order_cost + ,orders.shipping_cost + ,orders.order_total + ,orders.tracking_id + ,orders.shipping_service + ,orders.estimated_delivery_at + ,orders.delivered_at + ,orders.status + ,promos.discount AS promo_discount +FROM orders +LEFT JOIN promos + ON orders.promo_id = promos.promo_id \ No newline at end of file diff --git a/greenery/models/marts/core/int_products.sql b/greenery/models/marts/core/int_products.sql new file mode 100644 index 000000000..c88f6f6ba --- /dev/null +++ b/greenery/models/marts/core/int_products.sql @@ -0,0 +1,63 @@ +WITH +orders AS ( +SELECT + order_id + ,promo_id + ,user_id + ,address_id + ,created_at + ,order_cost + ,shipping_cost + ,order_total + ,tracking_id + ,shipping_service + ,estimated_delivery_at + ,delivered_at + ,status +FROM {{ ref("stg_postgres__orders") }} +)--orders + + +, orders_items AS ( +SELECT + order_id + ,product_id + ,quantity +FROM {{ ref("stg_postgres__order_items") }} +)--orders + + +, ordered_products_summary AS ( +SELECT + orders_items.product_id + ,COUNT(orders_items.product_id) AS num_orders + ,SUM(CASE WHEN orders.status = 'shipped' + THEN orders_items.quantity + ELSE 0 + END + ) AS quantity_shipped, + ,SUM(CASE WHEN orders.status = 'delivered' + THEN orders_items.quantity + ELSE 0 + END + ) AS quantity_delivered, + ,SUM(CASE WHEN orders.status = 'preparing' + THEN orders_items.quantity + ELSE 0 + END + ) AS quantity_preparing, + ,SUM(orders_items.quantity) AS total_quantity +FROM orders_items +LEFT JOIN orders + ON orders_items.order_id = orders.order_id +GROUP BY orders_items.product_id + ) + +SELECT + product_id + ,num_orders + ,quantity_shipped + ,quantity_delivered + ,quantity_preparing + ,total_quantity +FROM ordered_products_summary \ No newline at end of file diff --git a/greenery/models/marts/marketing/_marketing_models.yml b/greenery/models/marts/marketing/_marketing_models.yml new file mode 100644 index 000000000..da3c4e414 --- /dev/null +++ b/greenery/models/marts/marketing/_marketing_models.yml @@ -0,0 +1,44 @@ +version: 2 + +models: + + - name: fact_user_orders + columns: + - name: user_id + description: Unique UUID for each user + tests: + - unique + - not_null + - relationships: + to: ref('stg_postgres__users') + field: user_id + - name: first_order_date + description: Date of first order for the user + - name: last_order_date + description: Date of first order for the user + - name: total_spent + description: Date of first order for the user + tests: + - dbt_utils.accepted_range: + min_value: 0 + - name: num_orders + description: Number of orders made + tests: + - dbt_utils.accepted_range: + min_value: 0 + - name: num_shipped + description: Number of orders shipped + tests: + - dbt_utils.accepted_range: + min_value: 0 + - name: num_delivered + description: Number of orders delivered + tests: + - dbt_utils.accepted_range: + min_value: 0 + - name: num_preparing + description: Number of orders preparing + tests: + - dbt_utils.accepted_range: + min_value: 0 + diff --git a/greenery/models/marts/marketing/fact_user_orders.sql b/greenery/models/marts/marketing/fact_user_orders.sql new file mode 100644 index 000000000..3228a5b2b --- /dev/null +++ b/greenery/models/marts/marketing/fact_user_orders.sql @@ -0,0 +1,61 @@ +WITH +orders AS ( + +SELECT + order_id + ,promo_id + ,user_id + ,address_id + ,created_at + ,order_cost + ,shipping_cost + ,order_total + ,tracking_id + ,shipping_service + ,estimated_delivery_at + ,delivered_at + ,status +FROM {{ ref("stg_postgres__orders") }} + +)--orders + +, user_order_summary AS ( + +SELECT + user_id + ,MIN(created_at)::date AS first_order_date + ,MAX(created_at)::date AS last_order_date + ,ROUND(SUM(order_total), 2) AS total_spent + ,COUNT(order_id) as num_orders + ,SUM(CASE WHEN status = 'shipped' + THEN 1 + ELSE 0 + END + ) AS num_shipped + ,SUM(CASE WHEN status = 'delivered' + THEN 1 + ELSE 0 + END + ) AS num_delivered + ,SUM(CASE WHEN status = 'preparing' + THEN 1 + ELSE 0 + END + ) AS num_preparing + +FROM orders +GROUP BY user_id +)--user_order_summary + +SELECT + + user_id + ,first_order_date + ,last_order_date + ,total_spent + ,num_orders + ,num_shipped + ,num_delivered + ,num_preparing + +FROM user_order_summary \ No newline at end of file diff --git a/greenery/models/marts/product/_product_models.yml b/greenery/models/marts/product/_product_models.yml new file mode 100644 index 000000000..fb18a97ba --- /dev/null +++ b/greenery/models/marts/product/_product_models.yml @@ -0,0 +1,47 @@ +version: 2 + +models: + + - name: fact_page_views + columns: + - name: event_id + description: Unique UUID for each event + tests: + - unique + - not_null + - name: session_id + description: Unique UUID for each session + tests: + - unique + - not_null + - name: user_id + description: Unique UUID for each user + tests: + - relationships: + to: ref('stg_postgres__users') + field: user_id + - name: product_id + description: UUID of the product viewed + tests: + - relationships: + to: ref('stg_postgres__products') + field: product_id + - name: page_url + description: URL where the session and events happened + tests: + - not_null + - name: created_date + description: date for the event + tests: + - not_null + - name: product_id + description: UUID for the product (if any) added to cart during the session + - name: product_name + description: Name for the product (if any) added to cart during the session + - name: added_to_cart_during_session + description: Flag to spot if product was added to cart during the session + - name: added_to_cart_by_user + description: Flag to spot if product was added to cart by user + - name: quantity_purchased_by_user + description: Number of products purchased by user + diff --git a/greenery/models/marts/product/fact_page_views.sql b/greenery/models/marts/product/fact_page_views.sql new file mode 100644 index 000000000..daf95ba85 --- /dev/null +++ b/greenery/models/marts/product/fact_page_views.sql @@ -0,0 +1,91 @@ +WITH +page_views AS ( +SELECT + + event_id + ,session_id + ,user_id + ,page_url + ,created_date + ,product_id + +FROM {{ ref("stg_postgres__events") }} +WHERE event_type = 'page_view' +)--page_views + + +, products AS ( +SELECT + + product_id + ,name + ,price + ,inventory + +FROM {{ ref("stg_postgres__products") }} +) + + +, products_added_to_cart_during_session AS ( +SELECT + + DISTINCT session_id + ,product_id + +FROM {{ ref("stg_postgres__events") }} +WHERE event_type = 'add_to_cart' +)--products_added_to_cart_during_session + + +products_added_to_cart_by_user AS ( +SELECT + DISTINCT user_id + ,product_id + +FROM {{ ref("stg_postgres__events") }} +WHERE event_type = 'add_to_cart' +)--products_added_to_cart_by_user + + +, quantities_purchased_by_user AS ( +SELECT + + user_id + ,product_id + ,quantity_purchased_by_user + +from {{ ref("int_quantity_by_user") }} +)--quantities_purchased_by_user + + +SELECT + + page_views.event_id + ,page_views.session_id + ,page_views.user_id + ,page_views.page_url + ,page_views.created_date + ,page_views.product_id + ,products.product_name + ,CASE WHEN products_added_to_cart_during_session.product_id IS NOT NULL + THEN 1 + ELSE 0 + END AS added_to_cart_during_session + ,CASE WHEN products_added_to_cart_by_user.product_id IS NOT NULL + THEN 1 + ELSE 0 + END AS added_to_cart_by_user + ,quantities_purchased_by_user.quantity_purchased_by_user + +FROM page_views +LEFT JOIN products + ON page_views.product_id = products.product_id +LEFT JOIN products_added_to_cart_during_session + ON page_views.session_id = products_added_to_cart_during_session.session_id + AND page_views.product_id = products_added_to_cart_during_session.product_id +LEFT JOIN products_added_to_cart_by_user + ON page_views.user_id = products_added_to_cart_during_session.user_id + AND page_views.product_id = products_added_to_cart_during_session.product_id +LEFT JOIN quantities_purchased_by_user + ON page_views.user_id = quantities_purchased_by_user.user_id + AND page_views.product_id = quantities_purchased_by_user.product_id \ No newline at end of file diff --git a/greenery/models/marts/product/int_quantity_by_user.sql b/greenery/models/marts/product/int_quantity_by_user.sql new file mode 100644 index 000000000..3849c49d8 --- /dev/null +++ b/greenery/models/marts/product/int_quantity_by_user.sql @@ -0,0 +1,20 @@ +WITH +quantities_purchased_by_user AS ( +SELECT + orders.user_id + ,lines.product_id + ,SUM(lines.quantity) AS quantity_purchased_by_user + +FROM {{ ref("stg_postgres__orders") }} AS orders +LEFT JOIN {{ ref("stg_postgres__order_items") }} AS lines + ON orders.id = lines.order_id +GROUP BY orders.user_id, lines.product_id +)--quantities_purchased_by_user + +SELECT + + user_id + ,product_id + ,quantity_purchased_by_user + +FROM quantities_purchased_by_user \ No newline at end of file diff --git a/greenery/models/staging/postgres/_postgres__models.yml b/greenery/models/staging/postgres/_postgres__models.yml index e31f6c291..4d2f857d9 100644 --- a/greenery/models/staging/postgres/_postgres__models.yml +++ b/greenery/models/staging/postgres/_postgres__models.yml @@ -6,113 +6,269 @@ models: columns: - name: address_id description: UUID for each address + tests: + - not_null + - unique - name: address - description: address + description: Street name and number + tests: + - not_null - name: zipcode description: The zipcode of the address + tests: + - not_null - name: state description: State/Province + tests: + - not_null - name: country description: Country + tests: + - not_null - name: stg_postgres__events description: Staging model for events table columns: - name: event_id description: UUID for each event + tests: + - not_null + - unique - name: session_id description: UUID of each browsing session + tests: + - not_null - name: user_id description: UUID of the user who performed the event + tests: + - not_null + - relationships: + to: ref('stg_postgres__users') + field: user_id - name: page_url description: URL page of the event + tests: + - not_null - name: created_at description: Timestamp for the event + tests: + - not_null - name: event_type description: Type of event (add_to_cart, checkout, package_shipped, page_view) + tests: + - not_null + - accepted_values: + values: ['add_to_cart', 'checkout', 'package_shipped', 'page_view'] - name: order_id description: UUID of the order (only for checkout and package_shipped events) + tests: + - relationships: + to: ref('stg_postgres__orders') + field: order_id + - not_null: + where: "event_type in ('checkout', 'package_shipped')" - name: product_id description: UUID of the product for product-specific events (only for checkout and page_view events) + tests: + - relationships: + to: ref('stg_postgres__products') + field: product_id + - not_null: + where: "event_type in ('add_to_cart', 'page_view')" - name: stg_postgres__order_items description: Staging model for the order items + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - order_id + - product_id columns: - name: order_id description: UUID of the order + tests: + - not_null + - relationships: + to: ref('stg_postgres__orders') + field: order_id - name: product_id description: UUID of the product(s) in the order + tests: + - not_null + - relationships: + to: ref('stg_postgres__products') + field: product_id - name: quantity description: Number of units of the product in the order + tests: + - not_null + - dbt_utils.accepted_range: + min_value: 0 + - name: stg_postgres__orders description: Staging model for orders columns: - name: order_id description: UUID for each order + - name: order_id + description: UUID for each unique order on platform + tests: + - unique + - not_null - name: user_id description: UUID of the user who placed the order + tests: + - not_null + - relationships: + to: ref('stg_postgres__users') + field: user_id - name: promo_id description: UUID of the promo code used in the order (not required) + tests: + - relationships: + to: ref('stg_postgres__promos') + field: promo_id - name: address_id description: Delivery address ID for the order + tests: + - not_null + - relationships: + to: ref('stg_postgres__addresses') + field: address_id - name: created_at description: Timestamp for the order + tests: + - not_null - name: order_cost description: Dollar amount of the order + tests: + - not_null + - dbt_utils.accepted_range: + min_value: 0 + inclusive: false - name: shipping_cost description: Cost of shipping for the order + tests: + - not_null + - dbt_utils.accepted_range: + min_value: 0 + inclusive: false - name: order_total description: Total cost of the order (includes shipping) + tests: + - not_null + - dbt_utils.accepted_range: + min_value: 0 + inclusive: false - name: tracking_id description: Tracking number of the order + tests: + - not_null: + where: "status != 'preparing'" - name: shipping_service description: Company used for shipping + tests: + - not_null: + where: "status != 'preparing'" - name: estimated_delivery_at description: Estimated delivery date - name: delivered_at description: Actual delivery date + tests: + - not_null: + where: "status = 'delivered'" - name: status description: Status of the order + tests: + - not_null + - accepted_values: + values: ['delivered', 'preparing', 'shipped'] - name: stg_postgres__products description: Staging model for products columns: - name: product_id description: UUID for each product + tests: + - unique + - not_null - name: name description: Name of the product + tests: + - not_null - name: price description: Price of the product in dollars + tests: + - not_null + - dbt_utils.accepted_range: + min_value: 0 + inclusive: false - name: inventory description: Inventory for each product + tests: + - not_null + - dbt_utils.accepted_range: + min_value: 0 - name: stg_postgres__promos description: Staging model for promos columns: - name: promo_id description: Every promocode generated + tests: + - unique + - not_null - name: discount description: Dollar amount given off by using the code + tests: + - not_null + - dbt_utils.accepted_range: + min_value: 0 + inclusive: false - name: status description: Status of the promo code (active or inactive) + tests: + - not_null + - accepted_values: + values: ['active', 'inactive'] - name: stg_postgres__users description: Staging model for users columns: - name: user_id description: UUID for each user + tests: + - unique + - not_null - name: first_name description: First name of the user + tests: + - not_null - name: last_name description: Last name of the user + tests: + - not_null - name: email description: Email address of the user + tests: + - unique + - not_null - name: phone_number description: Phone number of the user + tests: + - not_null - name: created_at description: Timestamp when the user was created + tests: + - not_null - name: updated_at description: Timestamp when the user was last updated + tests: + - not_null - name: address_id - description: Default delivery address for the user \ No newline at end of file + description: Default delivery address for the user + tests: + - relationships: + to: ref('stg_postgres__addresses') + field: address_id \ No newline at end of file diff --git a/greenery/models/staging/postgres/_postgres__sources.yml b/greenery/models/staging/postgres/_postgres__sources.yml index 3b07c35f3..1c5dddaac 100644 --- a/greenery/models/staging/postgres/_postgres__sources.yml +++ b/greenery/models/staging/postgres/_postgres__sources.yml @@ -4,11 +4,35 @@ sources: - name: postgres database: RAW schema: PUBLIC + + quoting: + database: false + schema: false + identifier: false + + freshness: + warn_after: {count: 24, period: hour} + error_after: {count: 48, period: hour} + tables: - name: addresses - - name: users - - name: promos - - name: products + loaded_at_field: created_at + description: Contains information about the addresses of the users + - name: events + loaded_at_field: created_at + description: Contains the events that have happened on the platform (activity on the website)) - name: orders + loaded_at_field: created_at + description: Contains information about the orders made on the platform - name: order_items - - name: events \ No newline at end of file + loaded_at_field: created_at + description: Contains information about the items in each order + - name: products + loaded_at_field: created_at + description: Contains information about the products sold on the platform + - name: promos + loaded_at_field: created_at + description: Contains information about all of the promotions ran (present and past) + - name: users + loaded_at_field: created_at + description: Contains information about the users of the plafform \ No newline at end of file diff --git a/greenery/packages.yml b/greenery/packages.yml new file mode 100644 index 000000000..6152b3309 --- /dev/null +++ b/greenery/packages.yml @@ -0,0 +1,3 @@ +packages: + - package: dbt-labs/dbt_utils + version: 1.1.1 \ No newline at end of file diff --git a/greenery/snapshots/products__inventory.sql b/greenery/snapshots/products__inventory.sql index d04f1cd65..fb7cf646b 100644 --- a/greenery/snapshots/products__inventory.sql +++ b/greenery/snapshots/products__inventory.sql @@ -1,14 +1,14 @@ {% snapshot products_snapshot %} - {{ - config( - target_database = "DEV_DB", - target_schema = "DBT_FPETRIBUFUNDTHROUGHCOM", - strategy='check', - unique_key='product_id', - check_cols=['inventory'], - ) - }} +{{ + config( + target_database = target.database, + target_schema = target.schema, + strategy='check', + unique_key='product_id', + check_cols=['inventory'], + ) +}} SELECT product_id From 6310b8a5ba6a3cd801e7085d7690ab68d971603c Mon Sep 17 00:00:00 2001 From: Felipe Petribu Date: Sun, 24 Sep 2023 18:43:39 +0000 Subject: [PATCH 9/9] corrected product_model.yml --- .../marts/product/{_product_models.yml => _products_model.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename greenery/models/marts/product/{_product_models.yml => _products_model.yml} (100%) diff --git a/greenery/models/marts/product/_product_models.yml b/greenery/models/marts/product/_products_model.yml similarity index 100% rename from greenery/models/marts/product/_product_models.yml rename to greenery/models/marts/product/_products_model.yml