diff --git a/greenery/.gitignore b/greenery/.gitignore new file mode 100644 index 000000000..49f147cb9 --- /dev/null +++ b/greenery/.gitignore @@ -0,0 +1,4 @@ + +target/ +dbt_packages/ +logs/ diff --git a/greenery/README.md b/greenery/README.md new file mode 100644 index 000000000..e4de3ccd5 --- /dev/null +++ b/greenery/README.md @@ -0,0 +1,52 @@ +# Week 1 + +## Metrics: + +- **Total Users**: The total number of unique users registered on the platform. +- **Average Orders per Hour**: The average number of orders placed every hour. +- **Average Hours to Deliver**: On average, the number of hours it takes from an order being placed to being delivered. +- **Users by Purchase Count**: How many users have made one, two, or three or more purchases. +- **Average Sessions per Hour**: The average number of unique browsing sessions on the platform every hour. + +## Results: + +| Metric | Value | +|---------------------------------|------------| +| Total Users | 130 | +| Average Orders per Hour | 7.520 | +| Average Hours to Deliver | 93.403 | +| Users with One Purchase | 25 | +| Users with Two Purchases | 28 | +| Users with Three+ Purchases | 71 | +| Average Sessions per Hour | 16.327 | + + +# Week 2 + +## Business objectives: + +- **What is our user repeat rate?**: The ratio of users who made two or more purchases over total users. +- **Define good indicators for potential repeat users** +- **Define good indicators for potential non-repeat users** + + +## Results: + +| Metric | Value | +|---------------------------------|------------| +| Repeat rate | 79.84% | + +- **Define good indicators for potential repeat users** + - **Number of itens on previous order** + - **Usage of promo codes** + - **Number of sessions until purchase** + - **Average delivery time for the user** + - **Good Reviews on the products** + - **Good Reviews of the platform itself** + +- **Define good indicators for potential non-repeat users** + - **Type of product ordered (one-type purchases)** + - **Email spam rate** + - **Average delivery time for the user** + - **Bad Reviews on the products** + - **Bad Reviews of the platform itself** \ No newline at end of file diff --git a/greenery/analyses/.gitkeep b/greenery/analyses/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/analyses/week_1_project.sql b/greenery/analyses/week_1_project.sql new file mode 100644 index 000000000..c548532d3 --- /dev/null +++ b/greenery/analyses/week_1_project.sql @@ -0,0 +1,90 @@ +WITH + --1. How many users do we have? + qtt_users AS ( + SELECT + COUNT(DISTINCT user_id) AS distinct_users + FROM {{ ref("stg_postgres__users") }} + )--qtt_users + + + + --2. On average, how many orders do we receive per hour? + , orders_per_hour AS ( + SELECT + date_trunc('HOUR', created_at) AS hour + , COUNT(DISTINCT order_id) AS order_count + FROM {{ ref("stg_postgres__orders") }} + GROUP BY date_trunc('HOUR', created_at) + )--orders_per_hour + + + , avg_orders_per_hour AS ( + SELECT + AVG(order_count) AS avg_order_per_hour + FROM orders_per_hour + )--avg_orders_per_hour + + + --3. On average, how long does an order take from being placed to being delivered? + , avg_delivery_time AS ( + SELECT + AVG(DATEDIFF(hour, created_at, delivered_at)) AS avg_hours_to_deliver + FROM {{ ref("stg_postgres__orders") }} + WHERE delivered_at IS NOT NULL + )--avg_delivery_time + + + + -- 4. How many users have only made one purchase? Two purchases? Three+ purchases? + , purchases_by_user AS ( + SELECT + user_id + , COUNT(DISTINCT order_id) AS total_purchases + FROM {{ ref("stg_postgres__orders") }} + GROUP BY user_id + )--purchases_by_user + + , purchase_counter AS ( + SELECT + SUM(CASE WHEN total_purchases = 1 THEN 1 ELSE 0 end) AS users_with_one_purchase + , SUM(CASE WHEN total_purchases = 2 THEN 1 ELSE 0 end) AS users_with_two_purchase + , SUM(CASE WHEN total_purchases >= 3 THEN 1 ELSE 0 end) AS users_with_three_purchase + FROM purchases_by_user + )--purchase_counter + + + + -- 5. On average, how many unique sessions do we have per hour? + ,sessions_per_hour AS ( + SELECT + DATE_TRUNC('HOUR', created_at) AS session_hour + , COUNT(DISTINCT session_id) AS session_count + FROM {{ ref("stg_postgres__events") }} + GROUP BY DATE_TRUNC('HOUR', created_at) + ) + + avg_sessions_per_hour AS ( + SELECT + AVG(session_count) AS avg_session_per_hour + FROM sessions_per_hour + ) + + + +SELECT + qtt_users.distinct_users + , avg_orders_per_hour.avg_order_per_hour + , avg_delivery_time.avg_hours_to_deliver + , purchase_counter.users_with_one_purchase + , purchase_counter.users_with_two_purchase + , purchase_counter.users_with_three_purchase + , avg_sessions_per_hour.avg_session_per_hour +FROM qtt_users +CROSS JOIN + avg_orders_per_hour +CROSS JOIN + avg_delivery_time +CROSS JOIN + purchase_counter +CROSS JOIN + avg_sessions_per_hour \ No newline at end of file diff --git a/greenery/analyses/week_2_project.sql b/greenery/analyses/week_2_project.sql new file mode 100644 index 000000000..ebc295bc4 --- /dev/null +++ b/greenery/analyses/week_2_project.sql @@ -0,0 +1,25 @@ +with +purchases_per_user AS ( + SELECT + user_id + ,COUNT(DISTINCT order_id) AS purchase_counter + FROM {{ ref("stg_postgres__orders") }} + GROUP BY user_id +)--purchases_per_user + +, purchase_counter AS ( + SELECT + COUNT(user_id) AS users_who_purchased + , sum(CASE WHEN purchase_counter >= 2 + THEN 1 + ELSE 0 + END + ) AS users_who_purchased_twice_or_more + FROM purchases_per_user +)--purchase_counter + +SELECT + users_who_purchased, + users_who_purchased_twice_or_more, + users_who_purchased_twice_or_more / users_who_purchased as repeat_rate +FROM purchase_counter \ No newline at end of file diff --git a/greenery/dbt_project.yml b/greenery/dbt_project.yml new file mode 100644 index 000000000..52703a22f --- /dev/null +++ b/greenery/dbt_project.yml @@ -0,0 +1,39 @@ + +# Name your project! Project names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'greenery' +version: '1.0.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: 'greenery' + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that models in this project can be +# found in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +analysis-paths: ["analyses"] +test-paths: ["tests"] +seed-paths: ["seeds"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] + +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ +# directory as views. These settings can be overridden in the individual model +# files using the `{{ config(...) }}` macro. +models: + greenery: + # Config indicated by + and applies to all files under models/example/ + example: + +materialized: view + marts: + +materialized: table diff --git a/greenery/macros/.gitkeep b/greenery/macros/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/models/marts/core/_core__models.yml b/greenery/models/marts/core/_core__models.yml new file mode 100644 index 000000000..e405e681e --- /dev/null +++ b/greenery/models/marts/core/_core__models.yml @@ -0,0 +1,38 @@ +version: 2 + +models: + - name: dim_products + columns: + - name: product_id + description: Unique UUID for each product + tests: + - unique + - not_null + - relationships: + to: ref('stg_postgres__products') + field: product_id + + - name: dim_users + columns: + - name: user_id + description: Unique UUID for each user + tests: + - unique + - not_null + - relationships: + to: ref('stg_postgres__users') + field: user_id + + - name: fact_orders + columns: + - name: order_id + description: Unique UUID for each order + tests: + - unique + - not_null + - name: user_id + description: Unique UUID for each user + tests: + - relationships: + to: ref('stg_postgres__users') + field: user_id \ No newline at end of file diff --git a/greenery/models/marts/core/dim_products.sql b/greenery/models/marts/core/dim_products.sql new file mode 100644 index 000000000..c726c97d6 --- /dev/null +++ b/greenery/models/marts/core/dim_products.sql @@ -0,0 +1,34 @@ +WITH +products AS ( +SELECT + product_id + ,name + ,price + ,inventory +FROM {{ ref("stg_postgres__products") }} +)--products + +, ordered_products AS ( +SELECT + product_id + ,num_orders + ,quantity_shipped + ,quantity_delivered + ,quantity_preparing + ,total_quantity +FROM {{ ref("int_products") }} +)--ordered_products + +SELECT + products.product_id + ,products.name + ,products.price + ,products.inventory + ,ordered_products_summary.num_orders + ,ordered_products_summary.quantity_shipped + ,ordered_products_summary.quantity_delivered + ,ordered_products_summary.quantity_preparing + ,ordered_products_summary.total_quantity + +FROM products +left join ordered_products_summary using (product_id) \ No newline at end of file diff --git a/greenery/models/marts/core/dim_users.sql b/greenery/models/marts/core/dim_users.sql new file mode 100644 index 000000000..d6b30239f --- /dev/null +++ b/greenery/models/marts/core/dim_users.sql @@ -0,0 +1,10 @@ +select + user_id + ,first_name + ,last_name + ,email + ,phone_number + ,created_at + ,updated_at + ,address_id +FROM {{ ref("stg_postgres__users") }} \ No newline at end of file diff --git a/greenery/models/marts/core/fact_orders.sql b/greenery/models/marts/core/fact_orders.sql new file mode 100644 index 000000000..e4f7bdcf9 --- /dev/null +++ b/greenery/models/marts/core/fact_orders.sql @@ -0,0 +1,45 @@ +WITH +orders AS ( +SELECT + order_id + ,promo_id + ,user_id + ,address_id + ,created_at + ,order_cost + ,shipping_cost + ,order_total + ,tracking_id + ,shipping_service + ,estimated_delivery_at + ,delivered_at + ,status +FROM {{ ref("stg_postgres__orders") }} +)--orders + +, promos AS ( +SELECT + promo_id + ,discount + ,status +FROM {{ ref("stg_postgres__promos") }} +)--promos + +SELECT + orders.order_id + ,orders.promo_id + ,orders.user_id + ,orders.address_id + ,orders.created_at + ,orders.order_cost + ,orders.shipping_cost + ,orders.order_total + ,orders.tracking_id + ,orders.shipping_service + ,orders.estimated_delivery_at + ,orders.delivered_at + ,orders.status + ,promos.discount AS promo_discount +FROM orders +LEFT JOIN promos + ON orders.promo_id = promos.promo_id \ No newline at end of file diff --git a/greenery/models/marts/core/int_products.sql b/greenery/models/marts/core/int_products.sql new file mode 100644 index 000000000..c88f6f6ba --- /dev/null +++ b/greenery/models/marts/core/int_products.sql @@ -0,0 +1,63 @@ +WITH +orders AS ( +SELECT + order_id + ,promo_id + ,user_id + ,address_id + ,created_at + ,order_cost + ,shipping_cost + ,order_total + ,tracking_id + ,shipping_service + ,estimated_delivery_at + ,delivered_at + ,status +FROM {{ ref("stg_postgres__orders") }} +)--orders + + +, orders_items AS ( +SELECT + order_id + ,product_id + ,quantity +FROM {{ ref("stg_postgres__order_items") }} +)--orders + + +, ordered_products_summary AS ( +SELECT + orders_items.product_id + ,COUNT(orders_items.product_id) AS num_orders + ,SUM(CASE WHEN orders.status = 'shipped' + THEN orders_items.quantity + ELSE 0 + END + ) AS quantity_shipped, + ,SUM(CASE WHEN orders.status = 'delivered' + THEN orders_items.quantity + ELSE 0 + END + ) AS quantity_delivered, + ,SUM(CASE WHEN orders.status = 'preparing' + THEN orders_items.quantity + ELSE 0 + END + ) AS quantity_preparing, + ,SUM(orders_items.quantity) AS total_quantity +FROM orders_items +LEFT JOIN orders + ON orders_items.order_id = orders.order_id +GROUP BY orders_items.product_id + ) + +SELECT + product_id + ,num_orders + ,quantity_shipped + ,quantity_delivered + ,quantity_preparing + ,total_quantity +FROM ordered_products_summary \ No newline at end of file diff --git a/greenery/models/marts/marketing/_marketing_models.yml b/greenery/models/marts/marketing/_marketing_models.yml new file mode 100644 index 000000000..da3c4e414 --- /dev/null +++ b/greenery/models/marts/marketing/_marketing_models.yml @@ -0,0 +1,44 @@ +version: 2 + +models: + + - name: fact_user_orders + columns: + - name: user_id + description: Unique UUID for each user + tests: + - unique + - not_null + - relationships: + to: ref('stg_postgres__users') + field: user_id + - name: first_order_date + description: Date of first order for the user + - name: last_order_date + description: Date of first order for the user + - name: total_spent + description: Date of first order for the user + tests: + - dbt_utils.accepted_range: + min_value: 0 + - name: num_orders + description: Number of orders made + tests: + - dbt_utils.accepted_range: + min_value: 0 + - name: num_shipped + description: Number of orders shipped + tests: + - dbt_utils.accepted_range: + min_value: 0 + - name: num_delivered + description: Number of orders delivered + tests: + - dbt_utils.accepted_range: + min_value: 0 + - name: num_preparing + description: Number of orders preparing + tests: + - dbt_utils.accepted_range: + min_value: 0 + diff --git a/greenery/models/marts/marketing/fact_user_orders.sql b/greenery/models/marts/marketing/fact_user_orders.sql new file mode 100644 index 000000000..3228a5b2b --- /dev/null +++ b/greenery/models/marts/marketing/fact_user_orders.sql @@ -0,0 +1,61 @@ +WITH +orders AS ( + +SELECT + order_id + ,promo_id + ,user_id + ,address_id + ,created_at + ,order_cost + ,shipping_cost + ,order_total + ,tracking_id + ,shipping_service + ,estimated_delivery_at + ,delivered_at + ,status +FROM {{ ref("stg_postgres__orders") }} + +)--orders + +, user_order_summary AS ( + +SELECT + user_id + ,MIN(created_at)::date AS first_order_date + ,MAX(created_at)::date AS last_order_date + ,ROUND(SUM(order_total), 2) AS total_spent + ,COUNT(order_id) as num_orders + ,SUM(CASE WHEN status = 'shipped' + THEN 1 + ELSE 0 + END + ) AS num_shipped + ,SUM(CASE WHEN status = 'delivered' + THEN 1 + ELSE 0 + END + ) AS num_delivered + ,SUM(CASE WHEN status = 'preparing' + THEN 1 + ELSE 0 + END + ) AS num_preparing + +FROM orders +GROUP BY user_id +)--user_order_summary + +SELECT + + user_id + ,first_order_date + ,last_order_date + ,total_spent + ,num_orders + ,num_shipped + ,num_delivered + ,num_preparing + +FROM user_order_summary \ No newline at end of file diff --git a/greenery/models/marts/product/_products_model.yml b/greenery/models/marts/product/_products_model.yml new file mode 100644 index 000000000..fb18a97ba --- /dev/null +++ b/greenery/models/marts/product/_products_model.yml @@ -0,0 +1,47 @@ +version: 2 + +models: + + - name: fact_page_views + columns: + - name: event_id + description: Unique UUID for each event + tests: + - unique + - not_null + - name: session_id + description: Unique UUID for each session + tests: + - unique + - not_null + - name: user_id + description: Unique UUID for each user + tests: + - relationships: + to: ref('stg_postgres__users') + field: user_id + - name: product_id + description: UUID of the product viewed + tests: + - relationships: + to: ref('stg_postgres__products') + field: product_id + - name: page_url + description: URL where the session and events happened + tests: + - not_null + - name: created_date + description: date for the event + tests: + - not_null + - name: product_id + description: UUID for the product (if any) added to cart during the session + - name: product_name + description: Name for the product (if any) added to cart during the session + - name: added_to_cart_during_session + description: Flag to spot if product was added to cart during the session + - name: added_to_cart_by_user + description: Flag to spot if product was added to cart by user + - name: quantity_purchased_by_user + description: Number of products purchased by user + diff --git a/greenery/models/marts/product/fact_page_views.sql b/greenery/models/marts/product/fact_page_views.sql new file mode 100644 index 000000000..daf95ba85 --- /dev/null +++ b/greenery/models/marts/product/fact_page_views.sql @@ -0,0 +1,91 @@ +WITH +page_views AS ( +SELECT + + event_id + ,session_id + ,user_id + ,page_url + ,created_date + ,product_id + +FROM {{ ref("stg_postgres__events") }} +WHERE event_type = 'page_view' +)--page_views + + +, products AS ( +SELECT + + product_id + ,name + ,price + ,inventory + +FROM {{ ref("stg_postgres__products") }} +) + + +, products_added_to_cart_during_session AS ( +SELECT + + DISTINCT session_id + ,product_id + +FROM {{ ref("stg_postgres__events") }} +WHERE event_type = 'add_to_cart' +)--products_added_to_cart_during_session + + +products_added_to_cart_by_user AS ( +SELECT + DISTINCT user_id + ,product_id + +FROM {{ ref("stg_postgres__events") }} +WHERE event_type = 'add_to_cart' +)--products_added_to_cart_by_user + + +, quantities_purchased_by_user AS ( +SELECT + + user_id + ,product_id + ,quantity_purchased_by_user + +from {{ ref("int_quantity_by_user") }} +)--quantities_purchased_by_user + + +SELECT + + page_views.event_id + ,page_views.session_id + ,page_views.user_id + ,page_views.page_url + ,page_views.created_date + ,page_views.product_id + ,products.product_name + ,CASE WHEN products_added_to_cart_during_session.product_id IS NOT NULL + THEN 1 + ELSE 0 + END AS added_to_cart_during_session + ,CASE WHEN products_added_to_cart_by_user.product_id IS NOT NULL + THEN 1 + ELSE 0 + END AS added_to_cart_by_user + ,quantities_purchased_by_user.quantity_purchased_by_user + +FROM page_views +LEFT JOIN products + ON page_views.product_id = products.product_id +LEFT JOIN products_added_to_cart_during_session + ON page_views.session_id = products_added_to_cart_during_session.session_id + AND page_views.product_id = products_added_to_cart_during_session.product_id +LEFT JOIN products_added_to_cart_by_user + ON page_views.user_id = products_added_to_cart_during_session.user_id + AND page_views.product_id = products_added_to_cart_during_session.product_id +LEFT JOIN quantities_purchased_by_user + ON page_views.user_id = quantities_purchased_by_user.user_id + AND page_views.product_id = quantities_purchased_by_user.product_id \ No newline at end of file diff --git a/greenery/models/marts/product/int_quantity_by_user.sql b/greenery/models/marts/product/int_quantity_by_user.sql new file mode 100644 index 000000000..3849c49d8 --- /dev/null +++ b/greenery/models/marts/product/int_quantity_by_user.sql @@ -0,0 +1,20 @@ +WITH +quantities_purchased_by_user AS ( +SELECT + orders.user_id + ,lines.product_id + ,SUM(lines.quantity) AS quantity_purchased_by_user + +FROM {{ ref("stg_postgres__orders") }} AS orders +LEFT JOIN {{ ref("stg_postgres__order_items") }} AS lines + ON orders.id = lines.order_id +GROUP BY orders.user_id, lines.product_id +)--quantities_purchased_by_user + +SELECT + + user_id + ,product_id + ,quantity_purchased_by_user + +FROM quantities_purchased_by_user \ No newline at end of file diff --git a/greenery/models/staging/postgres/_postgres__models.yml b/greenery/models/staging/postgres/_postgres__models.yml new file mode 100644 index 000000000..4d2f857d9 --- /dev/null +++ b/greenery/models/staging/postgres/_postgres__models.yml @@ -0,0 +1,274 @@ +version: 2 + +models: + - name: stg_postgres__addresses + description: Staging model for the addresses table + columns: + - name: address_id + description: UUID for each address + tests: + - not_null + - unique + - name: address + description: Street name and number + tests: + - not_null + - name: zipcode + description: The zipcode of the address + tests: + - not_null + - name: state + description: State/Province + tests: + - not_null + - name: country + description: Country + tests: + - not_null + + - name: stg_postgres__events + description: Staging model for events table + columns: + - name: event_id + description: UUID for each event + tests: + - not_null + - unique + - name: session_id + description: UUID of each browsing session + tests: + - not_null + - name: user_id + description: UUID of the user who performed the event + tests: + - not_null + - relationships: + to: ref('stg_postgres__users') + field: user_id + - name: page_url + description: URL page of the event + tests: + - not_null + - name: created_at + description: Timestamp for the event + tests: + - not_null + - name: event_type + description: Type of event (add_to_cart, checkout, package_shipped, page_view) + tests: + - not_null + - accepted_values: + values: ['add_to_cart', 'checkout', 'package_shipped', 'page_view'] + - name: order_id + description: UUID of the order (only for checkout and package_shipped events) + tests: + - relationships: + to: ref('stg_postgres__orders') + field: order_id + - not_null: + where: "event_type in ('checkout', 'package_shipped')" + - name: product_id + description: UUID of the product for product-specific events (only for checkout and page_view events) + tests: + - relationships: + to: ref('stg_postgres__products') + field: product_id + - not_null: + where: "event_type in ('add_to_cart', 'page_view')" + + - name: stg_postgres__order_items + description: Staging model for the order items + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - order_id + - product_id + columns: + - name: order_id + description: UUID of the order + tests: + - not_null + - relationships: + to: ref('stg_postgres__orders') + field: order_id + - name: product_id + description: UUID of the product(s) in the order + tests: + - not_null + - relationships: + to: ref('stg_postgres__products') + field: product_id + - name: quantity + description: Number of units of the product in the order + tests: + - not_null + - dbt_utils.accepted_range: + min_value: 0 + + + - name: stg_postgres__orders + description: Staging model for orders + columns: + - name: order_id + description: UUID for each order + - name: order_id + description: UUID for each unique order on platform + tests: + - unique + - not_null + - name: user_id + description: UUID of the user who placed the order + tests: + - not_null + - relationships: + to: ref('stg_postgres__users') + field: user_id + - name: promo_id + description: UUID of the promo code used in the order (not required) + tests: + - relationships: + to: ref('stg_postgres__promos') + field: promo_id + - name: address_id + description: Delivery address ID for the order + tests: + - not_null + - relationships: + to: ref('stg_postgres__addresses') + field: address_id + - name: created_at + description: Timestamp for the order + tests: + - not_null + - name: order_cost + description: Dollar amount of the order + tests: + - not_null + - dbt_utils.accepted_range: + min_value: 0 + inclusive: false + - name: shipping_cost + description: Cost of shipping for the order + tests: + - not_null + - dbt_utils.accepted_range: + min_value: 0 + inclusive: false + - name: order_total + description: Total cost of the order (includes shipping) + tests: + - not_null + - dbt_utils.accepted_range: + min_value: 0 + inclusive: false + - name: tracking_id + description: Tracking number of the order + tests: + - not_null: + where: "status != 'preparing'" + - name: shipping_service + description: Company used for shipping + tests: + - not_null: + where: "status != 'preparing'" + - name: estimated_delivery_at + description: Estimated delivery date + - name: delivered_at + description: Actual delivery date + tests: + - not_null: + where: "status = 'delivered'" + - name: status + description: Status of the order + tests: + - not_null + - accepted_values: + values: ['delivered', 'preparing', 'shipped'] + + - name: stg_postgres__products + description: Staging model for products + columns: + - name: product_id + description: UUID for each product + tests: + - unique + - not_null + - name: name + description: Name of the product + tests: + - not_null + - name: price + description: Price of the product in dollars + tests: + - not_null + - dbt_utils.accepted_range: + min_value: 0 + inclusive: false + - name: inventory + description: Inventory for each product + tests: + - not_null + - dbt_utils.accepted_range: + min_value: 0 + + - name: stg_postgres__promos + description: Staging model for promos + columns: + - name: promo_id + description: Every promocode generated + tests: + - unique + - not_null + - name: discount + description: Dollar amount given off by using the code + tests: + - not_null + - dbt_utils.accepted_range: + min_value: 0 + inclusive: false + - name: status + description: Status of the promo code (active or inactive) + tests: + - not_null + - accepted_values: + values: ['active', 'inactive'] + + - name: stg_postgres__users + description: Staging model for users + columns: + - name: user_id + description: UUID for each user + tests: + - unique + - not_null + - name: first_name + description: First name of the user + tests: + - not_null + - name: last_name + description: Last name of the user + tests: + - not_null + - name: email + description: Email address of the user + tests: + - unique + - not_null + - name: phone_number + description: Phone number of the user + tests: + - not_null + - name: created_at + description: Timestamp when the user was created + tests: + - not_null + - name: updated_at + description: Timestamp when the user was last updated + tests: + - not_null + - name: address_id + description: Default delivery address for the user + tests: + - relationships: + to: ref('stg_postgres__addresses') + field: address_id \ No newline at end of file diff --git a/greenery/models/staging/postgres/_postgres__sources.yml b/greenery/models/staging/postgres/_postgres__sources.yml new file mode 100644 index 000000000..1c5dddaac --- /dev/null +++ b/greenery/models/staging/postgres/_postgres__sources.yml @@ -0,0 +1,38 @@ +version: 2 + +sources: + - name: postgres + database: RAW + schema: PUBLIC + + quoting: + database: false + schema: false + identifier: false + + freshness: + warn_after: {count: 24, period: hour} + error_after: {count: 48, period: hour} + + tables: + - name: addresses + loaded_at_field: created_at + description: Contains information about the addresses of the users + - name: events + loaded_at_field: created_at + description: Contains the events that have happened on the platform (activity on the website)) + - name: orders + loaded_at_field: created_at + description: Contains information about the orders made on the platform + - name: order_items + loaded_at_field: created_at + description: Contains information about the items in each order + - name: products + loaded_at_field: created_at + description: Contains information about the products sold on the platform + - name: promos + loaded_at_field: created_at + description: Contains information about all of the promotions ran (present and past) + - name: users + loaded_at_field: created_at + description: Contains information about the users of the plafform \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__addresses.sql b/greenery/models/staging/postgres/stg_postgres__addresses.sql new file mode 100644 index 000000000..6702cf9f3 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__addresses.sql @@ -0,0 +1,7 @@ +SELECT + address_id + ,address + ,zipcode + ,state + ,country +FROM {{ source("postgres", "addresses") }} \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__events.sql b/greenery/models/staging/postgres/stg_postgres__events.sql new file mode 100644 index 000000000..5d7bae2a0 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__events.sql @@ -0,0 +1,10 @@ +SELECT + event_id + ,session_id + ,user_id + ,event_type + ,page_url + ,created_at + ,order_id + ,product_id +FROM {{ source("postgres", "events") }} \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__order_items.sql b/greenery/models/staging/postgres/stg_postgres__order_items.sql new file mode 100644 index 000000000..cd380e7c6 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__order_items.sql @@ -0,0 +1,5 @@ +SELECT + order_id + ,product_id + ,quantity +FROM {{ source("postgres", "order_items") }} \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__orders.sql b/greenery/models/staging/postgres/stg_postgres__orders.sql new file mode 100644 index 000000000..9e1b96373 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__orders.sql @@ -0,0 +1,15 @@ +SELECT + order_id + ,promo_id + ,user_id + ,address_id + ,created_at + ,order_cost + ,shipping_cost + ,order_total + ,tracking_id + ,shipping_service + ,estimated_delivery_at + ,delivered_at + ,status +FROM {{ source("postgres", "orders") }} \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__products.sql b/greenery/models/staging/postgres/stg_postgres__products.sql new file mode 100644 index 000000000..508187349 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__products.sql @@ -0,0 +1,6 @@ +SELECT + product_id + ,name + ,price + ,inventory +FROM {{ source("postgres", "products") }} \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__promos.sql b/greenery/models/staging/postgres/stg_postgres__promos.sql new file mode 100644 index 000000000..3b953d26d --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__promos.sql @@ -0,0 +1,5 @@ +SELECT + promo_id + ,discount + ,status +FROM {{ source("postgres", "promos") }} \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__users.sql b/greenery/models/staging/postgres/stg_postgres__users.sql new file mode 100644 index 000000000..e7af02ba4 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__users.sql @@ -0,0 +1,10 @@ +SELECT + user_id + ,first_name + ,last_name + ,email + ,phone_number + ,created_at + ,updated_at + ,address_id +FROM {{ source("postgres", "users") }} \ No newline at end of file diff --git a/greenery/packages.yml b/greenery/packages.yml new file mode 100644 index 000000000..6152b3309 --- /dev/null +++ b/greenery/packages.yml @@ -0,0 +1,3 @@ +packages: + - package: dbt-labs/dbt_utils + version: 1.1.1 \ No newline at end of file diff --git a/greenery/seeds/.gitkeep b/greenery/seeds/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/snapshots/.gitkeep b/greenery/snapshots/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/snapshots/products__inventory.sql b/greenery/snapshots/products__inventory.sql new file mode 100644 index 000000000..fb7cf646b --- /dev/null +++ b/greenery/snapshots/products__inventory.sql @@ -0,0 +1,20 @@ +{% snapshot products_snapshot %} + +{{ + config( + target_database = target.database, + target_schema = target.schema, + strategy='check', + unique_key='product_id', + check_cols=['inventory'], + ) +}} + + SELECT + product_id + ,name + ,price + ,inventory + FROM {{ source("postgres", "products") }} + +{% endsnapshot %} \ No newline at end of file diff --git a/greenery/tests/.gitkeep b/greenery/tests/.gitkeep new file mode 100644 index 000000000..e69de29bb