Skip to content

Commit

Permalink
Add unit tests for manifest
Browse files Browse the repository at this point in the history
  • Loading branch information
agnessnowplow committed Dec 11, 2024
1 parent 683abae commit b46ed1d
Show file tree
Hide file tree
Showing 14 changed files with 219 additions and 12 deletions.
53 changes: 51 additions & 2 deletions integration_tests/.scripts/unit_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,63 @@ fi

for db in ${DATABASES[@]}; do

# Run dbt seed to set up the database, this prepares the ground for int tests that come after unit tests

echo "Snowplow unified unit tests: Seeding data"
eval "dbt seed --full-refresh --target $db" || exit 1;

# In order to test this macro we need a model reference first and also a timestamp column which the macro takes the min and max of
# We need to make sure that the correct result is returned even if the table is empty and whether they want the output to be a low or a high set date in that case
# All in the models folder

if [[ $BRANCH == "release" || $BRANCH == "fix/return_limits" ]]; then
if [[ $BRANCH == "release" || $BRANCH == "utils_revamp" ]]; then
echo "Snowplow-utils unit tests: Run test_return_limits_from_model_macro"
eval "dbt run --select +test_return_limits_from_model_macro expected_return_limits_from_model_macro --target $db --full-refresh" || exit 1;
eval "dbt test --select +test_return_limits_from_model_macro --store-failures --target $db" || exit 1;
eval "dbt test --select test_return_limits_from_model_macro --store-failures --target $db" || exit 1;
fi

# This macro returns different queries for different states which will be used to create the base_new_event_limits table
# We need to make sure that the correct result is returned from this query depending on different inputs
# Inputs are given based on the get_incremental_manifest_status macro but we can just fake it as it returns an array
# Input example: ['9999-01-01 00:00:00', '9999-01-01 00:00:00', 0, false]
# Inputs are read from a seed file

if [[ $BRANCH == "release" || $BRANCH == "utils_revamp" ]]; then
echo "Snowplow-utils unit tests: Run test_get_run_limits_macro"
eval "dbt run --select test_get_run_limits_macro --target $db --full-refresh" || exit 1;
eval "dbt test --select test_get_run_limits_macro --store-failures --target $db" || exit 1;
fi

# This macro returns different queries for different states which will be used to create the base_new_event_limits table
# We need to make sure that the correct result is returned from this query depending on different inputs
# Inputs are given based on the get_incremental_manifest_status macro but we can just fake it as it returns an array
# Input example: ['9999-01-01 00:00:00', '9999-01-01 00:00:00', 0, 0, false]
# Inputs are read from a seed file

if [[ $BRANCH == "release" || $BRANCH == "utils_revamp" ]]; then
echo "Snowplow-utils unit tests: Run test_get_run_limits_t_macro"
eval "dbt run --select test_get_run_limits_t_macro --target $db --full-refresh" || exit 1;
eval "dbt test --select test_get_run_limits_t_macro --store-failures --target $db" || exit 1;
fi

# This macro returns returns the array: [min_last_success, max_last_success, models_matched_from_manifest, has_matched_all_models]
# Not too important to test, it is effectively returns a min/max/count from values in the manifest based on the models in the run
# Inputs are read from a seed file, we can selectively test the different inputs depending on the models in run array so no need for it to contain exact scenarios upfront

if [[ $BRANCH == "release" || $BRANCH == "utils_revamp" ]]; then
echo "Snowplow-utils unit tests: Run test_get_incremental_manifest_status_macro"
eval "dbt run --select test_get_incremental_manifest_status_macro --target $db --full-refresh" || exit 1;
eval "dbt test --select test_get_incremental_manifest_status_macro --store-failures --target $db" || exit 1;
fi

# This macro returns returns the array: [min_first_processed_load_tstamp, max_first_processed_load_tstamp, min_last_processed_load_tstamp, max_last_processed_load_tstamp, models_matched_from_manifest, sync_count, has_matched_all_models]
# Not too important to test, it is effectively returns a min/max/count from values in the manifest based on the models in the run
# Inputs are read from a seed file, we can selectively test the different inputs depending on the models in run array so no need for it to contain exact scenarios upfront

if [[ $BRANCH == "release" || $BRANCH == "utils_revamp" ]]; then
echo "Snowplow-utils unit tests: Run test_get_incremental_manifest_status_t_macro"
eval "dbt run --select test_get_incremental_manifest_status_t_macro --target $db --full-refresh" || exit 1;
eval "dbt test --select test_get_incremental_manifest_status_t_macro --store-failures --target $db" || exit 1;
fi

done
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
model,first_processed_load_tstamp,last_processed_load_tstamp
a,2020-01-01 00:00:00,2020-01-02 00:00:00
b,2020-01-02 00:00:00,2020-01-03 00:00:00
c,2020-01-03 00:00:00,2020-01-04 00:00:00
d,2020-01-01 00:00:00,2020-01-02 00:00:00
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
test_case,min_first_processed_load_tstamp,max_first_processed_load_tstamp,min_last_processed_load_tstamp,max_last_processed_load_tstamp,models_matched_from_manifest,sync_count,has_matched_all_models
all model_in_run exist in manifest,2020-01-01 00:00:00,2020-01-03 00:00:00,2020-01-02 00:00:00,2020-01-04 00:00:00,3,3,true
some model_in_run exist in manifest,2020-01-01 00:00:00,2020-01-03 00:00:00,2020-01-02 00:00:00,2020-01-03 00:00:00,2,2,false
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
min_last_processed_load_tstamp,max_last_processed_load_tstamp,models_matched_from_manifest,has_matched_all_models,sync_count,start_date,lower_limit,upper_limit
,,0,FALSE,0,2021-01-01,2021-01-01 00:00:00+00:00,2021-01-31 00:00:00+00:00
2021-03-01 00:00:00+00:00,2021-03-01 00:00:00+00:00,10,FALSE,1,2021-01-01,2021-01-01 00:00:00+00:00,2021-01-31 00:00:00+00:00
2021-03-01 18:00:00+00:00,2021-03-01 18:00:00+00:00,10,TRUE,1,2021-01-01,2021-03-01 18:00:00+00:00,2021-03-31 18:00:00+00:00
2021-03-01 00:00:00+00:00,2021-03-01 00:00:00+00:00,10,FALSE,2,2021-01-01,1999-01-01 00:00:00+00:00,1999-01-02 00:00:00+00:00
2021-03-01 00:00:00+00:00,2021-03-01 00:00:00+00:00,10,FALSE,3,2021-01-01,1999-01-01 00:00:00+00:00,1999-01-02 00:00:00+00:00
2021-03-01 00:00:00+00:00,2021-03-05 00:00:00+00:00,10,TRUE,3,2021-01-01,1999-01-01 00:00:00+00:00,1999-01-02 00:00:00+00:00
2021-03-01 00:00:00+00:00,2021-05-01 00:00:00+00:00,10,TRUE,3,2021-01-01,1999-01-01 00:00:00+00:00,1999-01-02 00:00:00+00:00
2021-03-01 00:00:00+00:00,2021-03-01 00:00:00+00:00,10,TRUE,3,2021-01-01,1999-01-01 00:00:00+00:00,1999-01-02 00:00:00+00:00
18 changes: 18 additions & 0 deletions integration_tests/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ vars:
snowplow__dev_target_name: dev
snowplow__databricks_catalog: 'hive_metastore'
snowplow__query_tag: 'snowplow_dbt'
snowplow__testing: true

models:
snowplow_utils_integration_tests:
Expand Down Expand Up @@ -138,13 +139,30 @@ seeds:
+column_types:
min_last_success: timestamp
max_last_success: timestamp
data_get_incremental_manifest_status_t:
+column_types:
first_processed_load_tstamp: timestamp
last_processed_load_tstamp: timestamp
data_get_incremental_manifest_status_t_expected:
+column_types:
min_first_processed_load_tstamp: timestamp
max_first_processed_load_tstamp: timestamp
min_last_processed_load_tstamp: timestamp
max_last_processed_load_tstamp: timestamp
data_get_run_limits:
+column_types:
min_last_success: timestamp
max_last_success: timestamp
start_date: date
lower_limit: timestamp
upper_limit: timestamp
data_get_run_limits_t:
+column_types:
min_last_success: timestamp
max_last_success: timestamp
start_date: date
lower_limit: timestamp
upper_limit: timestamp
data_update_incremental_manifest_table:
+column_types:
is_in_manifest: boolean
Expand Down
10 changes: 0 additions & 10 deletions integration_tests/models/incremental_hooks/incremental_hooks.yml
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
version: 2

models:
- name: test_get_incremental_manifest_status
tests:
- dbt_utils.equality:
compare_model: ref('data_get_incremental_manifest_status_expected')
- name: test_get_run_limits
tests:
- dbt_utils.expression_is_true:
expression: "expected_lower_limit = actual_lower_limit"
- dbt_utils.expression_is_true:
expression: "expected_upper_limit = actual_upper_limit"
- name: test_update_incremental_manifest_table
tests:
- dbt_utils.equality:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
version: 2

models:
- name: test_get_incremental_manifest_status_macro
tests:
- dbt_utils.equality:
compare_model: ref('data_get_incremental_manifest_status_expected')
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
version: 2

models:
- name: test_get_incremental_manifest_status_t_macro
tests:
- dbt_utils.equality:
compare_model: ref('data_get_incremental_manifest_status_t_expected')
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{#
Copyright (c) 2021-present Snowplow Analytics Ltd. All rights reserved.
This program is licensed to you under the Snowplow Personal and Academic License Version 1.0,
and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0.
You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/
#}

{%- set all_models = snowplow_utils.get_incremental_manifest_status_t(ref('data_get_incremental_manifest_status_t'), ['a','b','c']) -%}
{%- set partial_models = snowplow_utils.get_incremental_manifest_status_t(ref('data_get_incremental_manifest_status_t'), ['b','d','e']) -%}

with prep as (
select
'all model_in_run exist in manifest' as test_case,
{{ snowplow_utils.cast_to_tstamp(all_models[0]) }} as min_first_processed_load_tstamp,
{{ snowplow_utils.cast_to_tstamp(all_models[1]) }} as max_first_processed_load_tstamp,
{{ snowplow_utils.cast_to_tstamp(all_models[2]) }} as min_last_processed_load_tstamp,
{{ snowplow_utils.cast_to_tstamp(all_models[3]) }} as max_last_processed_load_tstamp,
{{all_models[4]}} as models_matched_from_manifest,
{{all_models[5]}} as sync_count,
{{all_models[6]}} as has_matched_all_models

union all

select
'some model_in_run exist in manifest' as test_case,
{{ snowplow_utils.cast_to_tstamp(all_models[0]) }} as min_first_processed_load_tstamp,
{{ snowplow_utils.cast_to_tstamp(all_models[1]) }} as max_first_processed_load_tstamp,
{{ snowplow_utils.cast_to_tstamp(partial_models[2]) }} as min_last_processed_load_tstamp,
{{ snowplow_utils.cast_to_tstamp(partial_models[3]) }} as max_last_processed_load_tstamp,
{{partial_models[4]}} as models_matched_from_manifest,
{{partial_models[5]}} as sync_count,
{{partial_models[6]}} as has_matched_all_models

)

select
test_case,
min_first_processed_load_tstamp,
max_first_processed_load_tstamp,
min_last_processed_load_tstamp,
max_last_processed_load_tstamp,
models_matched_from_manifest,
sync_count,
cast(has_matched_all_models as {{ dbt.type_boolean() }}) as has_matched_all_models

from prep
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
version: 2

models:
- name: test_get_run_limits_macro
tests:
- dbt_utils.expression_is_true:
expression: "expected_lower_limit = actual_lower_limit"
- dbt_utils.expression_is_true:
expression: "expected_upper_limit = actual_upper_limit"
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
version: 2

models:
- name: test_get_run_limits_t_macro
tests:
- dbt_utils.expression_is_true:
expression: "expected_lower_limit = actual_lower_limit"
- dbt_utils.expression_is_true:
expression: "expected_upper_limit = actual_upper_limit"
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{#
Copyright (c) 2021-present Snowplow Analytics Ltd. All rights reserved.
This program is licensed to you under the Snowplow Personal and Academic License Version 1.0,
and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0.
You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/
#}

{%- set data_query -%}
select * from {{ ref('data_get_run_limits_t') }}
{%- endset -%}

{# fetch test data set as dict. dict form {column_name: (tuple_of_results) #}
{%- set raw_test_data = dbt_utils.get_query_results_as_dict(data_query) -%}

{# Snowflake returns keys as uppercase. Iterate and set to lowercase #}
{% set test_data = {} %}
{% for key, value in raw_test_data.items() %}
{% do test_data.update({key.lower(): value}) %}
{% endfor %}

{% for i in range(test_data.min_last_processed_load_tstamp|length) %}

{# iteratively pass each row of test data into get_run_limits_t() and execute returned query
min_first_processed_load_tstamp and max_first_processed_load_tstamp are not yet used, placeholder in place #}
{%- set results = run_query(snowplow_utils.get_run_limits_t('9999-01-01 00:00:00',
'9999-01-01 00:00:00',
test_data.min_last_processed_load_tstamp[i],
test_data.max_last_processed_load_tstamp[i],
test_data.models_matched_from_manifest[i],
test_data.sync_count[i],
test_data.has_matched_all_models[i],
test_data.start_date[i])) -%}

{# expected limits taken from test data #}
{%- set expected_lower_limit = test_data.lower_limit[i] -%}
{%- set expected_upper_limit = test_data.upper_limit[i] -%}

{# actual limits taken from get_run_limits_t() results #}
{%- if execute -%}
{%- set actual_lower_limit = results.columns[0].values()[0] -%}
{%- set actual_upper_limit = results.columns[1].values()[0] -%}
{%- else -%}
{%- set actual_lower_limit = none -%}
{%- set actual_upper_limit = none -%}
{%- endif -%}

{# union expected vs. actual for each test case #}
select
{{snowplow_utils.cast_to_tstamp(expected_lower_limit)}} as expected_lower_limit,
{{snowplow_utils.cast_to_tstamp(expected_upper_limit)}} as expected_upper_limit,
{{snowplow_utils.cast_to_tstamp(actual_lower_limit)}} as actual_lower_limit,
{{snowplow_utils.cast_to_tstamp(actual_upper_limit)}} as actual_upper_limit
{% if not loop.last %} union all {% endif %}

{% endfor %}

0 comments on commit b46ed1d

Please sign in to comment.