-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathdbt_project.yml
160 lines (147 loc) · 6.69 KB
/
dbt_project.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
name: 'snowplow_media_player'
version: '0.9.2'
config-version: 2
require-dbt-version: ['>=1.4.0', '<2.0.0']
profile: 'default'
dispatch:
- macro_namespace: dbt
search_order: ['snowplow_utils', 'dbt']
model-paths: ['models']
analysis-paths: ['analyses']
test-paths: ['tests']
seed-paths: ['seeds']
macro-paths: ['macros']
docs-paths: ['docs']
snapshot-paths: ['snapshots']
target-path: 'target'
clean-targets:
- 'target'
- 'dbt_packages'
vars:
snowplow_media_player:
# Variables - Warehouse and tracker
snowplow__percent_progress_boundaries: [10, 25, 50, 75]
snowplow__events: '{{ source("atomic", "events") }}'
snowplow__dev_target_name: 'dev'
# snowplow__atomic_schema: 'atomic' # Only set if not using 'atomic' schema for Snowplow events data
# snowplow__database: # Only set if not using target.database for Snowplow events data -- WILL BE IGNORED FOR DATABRICKS
# snowplow__events_table: 'events' # Only set if not using 'events' table for Snowplow events data
snowplow__grant_select_to: []
snowplow__grant_schema_usage: true
# Variables - Operation and logic
snowplow__complete_play_rate: 0.99
snowplow__max_media_pv_window: 10
snowplow__valid_play_sec: 30
surrogate_key_treat_nulls_as_empty_strings: true # turn on legacy behavior
snowplow__media_event_names: ['media_player_event']
snowplow__start_date: '2020-01-01'
snowplow__backfill_limit_days: 30
snowplow__lookback_window_hours: 6
snowplow__session_lookback_days: 730
snowplow__days_late_allowed: 3
snowplow__max_session_days: 3
snowplow__upsert_lookback_days: 30
snowplow__allow_refresh: false
snowplow__app_id: []
snowplow__session_timestamp: collector_tstamp
# please refer to the macros within identifiers.sql for default session and user values
snowplow__session_identifiers: []
# snowplow__session_sql: 'sc.session_id' # takes priority over session_identifiers
snowplow__user_identifiers: []
# snowplow__user_sql: 'sc.user_id' # takes priority over user identifiers
# Variables - Contexts, filters, and logs
# please set any of the below three variables to true if the related context schemas are enabled for your warehouse, please note it cannot be used to filter the data:
# set to true if the YouTube context schema is enabled
snowplow__enable_youtube: false
# set to true if the HTML5 media element context schema is enabled
snowplow__enable_whatwg_media: false
# set to true if the HTML5 video element context schema is enabled
snowplow__enable_whatwg_video: false
snowplow__enable_media_player_v1: false
snowplow__enable_media_player_v2: true
snowplow__enable_media_session: true
snowplow__enable_media_ad: false
snowplow__enable_media_ad_break: false
snowplow__enable_web_events: true
snowplow__enable_mobile_events: false
snowplow__enable_ad_quartile_event: false
# add extra custom fields:
snowplow__base_passthroughs: []
snowplow__ad_views_passthroughs: []
# Variables - Warehouse Specific
snowplow__media_player_event_context: 'com_snowplowanalytics_snowplow_media_player_event_1'
snowplow__media_player_context: 'com_snowplowanalytics_snowplow_media_player_1'
snowplow__media_player_v2_context: 'com_snowplowanalytics_snowplow_media_player_2'
snowplow__media_session_context: 'com_snowplowanalytics_snowplow_media_session_1'
snowplow__media_ad_context: 'com_snowplowanalytics_snowplow_media_ad_1'
snowplow__media_ad_break_context: 'com_snowplowanalytics_snowplow_media_ad_break_1'
snowplow__media_ad_quartile_event: 'com_snowplowanalytics_snowplow_media_ad_quartile_event_1'
snowplow__youtube_context: 'com_youtube_youtube_1'
snowplow__html5_media_element_context: 'org_whatwg_media_element_1'
snowplow__html5_video_element_context: 'org_whatwg_video_element_1'
snowplow__context_web_page: 'com_snowplowanalytics_snowplow_web_page_1'
snowplow__context_screen: 'com_snowplowanalytics_mobile_screen_1'
snowplow__context_mobile_session: 'com_snowplowanalytics_snowplow_client_session_1'
snowplow__derived_tstamp_partitioned: true
snowplow__query_tag: 'snowplow_dbt'
snowplow__enable_load_tstamp: true
snowplow__entities_or_sdes: []
# Databricks Only
# Depending on the use case it should either be the catalog (for Unity Catalog users from databricks connector 1.1.1 onwards) or the same value as your snowplow__atomic_schema (unless changed it should be 'atomic')
# snowplow__databricks_catalog: 'hive_metastore'
# Completely or partially remove models from the manifest during run start.
on-run-start:
- "{{ snowplow_utils.snowplow_delete_from_manifest(var('models_to_remove',[]), ref('snowplow_media_player_incremental_manifest')) }}"
# Check inconsistencies within the variable setup.
- '{{ snowplow_media_player.config_check() }}'
# Update manifest table with last event consumed per successfully executed node/model
on-run-end:
- '{{ snowplow_utils.snowplow_incremental_post_hook("snowplow_media_player", "snowplow_media_player_incremental_manifest", "snowplow_media_player_base_events_this_run", var("snowplow__session_tstamp", "collector_tstamp")) }}'
- "{{ snowplow_utils.grant_usage_on_schemas_built_into(var('snowplow__grant_schemas', true)) }}"
models:
snowplow_media_player:
+bind: false
+materialized: table
+incremental_strategy: "{{ none if target.type not in ['spark'] else 'merge' }}"
+file_format: "{{ 'delta' if target.type not in ['spark'] else 'iceberg'}}"
base:
manifest:
+schema: "snowplow_manifest"
scratch:
+schema: 'scratch'
+tags: 'scratch'
bigquery:
+enabled: '{{ target.type == "bigquery" | as_bool() }}'
databricks:
+enabled: '{{ target.type == "databricks" | as_bool() }}'
default:
+enabled: '{{ target.type in ["redshift", "postgres"] | as_bool() }}'
snowflake:
+enabled: '{{ target.type == "snowflake" | as_bool() }}'
spark:
+enabled: '{{ target.type == "spark" | as_bool() }}'
media_base:
+schema: 'derived'
+tags: 'snowplow_media_player_incremental'
scratch:
+schema: 'scratch'
+tags: 'scratch'
media_plays:
+schema: 'derived'
+tags: 'snowplow_media_player_incremental'
media_stats:
+schema: 'derived'
+tags: 'snowplow_media_player_incremental'
custom:
+schema: 'scratch'
+tags: 'snowplow_media_player_incremental'
+enabled: false
media_ad_views:
+schema: 'derived'
+tags: 'snowplow_media_player_incremental'
scratch:
+schema: 'scratch'
+tags: 'scratch'
media_ads:
+schema: 'derived'
+tags: 'snowplow_media_player_incremental'