From 8867bdfc0c045631044b4d80907483bbe80409d5 Mon Sep 17 00:00:00 2001 From: Andy Lu Date: Thu, 23 May 2019 20:51:53 +0000 Subject: [PATCH 1/8] Add selected-by-default in discovery, convert selected-by-default to selected in sync --- tap_heap/__init__.py | 14 +++++++++++++- tap_heap/discover.py | 15 +++++++++++---- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/tap_heap/__init__.py b/tap_heap/__init__.py index c338534..c7277b2 100644 --- a/tap_heap/__init__.py +++ b/tap_heap/__init__.py @@ -17,7 +17,7 @@ def do_discover(config): LOGGER.info("Starting discover") - streams = discover_streams(config['bucket']) + streams = discover_streams(config['bucket'], config.get('selected-by-default', False)) if not streams: raise Exception("No streams found") catalog = {"streams": streams} @@ -31,12 +31,24 @@ def stream_is_selected(mdata): return mdata.get((), {}).get('selected', False) +def convert_selected_by_default_metadata(catalog): + for stream in catalog.streams: + for md in stream.metadata: + is_selected = md.get('metadata',{}).get('selected') + is_selected_by_default = md.get('metadata',{}).get('selected-by-default', False) + if is_selected_by_default and is_selected is None: + md['metadata']['selected'] = True + + def do_sync(config, catalog, state): LOGGER.info('Starting sync.') bucket = config['bucket'] merged_manifests = manifest.generate_merged_manifests(bucket) + # Convert all selected-by-default metadata into selected: True + convert_selected_by_default_metadata(catalog) + for stream in catalog['streams']: stream_name = stream['tap_stream_id'] mdata = metadata.to_map(stream['metadata']) diff --git a/tap_heap/discover.py b/tap_heap/discover.py index 7b711b8..6374b3b 100644 --- a/tap_heap/discover.py +++ b/tap_heap/discover.py @@ -2,14 +2,16 @@ from tap_heap import manifest from tap_heap.schema import generate_fake_schema -def discover_streams(bucket): +def discover_streams(bucket, selected-by-default=False): streams = [] merged_manifests = manifest.generate_merged_manifests(bucket) for table_name, manifest_table in merged_manifests.items(): schema = generate_fake_schema(manifest_table) - streams.append({'stream': table_name, 'tap_stream_id': table_name, - 'schema': schema, 'metadata': load_metadata(table_name, schema)}) + streams.append({'stream': table_name, + 'tap_stream_id': table_name, + 'schema': schema, + 'metadata': load_metadata(table_name, schema, selected-by-default)}) return streams @@ -23,7 +25,7 @@ def get_key_properties(table_name): return ['event_id'] -def load_metadata(table_name, schema): +def load_metadata(table_name, schema, selected-by-default=False): mdata = metadata.new() key_properties = get_key_properties(table_name) @@ -34,5 +36,10 @@ def load_metadata(table_name, schema): mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic') else: mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'available') + if selected-by-default: + mdata = metadata.write(mdata, + ('properties', field_name), + 'selected-by-default', + 'true') return metadata.to_list(mdata) From 4a2c7ed72882e04ae24aa79c7e80e9139fa5336e Mon Sep 17 00:00:00 2001 From: Andy Lu Date: Fri, 24 May 2019 13:18:34 +0000 Subject: [PATCH 2/8] Update pylint disables: fixed no-else-return --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f45d373..1ba5351 100644 --- a/Makefile +++ b/Makefile @@ -7,5 +7,5 @@ check_prereqs: bash -c 'dpkg -l libsnappy-dev >/dev/null 2>&1' test: check_prereqs - pylint tap_heap --disable too-few-public-methods,missing-docstring,protected-access,no-else-return + pylint tap_heap --disable too-few-public-methods,missing-docstring,protected-access python -m unittest discover From 3953cd08eb1c8817aa997c22c303d5f5766c0ca0 Mon Sep 17 00:00:00 2001 From: Andy Lu Date: Fri, 24 May 2019 13:19:49 +0000 Subject: [PATCH 3/8] Pylint doesn't like two char variable names --- tap_heap/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tap_heap/__init__.py b/tap_heap/__init__.py index c7277b2..cac986f 100644 --- a/tap_heap/__init__.py +++ b/tap_heap/__init__.py @@ -33,11 +33,11 @@ def stream_is_selected(mdata): def convert_selected_by_default_metadata(catalog): for stream in catalog.streams: - for md in stream.metadata: - is_selected = md.get('metadata',{}).get('selected') - is_selected_by_default = md.get('metadata',{}).get('selected-by-default', False) + for med in stream.metadata: + is_selected = med.get('metadata', {}).get('selected') + is_selected_by_default = med.get('metadata', {}).get('selected-by-default', False) if is_selected_by_default and is_selected is None: - md['metadata']['selected'] = True + med['metadata']['selected'] = True def do_sync(config, catalog, state): From f176ff2ef6e72c2a9b71632ed18a93997e4937be Mon Sep 17 00:00:00 2001 From: Andy Lu Date: Fri, 24 May 2019 13:21:01 +0000 Subject: [PATCH 4/8] Hyphens are not valid in variable names --- tap_heap/discover.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tap_heap/discover.py b/tap_heap/discover.py index 6374b3b..883a9ec 100644 --- a/tap_heap/discover.py +++ b/tap_heap/discover.py @@ -2,7 +2,7 @@ from tap_heap import manifest from tap_heap.schema import generate_fake_schema -def discover_streams(bucket, selected-by-default=False): +def discover_streams(bucket, selected_by_default=False): streams = [] merged_manifests = manifest.generate_merged_manifests(bucket) @@ -11,7 +11,7 @@ def discover_streams(bucket, selected-by-default=False): streams.append({'stream': table_name, 'tap_stream_id': table_name, 'schema': schema, - 'metadata': load_metadata(table_name, schema, selected-by-default)}) + 'metadata': load_metadata(table_name, schema, selected_by_default)}) return streams @@ -25,7 +25,7 @@ def get_key_properties(table_name): return ['event_id'] -def load_metadata(table_name, schema, selected-by-default=False): +def load_metadata(table_name, schema, selected_by_default=False): mdata = metadata.new() key_properties = get_key_properties(table_name) @@ -36,7 +36,7 @@ def load_metadata(table_name, schema, selected-by-default=False): mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic') else: mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'available') - if selected-by-default: + if selected_by_default: mdata = metadata.write(mdata, ('properties', field_name), 'selected-by-default', From 44ec2890404552db7f8fa080781034fb59f68270 Mon Sep 17 00:00:00 2001 From: Andy Lu Date: Fri, 24 May 2019 13:22:12 +0000 Subject: [PATCH 5/8] Pylint: elif and else after a return is not needed --- tap_heap/discover.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tap_heap/discover.py b/tap_heap/discover.py index 883a9ec..7f4028b 100644 --- a/tap_heap/discover.py +++ b/tap_heap/discover.py @@ -19,10 +19,9 @@ def discover_streams(bucket, selected_by_default=False): def get_key_properties(table_name): if table_name == 'user_migrations': return ['from_user_id'] - elif table_name == 'users': + if table_name == 'users': return ['user_id'] - else: - return ['event_id'] + return ['event_id'] def load_metadata(table_name, schema, selected_by_default=False): From 1b9b153a29acb6a004da4158aed3cd1e0dc61f5a Mon Sep 17 00:00:00 2001 From: Andy Lu Date: Fri, 24 May 2019 18:44:53 +0000 Subject: [PATCH 6/8] Change dot accessor to proper dictionary accessors --- tap_heap/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tap_heap/__init__.py b/tap_heap/__init__.py index cac986f..5ca4ef7 100644 --- a/tap_heap/__init__.py +++ b/tap_heap/__init__.py @@ -32,8 +32,8 @@ def stream_is_selected(mdata): def convert_selected_by_default_metadata(catalog): - for stream in catalog.streams: - for med in stream.metadata: + for stream in catalog['streams']: + for med in stream.get('metadata'): is_selected = med.get('metadata', {}).get('selected') is_selected_by_default = med.get('metadata', {}).get('selected-by-default', False) if is_selected_by_default and is_selected is None: From 670bbb6d89df3f3757ea84ee304dde84da51d2a2 Mon Sep 17 00:00:00 2001 From: Andy Lu Date: Wed, 29 May 2019 20:12:36 +0000 Subject: [PATCH 7/8] Add selected by default to stream, change kebob-case for consistancy, swap metadata value for clarity --- tap_heap/__init__.py | 3 ++- tap_heap/discover.py | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tap_heap/__init__.py b/tap_heap/__init__.py index 5ca4ef7..daee4d8 100644 --- a/tap_heap/__init__.py +++ b/tap_heap/__init__.py @@ -17,7 +17,8 @@ def do_discover(config): LOGGER.info("Starting discover") - streams = discover_streams(config['bucket'], config.get('selected-by-default', False)) + + streams = discover_streams(config['bucket'], config.get('selected_by_default', False)) if not streams: raise Exception("No streams found") catalog = {"streams": streams} diff --git a/tap_heap/discover.py b/tap_heap/discover.py index 7f4028b..8c2a8d2 100644 --- a/tap_heap/discover.py +++ b/tap_heap/discover.py @@ -29,6 +29,8 @@ def load_metadata(table_name, schema, selected_by_default=False): key_properties = get_key_properties(table_name) mdata = metadata.write(mdata, (), 'table-key-properties', key_properties) + if selected_by_default: + mdata = metadata.write(mdata, (), 'selected-by-default', True) for field_name in schema.get('properties', {}).keys(): if field_name in key_properties: @@ -39,6 +41,6 @@ def load_metadata(table_name, schema, selected_by_default=False): mdata = metadata.write(mdata, ('properties', field_name), 'selected-by-default', - 'true') + True) return metadata.to_list(mdata) From 7cdb08bbbbb791b8f9f7f5b89dc1cd3fbd0679b6 Mon Sep 17 00:00:00 2001 From: Andy Lu Date: Wed, 29 May 2019 20:13:51 +0000 Subject: [PATCH 8/8] Bump singer-python version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1558790..f1b3ada 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ install_requires=[ 'boto3==1.9.57', 'singer-encodings==0.0.3', - 'singer-python==5.1.5', + 'singer-python==5.5.1', 'python-snappy==0.5.3', 'fastavro==0.21.8' ],