From fda478ecea67ea648d66b585b888811e98b9006b Mon Sep 17 00:00:00 2001 From: Hannes Schmidt Date: Thu, 17 Oct 2019 21:28:34 -0700 Subject: [PATCH 1/4] Prepare beta 22 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 4a8a74c..7e410a4 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="hca-metadata-api", - version="1.0b21.dev1", + version="1.0b22.dev1", license='MIT', install_requires=[ 'dataclasses >= 0.6' From 44c72656075eb1bcda6876b3724e91033a0e26a7 Mon Sep 17 00:00:00 2001 From: Hannes Schmidt Date: Mon, 14 Oct 2019 22:25:52 -0700 Subject: [PATCH 2/4] [1/2] Enforce pep8 and formatting rules Add build infrastructure. --- .pycharm.style.xml | 37 +++++++++++++++++++++++++++++++++++++ .travis.yml | 11 +++++++++++ Makefile | 20 ++++++++++++++++++++ 3 files changed, 68 insertions(+) create mode 100644 .pycharm.style.xml diff --git a/.pycharm.style.xml b/.pycharm.style.xml new file mode 100644 index 0000000..3779db1 --- /dev/null +++ b/.pycharm.style.xml @@ -0,0 +1,37 @@ + + \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index e17fcb8..38c3ebd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,9 @@ language: python cache: pip +services: + - docker + python: - 3.6 @@ -9,6 +12,14 @@ install: - make travis_install script: + # Hack: The use of chrgp compensates for a quirk of Docker. The PyCharm image + # used by make format sets up a user called `developer` and assigns it UID + # 1000. Travis is running as UID 2000. An alternative would be to pass --user + # to `docker run` and bind-mount an /etc/passwd that maps that to `developer`. + # Currently I'm unclear why `make format` works unchanged on Docker + - sudo chgrp -R 1000 . && make format && sudo chgrp -R $(id -g) . + - make check_clean + - make pep8 - make test after_success: diff --git a/Makefile b/Makefile index 6802922..11b28cc 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,9 @@ ifneq ($(shell python -c "import sys; print(sys.version_info >= (3,6))"),True) $(error Looks like Python 3.6 is not installed or active in the current virtualenv) endif +install_flake8: + pip install -U flake8==3.7.8 + install: pip install -e .[dss,test,coverage,examples] @@ -18,5 +21,22 @@ travis_install: test: install coverage run -m unittest discover -vs test +sources = src test + +pep8: install_flake8 + flake8 --max-line-length=120 $(sources) + +format: + docker run \ + --rm \ + --volume $(CURDIR):/home/developer/metadata-api \ + --workdir /home/developer/metadata-api rycus86/pycharm:2019.2.3 \ + /opt/pycharm/bin/format.sh -r -settings .pycharm.style.xml -mask '*.py' $(sources) + +check_clean: + git diff --exit-code && git diff --cached --exit-code + examples: install jupyter-notebook + +.PHONY: install_flake8 install travis_install test pep8 format check_clean examples From e2ae3ad2928f214dfa5e3587b265a5ed9143e87c Mon Sep 17 00:00:00 2001 From: Hannes Schmidt Date: Mon, 14 Oct 2019 22:26:30 -0700 Subject: [PATCH 3/4] [2/2] Enforce pep8 and formatting rules Fix formatting violations. --- src/humancellatlas/data/metadata/api.py | 29 +++++++++-- .../data/metadata/helpers/json.py | 6 ++- src/humancellatlas/data/metadata/lookup.py | 6 ++- .../data/metadata/helpers/schema_examples.py | 2 +- test/test.py | 52 ++++++++++++------- 5 files changed, 68 insertions(+), 27 deletions(-) diff --git a/src/humancellatlas/data/metadata/api.py b/src/humancellatlas/data/metadata/api.py index 7d3fb8c..45e2417 100644 --- a/src/humancellatlas/data/metadata/api.py +++ b/src/humancellatlas/data/metadata/api.py @@ -1,17 +1,37 @@ -from abc import ABC, abstractmethod +from abc import ( + ABC, + abstractmethod, +) from collections import defaultdict from itertools import chain -from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Set, Type, TypeVar, Union +from typing import ( + Any, + Iterable, + List, + Mapping, + MutableMapping, + Optional, + Set, + Type, + TypeVar, + Union, +) from uuid import UUID import warnings -from dataclasses import dataclass, field +from dataclasses import ( + dataclass, + field, +) from humancellatlas.data.metadata.age_range import AgeRange # A few helpful type aliases # -from humancellatlas.data.metadata.lookup import lookup, LookupDefault +from humancellatlas.data.metadata.lookup import ( + lookup, + LookupDefault, +) UUID4 = UUID AnyJSON2 = Union[str, int, float, bool, None, Mapping[str, Any], List[Any]] @@ -556,6 +576,7 @@ def __init__(self, json: JSON): content = json.get('content', json) self.target = [ImagingTarget.from_json(target) for target in content['target']] + @dataclass(init=False) class ImagingPreparationProtocol(Protocol): pass diff --git a/src/humancellatlas/data/metadata/helpers/json.py b/src/humancellatlas/data/metadata/helpers/json.py index d4c28f9..03f55f9 100644 --- a/src/humancellatlas/data/metadata/helpers/json.py +++ b/src/humancellatlas/data/metadata/helpers/json.py @@ -1,7 +1,11 @@ import copy from uuid import UUID -from dataclasses import field, fields, is_dataclass +from dataclasses import ( + field, + fields, + is_dataclass, +) from humancellatlas.data.metadata.api import Entity diff --git a/src/humancellatlas/data/metadata/lookup.py b/src/humancellatlas/data/metadata/lookup.py index 0f562af..19b0769 100644 --- a/src/humancellatlas/data/metadata/lookup.py +++ b/src/humancellatlas/data/metadata/lookup.py @@ -1,4 +1,8 @@ -from typing import TypeVar, Mapping, Union +from typing import ( + TypeVar, + Mapping, + Union, +) from enum import Enum K = TypeVar('K') diff --git a/test/humancellatlas/data/metadata/helpers/schema_examples.py b/test/humancellatlas/data/metadata/helpers/schema_examples.py index 9f0535e..69fd123 100644 --- a/test/humancellatlas/data/metadata/helpers/schema_examples.py +++ b/test/humancellatlas/data/metadata/helpers/schema_examples.py @@ -7,7 +7,7 @@ from dcplib.checksumming_io import ChecksummingSink -def download_example_bundle(repo, branch, path='/'): # pragma: no cover (because of canning) +def download_example_bundle(repo, branch, path='/'): # pragma: no cover (because of canning) manifest = [] metadata_files = {} if path.startswith('/') or not path.endswith('/'): diff --git a/test/test.py b/test/test.py index 4ebff0f..7e6e4db 100644 --- a/test/test.py +++ b/test/test.py @@ -1,4 +1,7 @@ -from concurrent.futures import ThreadPoolExecutor, wait +from concurrent.futures import ( + ThreadPoolExecutor, + wait, +) import doctest from itertools import chain from more_itertools import one @@ -6,29 +9,37 @@ import logging import os import re -from unittest import TestCase, skip +from unittest import ( + TestCase, + skip, +) from unittest.mock import Mock from uuid import UUID import warnings from atomicwrites import atomic_write -from humancellatlas.data.metadata.api import (AgeRange, - Biomaterial, - Bundle, - DonorOrganism, - Project, - SequenceFile, - SpecimenFromOrganism, - CellLine, - CellSuspension, - AnalysisProtocol, - ImagingProtocol, - LibraryPreparationProtocol, - SequencingProtocol, - SupplementaryFile, - ImagedSpecimen) -from humancellatlas.data.metadata.helpers.dss import download_bundle_metadata, dss_client +from humancellatlas.data.metadata.api import ( + AgeRange, + Biomaterial, + Bundle, + DonorOrganism, + Project, + SequenceFile, + SpecimenFromOrganism, + CellLine, + CellSuspension, + AnalysisProtocol, + ImagingProtocol, + LibraryPreparationProtocol, + SequencingProtocol, + SupplementaryFile, + ImagedSpecimen, +) +from humancellatlas.data.metadata.helpers.dss import ( + download_bundle_metadata, + dss_client, +) from humancellatlas.data.metadata.helpers.json import as_json from humancellatlas.data.metadata.helpers.schema_examples import download_example_bundle @@ -210,8 +221,8 @@ def test_bad_content_type(self): # noinspection PyTypeChecker download_bundle_metadata(client, 'aws', uuid) self.assertEqual(cm.exception.args[0], - f"Expecting file {file_uuid}.{file_version} " - "to have content type 'application/json', not 'bad'") + f"Expecting file {file_uuid}.{file_version} " + "to have content type 'application/json', not 'bad'") def test_v5_bundle(self): """ @@ -575,6 +586,7 @@ def test_cell_line(self): self.assertEqual(cell_lines[0].biomaterial_id, 'cell_line_at_day_54') self.assertEqual(cell_lines[0].has_input_biomaterial, None) self.assertEqual(cell_lines[0].type, 'stem cell-derived') + # noinspection PyDeprecation self.assertEqual(cell_lines[0].type, cell_lines[0].cell_line_type) self.assertEqual(cell_lines[0].model_organ, 'brain') From 8fb31281d3fdb546e3e989757d4f1704e62061dd Mon Sep 17 00:00:00 2001 From: Hannes Schmidt Date: Mon, 14 Oct 2019 22:54:50 -0700 Subject: [PATCH 4/4] Upgrade to dcp-cli (aka `hca`) version 6.4.0 (#91) --- setup.py | 2 +- .../data/metadata/helpers/dss.py | 17 ++++------ test/test.py | 33 +++++++++---------- 3 files changed, 23 insertions(+), 29 deletions(-) diff --git a/setup.py b/setup.py index 7e410a4..4164419 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ # Not using tests_require because that installs the test requirements into .eggs, not the virtualenv extras_require={ "dss": [ - 'hca == 5.2.0', + 'hca == 6.4.0', 'urllib3 >= 1.23', 'requests >= 2.19.1' ], diff --git a/src/humancellatlas/data/metadata/helpers/dss.py b/src/humancellatlas/data/metadata/helpers/dss.py index c5ac9d1..3eba5b6 100644 --- a/src/humancellatlas/data/metadata/helpers/dss.py +++ b/src/humancellatlas/data/metadata/helpers/dss.py @@ -73,19 +73,14 @@ def download_bundle_metadata(client: DSSClient, replica=replica, directurls=directurls, presignedurls=presignedurls) - url = None manifest = [] - while True: - # We can't use get_file.iterate because it only returns the `bundle.files` part of the response and swallows - # the `bundle.version`. See https://github.com/HumanCellAtlas/dcp-cli/issues/331 - # noinspection PyUnresolvedReferences,PyProtectedMember - response = client.get_bundle._request(kwargs, url=url) - bundle = response.json()['bundle'] + + bundle = None + # noinspection PyUnresolvedReferences + for page in client.get_bundle.paginate(**kwargs): + bundle = page['bundle'] manifest.extend(bundle['files']) - try: - url = response.links['next']['url'] - except KeyError: - break + assert bundle is not None metadata_files = {f['name']: f for f in manifest if f['indexed']} diff --git a/test/test.py b/test/test.py index 7e6e4db..cc1d9dd 100644 --- a/test/test.py +++ b/test/test.py @@ -180,24 +180,23 @@ def _load_bundle(self, uuid, version, replica='aws', deployment='prod'): return manifest, metadata_files def _mock_get_bundle(self, file_uuid, file_version, content_type): - response = Mock() - response.links = {} - response.json.return_value = { - 'bundle': { - 'version': '2018-09-20T232924.687620Z', - 'files': [ - { - 'name': 'name.json', - 'uuid': file_uuid, - 'version': file_version, - 'indexed': True, - 'content-type': content_type - } - ] - } - } client = Mock() - client.get_bundle._request.return_value = response + client.get_bundle.paginate.return_value = [ + { + 'bundle': { + 'version': '2018-09-20T232924.687620Z', + 'files': [ + { + 'name': 'name.json', + 'uuid': file_uuid, + 'version': file_version, + 'indexed': True, + 'content-type': content_type + } + ] + } + } + ] return client def test_bad_content(self):