From 0f5876b4012fb53c8bd90580d6105c9f0f38772e Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 2 Mar 2022 10:26:34 +0100 Subject: [PATCH 1/9] update parent field --- README.md | 4 ++ bioimageio/spec/model/v0_4/converters.py | 22 ++++++- bioimageio/spec/model/v0_4/raw_nodes.py | 2 +- bioimageio/spec/model/v0_4/schema.py | 8 +-- bioimageio/spec/shared/_resolve_source.py | 74 ++++++++++++----------- bioimageio/spec/shared/fields.py | 54 ++++++++++++++--- 6 files changed, 111 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index 9ab083d4e..3c18129a1 100644 --- a/README.md +++ b/README.md @@ -153,6 +153,10 @@ bioimageio update-format ### RDF Format Versions +#### model RDF 0.4.5 +- Breaking changes that are fully auto-convertible + - `parent` field changed to hold a string that is a BioImage.IO ID, a URL or a local relative path (and not subfields `uri` and `sha256`) + #### model RDF 0.4.4 - Non-breaking changes - new optional field `training_data` diff --git a/bioimageio/spec/model/v0_4/converters.py b/bioimageio/spec/model/v0_4/converters.py index 0ac85dfb7..ee671e1bc 100644 --- a/bioimageio/spec/model/v0_4/converters.py +++ b/bioimageio/spec/model/v0_4/converters.py @@ -38,7 +38,7 @@ def convert_model_from_v0_3_to_0_4_0(data: Dict[str, Any]) -> Dict[str, Any]: return data -def convert_model_from_v0_4_0_to_0_4_4(data: Dict[str, Any]) -> Dict[str, Any]: +def convert_model_from_v0_4_0_to_0_4_1(data: Dict[str, Any]) -> Dict[str, Any]: data = dict(data) # move dependencies from root to pytorch_state_dict weights entry @@ -49,7 +49,17 @@ def convert_model_from_v0_4_0_to_0_4_4(data: Dict[str, Any]) -> Dict[str, Any]: if entry and isinstance(entry, dict): entry["dependencies"] = deps - data["format_version"] = "0.4.4" + data["format_version"] = "0.4.1" + return data + + +def convert_model_from_v0_4_4_to_0_4_5(data: Dict[str, Any]) -> Dict[str, Any]: + data = dict(data) + + parent = data.pop("parent", None) + if parent and "uri" in parent: + data["parent"] = parent["uri"] + return data @@ -60,7 +70,13 @@ def maybe_convert(data: Dict[str, Any]) -> Dict[str, Any]: data = convert_model_from_v0_3_to_0_4_0(data) if data["format_version"] == "0.4.0": - data = convert_model_from_v0_4_0_to_0_4_4(data) + data = convert_model_from_v0_4_0_to_0_4_1(data) + + if data["format_version"] in ("0.4.1", "0.4.2", "0.4.3"): + data["format_version"] = "0.4.4" + + if data["format_version"] == "0.4.4": + data = convert_model_from_v0_4_4_to_0_4_5(data) # remove 'future' from config if no other than the used future entries exist config = data.get("config", {}) diff --git a/bioimageio/spec/model/v0_4/raw_nodes.py b/bioimageio/spec/model/v0_4/raw_nodes.py index 06ae14887..67d3ca8a4 100644 --- a/bioimageio/spec/model/v0_4/raw_nodes.py +++ b/bioimageio/spec/model/v0_4/raw_nodes.py @@ -48,7 +48,7 @@ PreprocessingName = PreprocessingName FormatVersion = Literal[ - "0.4.0", "0.4.1", "0.4.2", "0.4.3", "0.4.4" + "0.4.0", "0.4.1", "0.4.2", "0.4.3", "0.4.4", "0.4.5" ] # newest format needs to be last (used in __init__.py) WeightsFormat = Literal[ "pytorch_state_dict", "torchscript", "keras_hdf5", "tensorflow_js", "tensorflow_saved_model_bundle", "onnx" diff --git a/bioimageio/spec/model/v0_4/schema.py b/bioimageio/spec/model/v0_4/schema.py index e4c23c661..5440a2ea8 100644 --- a/bioimageio/spec/model/v0_4/schema.py +++ b/bioimageio/spec/model/v0_4/schema.py @@ -479,11 +479,9 @@ def get_min_shape(t) -> numpy.ndarray: f"different from `authors` in root or any entry in `weights`.", ) - parent = fields.Nested( - ModelParent(), - bioimageio_description="Parent model from which the trained weights of this model have been derived, e.g. by " - "finetuning the weights of this model on a different dataset. For format changes of the same trained model " - "checkpoint, see `weights`.", + parent = fields.Union( + [fields.BioImageIO_ID(resource_type="model"), fields.URI(), fields.RelativeLocalPath()], + bioimageio_description="BioImage.IO model id or URL or local relative path to a model RDF", ) run_mode = fields.Nested( diff --git a/bioimageio/spec/shared/_resolve_source.py b/bioimageio/spec/shared/_resolve_source.py index e785885d2..1df679a4c 100644 --- a/bioimageio/spec/shared/_resolve_source.py +++ b/bioimageio/spec/shared/_resolve_source.py @@ -33,6 +33,43 @@ def _is_path(s: typing.Any) -> bool: return False +T = typing.TypeVar("T") + + +def _resolve_json_from_url( + url: str, + expected_type: typing.Union[typing.Type[dict], typing.Type[T]] = dict, + warning_msg: str = "Failed to fetch {url}: {error}", +) -> typing.Tuple[typing.Optional[T], typing.Optional[str]]: + try: + p = resolve_source(url) + with p.open() as f: + data = json.load(f) + + assert isinstance(data, expected_type) + except Exception as e: + data = None + error: typing.Optional[str] = str(e) + if warning_msg: + warnings.warn(warning_msg.format(url=url, error=error)) + else: + error = None + + return data, error + + +BIOIMAGEIO_SITE_CONFIG, BIOIMAGEIO_SITE_CONFIG_ERROR = _resolve_json_from_url(BIOIMAGEIO_SITE_CONFIG_URL) +BIOIMAGEIO_COLLECTION, BIOIMAGEIO_COLLECTION_ERROR = _resolve_json_from_url(BIOIMAGEIO_COLLECTION_URL) +if BIOIMAGEIO_COLLECTION is None: + BIOIMAGEIO_COLLECTION_ENTRIES = None +else: + BIOIMAGEIO_COLLECTION_ENTRIES = { + c["id"]: c["rdf_source"] + for i, c in enumerate(BIOIMAGEIO_COLLECTION.get("collection", [])) + if "id" in c and "rdf_source" in c + } + + def resolve_rdf_source( source: typing.Union[dict, os.PathLike, typing.IO, str, bytes, raw_nodes.URI] ) -> typing.Tuple[dict, str, typing.Union[pathlib.Path, raw_nodes.URI, bytes]]: @@ -67,14 +104,12 @@ def resolve_rdf_source( if isinstance(source, str): # source might be bioimageio id, doi, url or file path -> resolve to pathlib.Path + from ._remote_constants import BIOIMAGEIO_COLLECTION + if BIOIMAGEIO_COLLECTION is None: bioimageio_rdf_source = None else: - bioimageio_collection = { - c.get("id", f"missind_id_{i}"): c.get("rdf_source") - for i, c in enumerate(BIOIMAGEIO_COLLECTION.get("collection", [])) - } - bioimageio_rdf_source = bioimageio_collection.get(source) or bioimageio_collection.get(source + "/latest") + bioimageio_rdf_source = BIOIMAGEIO_COLLECTION.get(source) or BIOIMAGEIO_COLLECTION.get(source + "/latest") if bioimageio_rdf_source is not None: # source is bioimageio id @@ -383,32 +418,3 @@ def _download_url(uri: raw_nodes.URI, output: typing.Optional[os.PathLike] = Non raise RuntimeError(f"Failed to download {uri} ({e})") return local_path - - -T = typing.TypeVar("T") - - -def _resolve_json_from_url( - url: str, - expected_type: typing.Union[typing.Type[dict], typing.Type[T]] = dict, - warning_msg: str = "Failed to fetch {url}: {error}", -) -> typing.Tuple[typing.Optional[T], typing.Optional[str]]: - try: - p = resolve_source(url) - with p.open() as f: - data = json.load(f) - - assert isinstance(data, expected_type) - except Exception as e: - data = None - error: typing.Optional[str] = str(e) - if warning_msg: - warnings.warn(warning_msg.format(url=url, error=error)) - else: - error = None - - return data, error - - -BIOIMAGEIO_SITE_CONFIG, BIOIMAGEIO_SITE_CONFIG_ERROR = _resolve_json_from_url(BIOIMAGEIO_SITE_CONFIG_URL) -BIOIMAGEIO_COLLECTION, BIOIMAGEIO_COLLECTION_ERROR = _resolve_json_from_url(BIOIMAGEIO_COLLECTION_URL) diff --git a/bioimageio/spec/shared/fields.py b/bioimageio/spec/shared/fields.py index 4c1be48c1..1dba5f4c0 100644 --- a/bioimageio/spec/shared/fields.py +++ b/bioimageio/spec/shared/fields.py @@ -48,11 +48,6 @@ def __init__( super().__init__(*super_args, **super_kwargs) # type: ignore -################################################# -# fields directly derived from marshmallow fields -################################################# - - class Array(DocumentedField, marshmallow_fields.Field): def __init__(self, inner: marshmallow_fields.Field, **kwargs): self.inner = inner @@ -266,11 +261,6 @@ def _deserialize(self, value, attr=None, data=None, **kwargs): raise ValidationError(message=messages, field_name=attr) from e -######################### -# more specialized fields -######################### - - class Axes(String): def _deserialize(self, *args, **kwargs) -> str: axes_str = super()._deserialize(*args, **kwargs) @@ -436,6 +426,50 @@ def _serialize(self, value, attr, obj, **kwargs) -> typing.Optional[str]: return super()._serialize(value, attr, obj, **kwargs) +class BioImageIO_ID(String): + def __init__( + self, + *super_args, + bioimageio_description: typing.Union[ + str, typing.Callable[[], str] + ] = "ID as shown on resource card on bioimage.io", + resource_type: typing.Optional[str] = None, + validate: typing.Optional[ + typing.Union[ + typing.Callable[[typing.Any], typing.Any], typing.Iterable[typing.Callable[[typing.Any], typing.Any]] + ] + ] = None, + **super_kwargs, + ): + from ._resolve_source import BIOIMAGEIO_COLLECTION_ENTRIES + + if validate is None: + validate = [] + + if isinstance(validate, typing.Iterable): + validate = list(validate) + else: + validate = [validate] + + if BIOIMAGEIO_COLLECTION_ENTRIES is not None: + error_msg = "'{input}' is not a valid BioImage.IO ID" + if resource_type is not None: + error_msg += f" of type {resource_type}" + + validate.append( + field_validators.OneOf( + { + k + for k, v in BIOIMAGEIO_COLLECTION_ENTRIES.items() + if resource_type is None or resource_type == v.get("type") + }, + error=error_msg, + ) + ) + + super().__init__(*super_args, bioimageio_description=bioimageio_description, **super_kwargs) + + class ProcMode(String): all_modes = ("fixed", "per_dataset", "per_sample") explanations = { From 13ce8bc1f12f7c3d19dff28f4466b948b2e06334 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 2 Mar 2022 11:32:11 +0100 Subject: [PATCH 2/9] make model parent more explicit --- bioimageio/spec/model/v0_4/schema.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/bioimageio/spec/model/v0_4/schema.py b/bioimageio/spec/model/v0_4/schema.py index 5440a2ea8..86066c9a8 100644 --- a/bioimageio/spec/model/v0_4/schema.py +++ b/bioimageio/spec/model/v0_4/schema.py @@ -7,7 +7,6 @@ from bioimageio.spec.dataset.v0_2.schema import Dataset as _Dataset from bioimageio.spec.model.v0_3.schema import ( KerasHdf5WeightsEntry as KerasHdf5WeightsEntry03, - ModelParent, OnnxWeightsEntry as OnnxWeightsEntry03, Postprocessing as Postprocessing03, Preprocessing as Preprocessing03, @@ -281,6 +280,19 @@ class LinkedDataset(_BioImageIOSchema): id = fields.String(bioimageio_description="dataset id") +class ModelParent(_BioImageIOSchema): + id = fields.BioImageIO_ID(resource_type="model") + uri = fields.Union( + [fields.URI(), fields.RelativeLocalPath()], bioimageio_description="URL or local relative path of a model RDF" + ) + sha256 = fields.SHA256(bioimageio_description="Hash of the parent model RDF. Note: the hash is not validated") + + @validates_schema() + def id_xor_uri(self, data): + if ("id" in data) == ("uri" in data): + raise ValidationError("Either 'id' or 'uri' are required (not both).") + + class Model(rdf.schema.RDF): raw_nodes = raw_nodes @@ -479,9 +491,9 @@ def get_min_shape(t) -> numpy.ndarray: f"different from `authors` in root or any entry in `weights`.", ) - parent = fields.Union( - [fields.BioImageIO_ID(resource_type="model"), fields.URI(), fields.RelativeLocalPath()], - bioimageio_description="BioImage.IO model id or URL or local relative path to a model RDF", + parent = fields.Nested( + ModelParent(), + bioimageio_description="The model from which this model is derived, e.g. by fine-tuning the weights.", ) run_mode = fields.Nested( From 25b68802ba4d8ace368ef2bd41749ac76e09b0a9 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 2 Mar 2022 11:44:09 +0100 Subject: [PATCH 3/9] add tests --- .../models/unet2d_nuclei_broad/rdf.yaml | 2 +- tests/conftest.py | 8 ++++---- tests/test_schema_model.py | 18 ++++++++++++++++++ 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/example_specs/models/unet2d_nuclei_broad/rdf.yaml b/example_specs/models/unet2d_nuclei_broad/rdf.yaml index f25a0bfaa..c2e0c50f0 100644 --- a/example_specs/models/unet2d_nuclei_broad/rdf.yaml +++ b/example_specs/models/unet2d_nuclei_broad/rdf.yaml @@ -1,5 +1,5 @@ # TODO physical scale of the data -format_version: 0.4.4 +format_version: 0.4.5 name: UNet 2D Nuclei Broad description: A 2d U-Net trained on the nuclei broad dataset. diff --git a/tests/conftest.py b/tests/conftest.py index 7f643c785..177a3dfc8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,7 +11,7 @@ def unet2d_nuclei_broad_base_path(): def get_unet2d_nuclei_broad(unet2d_nuclei_broad_base_path, request) -> dict: - if request.param == "v0_4_4": + if request.param == "v0_4_5": v = "" else: v = f"_{request.param}" @@ -21,7 +21,7 @@ def get_unet2d_nuclei_broad(unet2d_nuclei_broad_base_path, request) -> dict: return yaml.load(path) -@pytest.fixture(params=["v0_3_0", "v0_3_1", "v0_3_2", "v0_3_3", "v0_3_6", "v0_4_0", "v0_4_4"]) +@pytest.fixture(params=["v0_3_0", "v0_3_1", "v0_3_2", "v0_3_3", "v0_3_6", "v0_4_0", "v0_4_5"]) def unet2d_nuclei_broad_any(unet2d_nuclei_broad_base_path, request): yield get_unet2d_nuclei_broad(unet2d_nuclei_broad_base_path, request) @@ -31,12 +31,12 @@ def unet2d_nuclei_broad_before_latest(unet2d_nuclei_broad_base_path, request): yield get_unet2d_nuclei_broad(unet2d_nuclei_broad_base_path, request) -@pytest.fixture(params=["v0_4_4"]) +@pytest.fixture(params=["v0_4_5"]) def unet2d_nuclei_broad_latest(unet2d_nuclei_broad_base_path, request): yield get_unet2d_nuclei_broad(unet2d_nuclei_broad_base_path, request) -@pytest.fixture(params=["v0_3_6", "v0_4_4"]) +@pytest.fixture(params=["v0_3_6", "v0_4_5"]) def unet2d_nuclei_broad_any_minor(unet2d_nuclei_broad_base_path, request): yield get_unet2d_nuclei_broad(unet2d_nuclei_broad_base_path, request) diff --git a/tests/test_schema_model.py b/tests/test_schema_model.py index 92a68b2e4..da76958e9 100644 --- a/tests/test_schema_model.py +++ b/tests/test_schema_model.py @@ -174,3 +174,21 @@ def test_output_ref_shape_too_small(model_dict): assert e.value.messages == { "_schema": ["Minimal shape [128. 256. 9.] of output output_1 is too small for halo [256, 128, 0]."] } + + +def test_model_has_parent_with_uri(model_dict): + from bioimageio.spec.model.schema import Model + + model_dict["parent"] = dict(uri="https://doi.org/10.5281/zenodo.5744489") + + valid_data = Model().load(model_dict) + assert valid_data + + +def test_model_has_parent_with_id(model_dict): + from bioimageio.spec.model.schema import Model + + model_dict["parent"] = dict(id="10.5281/zenodo.5764892") + + valid_data = Model().load(model_dict) + assert valid_data From 65a505a5d5304ff4ffe02d31c79da3081e836187 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 2 Mar 2022 11:44:48 +0100 Subject: [PATCH 4/9] fix resolve_rdf_source --- bioimageio/spec/shared/_resolve_source.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bioimageio/spec/shared/_resolve_source.py b/bioimageio/spec/shared/_resolve_source.py index 1df679a4c..e659c06eb 100644 --- a/bioimageio/spec/shared/_resolve_source.py +++ b/bioimageio/spec/shared/_resolve_source.py @@ -104,8 +104,6 @@ def resolve_rdf_source( if isinstance(source, str): # source might be bioimageio id, doi, url or file path -> resolve to pathlib.Path - from ._remote_constants import BIOIMAGEIO_COLLECTION - if BIOIMAGEIO_COLLECTION is None: bioimageio_rdf_source = None else: From 71950601142aa7743c06583aac5749b9c21f0f6c Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 2 Mar 2022 11:47:06 +0100 Subject: [PATCH 5/9] fix convert_model_from_v0_4_4_to_0_4_5 --- bioimageio/spec/model/v0_4/converters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bioimageio/spec/model/v0_4/converters.py b/bioimageio/spec/model/v0_4/converters.py index ee671e1bc..afa373ecb 100644 --- a/bioimageio/spec/model/v0_4/converters.py +++ b/bioimageio/spec/model/v0_4/converters.py @@ -60,6 +60,7 @@ def convert_model_from_v0_4_4_to_0_4_5(data: Dict[str, Any]) -> Dict[str, Any]: if parent and "uri" in parent: data["parent"] = parent["uri"] + data["format_version"] = "0.4.5" return data From 937abca3fd0aaec8ee90f6fcb6e34aa377039911 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 2 Mar 2022 11:48:30 +0100 Subject: [PATCH 6/9] fix id_xor_uri --- bioimageio/spec/model/v0_4/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/spec/model/v0_4/schema.py b/bioimageio/spec/model/v0_4/schema.py index 86066c9a8..9c09a9aab 100644 --- a/bioimageio/spec/model/v0_4/schema.py +++ b/bioimageio/spec/model/v0_4/schema.py @@ -288,7 +288,7 @@ class ModelParent(_BioImageIOSchema): sha256 = fields.SHA256(bioimageio_description="Hash of the parent model RDF. Note: the hash is not validated") @validates_schema() - def id_xor_uri(self, data): + def id_xor_uri(self, data, **kwargs): if ("id" in data) == ("uri" in data): raise ValidationError("Either 'id' or 'uri' are required (not both).") From 7131db4d6ba17731341852a01f7253204be7ce97 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 2 Mar 2022 12:01:39 +0100 Subject: [PATCH 7/9] update ModelParent raw node --- bioimageio/spec/model/v0_4/raw_nodes.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/bioimageio/spec/model/v0_4/raw_nodes.py b/bioimageio/spec/model/v0_4/raw_nodes.py index 67d3ca8a4..a5634a802 100644 --- a/bioimageio/spec/model/v0_4/raw_nodes.py +++ b/bioimageio/spec/model/v0_4/raw_nodes.py @@ -11,7 +11,6 @@ from bioimageio.spec.model.v0_3.raw_nodes import ( InputTensor, KerasHdf5WeightsEntry as KerasHdf5WeightsEntry03, - ModelParent, OnnxWeightsEntry as OnnxWeightsEntry03, OutputTensor, Postprocessing, @@ -30,6 +29,7 @@ ImportableModule, ImportableSourceFile, ParametrizedInputShape, + RawNode, URI, ) @@ -108,10 +108,17 @@ class TorchscriptWeightsEntry(_WeightsEntryBase): @dataclass -class LinkedDataset: +class LinkedDataset(RawNode): id: str +@dataclass +class ModelParent(RawNode): + id: Union[_Missing, str] = missing + uri: Union[_Missing, URI, Path] = missing + sha256: Union[_Missing, str] = missing + + @dataclass class Model(_RDF): _include_in_package = ("covers", "documentation", "test_inputs", "test_outputs", "sample_inputs", "sample_outputs") From 59ae94976e04dea1239fa7e51e30eb2061f85d30 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 2 Mar 2022 12:06:03 +0100 Subject: [PATCH 8/9] add resource versions to BIOIMAGEIO_COLLECTION_ENTRIES --- bioimageio/spec/shared/_resolve_source.py | 77 ++++++++++++----------- 1 file changed, 40 insertions(+), 37 deletions(-) diff --git a/bioimageio/spec/shared/_resolve_source.py b/bioimageio/spec/shared/_resolve_source.py index e659c06eb..2a251df70 100644 --- a/bioimageio/spec/shared/_resolve_source.py +++ b/bioimageio/spec/shared/_resolve_source.py @@ -33,43 +33,6 @@ def _is_path(s: typing.Any) -> bool: return False -T = typing.TypeVar("T") - - -def _resolve_json_from_url( - url: str, - expected_type: typing.Union[typing.Type[dict], typing.Type[T]] = dict, - warning_msg: str = "Failed to fetch {url}: {error}", -) -> typing.Tuple[typing.Optional[T], typing.Optional[str]]: - try: - p = resolve_source(url) - with p.open() as f: - data = json.load(f) - - assert isinstance(data, expected_type) - except Exception as e: - data = None - error: typing.Optional[str] = str(e) - if warning_msg: - warnings.warn(warning_msg.format(url=url, error=error)) - else: - error = None - - return data, error - - -BIOIMAGEIO_SITE_CONFIG, BIOIMAGEIO_SITE_CONFIG_ERROR = _resolve_json_from_url(BIOIMAGEIO_SITE_CONFIG_URL) -BIOIMAGEIO_COLLECTION, BIOIMAGEIO_COLLECTION_ERROR = _resolve_json_from_url(BIOIMAGEIO_COLLECTION_URL) -if BIOIMAGEIO_COLLECTION is None: - BIOIMAGEIO_COLLECTION_ENTRIES = None -else: - BIOIMAGEIO_COLLECTION_ENTRIES = { - c["id"]: c["rdf_source"] - for i, c in enumerate(BIOIMAGEIO_COLLECTION.get("collection", [])) - if "id" in c and "rdf_source" in c - } - - def resolve_rdf_source( source: typing.Union[dict, os.PathLike, typing.IO, str, bytes, raw_nodes.URI] ) -> typing.Tuple[dict, str, typing.Union[pathlib.Path, raw_nodes.URI, bytes]]: @@ -416,3 +379,43 @@ def _download_url(uri: raw_nodes.URI, output: typing.Optional[os.PathLike] = Non raise RuntimeError(f"Failed to download {uri} ({e})") return local_path + + +T = typing.TypeVar("T") + + +def _resolve_json_from_url( + url: str, + expected_type: typing.Union[typing.Type[dict], typing.Type[T]] = dict, + warning_msg: str = "Failed to fetch {url}: {error}", +) -> typing.Tuple[typing.Optional[T], typing.Optional[str]]: + try: + p = resolve_source(url) + with p.open() as f: + data = json.load(f) + + assert isinstance(data, expected_type) + except Exception as e: + data = None + error: typing.Optional[str] = str(e) + if warning_msg: + warnings.warn(warning_msg.format(url=url, error=error)) + else: + error = None + + return data, error + + +BIOIMAGEIO_SITE_CONFIG, BIOIMAGEIO_SITE_CONFIG_ERROR = _resolve_json_from_url(BIOIMAGEIO_SITE_CONFIG_URL) +BIOIMAGEIO_COLLECTION, BIOIMAGEIO_COLLECTION_ERROR = _resolve_json_from_url(BIOIMAGEIO_COLLECTION_URL) +if BIOIMAGEIO_COLLECTION is None: + BIOIMAGEIO_COLLECTION_ENTRIES = None +else: + BIOIMAGEIO_COLLECTION_ENTRIES = { + f"{cr['id']}/{cv}": cr["rdf_source"].replace( + f"/{cr['versions'][0]}", cv # todo: improve this replace-version-monkeypatch + ) + for i, cr in enumerate(BIOIMAGEIO_COLLECTION.get("collection", [])) + for cv in cr.get("versions", []) + if "id" in cr and "rdf_source" in cr + } From e1ac5195019d3ba38dfb8f3ea27d6a56768d4a4b Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 2 Mar 2022 12:23:43 +0100 Subject: [PATCH 9/9] add short and long resource ids to BIOIMAGEIO_COLLECTION_ENTRIES --- bioimageio/spec/shared/_resolve_source.py | 30 ++++++++++++++--------- bioimageio/spec/shared/fields.py | 4 +-- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/bioimageio/spec/shared/_resolve_source.py b/bioimageio/spec/shared/_resolve_source.py index 2a251df70..925570934 100644 --- a/bioimageio/spec/shared/_resolve_source.py +++ b/bioimageio/spec/shared/_resolve_source.py @@ -67,10 +67,7 @@ def resolve_rdf_source( if isinstance(source, str): # source might be bioimageio id, doi, url or file path -> resolve to pathlib.Path - if BIOIMAGEIO_COLLECTION is None: - bioimageio_rdf_source = None - else: - bioimageio_rdf_source = BIOIMAGEIO_COLLECTION.get(source) or BIOIMAGEIO_COLLECTION.get(source + "/latest") + bioimageio_rdf_source: typing.Optional[str] = (BIOIMAGEIO_COLLECTION_ENTRIES or {}).get(source, (None, None))[1] if bioimageio_rdf_source is not None: # source is bioimageio id @@ -409,13 +406,24 @@ def _resolve_json_from_url( BIOIMAGEIO_SITE_CONFIG, BIOIMAGEIO_SITE_CONFIG_ERROR = _resolve_json_from_url(BIOIMAGEIO_SITE_CONFIG_URL) BIOIMAGEIO_COLLECTION, BIOIMAGEIO_COLLECTION_ERROR = _resolve_json_from_url(BIOIMAGEIO_COLLECTION_URL) if BIOIMAGEIO_COLLECTION is None: - BIOIMAGEIO_COLLECTION_ENTRIES = None + BIOIMAGEIO_COLLECTION_ENTRIES: typing.Optional[typing.Dict[str, typing.Tuple[str, str]]] = None else: BIOIMAGEIO_COLLECTION_ENTRIES = { - f"{cr['id']}/{cv}": cr["rdf_source"].replace( - f"/{cr['versions'][0]}", cv # todo: improve this replace-version-monkeypatch - ) - for i, cr in enumerate(BIOIMAGEIO_COLLECTION.get("collection", [])) - for cv in cr.get("versions", []) - if "id" in cr and "rdf_source" in cr + cr["id"]: (cr["type"], cr["rdf_source"]) + for cr in BIOIMAGEIO_COLLECTION.get("collection", []) + if "id" in cr and "rdf_source" in cr and "type" in cr } + # add resource versions explicitly + BIOIMAGEIO_COLLECTION_ENTRIES.update( + { + f"{cr['id']}/{cv}": ( + cr["type"], + cr["rdf_source"].replace( + f"/{cr['versions'][0]}", f"/{cv}" + ), # todo: improve this replace-version-monkeypatch + ) + for cr in BIOIMAGEIO_COLLECTION.get("collection", []) + for cv in cr.get("versions", []) + if "id" in cr and "rdf_source" in cr and "type" in cr + } + ) diff --git a/bioimageio/spec/shared/fields.py b/bioimageio/spec/shared/fields.py index 1dba5f4c0..346cbca63 100644 --- a/bioimageio/spec/shared/fields.py +++ b/bioimageio/spec/shared/fields.py @@ -460,8 +460,8 @@ def __init__( field_validators.OneOf( { k - for k, v in BIOIMAGEIO_COLLECTION_ENTRIES.items() - if resource_type is None or resource_type == v.get("type") + for k, (v_type, _) in BIOIMAGEIO_COLLECTION_ENTRIES.items() + if resource_type is None or resource_type == v_type }, error=error_msg, )