From 89e2cb747ec794f2e2bcdf5c4f733d4ea7f302c9 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Fri, 31 Jan 2025 16:05:21 -0800 Subject: [PATCH] fix: improve `metadata.migrate()` This commit improves `migrate()` by removing bugs, making it more readable, adding argument validations, etc. However, it doesn't change the migration of metadata instances this function currently intend to implement, per my best guess. The following are descriptions of the more prominent improvements: 1. Correct the type annotation of `to_version` (Bug fix) 2. Validate that the provided metadata instance and the target version specifies valid DANDI schema versions 3. Ensure the optional validation of the provided instance against its specified schema is done before any migration operation, to achieve greater consistency. 4. Return the instance unchanged when DANDI schema version of the given instance is equal to the target version instead (Bug fix). 5. Consistently use the tuple representations of DANDI schema versions to route the execution flow of migration operations to ensure all cases of the relation between the schema version of the instance and target version are covered. 6. Condition the migration to "0.6.0" and beyond on the target version being at least "0.6.0". (Bug fix) 7. Copy input instance for migration only immediately before migration operations 7. Rename local variables to improve readability 8. Handle the situation in which the input metadata instance not having the "assetsSummary" field Co-authored-by: Yaroslav Halchenko Co-authored-by: John T. Wodder II --- dandischema/metadata.py | 133 +++++++++++++++++++++++------ dandischema/tests/test_metadata.py | 59 +++++++++++-- 2 files changed, 160 insertions(+), 32 deletions(-) diff --git a/dandischema/metadata.py b/dandischema/metadata.py index 9195972f..190e473b 100644 --- a/dandischema/metadata.py +++ b/dandischema/metadata.py @@ -265,45 +265,128 @@ def validate( def migrate( obj: dict, - to_version: Optional[str] = DANDI_SCHEMA_VERSION, + to_version: str = DANDI_SCHEMA_VERSION, skip_validation: bool = False, ) -> dict: - """Migrate dandiset metadata object to new schema""" - obj = deepcopy(obj) + """ + Migrate a Dandiset metadata instance to a newer DANDI schema version + + :param obj: The Dandiset metadata instance to migrate + :param to_version: The target DANDI schema version to migrate to + :param skip_validation: If True, the given instance will not be validated before + migration. Otherwise, the instance will be validated before migration. + :return: The Dandiset metadata instance migrated to the target DANDI schema version + :raises ValueError: If the provided instance doesn't have a `schemaVersion` field + that specifies a valid DANDI schema version + :raises ValueError: If `to_version` is not a valid DANDI schema version or an + allowed target schema + :raises ValueError: If the target DANDI schema version is lower than the DANDI + schema version of the provided instance + """ + + # ATM, we only support the latest schema version as a target. See definition of + # `ALLOWED_TARGET_SCHEMAS` for details if len(ALLOWED_TARGET_SCHEMAS) > 1: - raise NotImplementedError( - "ATM code below supports migration to current version only" + msg = f"Only migration to current version, {DANDI_SCHEMA_VERSION}, is supported" + raise NotImplementedError(msg) + + # -------------------------------------------------------------- + # Validate DANDI schema version provided in the metadata instance + # -------------------------------------------------------------- + # DANDI schema version of the provided instance + obj_ver = obj.get("schemaVersion") + if obj_ver is None: + msg = ( + "The provided Dandiset metadata instance does not have a " + "`schemaVersion` field for specifying the DANDI schema version." + ) + raise ValueError(msg) + if not isinstance(obj_ver, str): + msg = ( + "The provided Dandiset metadata instance has a non-string " + "`schemaVersion` field for specifying the DANDI schema version." + ) + raise ValueError(msg) + # Check if `obj_ver` is a valid DANDI schema version + try: + # DANDI schema version of the provided instance in tuple form + obj_ver_tuple = version2tuple(obj_ver) + except ValueError as e: + msg = ( + "The provided Dandiset metadata instance has an invalid " + "`schemaVersion` field for specifying the DANDI schema version." + ) + raise ValueError(msg) from e + if obj_ver not in ALLOWED_INPUT_SCHEMAS: + msg = ( + f"The DANDI schema version of the provided Dandiset metadata instance, " + f"{obj_ver!r}, is not one of the supported versions for input " + f"Dandiset metadata instances. The supported versions are " + f"{ALLOWED_INPUT_SCHEMAS}." + ) + raise ValueError(msg) + # ---------------------------------------------------------------- + + # -------------------------------------------------------------- + # Validate `to_version` + # -------------------------------------------------------------- + # Check if `to_version` is a valid DANDI schema version + try: + # The target DANDI schema version in tuple form + target_ver_tuple = version2tuple(to_version) + except ValueError as e: + msg = ( + "The provided target version, {to_version!r}, is not a valid DANDI schema " + "version." ) + raise ValueError(msg) from e + + # Permit only allowed target schemas if to_version not in ALLOWED_TARGET_SCHEMAS: - raise ValueError(f"Current target schemas: {ALLOWED_TARGET_SCHEMAS}.") - schema_version = obj.get("schemaVersion") - if schema_version == DANDI_SCHEMA_VERSION: - return obj - if schema_version not in ALLOWED_INPUT_SCHEMAS: - raise ValueError(f"Current input schemas supported: {ALLOWED_INPUT_SCHEMAS}.") - if version2tuple(schema_version) > version2tuple(to_version): - raise ValueError(f"Cannot migrate from {schema_version} to lower {to_version}.") - if not (skip_validation): - schema = _get_schema(schema_version, "dandiset.json") + msg = ( + f"Target version, {to_version!r}, is not among supported target schemas: " + f"{ALLOWED_TARGET_SCHEMAS}" + ) + raise ValueError(msg) + # ---------------------------------------------------------------- + + # Ensure the target DANDI schema version is at least the DANDI schema version + # of the provided instance + if obj_ver_tuple > target_ver_tuple: + raise ValueError(f"Cannot migrate from {obj_ver} to lower {to_version}.") + + # Optionally validate the instance against the DANDI schema it specifies + # before migration + if not skip_validation: + schema = _get_schema(obj_ver, "dandiset.json") _validate_obj_json(obj, schema) - if version2tuple(schema_version) < version2tuple("0.6.0"): - for val in obj.get("about", []): + + obj_migrated = deepcopy(obj) + + if obj_ver_tuple == target_ver_tuple: + return obj_migrated + + if obj_ver_tuple < version2tuple("0.6.0") <= target_ver_tuple: + for val in obj_migrated.get("about", []): if "schemaKey" not in val: if "identifier" in val and "UBERON" in val["identifier"]: val["schemaKey"] = "Anatomy" else: raise ValueError("Cannot auto migrate. SchemaKey missing") - for val in obj.get("access", []): + for val in obj_migrated.get("access", []): if "schemaKey" not in val: val["schemaKey"] = "AccessRequirements" - for resource in obj.get("relatedResource", []): + for resource in obj_migrated.get("relatedResource", []): resource["schemaKey"] = "Resource" - if "schemaKey" not in obj["assetsSummary"]: - obj["assetsSummary"]["schemaKey"] = "AssetsSummary" - if "schemaKey" not in obj: - obj["schemaKey"] = "Dandiset" - obj["schemaVersion"] = to_version - return obj + if ( + "assetsSummary" in obj_migrated + and "schemaKey" not in obj_migrated["assetsSummary"] + ): + obj_migrated["assetsSummary"]["schemaKey"] = "AssetsSummary" + if "schemaKey" not in obj_migrated: + obj_migrated["schemaKey"] = "Dandiset" + obj_migrated["schemaVersion"] = to_version + return obj_migrated _stats_var_type = TypeVar("_stats_var_type", int, list) diff --git a/dandischema/tests/test_metadata.py b/dandischema/tests/test_metadata.py index d2ffe1bb..49262166 100644 --- a/dandischema/tests/test_metadata.py +++ b/dandischema/tests/test_metadata.py @@ -340,19 +340,64 @@ def test_missing_ok_error() -> None: @pytest.mark.parametrize( - "obj, target", + "obj, target, msg", [ - ({}, "0.6.0"), - ({"schemaVersion": "0.4.4"}, None), - ({"schemaVersion": "0.4.4"}, "0.4.6"), - ({"schemaVersion": "0.6.0"}, "0.5.2"), + ({}, "0.6.0", "does not have a `schemaVersion` field"), + # Non-string `schemaVersion` field in the instance + ({"schemaVersion": 42}, DANDI_SCHEMA_VERSION, "has a non-string"), + # `schemaVersion` field in the instance with invalid format + ( + {"schemaVersion": "abc"}, + DANDI_SCHEMA_VERSION, + "has an invalid `schemaVersion` field", + ), + # `schemaVersion` field in the instance is not an allowed input schema + ( + {"schemaVersion": "0.4.5"}, + DANDI_SCHEMA_VERSION, + "is not one of the supported versions " + "for input Dandiset metadata instances", + ), + # target schema with invalid format + ( + {"schemaVersion": "0.4.4"}, + "cba", + "target version.* is not a valid DANDI schema version", + ), + # target schema is not an allowed target schema + ( + {"schemaVersion": "0.4.4"}, + "0.4.5", + "Target version, .*, is not among supported target schemas", + ), ], ) -def test_migrate_errors(obj: Dict[str, Any], target: Optional[str]) -> None: - with pytest.raises(ValueError): +def test_migrate_value_errors(obj: Dict[str, Any], target: Any, msg: str) -> None: + """ + Test cases when `migrate()` is expected to raise a `ValueError` exception + + :param obj: The metadata instance of `Dandiset` to migrate + :param target: The target DANDI schema version to migrate to + :param msg: The expected error message with in the raised exception + """ + with pytest.raises(ValueError, match=msg): migrate(obj, to_version=target, skip_validation=True) +def test_migrate_value_errors_lesser_target(monkeypatch: pytest.MonkeyPatch) -> None: + """ + Test cases when `migrate()` is expected to raise a `ValueError` exception + when the target schema version is lesser than the schema version of the metadata + instance + """ + from dandischema import metadata + + monkeypatch.setattr(metadata, "ALLOWED_TARGET_SCHEMAS", ["0.6.0"]) + + with pytest.raises(ValueError, match="Cannot migrate from .* to lower"): + migrate({"schemaVersion": "0.6.7"}, to_version="0.6.0", skip_validation=True) + + @skipif_no_network def test_migrate_044(schema_dir: Path) -> None: with (METADATA_DIR / "meta_000004old.json").open() as fp: