Skip to content

Commit

Permalink
fix: improve metadata.migrate()
Browse files Browse the repository at this point in the history
This commit improves `migrate()` by removing bugs, making it
more readable, adding argument validations, etc. However,
it doesn't change the migration of metadata instances this
function currently intend to implement, per my best guess.
The following are descriptions of the more prominent
improvements:

1. Correct the type annotation of `to_version` (Bug fix)
2. Validate that the provided metadata instance and the
   target version specifies valid DANDI schema versions
3. Ensure the optional validation of the provided instance
   against its specified schema is done before any migration
   operation, to achieve greater consistency.
4. Return the instance unchanged when DANDI schema version
   of the given instance is equal to the target version instead
   (Bug fix).
5. Consistently use the tuple representations of DANDI schema
   versions to route the execution flow of migration operations
   to ensure all cases of the relation between the schema version
   of the instance and target version are covered.
6. Condition the migration to "0.6.0" and beyond on the target
   version being at least "0.6.0". (Bug fix)
7. Copy input instance for migration only immediately before
   migration operations
7. Rename local variables to improve readability
8. Handle the situation in which the input metadata instance not
   having the "assetsSummary" field

Co-authored-by: Yaroslav Halchenko <[email protected]>
Co-authored-by: John T. Wodder II <[email protected]>
  • Loading branch information
3 people committed Feb 6, 2025
1 parent 6fd04fb commit 89e2cb7
Show file tree
Hide file tree
Showing 2 changed files with 160 additions and 32 deletions.
133 changes: 108 additions & 25 deletions dandischema/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,45 +265,128 @@ def validate(

def migrate(
obj: dict,
to_version: Optional[str] = DANDI_SCHEMA_VERSION,
to_version: str = DANDI_SCHEMA_VERSION,
skip_validation: bool = False,
) -> dict:
"""Migrate dandiset metadata object to new schema"""
obj = deepcopy(obj)
"""
Migrate a Dandiset metadata instance to a newer DANDI schema version
:param obj: The Dandiset metadata instance to migrate
:param to_version: The target DANDI schema version to migrate to
:param skip_validation: If True, the given instance will not be validated before
migration. Otherwise, the instance will be validated before migration.
:return: The Dandiset metadata instance migrated to the target DANDI schema version
:raises ValueError: If the provided instance doesn't have a `schemaVersion` field
that specifies a valid DANDI schema version
:raises ValueError: If `to_version` is not a valid DANDI schema version or an
allowed target schema
:raises ValueError: If the target DANDI schema version is lower than the DANDI
schema version of the provided instance
"""

# ATM, we only support the latest schema version as a target. See definition of
# `ALLOWED_TARGET_SCHEMAS` for details
if len(ALLOWED_TARGET_SCHEMAS) > 1:
raise NotImplementedError(
"ATM code below supports migration to current version only"
msg = f"Only migration to current version, {DANDI_SCHEMA_VERSION}, is supported"
raise NotImplementedError(msg)

Check warning on line 291 in dandischema/metadata.py

View check run for this annotation

Codecov / codecov/patch

dandischema/metadata.py#L290-L291

Added lines #L290 - L291 were not covered by tests

# --------------------------------------------------------------
# Validate DANDI schema version provided in the metadata instance
# --------------------------------------------------------------
# DANDI schema version of the provided instance
obj_ver = obj.get("schemaVersion")
if obj_ver is None:
msg = (
"The provided Dandiset metadata instance does not have a "
"`schemaVersion` field for specifying the DANDI schema version."
)
raise ValueError(msg)
if not isinstance(obj_ver, str):
msg = (
"The provided Dandiset metadata instance has a non-string "
"`schemaVersion` field for specifying the DANDI schema version."
)
raise ValueError(msg)
# Check if `obj_ver` is a valid DANDI schema version
try:
# DANDI schema version of the provided instance in tuple form
obj_ver_tuple = version2tuple(obj_ver)
except ValueError as e:
msg = (
"The provided Dandiset metadata instance has an invalid "
"`schemaVersion` field for specifying the DANDI schema version."
)
raise ValueError(msg) from e
if obj_ver not in ALLOWED_INPUT_SCHEMAS:
msg = (
f"The DANDI schema version of the provided Dandiset metadata instance, "
f"{obj_ver!r}, is not one of the supported versions for input "
f"Dandiset metadata instances. The supported versions are "
f"{ALLOWED_INPUT_SCHEMAS}."
)
raise ValueError(msg)
# ----------------------------------------------------------------

# --------------------------------------------------------------
# Validate `to_version`
# --------------------------------------------------------------
# Check if `to_version` is a valid DANDI schema version
try:
# The target DANDI schema version in tuple form
target_ver_tuple = version2tuple(to_version)
except ValueError as e:
msg = (
"The provided target version, {to_version!r}, is not a valid DANDI schema "
"version."
)
raise ValueError(msg) from e

# Permit only allowed target schemas
if to_version not in ALLOWED_TARGET_SCHEMAS:
raise ValueError(f"Current target schemas: {ALLOWED_TARGET_SCHEMAS}.")
schema_version = obj.get("schemaVersion")
if schema_version == DANDI_SCHEMA_VERSION:
return obj
if schema_version not in ALLOWED_INPUT_SCHEMAS:
raise ValueError(f"Current input schemas supported: {ALLOWED_INPUT_SCHEMAS}.")
if version2tuple(schema_version) > version2tuple(to_version):
raise ValueError(f"Cannot migrate from {schema_version} to lower {to_version}.")
if not (skip_validation):
schema = _get_schema(schema_version, "dandiset.json")
msg = (
f"Target version, {to_version!r}, is not among supported target schemas: "
f"{ALLOWED_TARGET_SCHEMAS}"
)
raise ValueError(msg)
# ----------------------------------------------------------------

# Ensure the target DANDI schema version is at least the DANDI schema version
# of the provided instance
if obj_ver_tuple > target_ver_tuple:
raise ValueError(f"Cannot migrate from {obj_ver} to lower {to_version}.")

# Optionally validate the instance against the DANDI schema it specifies
# before migration
if not skip_validation:
schema = _get_schema(obj_ver, "dandiset.json")
_validate_obj_json(obj, schema)
if version2tuple(schema_version) < version2tuple("0.6.0"):
for val in obj.get("about", []):

obj_migrated = deepcopy(obj)

if obj_ver_tuple == target_ver_tuple:
return obj_migrated

if obj_ver_tuple < version2tuple("0.6.0") <= target_ver_tuple:
for val in obj_migrated.get("about", []):
if "schemaKey" not in val:
if "identifier" in val and "UBERON" in val["identifier"]:
val["schemaKey"] = "Anatomy"
else:
raise ValueError("Cannot auto migrate. SchemaKey missing")
for val in obj.get("access", []):
for val in obj_migrated.get("access", []):
if "schemaKey" not in val:
val["schemaKey"] = "AccessRequirements"
for resource in obj.get("relatedResource", []):
for resource in obj_migrated.get("relatedResource", []):
resource["schemaKey"] = "Resource"
if "schemaKey" not in obj["assetsSummary"]:
obj["assetsSummary"]["schemaKey"] = "AssetsSummary"
if "schemaKey" not in obj:
obj["schemaKey"] = "Dandiset"
obj["schemaVersion"] = to_version
return obj
if (
"assetsSummary" in obj_migrated
and "schemaKey" not in obj_migrated["assetsSummary"]
):
obj_migrated["assetsSummary"]["schemaKey"] = "AssetsSummary"
if "schemaKey" not in obj_migrated:
obj_migrated["schemaKey"] = "Dandiset"

Check warning on line 387 in dandischema/metadata.py

View check run for this annotation

Codecov / codecov/patch

dandischema/metadata.py#L387

Added line #L387 was not covered by tests
obj_migrated["schemaVersion"] = to_version
return obj_migrated


_stats_var_type = TypeVar("_stats_var_type", int, list)
Expand Down
59 changes: 52 additions & 7 deletions dandischema/tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,19 +340,64 @@ def test_missing_ok_error() -> None:


@pytest.mark.parametrize(
"obj, target",
"obj, target, msg",
[
({}, "0.6.0"),
({"schemaVersion": "0.4.4"}, None),
({"schemaVersion": "0.4.4"}, "0.4.6"),
({"schemaVersion": "0.6.0"}, "0.5.2"),
({}, "0.6.0", "does not have a `schemaVersion` field"),
# Non-string `schemaVersion` field in the instance
({"schemaVersion": 42}, DANDI_SCHEMA_VERSION, "has a non-string"),
# `schemaVersion` field in the instance with invalid format
(
{"schemaVersion": "abc"},
DANDI_SCHEMA_VERSION,
"has an invalid `schemaVersion` field",
),
# `schemaVersion` field in the instance is not an allowed input schema
(
{"schemaVersion": "0.4.5"},
DANDI_SCHEMA_VERSION,
"is not one of the supported versions "
"for input Dandiset metadata instances",
),
# target schema with invalid format
(
{"schemaVersion": "0.4.4"},
"cba",
"target version.* is not a valid DANDI schema version",
),
# target schema is not an allowed target schema
(
{"schemaVersion": "0.4.4"},
"0.4.5",
"Target version, .*, is not among supported target schemas",
),
],
)
def test_migrate_errors(obj: Dict[str, Any], target: Optional[str]) -> None:
with pytest.raises(ValueError):
def test_migrate_value_errors(obj: Dict[str, Any], target: Any, msg: str) -> None:
"""
Test cases when `migrate()` is expected to raise a `ValueError` exception
:param obj: The metadata instance of `Dandiset` to migrate
:param target: The target DANDI schema version to migrate to
:param msg: The expected error message with in the raised exception
"""
with pytest.raises(ValueError, match=msg):
migrate(obj, to_version=target, skip_validation=True)


def test_migrate_value_errors_lesser_target(monkeypatch: pytest.MonkeyPatch) -> None:
"""
Test cases when `migrate()` is expected to raise a `ValueError` exception
when the target schema version is lesser than the schema version of the metadata
instance
"""
from dandischema import metadata

monkeypatch.setattr(metadata, "ALLOWED_TARGET_SCHEMAS", ["0.6.0"])

with pytest.raises(ValueError, match="Cannot migrate from .* to lower"):
migrate({"schemaVersion": "0.6.7"}, to_version="0.6.0", skip_validation=True)


@skipif_no_network
def test_migrate_044(schema_dir: Path) -> None:
with (METADATA_DIR / "meta_000004old.json").open() as fp:
Expand Down

0 comments on commit 89e2cb7

Please sign in to comment.