Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve _validate_obj_json() in metadata.py and provide utility funcs for validating against JSON schemas #278

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 31 additions & 23 deletions dandischema/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from pathlib import Path
from typing import Any, Dict, Iterable, Optional, TypeVar, Union, cast, get_args

import jsonschema
import pydantic
import requests

Expand All @@ -23,6 +22,7 @@
_ensure_newline,
sanitize_value,
strip_top_level_optional,
validate_json,
version2tuple,
)

Expand Down Expand Up @@ -147,27 +147,35 @@ def publish_model_schemata(releasedir: Union[str, Path]) -> Path:
return vdir


def _validate_obj_json(data: dict, schema: dict, missing_ok: bool = False) -> None:
validator: Union[jsonschema.Draft202012Validator, jsonschema.Draft7Validator]

if version2tuple(data["schemaVersion"]) >= version2tuple("0.6.5"):
# schema version 0.7.0 and above is produced with Pydantic V2
# which is compliant with JSON Schema Draft 2020-12
validator = jsonschema.Draft202012Validator(
schema, format_checker=jsonschema.Draft202012Validator.FORMAT_CHECKER
)
else:
validator = jsonschema.Draft7Validator(
schema, format_checker=jsonschema.Draft7Validator.FORMAT_CHECKER
)

error_list = []
for error in sorted(validator.iter_errors(data), key=str):
if missing_ok and "is a required property" in error.message:
continue
error_list.append(error)
if error_list:
raise JsonschemaValidationError(error_list)
def _validate_obj_json(
instance: Any, schema: dict[str, Any], *, missing_ok: bool = False
) -> None:
"""
Validate a metadata instance of a **DANDI model** against the JSON schema of the
model with an option to filter out errors related to missing required properties

:param instance: The metadata instance to validate
:param schema: The JSON schema of the model
:param missing_ok: Indicates whether to filter out errors related to missing
required properties
:raises JsonschemaValidationError: If the metadata instance is invalid, and there
are errors detected in the validation, optionally discounting errors
related to missing required properties. An instance of this exception containing
a list of `jsonschema.exceptions.ValidationError` instances representing all the
(remaining) errors detected in the validation
"""
try:
validate_json(instance, schema)
except JsonschemaValidationError as e:
if missing_ok:
remaining_errs = [
err for err in e.errors if "is a required property" not in err.message
]
# Raise an exception only if there are errors left after filtering
if remaining_errs:
raise JsonschemaValidationError(remaining_errs) from e
else:
raise e


def _validate_dandiset_json(data: dict, schema_dir: Union[str, Path]) -> None:
Expand Down Expand Up @@ -250,7 +258,7 @@ def validate(
"using json schema for older versions"
)
schema = _get_schema(schema_version, schema_map[schema_key])
_validate_obj_json(obj, schema, missing_ok)
_validate_obj_json(obj, schema, missing_ok=missing_ok)
klass = getattr(models, schema_key)
try:
klass(**obj)
Expand Down
131 changes: 131 additions & 0 deletions dandischema/tests/test_metadata.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from contextlib import nullcontext
from hashlib import md5, sha256
import json
from pathlib import Path
from typing import Any, Dict, Optional, Sequence, Set
from unittest.mock import MagicMock

import pytest

Expand All @@ -11,6 +13,7 @@
from ..metadata import (
_validate_asset_json,
_validate_dandiset_json,
_validate_obj_json,
aggregate_assets_summary,
migrate,
publish_model_schemata,
Expand Down Expand Up @@ -621,3 +624,131 @@ def test_aggregation_bids() -> None:
sum(_.get("name", "").startswith("OME/NGFF") for _ in summary["dataStandard"])
== 1
) # only a single entry so we do not duplicate them


class TestValidateObjJson:
"""
Tests for `_validate_obj_json()`
"""

@pytest.fixture
def dummy_schema(self) -> dict:
"""Returns a dummy JSON schema."""
return {
"type": "object",
"properties": {"name": {"type": "string"}},
"required": ["name"],
}

@pytest.fixture
def dummy_instance(self) -> dict:
"""Returns a dummy instance"""
return {"name": "Example"}

def test_valid_obj_no_errors(
self, monkeypatch: pytest.MonkeyPatch, dummy_schema: dict, dummy_instance: dict
) -> None:
"""
Test that `_validate_obj_json` does not raise when `validate_json` has no errors
"""

def mock_validate_json(_instance: dict, _schema: dict) -> None:
"""Simulate successful validation with no exceptions."""
return # No error raised

# Patch the validate_json function used inside `_validate_obj_json`
from dandischema import metadata

monkeypatch.setattr(metadata, "validate_json", mock_validate_json)

# `_validate_obj_json` should succeed without raising an exception
_validate_obj_json(dummy_instance, dummy_schema)

def test_raises_error_without_missing_ok(
self, monkeypatch: pytest.MonkeyPatch, dummy_schema: dict, dummy_instance: dict
) -> None:
"""
Test that `_validate_obj_json` forwards JsonschemaValidationError
when `missing_ok=False`.
"""

def mock_validate_json(_instance: dict, _schema: dict) -> None:
"""Simulate validation error."""
# Create a mock error that says a field is invalid
raise JsonschemaValidationError(
errors=[MagicMock(message="`name` is a required property")]
)

from dandischema import metadata

monkeypatch.setattr(metadata, "validate_json", mock_validate_json)

# Since `missing_ok=False`, any error should be re-raised.
with pytest.raises(JsonschemaValidationError) as excinfo:
_validate_obj_json(dummy_instance, dummy_schema, missing_ok=False)
assert "`name` is a required property" == excinfo.value.errors[0].message

@pytest.mark.parametrize(
("validation_errs", "expect_raises", "expected_remaining_errs_count"),
[
pytest.param(
[
MagicMock(message="`name` is a required property"),
MagicMock(message="`title` is a required property ..."),
],
False,
None,
id="no_remaining_errors",
),
pytest.param(
[
MagicMock(message="`name` is a required property"),
MagicMock(message="Some other validation error"),
],
True,
1,
id="one_remaining_error",
),
],
)
def test_raises_only_nonmissing_errors_with_missing_ok(
self,
monkeypatch: pytest.MonkeyPatch,
dummy_schema: dict,
dummy_instance: dict,
validation_errs: list[MagicMock],
expect_raises: bool,
expected_remaining_errs_count: Optional[int],
) -> None:
"""
Test that `_validate_obj_json` filters out 'is a required property' errors
when `missing_ok=True`.
"""

def mock_validate_json(_instance: dict, _schema: dict) -> None:
"""
Simulate multiple validation errors, including missing required property.
"""
raise JsonschemaValidationError(
errors=validation_errs # type: ignore[arg-type]
)

from dandischema import metadata

monkeypatch.setattr(metadata, "validate_json", mock_validate_json)

# If expect_raises is True, we use pytest.raises(ValidationError)
# Otherwise, we enter a no-op context
ctx = (
pytest.raises(JsonschemaValidationError) if expect_raises else nullcontext()
)

with ctx as excinfo:
_validate_obj_json(dummy_instance, dummy_schema, missing_ok=True)

if excinfo is not None:
filtered_errors = excinfo.value.errors

# We expect the "required property" error to be filtered out,
# so we should only see the "Some other validation error".
assert len(filtered_errors) == expected_remaining_errs_count
Loading
Loading