From 610ac7774adc3265dac0c4c175f8dd0f3e9acb64 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 4 Nov 2024 09:35:14 -0500 Subject: [PATCH] RF: replace use of "identifiers" with "alternateIdentifiers" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Situation with "identifiers" is messy. We relied on it but it was not in datacite schema, but was allowed by API: https://support.datacite.org/docs/what-is-the-identifiers-attribute-in-the-rest-api > When creating or updating DOI alternateIdentifier metadata, the REST API accepts values in either the alternateIdentifiers or identifiers attributes. Including metadata in either attribute will populate the identifiers and alternateIdentifiers attributes in the REST API response and the alternateIdentifiers property in DataCite XML. And in jsonschema serialization of 4.5 "identifiers" was removed, see more in https://github.com/inveniosoftware/datacite/issues/81#issuecomment-1921727400 and there in. But I guess currently used 4.3 from datacite (not inveniosoftware) is still requiring identifiers, and hence this commit/solution is incomplete since does fail validation (see below). "identifiers" was removed from required only in 4.5 from inveniosoftware. ❯ python -m pytest -s -v dandischema/tests/test_datacite.py ============================================================= test session starts ============================================================== platform linux -- Python 3.12.6, pytest-8.3.3, pluggy-1.5.0 -- /home/yoh/proj/dandi/dandischema/venv/3/bin/python cachedir: .pytest_cache rootdir: /home/yoh/proj/dandi/dandischema configfile: tox.ini plugins: rerunfailures-14.0, cov-6.0.0 collected 14 items dandischema/tests/test_datacite.py::test_datacite[000004] FAILED dandischema/tests/test_datacite.py::test_datacite[000008] FAILED dandischema/tests/test_datacite.py::test_dandimeta_datacite[additional_meta0-datacite_checks0] FAILED dandischema/tests/test_datacite.py::test_dandimeta_datacite[additional_meta1-datacite_checks1] FAILED dandischema/tests/test_datacite.py::test_dandimeta_datacite[additional_meta2-datacite_checks2] FAILED dandischema/tests/test_datacite.py::test_dandimeta_datacite[additional_meta3-datacite_checks3] FAILED dandischema/tests/test_datacite.py::test_dandimeta_datacite[additional_meta4-datacite_checks4] FAILED dandischema/tests/test_datacite.py::test_dandimeta_datacite[additional_meta5-datacite_checks5] FAILED dandischema/tests/test_datacite.py::test_dandimeta_datacite[additional_meta6-datacite_checks6] FAILED dandischema/tests/test_datacite.py::test_datacite_publish PASSED dandischema/tests/test_datacite.py::test_datacite_related_res_url[related_res_url0-related_ident_exp0] PASSED dandischema/tests/test_datacite.py::test_datacite_related_res_url[related_res_url1-related_ident_exp1] PASSED dandischema/tests/test_datacite.py::test_datacite_related_res_url[related_res_url2-related_ident_exp2] PASSED dandischema/tests/test_datacite.py::test_datacite_related_res_url[related_res_url3-related_ident_exp3] PASSED =================================================================== FAILURES =================================================================== ____________________________________________________________ test_datacite[000004] _____________________________________________________________ dandischema/tests/test_datacite.py:160: in test_datacite datacite = to_datacite(meta=meta, validate=True) dandischema/datacite.py:238: in to_datacite validate_datacite(datacite_dict) dandischema/datacite.py:258: in validate_datacite validator.validate(datacite_dict["data"]["attributes"]) venv/3/lib/python3.12/site-packages/jsonschema/validators.py:451: in validate raise error E jsonschema.exceptions.ValidationError: 'identifiers' is a required property E E Failed validating 'required' in schema: E {'$schema': 'http://json-schema.org/draft-07/schema#', E 'definitions': {'nameType': {'type': 'string', E 'enum': ['Organizational', 'Personal']}, E 'nameIdentifiers': {'type': 'array', E 'items': {'type': 'object', E 'properties': {'nameIdentifier': {'type': 'string'}, E 'nameIdentifierScheme': {'type': 'string'}, E 'schemeURI': {'type': 'string', E 'format': 'uri'}}, E 'required': ['nameIdentifier', E 'nameIdentifierScheme']}, --- dandischema/datacite.py | 15 +++++++++------ dandischema/tests/test_datacite.py | 14 +++++++------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/dandischema/datacite.py b/dandischema/datacite.py index f8a42469..8833808b 100644 --- a/dandischema/datacite.py +++ b/dandischema/datacite.py @@ -69,16 +69,19 @@ def to_datacite( if publish: attributes["event"] = "publish" - attributes["identifiers"] = [ + attributes["alternateIdentifiers"] = [ # TODO: the first element is ignored, not sure how to fix it... - {"identifier": f"https://doi.org/{meta.doi}", "identifierType": "DOI"}, { - "identifier": f"https://identifiers.org/{meta.id}", - "identifierType": "URL", + "alternateIdentifier": f"https://doi.org/{meta.doi}", + "alternateIdentifierType": "DOI", }, { - "identifier": str(meta.url), - "identifierType": "URL", + "alternateIdentifier": f"https://identifiers.org/{meta.id}", + "alternateIdentifierType": "URL", + }, + { + "alternateIdentifier": str(meta.url), + "alternateIdentifierType": "URL", }, ] diff --git a/dandischema/tests/test_datacite.py b/dandischema/tests/test_datacite.py index 7c682b80..4f7e042a 100644 --- a/dandischema/tests/test_datacite.py +++ b/dandischema/tests/test_datacite.py @@ -469,24 +469,24 @@ def test_datacite_publish(metadata_basic: Dict[str, Any]) -> None: {"description": "testing", "descriptionType": "Abstract"} ], "doi": f"10.80507/dandi.{dandi_id_noprefix}/{version}", - "identifiers": [ + "alternateIdentifiers": [ { - "identifier": ( + "alternateIdentifier": ( f"https://doi.org/10.80507" f"/dandi.{dandi_id_noprefix}/{version}" ), - "identifierType": "DOI", + "alternateIdentifierType": "DOI", }, { - "identifier": f"https://identifiers.org/{dandi_id}/{version}", - "identifierType": "URL", + "alternateIdentifier": f"https://identifiers.org/{dandi_id}/{version}", + "alternateIdentifierType": "URL", }, { - "identifier": ( + "alternateIdentifier": ( f"https://dandiarchive.org/dandiset" f"/{dandi_id_noprefix}/{version}" ), - "identifierType": "URL", + "alternateIdentifierType": "URL", }, ], "publicationYear": "1970",