From 5512928ce2145e9f0427ecb17702eeebd6781a40 Mon Sep 17 00:00:00 2001 From: Dorota Jarecka Date: Sat, 18 Jan 2025 19:39:50 -0500 Subject: [PATCH] create a new test that can simulate the life cycle of dandiset and doi updates (not sure if we should always run the etst, so for now I added skip); updates to to_datacite function --- dandischema/datacite/__init__.py | 17 +- dandischema/datacite/tests/test_datacite.py | 170 +++++++++++++++++++- dandischema/tests/utils.py | 2 +- 3 files changed, 178 insertions(+), 11 deletions(-) diff --git a/dandischema/datacite/__init__.py b/dandischema/datacite/__init__.py index 9475c43..d2de9d4 100644 --- a/dandischema/datacite/__init__.py +++ b/dandischema/datacite/__init__.py @@ -14,7 +14,7 @@ from jsonschema import Draft7Validator import requests -from ..models import NAME_PATTERN, Organization, Person, PublishedDandiset, RoleType +from ..models import NAME_PATTERN, Organization, Person, PublishedDandiset, RoleType, Dandiset DATACITE_CONTRTYPE = { "ContactPerson", @@ -71,8 +71,13 @@ def to_datacite( publish: bool = False, ) -> dict: """Convert published Dandiset metadata to Datacite""" - if not isinstance(meta, PublishedDandiset): - meta = PublishedDandiset(**meta) + + # checking the version, create Dandiset for draft version and PublishedDandiset otherwise + if isinstance(meta, dict): + if meta.get("version") == "draft": + meta = Dandiset(**meta) + else: + meta = PublishedDandiset(**meta) attributes: Dict[str, Any] = {} if publish: @@ -89,7 +94,8 @@ def to_datacite( }, ] - attributes["doi"] = meta.doi + if hasattr(meta, "doi"): + attributes["doi"] = meta.doi if meta.version: attributes["version"] = meta.version attributes["titles"] = [{"title": meta.name}] @@ -103,7 +109,8 @@ def to_datacite( "publisherIdentifierScheme": "RRID", "lang": "en", } - attributes["publicationYear"] = str(meta.datePublished.year) + if hasattr(meta, "datePublished"): + attributes["publicationYear"] = str(meta.datePublished.year) # not sure about it dandi-api had "resourceTypeGeneral": "NWB" attributes["types"] = { "resourceType": "Neural Data", diff --git a/dandischema/datacite/tests/test_datacite.py b/dandischema/datacite/tests/test_datacite.py index c9e0d72..9b3dc4d 100644 --- a/dandischema/datacite/tests/test_datacite.py +++ b/dandischema/datacite/tests/test_datacite.py @@ -3,6 +3,7 @@ from pathlib import Path import random from typing import Any, Dict, Tuple +from copy import deepcopy from jsonschema import Draft7Validator import pytest @@ -10,6 +11,7 @@ from dandischema.models import ( LicenseType, + Dandiset, PublishedDandiset, RelationType, ResourceType, @@ -21,7 +23,7 @@ from .. import _get_datacite_schema, to_datacite -def datacite_post(datacite: dict, doi: str) -> None: +def datacite_post(datacite: dict, doi: str, clean: bool = True) -> None: """Post the datacite object and check the status of the request""" # removing doi in case it exists @@ -35,21 +37,179 @@ def datacite_post(datacite: dict, doi: str) -> None: auth=("DARTLIB.DANDI", os.environ["DATACITE_DEV_PASSWORD"]), ) rp.raise_for_status() + print("\n in datacite_post, after posting", doi, rp.status_code) + # checking if i'm able to get the url + rg = requests.get(url=f"https://api.test.datacite.org/dois/{doi}/activities") + rg.raise_for_status() + + if clean: + # cleaning url + _clean_doi(doi) + + +def datacite_update(datacite: dict, doi: str) -> None: + """Update the datacite object and check the status of the request""" + rp = requests.put( + url=f"https://api.test.datacite.org/dois/{doi}", + json=datacite, + headers={"Content-Type": "application/vnd.api+json"}, + auth=("DARTLIB.DANDI", os.environ["DATACITE_DEV_PASSWORD"]), + ) + rp.raise_for_status() # checking if i'm able to get the url rg = requests.get(url=f"https://api.test.datacite.org/dois/{doi}/activities") rg.raise_for_status() - # cleaning url - _clean_doi(doi) def _clean_doi(doi: str) -> None: """Remove doi. Status code is ignored""" - requests.delete( + rq = requests.delete( f"https://api.test.datacite.org/dois/{doi}", auth=("DARTLIB.DANDI", os.environ["DATACITE_DEV_PASSWORD"]), ) + print("\n in _clean_doi", doi, rq.status_code) + return rq.status_code + +@pytest.mark.skip(reason="to not produced too many dois, not sure if we want to keep it as a test") +def test_datacite_lifecycle() -> None: + """testing the lifecycle of a public dandiset and doi (from draft to published)""" + + # checking which doi is available + doi_available = False + while not doi_available: + dandi_id = f"000{random.randrange(500, 999)}" + print(f"searching for available doi, trying dandi_id: {dandi_id}") + doi_root = f'10.80507/dandi.{dandi_id}' + if _clean_doi(doi_root) != 405: + doi_available = True + print(f"found available doi, dandi_id: {dandi_id}") + + dandi_id_prefix = f"DANDI:{dandi_id}" + # creating the main/root doi and url + doi_root = f'10.80507/dandi.{dandi_id}' + url_root = f"https://dandiarchive.org/dandiset/{dandi_id}" + + + # creating draft dandiset with minimal metadata + version = "draft" + meta_dict = { + "identifier": dandi_id_prefix, + "id": f"{dandi_id_prefix}/{version}", + "name": "Testing Dataset: lifecycle", + "description": "testing lifecycle of a dataset and doi: draft", + "version": version, + "contributor": [ + { + "name": "A_last, A_first", + "email": "nemo@example.com", + "roleName": [RoleType("dcite:ContactPerson")], + "schemaKey": "Person", + } + ], + "license": [LicenseType("spdx:CC-BY-4.0")], + "citation": "A_last, A_first 2021", + "manifestLocation": [ + f"https://api.dandiarchive.org/api/dandisets/{dandi_id}/versions/{version}/assets/" + ], + "assetsSummary": { + "schemaKey": "AssetsSummary", + "numberOfBytes": 10, + "numberOfFiles": 1, + }, + } + #in addition to minimal metadata, we need to add doi and url if we want to create draft doi + meta_dict["doi"] = doi_root + meta_dict["url"] = url_root + # creating draft dandiset + dset = Dandiset(**meta_dict) + + # creating datacite object and posting the main doi entry (should be draft) + datacite = to_datacite(dset) + datacite_post(datacite, doi_root, clean=False) + + + # updating the draft but not enough to create PublishDandiset + meta_dict["description"] = "testing lifecycle of a dataset and doi: new draft" + # the dandi workflow should check if we cna create a datacite that can be validated and published + #try: datacite_new = to_datacite(meta_dict, validate=True, publish=True) + # if the metadata is not enough to create a valid datacite, we should update the draft doi + datacite_new = to_datacite(meta_dict) + datacite_update(datacite_new, doi_root) + + + # creating v1.0.0 + version = "1.0.0" + # adding contributors and updating description + meta_dict["contributor"].append({ + "name": "B_last, B_first", + "email": "nemo@example.com", + "roleName": [RoleType("dcite:DataCurator")], + "schemaKey": "Person", + }) + meta_dict["description"] = "testing lifecycle of a dataset and doi: v1.0.0" + # adding mandatory metadata for PublishDandiset + publish_meta = { + "datePublished": "2020", + "publishedBy": { + "id": "urn:uuid:08fffc59-9f1b-44d6-8e02-6729d266d1b6", + "name": "DANDI publish", + "startDate": "2021-05-18T19:58:39.310338-04:00", + "endDate": "2021-05-18T19:58:39.310361-04:00", + "wasAssociatedWith": [ + { + "id": "urn:uuid:9267d2e1-4a37-463b-9b10-dad3c66d8eaa", + "identifier": "RRID:SCR_017571", + "name": "DANDI API", + "version": version, + "schemaKey": "Software", + } + ], + "schemaKey": "PublishActivity", + }, + } + meta_dict.update(publish_meta) + # updating the version, id etc. + meta_dict["version"] = version + meta_dict["id"] =f"{dandi_id_prefix}/{version}" + meta_dict["doi"] = f"{doi_root}/{version}" + meta_dict["url"] = f"https://dandiarchive.org/dandiset/{dandi_id}/{version}" + # creating new published dandiset + dset_v1 = PublishedDandiset(**meta_dict) + # creating datacite object and posting (should be findable) + datacite_v1 = to_datacite(dset_v1, publish=True, validate=True) + datacite_post(datacite_v1, meta_dict["doi"], clean=False) + + # updating the main doi but keeping the root doi and url + datacite = deepcopy(datacite_v1) + datacite["data"]["attributes"]["doi"] = doi_root + datacite["data"]["attributes"]["url"] = url_root + # updating the doi (should change from draft to findable) + datacite_update(datacite, doi_root) + + + # creating v2.0.0 + version = "2.0.0" + # updating description + meta_dict["description"] = "testing lifecycle of a dataset and doi: v2.0.0" + meta_dict["version"] = version + meta_dict["id"] =f"{dandi_id_prefix}/{version}" + meta_dict["doi"] = f"{doi_root}/{version}" + meta_dict["url"] = f"https://dandiarchive.org/dandiset/{dandi_id}/{version}" + # creating new published dandiset + dset_v2 = PublishedDandiset(**meta_dict) + # creating datacite object and posting (should be findable) + datacite_v2 = to_datacite(dset_v2, publish=True, validate=True) + datacite_post(datacite_v2, meta_dict["doi"], clean=False) + + # updating the main doi to v2 but keeping the root doi and url + datacite = deepcopy(datacite_v2) + datacite["data"]["attributes"]["doi"] = doi_root + datacite["data"]["attributes"]["url"] = url_root + # updating the findable doi + datacite_update(datacite, doi_root) + @pytest.fixture(scope="module") @@ -59,7 +219,7 @@ def schema() -> Any: @pytest.fixture(scope="function") def metadata_basic() -> Dict[str, Any]: - dandi_id_noprefix = f"000{random.randrange(100, 999)}" + dandi_id_noprefix = f"000{random.randrange(100, 499)}" dandi_id = f"DANDI:{dandi_id_noprefix}" version = "0.0.0" # meta data without doi, datePublished and publishedBy diff --git a/dandischema/tests/utils.py b/dandischema/tests/utils.py index 5e76d5d..76fad50 100644 --- a/dandischema/tests/utils.py +++ b/dandischema/tests/utils.py @@ -28,7 +28,7 @@ def _basic_publishmeta( "id": "urn:uuid:9267d2e1-4a37-463b-9b10-dad3c66d8eaa", "identifier": "RRID:SCR_017571", "name": "DANDI API", - "version": "0.1.0", + "version": version, "schemaKey": "Software", } ],