Skip to content

Commit

Permalink
create a new test that can simulate the life cycle of dandiset and do…
Browse files Browse the repository at this point in the history
…i updates (not sure if we should always run the etst, so for now I added skip); updates to to_datacite function
  • Loading branch information
djarecka committed Jan 19, 2025
1 parent 395bc5b commit 5512928
Show file tree
Hide file tree
Showing 3 changed files with 178 additions and 11 deletions.
17 changes: 12 additions & 5 deletions dandischema/datacite/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from jsonschema import Draft7Validator
import requests

from ..models import NAME_PATTERN, Organization, Person, PublishedDandiset, RoleType
from ..models import NAME_PATTERN, Organization, Person, PublishedDandiset, RoleType, Dandiset

DATACITE_CONTRTYPE = {
"ContactPerson",
Expand Down Expand Up @@ -71,8 +71,13 @@ def to_datacite(
publish: bool = False,
) -> dict:
"""Convert published Dandiset metadata to Datacite"""
if not isinstance(meta, PublishedDandiset):
meta = PublishedDandiset(**meta)

# checking the version, create Dandiset for draft version and PublishedDandiset otherwise
if isinstance(meta, dict):
if meta.get("version") == "draft":
meta = Dandiset(**meta)
else:
meta = PublishedDandiset(**meta)

attributes: Dict[str, Any] = {}
if publish:
Expand All @@ -89,7 +94,8 @@ def to_datacite(
},
]

attributes["doi"] = meta.doi
if hasattr(meta, "doi"):
attributes["doi"] = meta.doi
if meta.version:
attributes["version"] = meta.version
attributes["titles"] = [{"title": meta.name}]
Expand All @@ -103,7 +109,8 @@ def to_datacite(
"publisherIdentifierScheme": "RRID",
"lang": "en",
}
attributes["publicationYear"] = str(meta.datePublished.year)
if hasattr(meta, "datePublished"):
attributes["publicationYear"] = str(meta.datePublished.year)
# not sure about it dandi-api had "resourceTypeGeneral": "NWB"
attributes["types"] = {
"resourceType": "Neural Data",
Expand Down
170 changes: 165 additions & 5 deletions dandischema/datacite/tests/test_datacite.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
from pathlib import Path
import random
from typing import Any, Dict, Tuple
from copy import deepcopy

from jsonschema import Draft7Validator
import pytest
import requests

from dandischema.models import (
LicenseType,
Dandiset,
PublishedDandiset,
RelationType,
ResourceType,
Expand All @@ -21,7 +23,7 @@
from .. import _get_datacite_schema, to_datacite


def datacite_post(datacite: dict, doi: str) -> None:
def datacite_post(datacite: dict, doi: str, clean: bool = True) -> None:
"""Post the datacite object and check the status of the request"""

# removing doi in case it exists
Expand All @@ -35,21 +37,179 @@ def datacite_post(datacite: dict, doi: str) -> None:
auth=("DARTLIB.DANDI", os.environ["DATACITE_DEV_PASSWORD"]),
)
rp.raise_for_status()
print("\n in datacite_post, after posting", doi, rp.status_code)
# checking if i'm able to get the url
rg = requests.get(url=f"https://api.test.datacite.org/dois/{doi}/activities")
rg.raise_for_status()

if clean:
# cleaning url
_clean_doi(doi)


def datacite_update(datacite: dict, doi: str) -> None:
"""Update the datacite object and check the status of the request"""
rp = requests.put(
url=f"https://api.test.datacite.org/dois/{doi}",
json=datacite,
headers={"Content-Type": "application/vnd.api+json"},
auth=("DARTLIB.DANDI", os.environ["DATACITE_DEV_PASSWORD"]),
)
rp.raise_for_status()

# checking if i'm able to get the url
rg = requests.get(url=f"https://api.test.datacite.org/dois/{doi}/activities")
rg.raise_for_status()

# cleaning url
_clean_doi(doi)


def _clean_doi(doi: str) -> None:
"""Remove doi. Status code is ignored"""
requests.delete(
rq = requests.delete(
f"https://api.test.datacite.org/dois/{doi}",
auth=("DARTLIB.DANDI", os.environ["DATACITE_DEV_PASSWORD"]),
)
print("\n in _clean_doi", doi, rq.status_code)
return rq.status_code

@pytest.mark.skip(reason="to not produced too many dois, not sure if we want to keep it as a test")
def test_datacite_lifecycle() -> None:
"""testing the lifecycle of a public dandiset and doi (from draft to published)"""

# checking which doi is available
doi_available = False
while not doi_available:
dandi_id = f"000{random.randrange(500, 999)}"
print(f"searching for available doi, trying dandi_id: {dandi_id}")
doi_root = f'10.80507/dandi.{dandi_id}'
if _clean_doi(doi_root) != 405:
doi_available = True
print(f"found available doi, dandi_id: {dandi_id}")

dandi_id_prefix = f"DANDI:{dandi_id}"
# creating the main/root doi and url
doi_root = f'10.80507/dandi.{dandi_id}'
url_root = f"https://dandiarchive.org/dandiset/{dandi_id}"


# creating draft dandiset with minimal metadata
version = "draft"
meta_dict = {
"identifier": dandi_id_prefix,
"id": f"{dandi_id_prefix}/{version}",
"name": "Testing Dataset: lifecycle",
"description": "testing lifecycle of a dataset and doi: draft",
"version": version,
"contributor": [
{
"name": "A_last, A_first",
"email": "[email protected]",
"roleName": [RoleType("dcite:ContactPerson")],
"schemaKey": "Person",
}
],
"license": [LicenseType("spdx:CC-BY-4.0")],
"citation": "A_last, A_first 2021",
"manifestLocation": [
f"https://api.dandiarchive.org/api/dandisets/{dandi_id}/versions/{version}/assets/"
],
"assetsSummary": {
"schemaKey": "AssetsSummary",
"numberOfBytes": 10,
"numberOfFiles": 1,
},
}
#in addition to minimal metadata, we need to add doi and url if we want to create draft doi
meta_dict["doi"] = doi_root
meta_dict["url"] = url_root
# creating draft dandiset
dset = Dandiset(**meta_dict)

# creating datacite object and posting the main doi entry (should be draft)
datacite = to_datacite(dset)
datacite_post(datacite, doi_root, clean=False)


# updating the draft but not enough to create PublishDandiset
meta_dict["description"] = "testing lifecycle of a dataset and doi: new draft"
# the dandi workflow should check if we cna create a datacite that can be validated and published
#try: datacite_new = to_datacite(meta_dict, validate=True, publish=True)
# if the metadata is not enough to create a valid datacite, we should update the draft doi
datacite_new = to_datacite(meta_dict)
datacite_update(datacite_new, doi_root)


# creating v1.0.0
version = "1.0.0"
# adding contributors and updating description
meta_dict["contributor"].append({
"name": "B_last, B_first",
"email": "[email protected]",
"roleName": [RoleType("dcite:DataCurator")],
"schemaKey": "Person",
})
meta_dict["description"] = "testing lifecycle of a dataset and doi: v1.0.0"
# adding mandatory metadata for PublishDandiset
publish_meta = {
"datePublished": "2020",
"publishedBy": {
"id": "urn:uuid:08fffc59-9f1b-44d6-8e02-6729d266d1b6",
"name": "DANDI publish",
"startDate": "2021-05-18T19:58:39.310338-04:00",
"endDate": "2021-05-18T19:58:39.310361-04:00",
"wasAssociatedWith": [
{
"id": "urn:uuid:9267d2e1-4a37-463b-9b10-dad3c66d8eaa",
"identifier": "RRID:SCR_017571",
"name": "DANDI API",
"version": version,
"schemaKey": "Software",
}
],
"schemaKey": "PublishActivity",
},
}
meta_dict.update(publish_meta)
# updating the version, id etc.
meta_dict["version"] = version
meta_dict["id"] =f"{dandi_id_prefix}/{version}"
meta_dict["doi"] = f"{doi_root}/{version}"
meta_dict["url"] = f"https://dandiarchive.org/dandiset/{dandi_id}/{version}"
# creating new published dandiset
dset_v1 = PublishedDandiset(**meta_dict)
# creating datacite object and posting (should be findable)
datacite_v1 = to_datacite(dset_v1, publish=True, validate=True)
datacite_post(datacite_v1, meta_dict["doi"], clean=False)

# updating the main doi but keeping the root doi and url
datacite = deepcopy(datacite_v1)
datacite["data"]["attributes"]["doi"] = doi_root
datacite["data"]["attributes"]["url"] = url_root
# updating the doi (should change from draft to findable)
datacite_update(datacite, doi_root)


# creating v2.0.0
version = "2.0.0"
# updating description
meta_dict["description"] = "testing lifecycle of a dataset and doi: v2.0.0"
meta_dict["version"] = version
meta_dict["id"] =f"{dandi_id_prefix}/{version}"
meta_dict["doi"] = f"{doi_root}/{version}"
meta_dict["url"] = f"https://dandiarchive.org/dandiset/{dandi_id}/{version}"
# creating new published dandiset
dset_v2 = PublishedDandiset(**meta_dict)
# creating datacite object and posting (should be findable)
datacite_v2 = to_datacite(dset_v2, publish=True, validate=True)
datacite_post(datacite_v2, meta_dict["doi"], clean=False)

# updating the main doi to v2 but keeping the root doi and url
datacite = deepcopy(datacite_v2)
datacite["data"]["attributes"]["doi"] = doi_root
datacite["data"]["attributes"]["url"] = url_root
# updating the findable doi
datacite_update(datacite, doi_root)



@pytest.fixture(scope="module")
Expand All @@ -59,7 +219,7 @@ def schema() -> Any:

@pytest.fixture(scope="function")
def metadata_basic() -> Dict[str, Any]:
dandi_id_noprefix = f"000{random.randrange(100, 999)}"
dandi_id_noprefix = f"000{random.randrange(100, 499)}"
dandi_id = f"DANDI:{dandi_id_noprefix}"
version = "0.0.0"
# meta data without doi, datePublished and publishedBy
Expand Down
2 changes: 1 addition & 1 deletion dandischema/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def _basic_publishmeta(
"id": "urn:uuid:9267d2e1-4a37-463b-9b10-dad3c66d8eaa",
"identifier": "RRID:SCR_017571",
"name": "DANDI API",
"version": "0.1.0",
"version": version,
"schemaKey": "Software",
}
],
Expand Down

0 comments on commit 5512928

Please sign in to comment.