Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] updating life cycle of dandiset #275

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions dandischema/datacite/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,14 @@
from jsonschema import Draft7Validator
import requests

from ..models import NAME_PATTERN, Organization, Person, PublishedDandiset, RoleType
from ..models import (
NAME_PATTERN,
Dandiset,
Organization,
Person,
PublishedDandiset,
RoleType,
)

DATACITE_CONTRTYPE = {
"ContactPerson",
Expand Down Expand Up @@ -71,8 +78,13 @@
publish: bool = False,
) -> dict:
"""Convert published Dandiset metadata to Datacite"""
if not isinstance(meta, PublishedDandiset):
meta = PublishedDandiset(**meta)

# checking the version, create Dandiset for draft version and PublishedDandiset otherwise
if isinstance(meta, dict):
if meta.get("version") == "draft":
meta = Dandiset(**meta)

Check warning on line 85 in dandischema/datacite/__init__.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/__init__.py#L85

Added line #L85 was not covered by tests
else:
meta = PublishedDandiset(**meta)

attributes: Dict[str, Any] = {}
if publish:
Expand All @@ -89,7 +101,8 @@
},
]

attributes["doi"] = meta.doi
if hasattr(meta, "doi"):
attributes["doi"] = meta.doi
if meta.version:
attributes["version"] = meta.version
attributes["titles"] = [{"title": meta.name}]
Expand All @@ -103,7 +116,8 @@
"publisherIdentifierScheme": "RRID",
"lang": "en",
}
attributes["publicationYear"] = str(meta.datePublished.year)
if hasattr(meta, "datePublished"):
attributes["publicationYear"] = str(meta.datePublished.year)
# not sure about it dandi-api had "resourceTypeGeneral": "NWB"
attributes["types"] = {
"resourceType": "Neural Data",
Expand Down
171 changes: 165 additions & 6 deletions dandischema/datacite/tests/test_datacite.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from copy import deepcopy
import json
import os
from pathlib import Path
Expand All @@ -9,6 +10,7 @@
import requests

from dandischema.models import (
Dandiset,
LicenseType,
PublishedDandiset,
RelationType,
Expand All @@ -21,7 +23,7 @@
from .. import _get_datacite_schema, to_datacite


def datacite_post(datacite: dict, doi: str) -> None:
def datacite_post(datacite: dict, doi: str, clean: bool = True) -> None:
"""Post the datacite object and check the status of the request"""

# removing doi in case it exists
Expand All @@ -35,21 +37,178 @@
auth=("DARTLIB.DANDI", os.environ["DATACITE_DEV_PASSWORD"]),
)
rp.raise_for_status()

print("\n in datacite_post, after posting", doi, rp.status_code)

Check warning on line 40 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L40

Added line #L40 was not covered by tests
# checking if i'm able to get the url
rg = requests.get(url=f"https://api.test.datacite.org/dois/{doi}/activities")
rg.raise_for_status()

# cleaning url
_clean_doi(doi)
if clean:

Check warning on line 45 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L45

Added line #L45 was not covered by tests
# cleaning url
_clean_doi(doi)

Check warning on line 47 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L47

Added line #L47 was not covered by tests


def datacite_update(datacite: dict, doi: str) -> None:
"""Update the datacite object and check the status of the request"""
rp = requests.put(

Check warning on line 52 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L52

Added line #L52 was not covered by tests
url=f"https://api.test.datacite.org/dois/{doi}",
json=datacite,
headers={"Content-Type": "application/vnd.api+json"},
auth=("DARTLIB.DANDI", os.environ["DATACITE_DEV_PASSWORD"]),
)
rp.raise_for_status()

Check warning on line 58 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L58

Added line #L58 was not covered by tests

# checking if i'm able to get the url
rg = requests.get(url=f"https://api.test.datacite.org/dois/{doi}/activities")
rg.raise_for_status()

Check warning on line 62 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L61-L62

Added lines #L61 - L62 were not covered by tests


def _clean_doi(doi: str) -> None:
"""Remove doi. Status code is ignored"""
requests.delete(
rq = requests.delete(

Check warning on line 67 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L67

Added line #L67 was not covered by tests
f"https://api.test.datacite.org/dois/{doi}",
auth=("DARTLIB.DANDI", os.environ["DATACITE_DEV_PASSWORD"]),
)
print("\n in _clean_doi", doi, rq.status_code)
return rq.status_code

Check warning on line 72 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L71-L72

Added lines #L71 - L72 were not covered by tests


@pytest.mark.skip(
reason="to not produced too many dois, not sure if we want to keep it as a test"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that is because the issue with DOI deletion was not yet addressed on datacite side, right?
then better be mentioned. in the current skip.

Otherwise -- test should be diligent about removing any DOI it created in its life time.

)
def test_datacite_lifecycle() -> None:
"""testing the lifecycle of a public dandiset and doi (from draft to published)"""

# checking which doi is available
doi_available = False
while not doi_available:
dandi_id = f"000{random.randrange(500, 999)}"
print(f"searching for available doi, trying dandi_id: {dandi_id}")
doi_root = f"10.80507/dandi.{dandi_id}"
if _clean_doi(doi_root) != 405:
doi_available = True
print(f"found available doi, dandi_id: {dandi_id}")

Check warning on line 89 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L82-L89

Added lines #L82 - L89 were not covered by tests

dandi_id_prefix = f"DANDI:{dandi_id}"

Check warning on line 91 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L91

Added line #L91 was not covered by tests
# creating the main/root doi and url
doi_root = f"10.80507/dandi.{dandi_id}"
url_root = f"https://dandiarchive.org/dandiset/{dandi_id}"

Check warning on line 94 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L93-L94

Added lines #L93 - L94 were not covered by tests

# creating draft dandiset with minimal metadata
version = "draft"
meta_dict = {

Check warning on line 98 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L97-L98

Added lines #L97 - L98 were not covered by tests
"identifier": dandi_id_prefix,
"id": f"{dandi_id_prefix}/{version}",
"name": "Testing Dataset: lifecycle",
"description": "testing lifecycle of a dataset and doi: draft",
"version": version,
"contributor": [
{
"name": "A_last, A_first",
"email": "[email protected]",
"roleName": [RoleType("dcite:ContactPerson")],
"schemaKey": "Person",
}
],
"license": [LicenseType("spdx:CC-BY-4.0")],
"citation": "A_last, A_first 2021",
"manifestLocation": [
f"https://api.dandiarchive.org/api/dandisets/{dandi_id}/versions/{version}/assets/"
],
"assetsSummary": {
"schemaKey": "AssetsSummary",
"numberOfBytes": 10,
"numberOfFiles": 1,
},
Comment on lines +117 to +121
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hopefully does not matter but whenever dandiset is created, there is no assets summary yet -- it is done async IIRC, hence

Suggested change
"assetsSummary": {
"schemaKey": "AssetsSummary",
"numberOfBytes": 10,
"numberOfFiles": 1,
},

}
# in addition to minimal metadata, we need to add doi and url if we want to create draft doi
meta_dict["doi"] = doi_root
meta_dict["url"] = url_root

Check warning on line 125 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L124-L125

Added lines #L124 - L125 were not covered by tests
# creating draft dandiset
dset = Dandiset(**meta_dict)

Check warning on line 127 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L127

Added line #L127 was not covered by tests

# creating datacite object and posting the main doi entry (should be draft)
datacite = to_datacite(dset)
datacite_post(datacite, doi_root, clean=False)

Check warning on line 131 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L130-L131

Added lines #L130 - L131 were not covered by tests

# updating the draft but not enough to create PublishDandiset
meta_dict["description"] = "testing lifecycle of a dataset and doi: new draft"

Check warning on line 134 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L134

Added line #L134 was not covered by tests
# the dandi workflow should check if we cna create a datacite that can be validated and published
# try: datacite_new = to_datacite(meta_dict, validate=True, publish=True)
# if the metadata is not enough to create a valid datacite, we should update the draft doi
datacite_new = to_datacite(meta_dict)
datacite_update(datacite_new, doi_root)

Check warning on line 139 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L138-L139

Added lines #L138 - L139 were not covered by tests

# creating v1.0.0
version = "1.0.0"

Check warning on line 142 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L142

Added line #L142 was not covered by tests
# adding contributors and updating description
meta_dict["contributor"].append(

Check warning on line 144 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L144

Added line #L144 was not covered by tests
{
"name": "B_last, B_first",
"email": "[email protected]",
"roleName": [RoleType("dcite:DataCurator")],
"schemaKey": "Person",
}
)
meta_dict["description"] = "testing lifecycle of a dataset and doi: v1.0.0"

Check warning on line 152 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L152

Added line #L152 was not covered by tests
# adding mandatory metadata for PublishDandiset
publish_meta = {

Check warning on line 154 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L154

Added line #L154 was not covered by tests
"datePublished": "2020",
"publishedBy": {
"id": "urn:uuid:08fffc59-9f1b-44d6-8e02-6729d266d1b6",
"name": "DANDI publish",
"startDate": "2021-05-18T19:58:39.310338-04:00",
"endDate": "2021-05-18T19:58:39.310361-04:00",
"wasAssociatedWith": [
{
"id": "urn:uuid:9267d2e1-4a37-463b-9b10-dad3c66d8eaa",
"identifier": "RRID:SCR_017571",
"name": "DANDI API",
"version": version,
"schemaKey": "Software",
}
],
"schemaKey": "PublishActivity",
},
}
meta_dict.update(publish_meta)

Check warning on line 173 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L173

Added line #L173 was not covered by tests
# updating the version, id etc.
meta_dict["version"] = version
meta_dict["id"] = f"{dandi_id_prefix}/{version}"
meta_dict["doi"] = f"{doi_root}/{version}"
meta_dict["url"] = f"https://dandiarchive.org/dandiset/{dandi_id}/{version}"

Check warning on line 178 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L175-L178

Added lines #L175 - L178 were not covered by tests
# creating new published dandiset
dset_v1 = PublishedDandiset(**meta_dict)

Check warning on line 180 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L180

Added line #L180 was not covered by tests
# creating datacite object and posting (should be findable)
datacite_v1 = to_datacite(dset_v1, publish=True, validate=True)
datacite_post(datacite_v1, meta_dict["doi"], clean=False)

Check warning on line 183 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L182-L183

Added lines #L182 - L183 were not covered by tests

# updating the main doi but keeping the root doi and url
datacite = deepcopy(datacite_v1)
datacite["data"]["attributes"]["doi"] = doi_root
datacite["data"]["attributes"]["url"] = url_root

Check warning on line 188 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L186-L188

Added lines #L186 - L188 were not covered by tests
# updating the doi (should change from draft to findable)
datacite_update(datacite, doi_root)

Check warning on line 190 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L190

Added line #L190 was not covered by tests

# creating v2.0.0
version = "2.0.0"

Check warning on line 193 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L193

Added line #L193 was not covered by tests
# updating description
meta_dict["description"] = "testing lifecycle of a dataset and doi: v2.0.0"
meta_dict["version"] = version
meta_dict["id"] = f"{dandi_id_prefix}/{version}"
meta_dict["doi"] = f"{doi_root}/{version}"
meta_dict["url"] = f"https://dandiarchive.org/dandiset/{dandi_id}/{version}"

Check warning on line 199 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L195-L199

Added lines #L195 - L199 were not covered by tests
# creating new published dandiset
dset_v2 = PublishedDandiset(**meta_dict)

Check warning on line 201 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L201

Added line #L201 was not covered by tests
# creating datacite object and posting (should be findable)
datacite_v2 = to_datacite(dset_v2, publish=True, validate=True)
datacite_post(datacite_v2, meta_dict["doi"], clean=False)

Check warning on line 204 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L203-L204

Added lines #L203 - L204 were not covered by tests

# updating the main doi to v2 but keeping the root doi and url
datacite = deepcopy(datacite_v2)
datacite["data"]["attributes"]["doi"] = doi_root
datacite["data"]["attributes"]["url"] = url_root

Check warning on line 209 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L207-L209

Added lines #L207 - L209 were not covered by tests
# updating the findable doi
datacite_update(datacite, doi_root)

Check warning on line 211 in dandischema/datacite/tests/test_datacite.py

View check run for this annotation

Codecov / codecov/patch

dandischema/datacite/tests/test_datacite.py#L211

Added line #L211 was not covered by tests


@pytest.fixture(scope="module")
Expand All @@ -59,7 +218,7 @@

@pytest.fixture(scope="function")
def metadata_basic() -> Dict[str, Any]:
dandi_id_noprefix = f"000{random.randrange(100, 999)}"
dandi_id_noprefix = f"000{random.randrange(100, 499)}"
dandi_id = f"DANDI:{dandi_id_noprefix}"
version = "0.0.0"
# meta data without doi, datePublished and publishedBy
Expand Down
2 changes: 1 addition & 1 deletion dandischema/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def _basic_publishmeta(
"id": "urn:uuid:9267d2e1-4a37-463b-9b10-dad3c66d8eaa",
"identifier": "RRID:SCR_017571",
"name": "DANDI API",
"version": "0.1.0",
"version": version,
"schemaKey": "Software",
}
],
Expand Down
Loading