From dcc208c3ca13c326d7c7419ba603b0188077b9cc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 8 Oct 2024 00:24:00 +0000 Subject: [PATCH 1/7] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/pre-commit-hooks: v4.6.0 → v5.0.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.6.0...v5.0.0) - [github.com/psf/black: 24.8.0 → 24.10.0](https://github.com/psf/black/compare/24.8.0...24.10.0) --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 36ab1b7..eca0871 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,14 +2,14 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v5.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml - id: check-added-large-files - repo: https://github.com/psf/black - rev: 24.8.0 + rev: 24.10.0 hooks: - id: black - repo: https://github.com/PyCQA/isort From 5f7f353968f24fb02e4a87506c68264ffa717177 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 4 Nov 2024 11:55:49 -0500 Subject: [PATCH 2/7] Drop 3.8 support (remove typing_extensions from depends) --- .github/workflows/lint.yml | 2 +- .github/workflows/release.yml | 2 +- .github/workflows/test-dandi-cli.yml | 7 +++---- .github/workflows/test-nonetwork.yml | 2 +- .github/workflows/test-schema.yml | 2 +- .github/workflows/test.yml | 2 +- .github/workflows/typing.yml | 2 +- dandischema/models.py | 16 ++++++++++++---- dandischema/types.py | 3 +-- setup.cfg | 4 +--- 10 files changed, 23 insertions(+), 19 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index a017960..1e0fc22 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -16,7 +16,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.9' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 6032e17..0a38260 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -58,7 +58,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: 3.8 + python-version: 3.9 - name: Install dandischema run: python -m pip install . diff --git a/.github/workflows/test-dandi-cli.yml b/.github/workflows/test-dandi-cli.yml index 98cfc82..4605cd9 100644 --- a/.github/workflows/test-dandi-cli.yml +++ b/.github/workflows/test-dandi-cli.yml @@ -24,11 +24,10 @@ jobs: - macos-latest python: # Use the only Python which is ATM also used by dandi-api - # - 3.7 - # - 3.8 # - 3.9 # - '3.10' - '3.11' + # - '3.12' version: - master - release @@ -36,11 +35,11 @@ jobs: - normal include: - os: ubuntu-latest - python: 3.8 + python: 3.9 mode: dandi-devel version: master - os: ubuntu-latest - python: 3.8 + python: 3.9 mode: dandi-devel version: release exclude: diff --git a/.github/workflows/test-nonetwork.yml b/.github/workflows/test-nonetwork.yml index 9c50916..9abc4b1 100644 --- a/.github/workflows/test-nonetwork.yml +++ b/.github/workflows/test-nonetwork.yml @@ -19,7 +19,7 @@ jobs: - ubuntu-latest - macos-12 python: - - 3.8 + - 3.9 - 3.9 - '3.10' - '3.11' diff --git a/.github/workflows/test-schema.yml b/.github/workflows/test-schema.yml index ff36f39..c4c1c68 100644 --- a/.github/workflows/test-schema.yml +++ b/.github/workflows/test-schema.yml @@ -19,7 +19,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.9' - name: Install dandischema run: python -m pip install . diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index fcc744f..90493f2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -20,7 +20,7 @@ jobs: - ubuntu-latest - macos-12 python: - - 3.8 + - 3.9 - 3.9 - '3.10' - '3.11' diff --git a/.github/workflows/typing.yml b/.github/workflows/typing.yml index c01c56c..dd8a09e 100644 --- a/.github/workflows/typing.yml +++ b/.github/workflows/typing.yml @@ -16,7 +16,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.9' - name: Install dependencies run: | diff --git a/dandischema/models.py b/dandischema/models.py index 20af619..57aa762 100644 --- a/dandischema/models.py +++ b/dandischema/models.py @@ -4,7 +4,18 @@ from enum import Enum import os import re -from typing import Any, Dict, List, Literal, Optional, Sequence, Type, TypeVar, Union +from typing import ( + Annotated, + Any, + Dict, + List, + Literal, + Optional, + Sequence, + Type, + TypeVar, + Union, +) from warnings import warn from pydantic import ( @@ -23,9 +34,6 @@ ) from pydantic.json_schema import JsonSchemaValue from pydantic_core import CoreSchema -from typing_extensions import ( - Annotated, # TODO: import from `typing` when Python 3.8 support is dropped -) from zarr_checksum.checksum import InvalidZarrChecksum, ZarrDirectoryDigest from .consts import DANDI_SCHEMA_VERSION diff --git a/dandischema/types.py b/dandischema/types.py index 1f2eb48..555793e 100644 --- a/dandischema/types.py +++ b/dandischema/types.py @@ -1,12 +1,11 @@ # This file is for defining types that extend existing types through the use of # `typing.Annotated`. -from typing import Type +from typing import Annotated, Type from pydantic import ByteSize, GetCoreSchemaHandler, GetJsonSchemaHandler from pydantic.json_schema import JsonSchemaValue from pydantic_core import CoreSchema, core_schema -from typing_extensions import Annotated class _ByteSizeJsonSchemaAnnotation: diff --git a/setup.cfg b/setup.cfg index 9ccdf35..dcaab7b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,7 +12,6 @@ classifiers = License :: OSI Approved :: Apache Software License Operating System :: OS Independent Programming Language :: Python - Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 @@ -27,12 +26,11 @@ project_urls = Source Code = https://github.com/dandi/dandischema [options] -python_requires = >=3.8 +python_requires = >=3.9 install_requires = jsonschema[format] pydantic[email] ~= 2.4 requests - typing_extensions; python_version < "3.9" zarr_checksum zip_safe = False packages = find_namespace: From eaa32d2f285371984d3e44b32f46ce12b1659b78 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 4 Nov 2024 15:44:58 -0500 Subject: [PATCH 3/7] BF: removing duplicated 3.9 and jumping to macos-latest before we went for -12 since needed 3.8. Let's see if would work for 3.9 --- .github/workflows/test-nonetwork.yml | 3 +-- .github/workflows/test.yml | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-nonetwork.yml b/.github/workflows/test-nonetwork.yml index 9abc4b1..2f2302b 100644 --- a/.github/workflows/test-nonetwork.yml +++ b/.github/workflows/test-nonetwork.yml @@ -17,9 +17,8 @@ jobs: os: - windows-2019 - ubuntu-latest - - macos-12 + - macos-latest python: - - 3.9 - 3.9 - '3.10' - '3.11' diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 90493f2..cdf236a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,9 +18,8 @@ jobs: os: - windows-2019 - ubuntu-latest - - macos-12 + - macos-latest python: - - 3.9 - 3.9 - '3.10' - '3.11' From e6cb299a1807af3390a59383a6ccc9a66959bed0 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 30 Aug 2024 13:37:53 -0400 Subject: [PATCH 4/7] Add support for detection and addition of ome/ngff "standard" into assets summary --- dandischema/metadata.py | 13 +++++++++---- dandischema/models.py | 6 ++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/dandischema/metadata.py b/dandischema/metadata.py index 309707e..a2f041b 100644 --- a/dandischema/metadata.py +++ b/dandischema/metadata.py @@ -380,13 +380,18 @@ def _add_asset_to_stats(assetmeta: Dict[str, Any], stats: _stats_type) -> None: stats["tissuesample"].append(sample) stats["dataStandard"] = stats.get("dataStandard", []) + + def add_if_missing(standard: str) -> None: + if standard not in stats["dataStandard"]: + stats["dataStandard"].append(standard) + if "nwb" in assetmeta["encodingFormat"]: - if models.nwb_standard not in stats["dataStandard"]: - stats["dataStandard"].append(models.nwb_standard) + add_if_missing(models.nwb_standard) # TODO: RF assumption that any .json implies BIDS if set(Path(assetmeta["path"]).suffixes).intersection((".json", ".nii")): - if models.bids_standard not in stats["dataStandard"]: - stats["dataStandard"].append(models.bids_standard) + add_if_missing(models.bids_standard) + if Path(assetmeta["path"]).suffixes == [".ome", ".zarr"]: + add_if_missing(models.ome_ngff_standard) # TODO?: move/bind such helpers as .from_metadata or alike within diff --git a/dandischema/models.py b/dandischema/models.py index 57aa762..4b6c79f 100644 --- a/dandischema/models.py +++ b/dandischema/models.py @@ -815,6 +815,12 @@ class StandardsType(BaseType): identifier="RRID:SCR_016124", ).model_dump(mode="json", exclude_none=True) +ome_ngff_standard = StandardsType( + name="OME/NGFF Standard", + # Seems no dedicated RRID, only for OME itself? + # identifier="RRID:SCR_???", +).model_dump(mode="json", exclude_none=True) + class ContactPoint(DandiBaseModel): email: Optional[EmailStr] = Field( From 3f27284a8a58d63f9df6d871247b864b53a889fd Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 6 Sep 2024 14:21:57 -0400 Subject: [PATCH 5/7] Use DOI for OME/NGFF We already use DOI: prefix in few places seems, to chose it over direct URL. --- dandischema/models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dandischema/models.py b/dandischema/models.py index 4b6c79f..eaa9826 100644 --- a/dandischema/models.py +++ b/dandischema/models.py @@ -817,8 +817,7 @@ class StandardsType(BaseType): ome_ngff_standard = StandardsType( name="OME/NGFF Standard", - # Seems no dedicated RRID, only for OME itself? - # identifier="RRID:SCR_???", + identifier="DOI:10.25504/FAIRsharing.9af712", ).model_dump(mode="json", exclude_none=True) From 0f312b963febc6a7103d602b6cdfa6b04b6b6ecc Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 4 Nov 2024 16:26:20 -0500 Subject: [PATCH 6/7] test: refactor test to test discovery/addition of OME/NGFF into summary --- dandischema/tests/test_metadata.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dandischema/tests/test_metadata.py b/dandischema/tests/test_metadata.py index 61ac2d7..669437d 100644 --- a/dandischema/tests/test_metadata.py +++ b/dandischema/tests/test_metadata.py @@ -542,7 +542,7 @@ def test_aggregation_bids() -> None: { "id": "dandiasset:6668d37f-e842-4b73-8c20-082a1dd0d31a", "path": "sub-MITU01/ses-20210703h01m05s04/microscopy/sub-MITU01_" - "run-1_sample-163_stain-YO_chunk-5_spim.h5", + "run-1_sample-163_stain-YO_chunk-5_spim.ome.zarr", "access": [ {"status": "dandi:OpenAccess", "schemaKey": "AccessRequirements"} ], @@ -588,7 +588,7 @@ def test_aggregation_bids() -> None: { "id": "dandiasset:84dd580f-8d4a-43f8-bda3-6fb53fb5d3a2", "path": "sub-MITU01/ses-20210703h16m32s10/microscopy/sub-MITU01_" - "ses-20210703h16m32s10_run-1_sample-162_stain-LEC_chunk-5_spim.h5", + "ses-20210703h16m32s10_run-1_sample-162_stain-LEC_chunk-5_spim.ome.zarr", "access": [ {"status": "dandi:OpenAccess", "schemaKey": "AccessRequirements"} ], @@ -634,3 +634,6 @@ def test_aggregation_bids() -> None: ] summary = aggregate_assets_summary(data) assert summary["numberOfSamples"] == 2 + assert any( + _.get("name", "").startswith("OME/NGFF") for _ in summary["dataStandard"] + ) From f82c1212c92783eb31a85fb9e9a27bbedd8c3c6e Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 4 Nov 2024 16:46:54 -0500 Subject: [PATCH 7/7] fix: fix type annotation and test that we do not add multiple --- dandischema/metadata.py | 2 +- dandischema/tests/test_metadata.py | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/dandischema/metadata.py b/dandischema/metadata.py index a2f041b..10b591b 100644 --- a/dandischema/metadata.py +++ b/dandischema/metadata.py @@ -381,7 +381,7 @@ def _add_asset_to_stats(assetmeta: Dict[str, Any], stats: _stats_type) -> None: stats["dataStandard"] = stats.get("dataStandard", []) - def add_if_missing(standard: str) -> None: + def add_if_missing(standard: dict) -> None: if standard not in stats["dataStandard"]: stats["dataStandard"].append(standard) diff --git a/dandischema/tests/test_metadata.py b/dandischema/tests/test_metadata.py index 669437d..25399f1 100644 --- a/dandischema/tests/test_metadata.py +++ b/dandischema/tests/test_metadata.py @@ -565,7 +565,7 @@ def test_aggregation_bids() -> None: "contentSize": 38474544973, "dateModified": "2021-07-22T23:59:16.060551-04:00", "schemaVersion": "0.4.4", - "encodingFormat": "application/x-hdf5", + "encodingFormat": "application/x-zarr", "wasGeneratedBy": [ { "id": "urn:uuid:aef77d59-7a7f-4320-9d4b-9b03f3e25e54", @@ -611,7 +611,7 @@ def test_aggregation_bids() -> None: "contentSize": 61774316916, "dateModified": "2021-10-01T18:28:16.038990-04:00", "schemaVersion": "0.6.0", - "encodingFormat": "application/x-hdf5", + "encodingFormat": "application/x-zarr", "wasGeneratedBy": [ { "id": "urn:uuid:8f69a248-0e6a-4fa1-8369-ae1cc63d59d8", @@ -634,6 +634,7 @@ def test_aggregation_bids() -> None: ] summary = aggregate_assets_summary(data) assert summary["numberOfSamples"] == 2 - assert any( - _.get("name", "").startswith("OME/NGFF") for _ in summary["dataStandard"] - ) + assert ( + sum(_.get("name", "").startswith("OME/NGFF") for _ in summary["dataStandard"]) + == 1 + ) # only a single entry so we do not duplicate them