From 9ee7b29cc4931adb46acb2920d8ca518d7334fbd Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 7 Jan 2025 16:03:04 +0100 Subject: [PATCH 01/99] rename validator --- fractal_server/app/schemas/_validators.py | 25 +++++++++++++++++- fractal_server/app/schemas/v2/task.py | 18 ++++++------- fractal_server/app/schemas/v2/task_group.py | 4 +-- fractal_server/app/schemas/v2/workflowtask.py | 26 +++++++++---------- fractal_server/images/models.py | 14 +++++----- 5 files changed, 55 insertions(+), 32 deletions(-) diff --git a/fractal_server/app/schemas/_validators.py b/fractal_server/app/schemas/_validators.py index bdce6d3507..2e37195c2b 100644 --- a/fractal_server/app/schemas/_validators.py +++ b/fractal_server/app/schemas/_validators.py @@ -1,6 +1,7 @@ import os from typing import Any from typing import Optional +from typing import Union def valstr(attribute: str, accept_none: bool = False): @@ -27,7 +28,7 @@ def val(string: Optional[str]) -> Optional[str]: return val -def valdictkeys(attribute: str): +def valdict_keys(attribute: str): def val(d: Optional[dict[str, Any]]) -> Optional[dict[str, Any]]: """ Apply valstr to every key of the dictionary, and fail if there are @@ -48,6 +49,28 @@ def val(d: Optional[dict[str, Any]]) -> Optional[dict[str, Any]]: return val +def valdict_scalarvalues(attribute: str): + """ + Check that every value of a `dict[str, list[Any]]` is a list of scalar + values (i.e. one of int, float, str, bool or None). + """ + + def val( + d: dict[str, list[Any]] + ) -> dict[str, list[Union[int, float, str, bool, None]]]: + for key, values in d.items(): + for value in values: + if not isinstance(value, (int, float, str, bool, type(None))): + raise ValueError( + f"{attribute}[{key}] values must be a scalars " + "(int, float, str, bool, or None). " + f"Given {value} ({type(value)})" + ) + return d + + return val + + def valint(attribute: str, min_val: int = 1): """ Check that an integer attribute (e.g. if it is meant to be the ID of a diff --git a/fractal_server/app/schemas/v2/task.py b/fractal_server/app/schemas/v2/task.py index 166aec66e4..0ae5377061 100644 --- a/fractal_server/app/schemas/v2/task.py +++ b/fractal_server/app/schemas/v2/task.py @@ -10,7 +10,7 @@ from pydantic import validator from fractal_server.app.schemas._validators import val_unique_list -from fractal_server.app.schemas._validators import valdictkeys +from fractal_server.app.schemas._validators import valdict_keys from fractal_server.app.schemas._validators import valstr from fractal_server.string_tools import validate_cmd @@ -66,25 +66,25 @@ def validate_commands(cls, values): _version = validator("version", allow_reuse=True)(valstr("version")) _meta_non_parallel = validator("meta_non_parallel", allow_reuse=True)( - valdictkeys("meta_non_parallel") + valdict_keys("meta_non_parallel") ) _meta_parallel = validator("meta_parallel", allow_reuse=True)( - valdictkeys("meta_parallel") + valdict_keys("meta_parallel") ) _args_schema_non_parallel = validator( "args_schema_non_parallel", allow_reuse=True - )(valdictkeys("args_schema_non_parallel")) + )(valdict_keys("args_schema_non_parallel")) _args_schema_parallel = validator( "args_schema_parallel", allow_reuse=True - )(valdictkeys("args_schema_parallel")) + )(valdict_keys("args_schema_parallel")) _args_schema_version = validator("args_schema_version", allow_reuse=True)( valstr("args_schema_version") ) _input_types = validator("input_types", allow_reuse=True)( - valdictkeys("input_types") + valdict_keys("input_types") ) _output_types = validator("output_types", allow_reuse=True)( - valdictkeys("output_types") + valdict_keys("output_types") ) _category = validator("category", allow_reuse=True)( @@ -158,10 +158,10 @@ def val_is_dict(cls, v): "command_non_parallel", allow_reuse=True )(valstr("command_non_parallel")) _input_types = validator("input_types", allow_reuse=True)( - valdictkeys("input_types") + valdict_keys("input_types") ) _output_types = validator("output_types", allow_reuse=True)( - valdictkeys("output_types") + valdict_keys("output_types") ) _category = validator("category", allow_reuse=True)( diff --git a/fractal_server/app/schemas/v2/task_group.py b/fractal_server/app/schemas/v2/task_group.py index 254f6e9d17..c744050227 100644 --- a/fractal_server/app/schemas/v2/task_group.py +++ b/fractal_server/app/schemas/v2/task_group.py @@ -8,7 +8,7 @@ from pydantic import validator from .._validators import val_absolute_path -from .._validators import valdictkeys +from .._validators import valdict_keys from .._validators import valstr from .task import TaskReadV2 @@ -57,7 +57,7 @@ class TaskGroupCreateV2(BaseModel, extra=Extra.forbid): ) _pinned_package_versions = validator( "pinned_package_versions", allow_reuse=True - )(valdictkeys("pinned_package_versions")) + )(valdict_keys("pinned_package_versions")) _pip_extras = validator("pip_extras", allow_reuse=True)( valstr("pip_extras") ) diff --git a/fractal_server/app/schemas/v2/workflowtask.py b/fractal_server/app/schemas/v2/workflowtask.py index 46a6e51d2a..5f121e2805 100644 --- a/fractal_server/app/schemas/v2/workflowtask.py +++ b/fractal_server/app/schemas/v2/workflowtask.py @@ -8,7 +8,7 @@ from pydantic import Field from pydantic import validator -from .._validators import valdictkeys +from .._validators import valdict_keys from .task import TaskExportV2 from .task import TaskImportV2 from .task import TaskImportV2Legacy @@ -47,17 +47,17 @@ class WorkflowTaskCreateV2(BaseModel, extra=Extra.forbid): # Validators _meta_non_parallel = validator("meta_non_parallel", allow_reuse=True)( - valdictkeys("meta_non_parallel") + valdict_keys("meta_non_parallel") ) _meta_parallel = validator("meta_parallel", allow_reuse=True)( - valdictkeys("meta_parallel") + valdict_keys("meta_parallel") ) @validator("args_non_parallel") def validate_args_non_parallel(cls, value): if value is None: return - valdictkeys("args_non_parallel")(value) + valdict_keys("args_non_parallel")(value) args_keys = set(value.keys()) intersect_keys = RESERVED_ARGUMENTS.intersection(args_keys) if intersect_keys: @@ -71,7 +71,7 @@ def validate_args_non_parallel(cls, value): def validate_args_parallel(cls, value): if value is None: return - valdictkeys("args_parallel")(value) + valdict_keys("args_parallel")(value) args_keys = set(value.keys()) intersect_keys = RESERVED_ARGUMENTS.intersection(args_keys) if intersect_keys: @@ -122,17 +122,17 @@ class WorkflowTaskUpdateV2(BaseModel, extra=Extra.forbid): # Validators _meta_non_parallel = validator("meta_non_parallel", allow_reuse=True)( - valdictkeys("meta_non_parallel") + valdict_keys("meta_non_parallel") ) _meta_parallel = validator("meta_parallel", allow_reuse=True)( - valdictkeys("meta_parallel") + valdict_keys("meta_parallel") ) @validator("args_non_parallel") def validate_args_non_parallel(cls, value): if value is None: return - valdictkeys("args_non_parallel")(value) + valdict_keys("args_non_parallel")(value) args_keys = set(value.keys()) intersect_keys = RESERVED_ARGUMENTS.intersection(args_keys) if intersect_keys: @@ -146,7 +146,7 @@ def validate_args_non_parallel(cls, value): def validate_args_parallel(cls, value): if value is None: return - valdictkeys("args_parallel")(value) + valdict_keys("args_parallel")(value) args_keys = set(value.keys()) intersect_keys = RESERVED_ARGUMENTS.intersection(args_keys) if intersect_keys: @@ -169,16 +169,16 @@ class WorkflowTaskImportV2(BaseModel, extra=Extra.forbid): task: Union[TaskImportV2, TaskImportV2Legacy] _meta_non_parallel = validator("meta_non_parallel", allow_reuse=True)( - valdictkeys("meta_non_parallel") + valdict_keys("meta_non_parallel") ) _meta_parallel = validator("meta_parallel", allow_reuse=True)( - valdictkeys("meta_parallel") + valdict_keys("meta_parallel") ) _args_non_parallel = validator("args_non_parallel", allow_reuse=True)( - valdictkeys("args_non_parallel") + valdict_keys("args_non_parallel") ) _args_parallel = validator("args_parallel", allow_reuse=True)( - valdictkeys("args_parallel") + valdict_keys("args_parallel") ) diff --git a/fractal_server/images/models.py b/fractal_server/images/models.py index 32e289d42d..3641a97095 100644 --- a/fractal_server/images/models.py +++ b/fractal_server/images/models.py @@ -7,7 +7,7 @@ from pydantic import Field from pydantic import validator -from fractal_server.app.schemas._validators import valdictkeys +from fractal_server.app.schemas._validators import valdict_keys from fractal_server.urls import normalize_url @@ -30,9 +30,9 @@ class SingleImageBase(BaseModel): # Validators _attributes = validator("attributes", allow_reuse=True)( - valdictkeys("attributes") + valdict_keys("attributes") ) - _types = validator("types", allow_reuse=True)(valdictkeys("types")) + _types = validator("types", allow_reuse=True)(valdict_keys("types")) @validator("zarr_url") def normalize_zarr_url(cls, v: str) -> str: @@ -96,7 +96,7 @@ def validate_attributes( ) -> dict[str, Union[int, float, str, bool]]: if v is not None: # validate keys - valdictkeys("attributes")(v) + valdict_keys("attributes")(v) # validate values for key, value in v.items(): if not isinstance(value, (int, float, str, bool)): @@ -107,7 +107,7 @@ def validate_attributes( ) return v - _types = validator("types", allow_reuse=True)(valdictkeys("types")) + _types = validator("types", allow_reuse=True)(valdict_keys("types")) class Filters(BaseModel, extra=Extra.forbid): @@ -116,9 +116,9 @@ class Filters(BaseModel, extra=Extra.forbid): # Validators _attributes = validator("attributes", allow_reuse=True)( - valdictkeys("attributes") + valdict_keys("attributes") ) - _types = validator("types", allow_reuse=True)(valdictkeys("types")) + _types = validator("types", allow_reuse=True)(valdict_keys("types")) @validator("attributes") def validate_attributes( From c64b3edcddbdd58d4134313cec30dbb6f6f78a20 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 7 Jan 2025 16:17:58 +0100 Subject: [PATCH 02/99] add accept_none=True to valdict_scalarvalues --- fractal_server/app/schemas/_validators.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fractal_server/app/schemas/_validators.py b/fractal_server/app/schemas/_validators.py index 2e37195c2b..bb63afd8cc 100644 --- a/fractal_server/app/schemas/_validators.py +++ b/fractal_server/app/schemas/_validators.py @@ -49,7 +49,7 @@ def val(d: Optional[dict[str, Any]]) -> Optional[dict[str, Any]]: return val -def valdict_scalarvalues(attribute: str): +def valdict_scalarvalues(attribute: str, accept_none: bool = True): """ Check that every value of a `dict[str, list[Any]]` is a list of scalar values (i.e. one of int, float, str, bool or None). @@ -58,9 +58,13 @@ def valdict_scalarvalues(attribute: str): def val( d: dict[str, list[Any]] ) -> dict[str, list[Union[int, float, str, bool, None]]]: + if accept_none: + accepted = (int, float, str, bool, type(None)) + else: + accepted = (int, float, str, bool) for key, values in d.items(): for value in values: - if not isinstance(value, (int, float, str, bool, type(None))): + if not isinstance(value, accepted): raise ValueError( f"{attribute}[{key}] values must be a scalars " "(int, float, str, bool, or None). " From a3b1f912cd6d722df43dfdb2fdb81271d40849c9 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 7 Jan 2025 16:22:55 +0100 Subject: [PATCH 03/99] change dataset model and add migration --- fractal_server/app/models/v2/dataset.py | 6 +++ ...079_split_dataset_filters_and_keep_old_.py | 45 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 fractal_server/migrations/versions/f22d4a53c079_split_dataset_filters_and_keep_old_.py diff --git a/fractal_server/app/models/v2/dataset.py b/fractal_server/app/models/v2/dataset.py index 91ef47006c..6bf628becc 100644 --- a/fractal_server/app/models/v2/dataset.py +++ b/fractal_server/app/models/v2/dataset.py @@ -48,6 +48,12 @@ class Config: server_default='{"attributes": {}, "types": {}}', ) ) + type_filters: dict[str, bool] = Field( + sa_column=Column(JSON, nullable=False, server_default="{}") + ) + attribute_filters: dict[str, list[Any]] = Field( + sa_column=Column(JSON, nullable=False, server_default="{}") + ) @property def image_zarr_urls(self) -> list[str]: diff --git a/fractal_server/migrations/versions/f22d4a53c079_split_dataset_filters_and_keep_old_.py b/fractal_server/migrations/versions/f22d4a53c079_split_dataset_filters_and_keep_old_.py new file mode 100644 index 0000000000..3a88cad37a --- /dev/null +++ b/fractal_server/migrations/versions/f22d4a53c079_split_dataset_filters_and_keep_old_.py @@ -0,0 +1,45 @@ +"""split dataset filters and keep old column + +Revision ID: f22d4a53c079 +Revises: 316140ff7ee1 +Create Date: 2025-01-07 16:21:51.803044 + +""" +import sqlalchemy as sa +from alembic import op + + +# revision identifiers, used by Alembic. +revision = "f22d4a53c079" +down_revision = "316140ff7ee1" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("datasetv2", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "type_filters", sa.JSON(), server_default="{}", nullable=False + ) + ) + batch_op.add_column( + sa.Column( + "attribute_filters", + sa.JSON(), + server_default="{}", + nullable=False, + ) + ) + + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("datasetv2", schema=None) as batch_op: + batch_op.drop_column("attribute_filters") + batch_op.drop_column("type_filters") + + # ### end Alembic commands ### From a6a0ce18bc71de234418f0e0fc9c2f7b39fd1e99 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 7 Jan 2025 16:24:00 +0100 Subject: [PATCH 04/99] rename variable --- fractal_server/app/schemas/_validators.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fractal_server/app/schemas/_validators.py b/fractal_server/app/schemas/_validators.py index bb63afd8cc..86fe522ba4 100644 --- a/fractal_server/app/schemas/_validators.py +++ b/fractal_server/app/schemas/_validators.py @@ -59,12 +59,12 @@ def val( d: dict[str, list[Any]] ) -> dict[str, list[Union[int, float, str, bool, None]]]: if accept_none: - accepted = (int, float, str, bool, type(None)) + accepted_types = (int, float, str, bool, type(None)) else: - accepted = (int, float, str, bool) + accepted_types = (int, float, str, bool) for key, values in d.items(): for value in values: - if not isinstance(value, accepted): + if not isinstance(value, accepted_types): raise ValueError( f"{attribute}[{key}] values must be a scalars " "(int, float, str, bool, or None). " From 6977433e1a8e6686a5b97a3a766ba9503960386e Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 7 Jan 2025 16:31:07 +0100 Subject: [PATCH 05/99] modify validator --- fractal_server/app/schemas/_validators.py | 29 ++++++++++++----------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/fractal_server/app/schemas/_validators.py b/fractal_server/app/schemas/_validators.py index 86fe522ba4..8d486a2a92 100644 --- a/fractal_server/app/schemas/_validators.py +++ b/fractal_server/app/schemas/_validators.py @@ -49,27 +49,28 @@ def val(d: Optional[dict[str, Any]]) -> Optional[dict[str, Any]]: return val -def valdict_scalarvalues(attribute: str, accept_none: bool = True): +def valdict_scalarvalues(attribute: str, accept_type_none: bool = True): """ Check that every value of a `dict[str, list[Any]]` is a list of scalar values (i.e. one of int, float, str, bool or None). """ def val( - d: dict[str, list[Any]] + d: Optional[dict[str, list[Any]]] ) -> dict[str, list[Union[int, float, str, bool, None]]]: - if accept_none: - accepted_types = (int, float, str, bool, type(None)) - else: - accepted_types = (int, float, str, bool) - for key, values in d.items(): - for value in values: - if not isinstance(value, accepted_types): - raise ValueError( - f"{attribute}[{key}] values must be a scalars " - "(int, float, str, bool, or None). " - f"Given {value} ({type(value)})" - ) + if d is not None: + if accept_type_none: + accepted_types = (int, float, str, bool, type(None)) + else: + accepted_types = (int, float, str, bool) + for key, values in d.items(): + for value in values: + if not isinstance(value, accepted_types): + raise ValueError( + f"{attribute}[{key}] values must be a scalars " + "(int, float, str, bool, or None). " + f"Given {value} ({type(value)})" + ) return d return val From bee422a8dcb173381d0a20668d9c3d46c573d1ea Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 7 Jan 2025 16:35:20 +0100 Subject: [PATCH 06/99] update dataset schemas --- fractal_server/app/schemas/v2/dataset.py | 61 ++++++++++++++++++++---- 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/fractal_server/app/schemas/v2/dataset.py b/fractal_server/app/schemas/v2/dataset.py index e680106e30..0d10ff3ad2 100644 --- a/fractal_server/app/schemas/v2/dataset.py +++ b/fractal_server/app/schemas/v2/dataset.py @@ -1,4 +1,5 @@ from datetime import datetime +from typing import Any from typing import Optional from pydantic import BaseModel @@ -6,11 +7,12 @@ from pydantic import Field from pydantic import validator +from .._validators import valdict_keys +from .._validators import valdict_scalarvalues from .._validators import valstr from .dumps import WorkflowTaskDumpV2 from .project import ProjectReadV2 from .workflowtask import WorkflowTaskStatusTypeV2 -from fractal_server.images import Filters from fractal_server.images import SingleImage from fractal_server.urls import normalize_url @@ -34,17 +36,29 @@ class DatasetCreateV2(BaseModel, extra=Extra.forbid): zarr_dir: Optional[str] = None - filters: Filters = Field(default_factory=Filters) + type_filters: dict[str, bool] = Field(default_factory=dict) + attribute_filters: dict[str, list[Any]] = Field(default_factory=dict) # Validators + + _name = validator("name", allow_reuse=True)(valstr("name")) + + _type_filters = validator("type_filters", allow_reuse=True)( + valdict_keys("type_filters") + ) + _attribute_filters_1 = validator("attribute_filters", allow_reuse=True)( + valdict_keys("attribute_filters") + ) + _attribute_filters_2 = validator("attribute_filters", allow_reuse=True)( + valdict_scalarvalues("attribute_filters") + ) + @validator("zarr_dir") def normalize_zarr_dir(cls, v: Optional[str]) -> Optional[str]: if v is not None: return normalize_url(v) return v - _name = validator("name", allow_reuse=True)(valstr("name")) - class DatasetReadV2(BaseModel): @@ -59,24 +73,37 @@ class DatasetReadV2(BaseModel): timestamp_created: datetime zarr_dir: str - filters: Filters = Field(default_factory=Filters) + type_filters: dict[str, bool] = Field(default_factory=dict) + attribute_filters: dict[str, list[Any]] = Field(default_factory=dict) class DatasetUpdateV2(BaseModel, extra=Extra.forbid): name: Optional[str] zarr_dir: Optional[str] - filters: Optional[Filters] + type_filters: Optional[dict[str, bool]] + attribute_filters: Optional[dict[str, list[Any]]] # Validators + + _name = validator("name", allow_reuse=True)(valstr("name")) + + _type_filters = validator("type_filters", allow_reuse=True)( + valdict_keys("type_filters") + ) + _attribute_filters_1 = validator("attribute_filters", allow_reuse=True)( + valdict_keys("attribute_filters") + ) + _attribute_filters_2 = validator("attribute_filters", allow_reuse=True)( + valdict_scalarvalues("attribute_filters") + ) + @validator("zarr_dir") def normalize_zarr_dir(cls, v: Optional[str]) -> Optional[str]: if v is not None: return normalize_url(v) return v - _name = validator("name", allow_reuse=True)(valstr("name")) - class DatasetImportV2(BaseModel, extra=Extra.forbid): """ @@ -92,9 +119,22 @@ class DatasetImportV2(BaseModel, extra=Extra.forbid): name: str zarr_dir: str images: list[SingleImage] = Field(default_factory=list) - filters: Filters = Field(default_factory=Filters) + + type_filters: dict[str, bool] = Field(default_factory=dict) + attribute_filters: dict[str, list[Any]] = Field(default_factory=dict) # Validators + + _type_filters = validator("type_filters", allow_reuse=True)( + valdict_keys("type_filters") + ) + _attribute_filters_1 = validator("attribute_filters", allow_reuse=True)( + valdict_keys("attribute_filters") + ) + _attribute_filters_2 = validator("attribute_filters", allow_reuse=True)( + valdict_scalarvalues("attribute_filters") + ) + @validator("zarr_dir") def normalize_zarr_dir(cls, v: str) -> str: return normalize_url(v) @@ -114,4 +154,5 @@ class DatasetExportV2(BaseModel): name: str zarr_dir: str images: list[SingleImage] - filters: Filters + type_filters: dict[str, bool] + attribute_filters: dict[str, list[Any]] From 42c6bc7077d3e9e4422e942d30893ec9aa4c8127 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 7 Jan 2025 16:37:22 +0100 Subject: [PATCH 07/99] fix type hint --- fractal_server/app/schemas/_validators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fractal_server/app/schemas/_validators.py b/fractal_server/app/schemas/_validators.py index 8d486a2a92..3d2b1d00d3 100644 --- a/fractal_server/app/schemas/_validators.py +++ b/fractal_server/app/schemas/_validators.py @@ -57,7 +57,7 @@ def valdict_scalarvalues(attribute: str, accept_type_none: bool = True): def val( d: Optional[dict[str, list[Any]]] - ) -> dict[str, list[Union[int, float, str, bool, None]]]: + ) -> Optional[dict[str, list[Union[int, float, str, bool, None]]]]: if d is not None: if accept_type_none: accepted_types = (int, float, str, bool, type(None)) From 22949ad1bc03c1863c477e56cfececefa5744076 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 7 Jan 2025 16:40:42 +0100 Subject: [PATCH 08/99] remaining dataset schemas --- benchmarks/runner/mocks.py | 12 ++---------- fractal_server/app/schemas/v2/dumps.py | 4 +++- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/benchmarks/runner/mocks.py b/benchmarks/runner/mocks.py index 20e16c34a9..7dd1ee181d 100644 --- a/benchmarks/runner/mocks.py +++ b/benchmarks/runner/mocks.py @@ -1,5 +1,4 @@ from typing import Any -from typing import Literal from typing import Optional from pydantic import BaseModel @@ -13,21 +12,14 @@ class DatasetV2Mock(BaseModel): name: str zarr_dir: str images: list[dict[str, Any]] = Field(default_factory=list) - filters: dict[Literal["types", "attributes"], dict[str, Any]] = Field( - default_factory=dict - ) + type_filters: dict[str, bool] = Field(default_factory=dict) + attribute_filters: dict[str, list[Any]] = Field(default_factory=dict) history: list = Field(default_factory=list) @property def image_zarr_urls(self) -> list[str]: return [image["zarr_urls"] for image in self.images] - @validator("filters", always=True) - def _default_filters(cls, value): - if value == {}: - return {"types": {}, "attributes": {}} - return value - class TaskV2Mock(BaseModel): id: int diff --git a/fractal_server/app/schemas/v2/dumps.py b/fractal_server/app/schemas/v2/dumps.py index d72feec30e..398efb5d59 100644 --- a/fractal_server/app/schemas/v2/dumps.py +++ b/fractal_server/app/schemas/v2/dumps.py @@ -8,6 +8,7 @@ 1. In the "*_dump" attributes of Job models; 2. In the `_DatasetHistoryItem.workflowtask` model, to trim its size. """ +from typing import Any from typing import Optional from pydantic import BaseModel @@ -71,4 +72,5 @@ class DatasetDumpV2(BaseModel, extra=Extra.forbid): timestamp_created: str zarr_dir: str - filters: Filters + type_filters: dict[str, bool] + attribute_filters: dict[str, list[Any]] From 1bd9a8fab63ba20e30c342b6b6434fd15fe9aef3 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 7 Jan 2025 17:01:54 +0100 Subject: [PATCH 09/99] split filters in workflow task model --- fractal_server/app/models/v2/workflowtask.py | 3 +++ ...df76_split_filters_and_keep_old_columns.py} | 18 ++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) rename fractal_server/migrations/versions/{f22d4a53c079_split_dataset_filters_and_keep_old_.py => b6e34fcadf76_split_filters_and_keep_old_columns.py} (68%) diff --git a/fractal_server/app/models/v2/workflowtask.py b/fractal_server/app/models/v2/workflowtask.py index 32f64215a7..c36be4fab2 100644 --- a/fractal_server/app/models/v2/workflowtask.py +++ b/fractal_server/app/models/v2/workflowtask.py @@ -34,6 +34,9 @@ class Config: server_default='{"attributes": {}, "types": {}}', ) ) + type_filters: dict[str, bool] = Field( + sa_column=Column(JSON, nullable=False, server_default="{}") + ) # Task task_type: str diff --git a/fractal_server/migrations/versions/f22d4a53c079_split_dataset_filters_and_keep_old_.py b/fractal_server/migrations/versions/b6e34fcadf76_split_filters_and_keep_old_columns.py similarity index 68% rename from fractal_server/migrations/versions/f22d4a53c079_split_dataset_filters_and_keep_old_.py rename to fractal_server/migrations/versions/b6e34fcadf76_split_filters_and_keep_old_columns.py index 3a88cad37a..ef3623ffb5 100644 --- a/fractal_server/migrations/versions/f22d4a53c079_split_dataset_filters_and_keep_old_.py +++ b/fractal_server/migrations/versions/b6e34fcadf76_split_filters_and_keep_old_columns.py @@ -1,8 +1,8 @@ -"""split dataset filters and keep old column +"""split filters and keep old columns -Revision ID: f22d4a53c079 +Revision ID: b6e34fcadf76 Revises: 316140ff7ee1 -Create Date: 2025-01-07 16:21:51.803044 +Create Date: 2025-01-07 17:00:11.918316 """ import sqlalchemy as sa @@ -10,7 +10,7 @@ # revision identifiers, used by Alembic. -revision = "f22d4a53c079" +revision = "b6e34fcadf76" down_revision = "316140ff7ee1" branch_labels = None depends_on = None @@ -33,11 +33,21 @@ def upgrade() -> None: ) ) + with op.batch_alter_table("workflowtaskv2", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "type_filters", sa.JSON(), server_default="{}", nullable=False + ) + ) + # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("workflowtaskv2", schema=None) as batch_op: + batch_op.drop_column("type_filters") + with op.batch_alter_table("datasetv2", schema=None) as batch_op: batch_op.drop_column("attribute_filters") batch_op.drop_column("type_filters") From 50713fb698d262bf1c7a093b625d18fea6709a18 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 7 Jan 2025 17:03:06 +0100 Subject: [PATCH 10/99] split filters in workflowtask schemas --- fractal_server/app/schemas/v2/dumps.py | 4 +--- fractal_server/app/schemas/v2/workflowtask.py | 20 +++++++++++++------ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/fractal_server/app/schemas/v2/dumps.py b/fractal_server/app/schemas/v2/dumps.py index 398efb5d59..2e2094a277 100644 --- a/fractal_server/app/schemas/v2/dumps.py +++ b/fractal_server/app/schemas/v2/dumps.py @@ -14,8 +14,6 @@ from pydantic import BaseModel from pydantic import Extra -from fractal_server.images import Filters - class ProjectDumpV2(BaseModel, extra=Extra.forbid): @@ -52,7 +50,7 @@ class WorkflowTaskDumpV2(BaseModel): workflow_id: int order: Optional[int] - input_filters: Filters + type_filters: dict[str, bool] task_id: Optional[int] task: Optional[TaskDumpV2] diff --git a/fractal_server/app/schemas/v2/workflowtask.py b/fractal_server/app/schemas/v2/workflowtask.py index 5f121e2805..81537e55c0 100644 --- a/fractal_server/app/schemas/v2/workflowtask.py +++ b/fractal_server/app/schemas/v2/workflowtask.py @@ -13,7 +13,6 @@ from .task import TaskImportV2 from .task import TaskImportV2Legacy from .task import TaskReadV2 -from fractal_server.images import Filters RESERVED_ARGUMENTS = {"zarr_dir", "zarr_url", "zarr_urls", "init_args"} @@ -43,7 +42,7 @@ class WorkflowTaskCreateV2(BaseModel, extra=Extra.forbid): meta_parallel: Optional[dict[str, Any]] args_non_parallel: Optional[dict[str, Any]] args_parallel: Optional[dict[str, Any]] - input_filters: Filters = Field(default_factory=Filters) + type_filters: dict[str, bool] = Field(default_factory=dict) # Validators _meta_non_parallel = validator("meta_non_parallel", allow_reuse=True)( @@ -52,6 +51,9 @@ class WorkflowTaskCreateV2(BaseModel, extra=Extra.forbid): _meta_parallel = validator("meta_parallel", allow_reuse=True)( valdict_keys("meta_parallel") ) + _type_filters = validator("type_filters", allow_reuse=True)( + valdict_keys("type_filters") + ) @validator("args_non_parallel") def validate_args_non_parallel(cls, value): @@ -101,7 +103,7 @@ class WorkflowTaskReadV2(BaseModel): args_non_parallel: Optional[dict[str, Any]] args_parallel: Optional[dict[str, Any]] - input_filters: Filters + type_filters: dict[str, bool] task_type: str task_id: int @@ -118,7 +120,7 @@ class WorkflowTaskUpdateV2(BaseModel, extra=Extra.forbid): meta_parallel: Optional[dict[str, Any]] args_non_parallel: Optional[dict[str, Any]] args_parallel: Optional[dict[str, Any]] - input_filters: Optional[Filters] + type_filters: Optional[dict[str, bool]] # Validators _meta_non_parallel = validator("meta_non_parallel", allow_reuse=True)( @@ -127,6 +129,9 @@ class WorkflowTaskUpdateV2(BaseModel, extra=Extra.forbid): _meta_parallel = validator("meta_parallel", allow_reuse=True)( valdict_keys("meta_parallel") ) + _type_filters = validator("type_filters", allow_reuse=True)( + valdict_keys("type_filters") + ) @validator("args_non_parallel") def validate_args_non_parallel(cls, value): @@ -164,7 +169,7 @@ class WorkflowTaskImportV2(BaseModel, extra=Extra.forbid): args_non_parallel: Optional[dict[str, Any]] = None args_parallel: Optional[dict[str, Any]] = None - input_filters: Optional[Filters] = None + type_filters: Optional[dict[str, bool]] = None task: Union[TaskImportV2, TaskImportV2Legacy] @@ -180,6 +185,9 @@ class WorkflowTaskImportV2(BaseModel, extra=Extra.forbid): _args_parallel = validator("args_parallel", allow_reuse=True)( valdict_keys("args_parallel") ) + _type_filters = validator("type_filters", allow_reuse=True)( + valdict_keys("type_filters") + ) class WorkflowTaskExportV2(BaseModel): @@ -188,6 +196,6 @@ class WorkflowTaskExportV2(BaseModel): meta_parallel: Optional[dict[str, Any]] = None args_non_parallel: Optional[dict[str, Any]] = None args_parallel: Optional[dict[str, Any]] = None - input_filters: Filters = Field(default_factory=Filters) + type_filters: dict[str, bool] = Field(default_factory=dict) task: TaskExportV2 From 10dd9414866b2a7c2014e48418345811d7304c15 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 7 Jan 2025 17:15:11 +0100 Subject: [PATCH 11/99] remove default factory from dataset read --- fractal_server/app/schemas/v2/dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fractal_server/app/schemas/v2/dataset.py b/fractal_server/app/schemas/v2/dataset.py index 0d10ff3ad2..2e2859a372 100644 --- a/fractal_server/app/schemas/v2/dataset.py +++ b/fractal_server/app/schemas/v2/dataset.py @@ -73,8 +73,8 @@ class DatasetReadV2(BaseModel): timestamp_created: datetime zarr_dir: str - type_filters: dict[str, bool] = Field(default_factory=dict) - attribute_filters: dict[str, list[Any]] = Field(default_factory=dict) + type_filters: dict[str, bool] + attribute_filters: dict[str, list[Any]] class DatasetUpdateV2(BaseModel, extra=Extra.forbid): From 5016b3ff177256b4f1d454e4b22c1f62eeff3b19 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 7 Jan 2025 17:17:48 +0100 Subject: [PATCH 12/99] attribute_filters in jobv2 --- fractal_server/app/models/v2/job.py | 4 ++++ ...e62_split_filters_and_keep_old_columns.py} | 19 ++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) rename fractal_server/migrations/versions/{b6e34fcadf76_split_filters_and_keep_old_columns.py => f5aed9bbae62_split_filters_and_keep_old_columns.py} (74%) diff --git a/fractal_server/app/models/v2/job.py b/fractal_server/app/models/v2/job.py index 2b5789a53b..b8dcbc0804 100644 --- a/fractal_server/app/models/v2/job.py +++ b/fractal_server/app/models/v2/job.py @@ -49,3 +49,7 @@ class Config: ) status: str = JobStatusTypeV2.SUBMITTED log: Optional[str] = None + + attribute_filters: dict[str, list[Any]] = Field( + sa_column=Column(JSON, nullable=False, server_default="{}") + ) diff --git a/fractal_server/migrations/versions/b6e34fcadf76_split_filters_and_keep_old_columns.py b/fractal_server/migrations/versions/f5aed9bbae62_split_filters_and_keep_old_columns.py similarity index 74% rename from fractal_server/migrations/versions/b6e34fcadf76_split_filters_and_keep_old_columns.py rename to fractal_server/migrations/versions/f5aed9bbae62_split_filters_and_keep_old_columns.py index ef3623ffb5..ee635f3854 100644 --- a/fractal_server/migrations/versions/b6e34fcadf76_split_filters_and_keep_old_columns.py +++ b/fractal_server/migrations/versions/f5aed9bbae62_split_filters_and_keep_old_columns.py @@ -1,8 +1,8 @@ """split filters and keep old columns -Revision ID: b6e34fcadf76 +Revision ID: f5aed9bbae62 Revises: 316140ff7ee1 -Create Date: 2025-01-07 17:00:11.918316 +Create Date: 2025-01-07 17:16:45.454431 """ import sqlalchemy as sa @@ -10,7 +10,7 @@ # revision identifiers, used by Alembic. -revision = "b6e34fcadf76" +revision = "f5aed9bbae62" down_revision = "316140ff7ee1" branch_labels = None depends_on = None @@ -33,6 +33,16 @@ def upgrade() -> None: ) ) + with op.batch_alter_table("jobv2", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "attribute_filters", + sa.JSON(), + server_default="{}", + nullable=False, + ) + ) + with op.batch_alter_table("workflowtaskv2", schema=None) as batch_op: batch_op.add_column( sa.Column( @@ -48,6 +58,9 @@ def downgrade() -> None: with op.batch_alter_table("workflowtaskv2", schema=None) as batch_op: batch_op.drop_column("type_filters") + with op.batch_alter_table("jobv2", schema=None) as batch_op: + batch_op.drop_column("attribute_filters") + with op.batch_alter_table("datasetv2", schema=None) as batch_op: batch_op.drop_column("attribute_filters") batch_op.drop_column("type_filters") From 32bdef082cadd9aa4b27aa57a44d8fcbe3d5179f Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 7 Jan 2025 17:18:16 +0100 Subject: [PATCH 13/99] attribute_filters in jobv2 schemas --- fractal_server/app/schemas/v2/job.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fractal_server/app/schemas/v2/job.py b/fractal_server/app/schemas/v2/job.py index e09232ea44..324b509c1f 100644 --- a/fractal_server/app/schemas/v2/job.py +++ b/fractal_server/app/schemas/v2/job.py @@ -1,12 +1,16 @@ from datetime import datetime from enum import Enum +from typing import Any from typing import Optional from pydantic import BaseModel from pydantic import Extra +from pydantic import Field from pydantic import validator from pydantic.types import StrictStr +from .._validators import valdict_keys +from .._validators import valdict_scalarvalues from .._validators import valstr from .dumps import DatasetDumpV2 from .dumps import ProjectDumpV2 @@ -41,10 +45,18 @@ class JobCreateV2(BaseModel, extra=Extra.forbid): slurm_account: Optional[StrictStr] = None worker_init: Optional[str] + attribute_filters: dict[str, list[Any]] = Field(default_factory=dict) + # Validators _worker_init = validator("worker_init", allow_reuse=True)( valstr("worker_init") ) + _attribute_filters_1 = validator("attribute_filters", allow_reuse=True)( + valdict_keys("attribute_filters") + ) + _attribute_filters_2 = validator("attribute_filters", allow_reuse=True)( + valdict_scalarvalues("attribute_filters") + ) @validator("first_task_index", always=True) def first_task_index_non_negative(cls, v, values): @@ -99,6 +111,7 @@ class JobReadV2(BaseModel): first_task_index: Optional[int] last_task_index: Optional[int] worker_init: Optional[str] + attribute_filters: dict[str, list[Any]] class JobUpdateV2(BaseModel, extra=Extra.forbid): From 3d89275830ca67c3157665dc182ccdaf1d579d8e Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 7 Jan 2025 17:29:30 +0100 Subject: [PATCH 14/99] fix test_schemas_dataset_v2 --- tests/v2/01_schemas/test_schemas_dataset.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tests/v2/01_schemas/test_schemas_dataset.py b/tests/v2/01_schemas/test_schemas_dataset.py index 883ca75844..969ee15419 100644 --- a/tests/v2/01_schemas/test_schemas_dataset.py +++ b/tests/v2/01_schemas/test_schemas_dataset.py @@ -16,23 +16,28 @@ async def test_schemas_dataset_v2(): # Create with pytest.raises(ValidationError): - # Non-scalar attribute + # Not a list DatasetCreateV2( name="name", zarr_dir="/zarr", - filters={"attributes": {"x": [1, 0]}}, + attribute_filters={"x": 1}, ) with pytest.raises(ValidationError): - # Non-boolean types + # Non-scalar attribute DatasetCreateV2( - name="name", zarr_dir="/zarr", filters={"types": {"a": "b"}} + name="name", + zarr_dir="/zarr", + attribute_filters={"x": [{1, 0}]}, ) + with pytest.raises(ValidationError): + # Non-boolean types + DatasetCreateV2(name="name", zarr_dir="/zarr", type_filters={"a": "b"}) # Test zarr_dir=None is valid DatasetCreateV2(name="name", zarr_dir=None) dataset_create = DatasetCreateV2( name="name", - filters={"attributes": {"x": 10}}, + attribute_filters={"x": [10, 11]}, zarr_dir="/tmp/", ) assert dataset_create.zarr_dir == normalize_url(dataset_create.zarr_dir) @@ -42,7 +47,7 @@ async def test_schemas_dataset_v2(): dataset_import = DatasetImportV2( name="name", - filters={"attributes": {"x": 10}}, + attribute_filters={"x": [10]}, zarr_dir="/tmp/", images=[{"zarr_url": "/tmp/image/"}], ) @@ -62,9 +67,9 @@ async def test_schemas_dataset_v2(): # Update # validation accepts `zarr_dir` and `filters` as None, but not `name` - DatasetUpdateV2(zarr_dir=None, filters=None) + DatasetUpdateV2(zarr_dir=None, attribute_filters=None) with pytest.raises(ValidationError): - DatasetUpdateV2(name=None, zarr_dir=None, filters=None) + DatasetUpdateV2(name=None, zarr_dir=None, attribute_filters=None) dataset_update = DatasetUpdateV2(name="new name", zarr_dir="/zarr/") assert not dataset_update.zarr_dir.endswith("/") From 1d33e67a25a2ee108f4ecd8b0482d12dab19b318 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 7 Jan 2025 17:30:46 +0100 Subject: [PATCH 15/99] fix test_workflow_task_dump --- tests/v2/01_schemas/test_unit_schemas_v2.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/v2/01_schemas/test_unit_schemas_v2.py b/tests/v2/01_schemas/test_unit_schemas_v2.py index cc84d89372..66d7c5316e 100644 --- a/tests/v2/01_schemas/test_unit_schemas_v2.py +++ b/tests/v2/01_schemas/test_unit_schemas_v2.py @@ -11,7 +11,6 @@ from fractal_server.app.schemas.v2 import WorkflowCreateV2 from fractal_server.app.schemas.v2 import WorkflowTaskCreateV2 from fractal_server.app.schemas.v2 import WorkflowTaskDumpV2 -from fractal_server.images import Filters def test_extra_on_create_models(): @@ -103,7 +102,7 @@ def test_workflow_task_dump(): WorkflowTaskDumpV2( id=1, workflow_id=1, - input_filters=Filters(), + type_filters={}, task_id=1, task=TaskDumpV2( id=1, From d3060025e0ae2d162272912d8e2a73dd7cd9910f Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 7 Jan 2025 17:38:09 +0100 Subject: [PATCH 16/99] remove old columns --- fractal_server/app/models/v2/dataset.py | 8 --- fractal_server/app/models/v2/workflowtask.py | 10 ---- .../c16108de9f77_remove_old_filter_columns.py | 58 +++++++++++++++++++ 3 files changed, 58 insertions(+), 18 deletions(-) create mode 100644 fractal_server/migrations/versions/c16108de9f77_remove_old_filter_columns.py diff --git a/fractal_server/app/models/v2/dataset.py b/fractal_server/app/models/v2/dataset.py index 6bf628becc..5fb63e356a 100644 --- a/fractal_server/app/models/v2/dataset.py +++ b/fractal_server/app/models/v2/dataset.py @@ -1,6 +1,5 @@ from datetime import datetime from typing import Any -from typing import Literal from typing import Optional from sqlalchemy import Column @@ -41,13 +40,6 @@ class Config: sa_column=Column(JSON, server_default="[]", nullable=False) ) - filters: dict[Literal["attributes", "types"], dict[str, Any]] = Field( - sa_column=Column( - JSON, - nullable=False, - server_default='{"attributes": {}, "types": {}}', - ) - ) type_filters: dict[str, bool] = Field( sa_column=Column(JSON, nullable=False, server_default="{}") ) diff --git a/fractal_server/app/models/v2/workflowtask.py b/fractal_server/app/models/v2/workflowtask.py index c36be4fab2..90bf7e0f8d 100644 --- a/fractal_server/app/models/v2/workflowtask.py +++ b/fractal_server/app/models/v2/workflowtask.py @@ -1,5 +1,4 @@ from typing import Any -from typing import Literal from typing import Optional from sqlalchemy import Column @@ -25,15 +24,6 @@ class Config: args_parallel: Optional[dict[str, Any]] = Field(sa_column=Column(JSON)) args_non_parallel: Optional[dict[str, Any]] = Field(sa_column=Column(JSON)) - input_filters: dict[ - Literal["attributes", "types"], dict[str, Any] - ] = Field( - sa_column=Column( - JSON, - nullable=False, - server_default='{"attributes": {}, "types": {}}', - ) - ) type_filters: dict[str, bool] = Field( sa_column=Column(JSON, nullable=False, server_default="{}") ) diff --git a/fractal_server/migrations/versions/c16108de9f77_remove_old_filter_columns.py b/fractal_server/migrations/versions/c16108de9f77_remove_old_filter_columns.py new file mode 100644 index 0000000000..e79a358345 --- /dev/null +++ b/fractal_server/migrations/versions/c16108de9f77_remove_old_filter_columns.py @@ -0,0 +1,58 @@ +"""remove old filter columns + +Revision ID: c16108de9f77 +Revises: f5aed9bbae62 +Create Date: 2025-01-07 17:36:56.520295 + +""" +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "c16108de9f77" +down_revision = "f5aed9bbae62" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("datasetv2", schema=None) as batch_op: + batch_op.drop_column("filters") + + with op.batch_alter_table("workflowtaskv2", schema=None) as batch_op: + batch_op.drop_column("input_filters") + + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("workflowtaskv2", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "input_filters", + postgresql.JSON(astext_type=sa.Text()), + server_default=sa.text( + '\'{"attributes": {}, "types": {}}\'::json' + ), + autoincrement=False, + nullable=False, + ) + ) + + with op.batch_alter_table("datasetv2", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "filters", + postgresql.JSON(astext_type=sa.Text()), + server_default=sa.text( + '\'{"attributes": {}, "types": {}}\'::json' + ), + autoincrement=False, + nullable=False, + ) + ) + + # ### end Alembic commands ### From d0faae0c2deef11f93e3ad9a6f68f5a9bc792c9f Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 8 Jan 2025 12:08:54 +0100 Subject: [PATCH 17/99] new filters validators --- benchmarks/runner/mocks.py | 7 +-- fractal_server/app/schemas/_validators.py | 41 +++++++++++++++++- fractal_server/app/schemas/v2/dataset.py | 43 ++++++++----------- fractal_server/app/schemas/v2/job.py | 12 ++---- fractal_server/app/schemas/v2/workflowtask.py | 13 +++--- 5 files changed, 69 insertions(+), 47 deletions(-) diff --git a/benchmarks/runner/mocks.py b/benchmarks/runner/mocks.py index 7dd1ee181d..26c1ce8013 100644 --- a/benchmarks/runner/mocks.py +++ b/benchmarks/runner/mocks.py @@ -69,13 +69,8 @@ class WorkflowTaskV2Mock(BaseModel): meta_parallel: Optional[dict[str, Any]] = Field() meta_non_parallel: Optional[dict[str, Any]] = Field() task: TaskV2Mock = None - input_filters: dict[str, Any] = Field(default_factory=dict) + type_filters: dict[str, bool] = Field(default_factory=dict) order: int id: int workflow_id: int = 0 task_id: int - - @validator("input_filters", always=True) - def _default_filters(cls, value): - if value == {}: - return {"types": {}, "attributes": {}} diff --git a/fractal_server/app/schemas/_validators.py b/fractal_server/app/schemas/_validators.py index 3d2b1d00d3..0eab8be344 100644 --- a/fractal_server/app/schemas/_validators.py +++ b/fractal_server/app/schemas/_validators.py @@ -39,7 +39,7 @@ def val(d: Optional[dict[str, Any]]) -> Optional[dict[str, Any]]: new_keys = [valstr(f"{attribute}[{key}]")(key) for key in old_keys] if len(new_keys) != len(set(new_keys)): raise ValueError( - f"Dictionary contains multiple identical keys: {d}." + f"Dictionary contains multiple identical keys: '{d}'." ) for old_key, new_key in zip(old_keys, new_keys): if new_key != old_key: @@ -49,6 +49,19 @@ def val(d: Optional[dict[str, Any]]) -> Optional[dict[str, Any]]: return val +def valdict_keys_str(): + def val(d: Optional[dict[Any, Any]]) -> Optional[dict[str, Any]]: + + if d is not None: + if any(not isinstance(key, str) for key in d.keys()): + raise ValueError( + f"Dictionary contains non-string keys: '{d}'." + ) + return d + + return val + + def valdict_scalarvalues(attribute: str, accept_type_none: bool = True): """ Check that every value of a `dict[str, list[Any]]` is a list of scalar @@ -129,3 +142,29 @@ def val(must_be_unique: Optional[list]) -> Optional[list]: return must_be_unique return val + + +def validate_type_filters(): + def val(type_filters: dict) -> dict[str, Any]: + type_filters = valdict_keys_str()(type_filters) + type_filters = valdict_keys("type_filters")(type_filters) + return type_filters + + return val + + +def validate_attribute_filters(accept_type_none: bool = True): + def val(attribute_filters: dict) -> dict[str, Any]: + attribute_filters = valdict_keys_str()(attribute_filters) + attribute_filters = valdict_keys("attribute_filters")( + attribute_filters + ) + for value in attribute_filters.values(): + if not isinstance(value, list): + raise ValueError(f"Values must be a list. Given {type(value)}") + attribute_filters = valdict_scalarvalues( + "attribute_filters", accept_type_none=accept_type_none + )(attribute_filters) + return attribute_filters + + return val diff --git a/fractal_server/app/schemas/v2/dataset.py b/fractal_server/app/schemas/v2/dataset.py index 2e2859a372..b4422b5f9f 100644 --- a/fractal_server/app/schemas/v2/dataset.py +++ b/fractal_server/app/schemas/v2/dataset.py @@ -7,8 +7,8 @@ from pydantic import Field from pydantic import validator -from .._validators import valdict_keys -from .._validators import valdict_scalarvalues +from .._validators import validate_attribute_filters +from .._validators import validate_type_filters from .._validators import valstr from .dumps import WorkflowTaskDumpV2 from .project import ProjectReadV2 @@ -43,15 +43,12 @@ class DatasetCreateV2(BaseModel, extra=Extra.forbid): _name = validator("name", allow_reuse=True)(valstr("name")) - _type_filters = validator("type_filters", allow_reuse=True)( - valdict_keys("type_filters") - ) - _attribute_filters_1 = validator("attribute_filters", allow_reuse=True)( - valdict_keys("attribute_filters") - ) - _attribute_filters_2 = validator("attribute_filters", allow_reuse=True)( - valdict_scalarvalues("attribute_filters") + _type_filters = validator("type_filters", pre=True, allow_reuse=True)( + validate_type_filters() ) + _attribute_filters = validator( + "attribute_filters", pre=True, allow_reuse=True + )(validate_attribute_filters()) @validator("zarr_dir") def normalize_zarr_dir(cls, v: Optional[str]) -> Optional[str]: @@ -88,15 +85,12 @@ class DatasetUpdateV2(BaseModel, extra=Extra.forbid): _name = validator("name", allow_reuse=True)(valstr("name")) - _type_filters = validator("type_filters", allow_reuse=True)( - valdict_keys("type_filters") - ) - _attribute_filters_1 = validator("attribute_filters", allow_reuse=True)( - valdict_keys("attribute_filters") - ) - _attribute_filters_2 = validator("attribute_filters", allow_reuse=True)( - valdict_scalarvalues("attribute_filters") + _type_filters = validator("type_filters", pre=True, allow_reuse=True)( + validate_type_filters() ) + _attribute_filters = validator( + "attribute_filters", pre=True, allow_reuse=True + )(validate_attribute_filters()) @validator("zarr_dir") def normalize_zarr_dir(cls, v: Optional[str]) -> Optional[str]: @@ -125,15 +119,12 @@ class DatasetImportV2(BaseModel, extra=Extra.forbid): # Validators - _type_filters = validator("type_filters", allow_reuse=True)( - valdict_keys("type_filters") - ) - _attribute_filters_1 = validator("attribute_filters", allow_reuse=True)( - valdict_keys("attribute_filters") - ) - _attribute_filters_2 = validator("attribute_filters", allow_reuse=True)( - valdict_scalarvalues("attribute_filters") + _type_filters = validator("type_filters", pre=True, allow_reuse=True)( + validate_type_filters() ) + _attribute_filters = validator( + "attribute_filters", pre=True, allow_reuse=True + )(validate_attribute_filters()) @validator("zarr_dir") def normalize_zarr_dir(cls, v: str) -> str: diff --git a/fractal_server/app/schemas/v2/job.py b/fractal_server/app/schemas/v2/job.py index 324b509c1f..740d9f2f47 100644 --- a/fractal_server/app/schemas/v2/job.py +++ b/fractal_server/app/schemas/v2/job.py @@ -9,8 +9,7 @@ from pydantic import validator from pydantic.types import StrictStr -from .._validators import valdict_keys -from .._validators import valdict_scalarvalues +from .._validators import validate_attribute_filters from .._validators import valstr from .dumps import DatasetDumpV2 from .dumps import ProjectDumpV2 @@ -51,12 +50,9 @@ class JobCreateV2(BaseModel, extra=Extra.forbid): _worker_init = validator("worker_init", allow_reuse=True)( valstr("worker_init") ) - _attribute_filters_1 = validator("attribute_filters", allow_reuse=True)( - valdict_keys("attribute_filters") - ) - _attribute_filters_2 = validator("attribute_filters", allow_reuse=True)( - valdict_scalarvalues("attribute_filters") - ) + _attribute_filters = validator( + "attribute_filters", pre=True, allow_reuse=True + )(validate_attribute_filters()) @validator("first_task_index", always=True) def first_task_index_non_negative(cls, v, values): diff --git a/fractal_server/app/schemas/v2/workflowtask.py b/fractal_server/app/schemas/v2/workflowtask.py index 81537e55c0..94c009c370 100644 --- a/fractal_server/app/schemas/v2/workflowtask.py +++ b/fractal_server/app/schemas/v2/workflowtask.py @@ -9,6 +9,7 @@ from pydantic import validator from .._validators import valdict_keys +from .._validators import validate_type_filters from .task import TaskExportV2 from .task import TaskImportV2 from .task import TaskImportV2Legacy @@ -51,8 +52,8 @@ class WorkflowTaskCreateV2(BaseModel, extra=Extra.forbid): _meta_parallel = validator("meta_parallel", allow_reuse=True)( valdict_keys("meta_parallel") ) - _type_filters = validator("type_filters", allow_reuse=True)( - valdict_keys("type_filters") + _type_filters = validator("type_filters", pre=True, allow_reuse=True)( + validate_type_filters() ) @validator("args_non_parallel") @@ -129,8 +130,8 @@ class WorkflowTaskUpdateV2(BaseModel, extra=Extra.forbid): _meta_parallel = validator("meta_parallel", allow_reuse=True)( valdict_keys("meta_parallel") ) - _type_filters = validator("type_filters", allow_reuse=True)( - valdict_keys("type_filters") + _type_filters = validator("type_filters", pre=True, allow_reuse=True)( + validate_type_filters() ) @validator("args_non_parallel") @@ -185,8 +186,8 @@ class WorkflowTaskImportV2(BaseModel, extra=Extra.forbid): _args_parallel = validator("args_parallel", allow_reuse=True)( valdict_keys("args_parallel") ) - _type_filters = validator("type_filters", allow_reuse=True)( - valdict_keys("type_filters") + _type_filters = validator("type_filters", pre=True, allow_reuse=True)( + validate_type_filters() ) From 2b77e1b6b969b5b32a914d7580bd88ebde10bfb7 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 8 Jan 2025 12:14:41 +0100 Subject: [PATCH 18/99] fix test api dataset --- tests/v2/03_api/test_api_dataset.py | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/tests/v2/03_api/test_api_dataset.py b/tests/v2/03_api/test_api_dataset.py index 881723aac7..dcca09317a 100644 --- a/tests/v2/03_api/test_api_dataset.py +++ b/tests/v2/03_api/test_api_dataset.py @@ -58,7 +58,7 @@ async def test_new_dataset_v2(client, MockCurrentUser): f"api/v2/project/{p2_id}/dataset/", json=dict( name="dataset", - filters={"attributes": {"x": 10}}, + attribute_filters={"x": [10]}, zarr_dir="/tmp", ), ) @@ -442,10 +442,8 @@ async def test_dataset_export( assert res_dataset["name"] == "My Dataset" assert res_dataset["zarr_dir"] == "/zarr_dir" assert res_dataset["images"] == IMAGES - assert res_dataset["filters"] == dict( - attributes={}, - types={}, - ) + assert res_dataset["attribute_filters"] == dict() + assert res_dataset["type_filters"] == dict() async def test_dataset_import( @@ -460,10 +458,8 @@ async def test_dataset_import( name="Dataset", zarr_dir="/somewhere/invalid/", images=IMAGES, - filters=dict( - attributes={}, - types={}, - ), + attribute_filters={}, + type_filters={}, ) res = await client.post( f"{PREFIX}/project/{project.id}/dataset/import/", json=dataset @@ -476,10 +472,8 @@ async def test_dataset_import( name="Dataset", zarr_dir=ZARR_DIR, images=IMAGES, - filters=dict( - attributes={}, - types={}, - ), + attribute_filters=dict(), + type_filters=dict(), ) res = await client.post( f"{PREFIX}/project/{project.id}/dataset/import/", json=dataset @@ -489,7 +483,5 @@ async def test_dataset_import( debug(res_dataset) assert res_dataset["name"] == "Dataset" assert res_dataset["zarr_dir"] == ZARR_DIR - assert res_dataset["filters"] == dict( - attributes={}, - types={}, - ) + assert res_dataset["attribute_filters"] == dict() + assert res_dataset["type_filters"] == dict() From 6d2b5c722f53751e5ee9cfaebc2b58d0cc53422b Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 8 Jan 2025 12:31:46 +0100 Subject: [PATCH 19/99] remove input_filters --- .../app/routes/api/v2/_aux_functions.py | 13 +++---------- .../app/routes/api/v2/workflowtask.py | 6 +++--- tests/v2/03_api/test_api_workflow_task.py | 17 +++++------------ tests/v2/03_api/test_unit_issue_1783.py | 4 ++-- tests/v2/09_backends/test_slurm_config.py | 2 +- 5 files changed, 14 insertions(+), 28 deletions(-) diff --git a/fractal_server/app/routes/api/v2/_aux_functions.py b/fractal_server/app/routes/api/v2/_aux_functions.py index 63762d9cdd..358ac1d26e 100644 --- a/fractal_server/app/routes/api/v2/_aux_functions.py +++ b/fractal_server/app/routes/api/v2/_aux_functions.py @@ -21,7 +21,6 @@ from ....models.v2 import WorkflowTaskV2 from ....models.v2 import WorkflowV2 from ....schemas.v2 import JobStatusTypeV2 -from fractal_server.images import Filters async def _get_project_check_owner( @@ -336,7 +335,7 @@ async def _workflow_insert_task( meta_non_parallel: Optional[dict[str, Any]] = None, args_non_parallel: Optional[dict[str, Any]] = None, args_parallel: Optional[dict[str, Any]] = None, - input_filters: Optional[Filters] = None, + type_filters: Optional[dict[str, bool]] = None, db: AsyncSession, ) -> WorkflowTaskV2: """ @@ -350,7 +349,7 @@ async def _workflow_insert_task( meta_non_parallel: args_non_parallel: args_parallel: - input_filters: + type_filters: db: """ db_workflow = await db.get(WorkflowV2, workflow_id) @@ -376,12 +375,6 @@ async def _workflow_insert_task( if final_meta_non_parallel == {}: final_meta_non_parallel = None - # Prepare input_filters attribute - if input_filters is None: - input_filters_kwarg = {} - else: - input_filters_kwarg = dict(input_filters=input_filters) - # Create DB entry wf_task = WorkflowTaskV2( task_type=task_type, @@ -390,7 +383,7 @@ async def _workflow_insert_task( args_parallel=args_parallel, meta_parallel=final_meta_parallel, meta_non_parallel=final_meta_non_parallel, - **input_filters_kwarg, + type_filters=(type_filters or dict()), ) db_workflow.task_list.append(wf_task) flag_modified(db_workflow, "task_list") diff --git a/fractal_server/app/routes/api/v2/workflowtask.py b/fractal_server/app/routes/api/v2/workflowtask.py index d43bcf0a25..ee4ac3caab 100644 --- a/fractal_server/app/routes/api/v2/workflowtask.py +++ b/fractal_server/app/routes/api/v2/workflowtask.py @@ -109,7 +109,7 @@ async def replace_workflowtask( task_type=task.type, task=task, # old-task values - input_filters=old_workflow_task.input_filters, + type_filters=old_workflow_task.type_filters, # possibly new values args_non_parallel=_args_non_parallel, args_parallel=_args_parallel, @@ -183,7 +183,7 @@ async def create_workflowtask( meta_parallel=new_task.meta_parallel, args_non_parallel=new_task.args_non_parallel, args_parallel=new_task.args_parallel, - input_filters=new_task.input_filters, + type_filters=new_task.type_filters, db=db, ) @@ -274,7 +274,7 @@ async def update_workflowtask( if not actual_args: actual_args = None setattr(db_wf_task, key, actual_args) - elif key in ["meta_parallel", "meta_non_parallel", "input_filters"]: + elif key in ["meta_parallel", "meta_non_parallel", "type_filters"]: setattr(db_wf_task, key, value) else: raise HTTPException( diff --git a/tests/v2/03_api/test_api_workflow_task.py b/tests/v2/03_api/test_api_workflow_task.py index 8148eba797..1f14291c51 100644 --- a/tests/v2/03_api/test_api_workflow_task.py +++ b/tests/v2/03_api/test_api_workflow_task.py @@ -8,7 +8,7 @@ from fractal_server.app.models import UserGroup from fractal_server.app.models.v2 import WorkflowTaskV2 from fractal_server.app.models.v2 import WorkflowV2 -from fractal_server.images.models import Filters + PREFIX = "api/v2" @@ -296,10 +296,7 @@ async def test_patch_workflow_task( args_parallel={"c": 333, "d": 444}, meta_non_parallel={"non": "parallel"}, meta_parallel={"executor": "cpu-low"}, - input_filters={ - "attributes": {"a": "b", "c": "d"}, - "types": {"e": True, "f": False, "g": True}, - }, + type_filters={"e": True, "f": False, "g": True}, ) res = await client.patch( f"{PREFIX}/project/{project.id}/workflow/{workflow['id']}/" @@ -323,9 +320,7 @@ async def test_patch_workflow_task( assert ( patched_workflow_task["meta_parallel"] == payload["meta_parallel"] ) - assert ( - patched_workflow_task["input_filters"] == payload["input_filters"] - ) + assert patched_workflow_task["type_filters"] == payload["type_filters"] assert res.status_code == 200 payload_up = dict( @@ -377,15 +372,13 @@ async def test_patch_workflow_task( f"wftask/{workflow['task_list'][0]['id']}/", json=dict( args_non_parallel={}, - input_filters=Filters().dict(), + type_filters=dict(), ), ) patched_workflow_task = res.json() debug(patched_workflow_task["args_non_parallel"]) assert patched_workflow_task["args_non_parallel"] is None - assert patched_workflow_task["input_filters"] == dict( - attributes={}, types={} - ) + assert patched_workflow_task["type_filters"] == dict() assert res.status_code == 200 # Test 422 diff --git a/tests/v2/03_api/test_unit_issue_1783.py b/tests/v2/03_api/test_unit_issue_1783.py index 0b544cfd98..c15eb35e07 100644 --- a/tests/v2/03_api/test_unit_issue_1783.py +++ b/tests/v2/03_api/test_unit_issue_1783.py @@ -14,7 +14,7 @@ def test_issue_1783(): id=1, workflow_id=1, order=0, - input_filters=dict(attributes=dict(), types=dict()), + type_filters=dict(), task_id=1, task=dict( id=1, @@ -36,7 +36,7 @@ def test_issue_1783(): id=1, workflow_id=1, order=0, - input_filters=dict(attributes=dict(), types=dict()), + type_filters=dict(), task_legacy_id=1, task_legacy=dict( id=1, diff --git a/tests/v2/09_backends/test_slurm_config.py b/tests/v2/09_backends/test_slurm_config.py index 601977c032..7ab8f279f4 100644 --- a/tests/v2/09_backends/test_slurm_config.py +++ b/tests/v2/09_backends/test_slurm_config.py @@ -53,7 +53,7 @@ class WorkflowTaskV2Mock(BaseModel, extra=Extra.forbid): meta_parallel: Optional[dict[str, Any]] = Field() meta_non_parallel: Optional[dict[str, Any]] = Field() task: TaskV2Mock - input_filters: dict[str, Any] = Field(default_factory=dict) + type_filters: dict[str, bool] = Field(default_factory=dict) order: int = 0 id: int = 1 workflow_id: int = 0 From f2a4af87ab2f78a86f46f23ec2bd1637cb8c381f Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 8 Jan 2025 12:35:54 +0100 Subject: [PATCH 20/99] remove Filters schema --- fractal_server/images/models.py | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/fractal_server/images/models.py b/fractal_server/images/models.py index 3641a97095..45eb84953d 100644 --- a/fractal_server/images/models.py +++ b/fractal_server/images/models.py @@ -3,7 +3,6 @@ from typing import Union from pydantic import BaseModel -from pydantic import Extra from pydantic import Field from pydantic import validator @@ -108,27 +107,3 @@ def validate_attributes( return v _types = validator("types", allow_reuse=True)(valdict_keys("types")) - - -class Filters(BaseModel, extra=Extra.forbid): - attributes: dict[str, Any] = Field(default_factory=dict) - types: dict[str, bool] = Field(default_factory=dict) - - # Validators - _attributes = validator("attributes", allow_reuse=True)( - valdict_keys("attributes") - ) - _types = validator("types", allow_reuse=True)(valdict_keys("types")) - - @validator("attributes") - def validate_attributes( - cls, v: dict[str, Any] - ) -> dict[str, Union[int, float, str, bool, None]]: - for key, value in v.items(): - if not isinstance(value, (int, float, str, bool, type(None))): - raise ValueError( - f"Filters.attributes[{key}] must be a scalar " - "(int, float, str, bool, or None). " - f"Given {value} ({type(value)})" - ) - return v From 6527b8ac72913e84263df4e86234998a1b3a1a7c Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 8 Jan 2025 13:02:55 +0100 Subject: [PATCH 21/99] remove Filters --- fractal_server/app/routes/api/v2/images.py | 13 +- fractal_server/images/__init__.py | 1 - fractal_server/images/tools.py | 45 ++-- tests/v2/03_api/test_api_dataset_images.py | 13 +- .../test_benchmark_helper_functions.py | 9 +- tests/v2/05_images/test_filters.py | 24 +- tests/v2/05_images/test_image_models.py | 24 -- tests/v2/05_images/test_unit_image_tools.py | 211 ++++++------------ 8 files changed, 120 insertions(+), 220 deletions(-) diff --git a/fractal_server/app/routes/api/v2/images.py b/fractal_server/app/routes/api/v2/images.py index 01ccd5e187..7de43e51ba 100644 --- a/fractal_server/app/routes/api/v2/images.py +++ b/fractal_server/app/routes/api/v2/images.py @@ -15,7 +15,6 @@ from fractal_server.app.db import get_async_db from fractal_server.app.models import UserOAuth from fractal_server.app.routes.auth import current_active_user -from fractal_server.images import Filters from fractal_server.images import SingleImage from fractal_server.images import SingleImageUpdate from fractal_server.images.tools import find_image_by_zarr_url @@ -38,7 +37,8 @@ class ImagePage(BaseModel): class ImageQuery(BaseModel): zarr_url: Optional[str] - filters: Filters = Field(default_factory=Filters) + type_filters: dict[str, bool] = Field(default_factory=dict) + attribute_filters: dict[str, list[Any]] = Field(default_factory=dict) @router.post( @@ -124,7 +124,11 @@ async def query_dataset_images( images = [ image for image in images - if match_filter(image, Filters(**dataset.filters)) + if match_filter( + image, + type_filters=dataset.type_filters, + attribute_filters=dataset.attribute_filters, + ) ] attributes = {} @@ -160,7 +164,8 @@ async def query_dataset_images( for image in images if match_filter( image, - Filters(**query.filters.dict()), + type_filters=query.type_filters, + attribute_filters=query.attribute_filters, ) ] diff --git a/fractal_server/images/__init__.py b/fractal_server/images/__init__.py index 1d2e8dc55d..960ac2105a 100644 --- a/fractal_server/images/__init__.py +++ b/fractal_server/images/__init__.py @@ -1,4 +1,3 @@ -from .models import Filters # noqa: F401 from .models import SingleImage # noqa: F401 from .models import SingleImageTaskOutput # noqa: F401 from .models import SingleImageUpdate # noqa: F401 diff --git a/fractal_server/images/tools.py b/fractal_server/images/tools.py index 6452d08dd9..b9ee4f2f46 100644 --- a/fractal_server/images/tools.py +++ b/fractal_server/images/tools.py @@ -4,9 +4,6 @@ from typing import Optional from typing import Union -from fractal_server.images import Filters - - ImageSearch = dict[Literal["image", "index"], Union[int, dict[str, Any]]] @@ -33,33 +30,41 @@ def find_image_by_zarr_url( return dict(image=copy(images[ind]), index=ind) -def match_filter(image: dict[str, Any], filters: Filters) -> bool: +def match_filter( + image: dict[str, Any], + type_filters: Optional[dict[str, bool]] = None, + attribute_filters: Optional[dict[str, list[Any]]] = None, +) -> bool: """ Find whether an image matches a filter set. Arguments: image: A single image. - filters: A set of filters. + type_filters: + attribute_filters: Returns: Whether the image matches the filter set. """ - # Verify match with types (using a False default) - for key, value in filters.types.items(): - if image["types"].get(key, False) != value: - return False - # Verify match with attributes (only for non-None filters) - for key, value in filters.attributes.items(): - if value is None: - continue - if image["attributes"].get(key) != value: - return False + if type_filters is not None: + # Verify match with types (using a False default) + for key, value in type_filters.items(): + if image["types"].get(key, False) != value: + return False + if attribute_filters is not None: + # Verify match with attributes (only for not-None filters) + for key, values in attribute_filters.items(): + if None in values: + continue + if not image["attributes"].get(key) in values: + return False return True def filter_image_list( images: list[dict[str, Any]], - filters: Filters, + type_filters: Optional[dict[str, bool]] = None, + attribute_filters: Optional[dict[str, list[Any]]] = None, ) -> list[dict[str, Any]]: """ Compute a sublist with images that match a filter set. @@ -73,12 +78,16 @@ def filter_image_list( """ # When no filter is provided, return all images - if filters.attributes == {} and filters.types == {}: + if type_filters is None and attribute_filters is None: return images filtered_images = [ copy(this_image) for this_image in images - if match_filter(this_image, filters=filters) + if match_filter( + this_image, + type_filters=type_filters, + attribute_filters=attribute_filters, + ) ] return filtered_images diff --git a/tests/v2/03_api/test_api_dataset_images.py b/tests/v2/03_api/test_api_dataset_images.py index 534c39ddff..88a69f54bb 100644 --- a/tests/v2/03_api/test_api_dataset_images.py +++ b/tests/v2/03_api/test_api_dataset_images.py @@ -1,4 +1,3 @@ -from fractal_server.images import Filters from fractal_server.images import SingleImage from fractal_server.images.tools import find_image_by_zarr_url from fractal_server.images.tools import match_filter @@ -136,14 +135,14 @@ async def test_query_images( # use `query.attributes` res = await client.post( f"{PREFIX}/project/{project.id}/dataset/{dataset.id}/images/query/", - json=dict(filters=dict(types=dict(flag=False))), + json=dict(type_filters=dict(flag=False)), ) assert res.status_code == 200 assert res.json()["total_count"] == len( [ image for image in images - if match_filter(image, Filters(types={"flag": False})) + if match_filter(image, type_filters={"flag": False}) ] ) assert res.json()["current_page"] == 1 @@ -154,14 +153,14 @@ async def test_query_images( res = await client.post( f"{PREFIX}/project/{project.id}/dataset/{dataset.id}/images/query/" "?page_size=1000", - json=dict(filters=dict(types={"flag": True})), + json=dict(type_filters={"flag": True}), ) assert res.status_code == 200 assert res.json()["total_count"] == len( [ image for image in images - if match_filter(image, filters=Filters(types={"flag": 1})) + if match_filter(image, type_filters={"flag": 1}) ] ) assert res.json()["page_size"] == 1000 @@ -171,7 +170,7 @@ async def test_query_images( res = await client.post( f"{PREFIX}/project/{project.id}/dataset/{dataset.id}/images/query/" "?page_size=42", - json=dict(filters=dict(types={"foo": True})), + json=dict(type_filters={"foo": True}), ) assert res.status_code == 200 assert res.json()["total_count"] == 0 @@ -180,7 +179,7 @@ async def test_query_images( assert res.json()["images"] == [] res = await client.post( f"{PREFIX}/project/{project.id}/dataset/{dataset.id}/images/query/", - json=dict(filters=dict(types={"foo": False})), + json=dict(type_filters={"foo": False}), ) assert res.status_code == 200 assert res.json()["total_count"] == N diff --git a/tests/v2/05_images/test_benchmark_helper_functions.py b/tests/v2/05_images/test_benchmark_helper_functions.py index 9d49b6269d..6a296a2f40 100644 --- a/tests/v2/05_images/test_benchmark_helper_functions.py +++ b/tests/v2/05_images/test_benchmark_helper_functions.py @@ -6,7 +6,6 @@ from fractal_server.app.runner.v2.deduplicate_list import deduplicate_list from fractal_server.app.runner.v2.task_interface import InitArgsModel -from fractal_server.images import Filters from fractal_server.images import SingleImage from fractal_server.images.tools import filter_image_list @@ -46,10 +45,8 @@ def test_filter_image_list_with_filters( ): new_list = filter_image_list( images=images, - filters=Filters( - attributes=dict(a1=0, a2="a2", a3=None), - types=dict(t1=True, t2=False), - ), + attribute_filters=dict(a1=0, a2="a2", a3=None), + type_filters=dict(t1=True, t2=False), ) debug(len(images), len(new_list)) assert len(new_list) == len(images) // 4 @@ -65,7 +62,7 @@ def test_filter_image_list_few_filters( ): new_list = filter_image_list( images=images, - filters=Filters(attributes=dict(a1=0)), + attribute_filters=dict(a1=0), ) debug(len(images), len(new_list)) assert len(new_list) == len(images) // 2 diff --git a/tests/v2/05_images/test_filters.py b/tests/v2/05_images/test_filters.py index 8ac82a6c53..a4376d9c9f 100644 --- a/tests/v2/05_images/test_filters.py +++ b/tests/v2/05_images/test_filters.py @@ -2,7 +2,6 @@ from devtools import debug from pydantic import ValidationError -from fractal_server.images import Filters from fractal_server.images import SingleImage from fractal_server.images.tools import filter_image_list @@ -96,26 +95,6 @@ def test_singleimage_attributes_validation(): ) -def test_filters_attributes_validation(): - invalid = [ - ["l", "i", "s", "t"], - {"d": "i", "c": "t"}, - {"s", "e", "t"}, - ("t", "u", "p", "l", "e"), - bool, # type - lambda x: x, # function - ] - - for item in invalid: - with pytest.raises(ValidationError) as e: - Filters(attributes={"key": item}) - debug(e) - - valid = ["string", -7, 3.14, True, None] - for item in valid: - assert Filters(attributes={"key": item}).attributes["key"] == item - - @pytest.mark.parametrize( "attribute_filters,type_filters,expected_number", [ @@ -165,7 +144,8 @@ def test_filter_image_list_SingleImage( ): filtered_list = filter_image_list( images=IMAGES, - filters=Filters(attributes=attribute_filters, types=type_filters), + attribute_filters=attribute_filters, + type_filters=type_filters, ) debug(attribute_filters) diff --git a/tests/v2/05_images/test_image_models.py b/tests/v2/05_images/test_image_models.py index 89960e99ac..918d30d313 100644 --- a/tests/v2/05_images/test_image_models.py +++ b/tests/v2/05_images/test_image_models.py @@ -1,7 +1,6 @@ import pytest from pydantic import ValidationError -from fractal_server.images import Filters from fractal_server.images import SingleImage from fractal_server.images import SingleImageTaskOutput from fractal_server.images import SingleImageUpdate @@ -87,29 +86,6 @@ def test_single_image_task_output(): SingleImage(**base.dict()) -def test_filters(): - - Filters() - - valid_attributes = dict(a="string", b=3, c=0.33, d=True, e=None) - assert Filters(attributes=valid_attributes).attributes == valid_attributes - - invalid_attributes = [ - dict(a=["l", "i", "s", "t"]), - dict(a={"d": "i", "c": "t"}), - ] - for attr in invalid_attributes: - with pytest.raises(ValidationError): - Filters(attributes=attr) - - valid_types = dict(a=True, b=False) - assert Filters(types=valid_types).types == valid_types - - invalid_types = dict(a="not a bool") - with pytest.raises(ValidationError): - Filters(types=invalid_types) - - def test_single_image_update(): with pytest.raises(ValidationError): diff --git a/tests/v2/05_images/test_unit_image_tools.py b/tests/v2/05_images/test_unit_image_tools.py index 87520782dc..5ab399074e 100644 --- a/tests/v2/05_images/test_unit_image_tools.py +++ b/tests/v2/05_images/test_unit_image_tools.py @@ -1,4 +1,3 @@ -from fractal_server.images import Filters from fractal_server.images import SingleImage from fractal_server.images.tools import filter_image_list from fractal_server.images.tools import find_image_by_zarr_url @@ -49,184 +48,120 @@ def test_match_filter(): ).dict() # Empty - assert match_filter(image, Filters()) is True + assert match_filter(image) is True # Attributes - f = Filters(attributes=dict(foo="bar")) # not existing attribute - assert match_filter(image, f) is False + # not existing attribute + assert match_filter(image, attribute_filters=dict(foo="bar")) is False - f = Filters(attributes=dict(name="a")) - assert match_filter(image, f) is True + assert match_filter(image, attribute_filters=dict(name="a")) is True - f = Filters(attributes=dict(num=0)) - assert match_filter(image, f) is True + assert match_filter(image, attribute_filters=dict(num=0)) is True - f = Filters( - attributes=dict( - name="a", - num=0, + assert ( + match_filter( + image, + attribute_filters=dict( + name="a", + num=0, + ), ) + is True ) - assert match_filter(image, f) is True - f = Filters( - attributes=dict( - name="a", - num=0, - foo="bar", # not existing attribute - ) + # not existing attribute + assert ( + match_filter(image, attribute_filters=dict(name="a", num=0, foo="bar")) + is False ) - assert match_filter(image, f) is False - f = Filters( - attributes=dict( - name="a", - num="0", # int as string - ) + # int as string + assert ( + match_filter(image, attribute_filters=dict(name="a", num="0")) is False ) - assert match_filter(image, f) is False # Types - f = Filters(types=dict(a=True)) - assert match_filter(image, f) is True - f = Filters(types=dict(b=False)) - assert match_filter(image, f) is True - f = Filters( - types=dict( - a=True, - b=False, - ) + assert match_filter(image, type_filters=dict(a=True)) is True + assert match_filter(image, type_filters=dict(b=False)) is True + assert match_filter(image, type_filters=dict(a=True, b=False)) is True + assert match_filter(image, type_filters=dict(a=False)) is False + assert match_filter(image, type_filters=dict(a=True, b=True)) is False + # not existing 'True' types are checked + assert match_filter(image, type_filters=dict(c=True)) is False + # not existing 'False' types are ignored + assert match_filter(image, type_filters=dict(c=False)) is True + assert ( + match_filter(image, type_filters=dict(a=True, b=False, c=True)) + is False ) - assert match_filter(image, f) is True - f = Filters( - types=dict( - a=False, - ) + assert ( + match_filter(image, type_filters=dict(a=True, b=False, c=False)) + is True ) - assert match_filter(image, f) is False - f = Filters( - types=dict( - a=True, - b=True, + + # Both + + assert ( + match_filter( + image, attribute_filters=dict(name="a"), type_filters=dict(a=True) ) + is True ) - assert match_filter(image, f) is False - f = Filters( - types=dict( - c=True, # not existing 'True' types are checked + assert ( + match_filter( + image, attribute_filters=dict(name="a"), type_filters=dict(a=False) ) + is False ) - assert match_filter(image, f) is False - f = Filters( - types=dict( - c=False, # not existing 'False' types are ignored + assert ( + match_filter( + image, attribute_filters=dict(name="b"), type_filters=dict(a=True) ) + is False ) - assert match_filter(image, f) is True - f = Filters( - types=dict( - a=True, - b=False, - c=True, + assert ( + match_filter( + image, + attribute_filters=dict(name="a"), + type_filters=dict(x=False, y=False, z=False), ) + is True ) - assert match_filter(image, f) is False - f = Filters( - types=dict( - a=True, - b=False, - c=False, + assert ( + match_filter( + image, + attribute_filters=dict(name="a"), + type_filters=dict(x=False, y=False, z=True), ) + is False ) - assert match_filter(image, f) is True - - # Both - - f = Filters( - attributes=dict( - name="a", - ), - types=dict(a=True), - ) - assert match_filter(image, f) is True - f = Filters( - attributes=dict( - name="a", - ), - types=dict(a=False), - ) - assert match_filter(image, f) is False - f = Filters( - attributes=dict( - name="b", - ), - types=dict(a=True), - ) - assert match_filter(image, f) is False - f = Filters( - attributes=dict( - name="a", - ), - types=dict( - x=False, - y=False, - z=False, - ), - ) - assert match_filter(image, f) is True - f = Filters( - attributes=dict( - name="a", - ), - types=dict( - x=False, - y=False, - z=True, - ), - ) - assert match_filter(image, f) is False def test_filter_image_list(): # Empty - res = filter_image_list(images, Filters()) + res = filter_image_list(images) assert res == images # Attributes - f = Filters(attributes=dict(name="a")) - res = filter_image_list(images, f) + res = filter_image_list(images, attribute_filters=dict(name="a")) k = (N // 2) if not N % 2 else (N + 1) // 2 assert len(res) == k - f = Filters(attributes=dict(name="b")) - res = filter_image_list(images, f) + res = filter_image_list(images, attribute_filters=dict(name="b")) assert len(res) == N - k - f = Filters(attributes=dict(num=0)) - res = filter_image_list(images, f) + res = filter_image_list(images, attribute_filters=dict(num=0)) assert len(res) == len([i for i in range(N) if i % 3 == 0]) - f = Filters(attributes=dict(num=1)) - res = filter_image_list(images, f) + res = filter_image_list(images, attribute_filters=dict(num=1)) assert len(res) == len([i for i in range(N) if i % 3 == 1]) - f = Filters(attributes=dict(num=2)) - res = filter_image_list(images, f) + res = filter_image_list(images, attribute_filters=dict(num=2)) assert len(res) == len([i for i in range(N) if i % 3 == 2]) - f = Filters(attributes=dict(name="foo")) - res = filter_image_list(images, f) + res = filter_image_list(images, attribute_filters=dict(name="foo")) assert len(res) == 0 - f = Filters(attributes=dict(num=3)) - res = filter_image_list(images, f) + res = filter_image_list(images, attribute_filters=dict(num=3)) assert len(res) == 0 - f = Filters(attributes=dict(name="a", num=3)) - res = filter_image_list(images, f) + res = filter_image_list(images, attribute_filters=dict(name="a", num=3)) assert len(res) == 0 - f = Filters(attributes=dict(name="foo", num=0)) - res = filter_image_list(images, f) + res = filter_image_list(images, attribute_filters=dict(name="foo", num=0)) assert len(res) == 0 - f = Filters( - types=dict( - a=True, - b=True, - ) - ) - res = filter_image_list(images, f) + res = filter_image_list(images, type_filters=dict(a=True, b=True)) assert len(res) == N // 2 - N // 3 + 1 From 643be065e82b0880840e980d784f3c3414b689cf Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 8 Jan 2025 14:20:41 +0100 Subject: [PATCH 22/99] remove Filters from runner --- fractal_server/app/runner/filenames.py | 3 +- fractal_server/app/runner/v2/runner.py | 39 +++++++------------ .../app/runner/v2/task_interface.py | 4 +- 3 files changed, 17 insertions(+), 29 deletions(-) diff --git a/fractal_server/app/runner/filenames.py b/fractal_server/app/runner/filenames.py index eaa8a123c0..db80a5efa0 100644 --- a/fractal_server/app/runner/filenames.py +++ b/fractal_server/app/runner/filenames.py @@ -1,5 +1,6 @@ HISTORY_FILENAME = "history.json" -FILTERS_FILENAME = "filters.json" +TYPE_FILTERS_FILENAME = "type_filters.json" +ATTRIBUTE_FILTERS_FILENAME = "attribute_filters.json" IMAGES_FILENAME = "images.json" METADATA_FILENAME = "metadata.json" SHUTDOWN_FILENAME = "shutdown" diff --git a/fractal_server/app/runner/v2/runner.py b/fractal_server/app/runner/v2/runner.py index 5a87ac8a58..47f7b50881 100644 --- a/fractal_server/app/runner/v2/runner.py +++ b/fractal_server/app/runner/v2/runner.py @@ -7,15 +7,14 @@ from typing import Callable from typing import Optional -from ....images import Filters from ....images import SingleImage from ....images.tools import filter_image_list from ....images.tools import find_image_by_zarr_url from ....images.tools import match_filter from ..exceptions import JobExecutionError -from ..filenames import FILTERS_FILENAME from ..filenames import HISTORY_FILENAME from ..filenames import IMAGES_FILENAME +from ..filenames import TYPE_FILTERS_FILENAME from .runner_functions import no_op_submit_setup_call from .runner_functions import run_v2_task_compound from .runner_functions import run_v2_task_non_parallel @@ -47,7 +46,7 @@ def execute_tasks_v2( # Initialize local dataset attributes zarr_dir = dataset.zarr_dir tmp_images = deepcopy(dataset.images) - tmp_filters = deepcopy(dataset.filters) + tmp_type_filters = deepcopy(dataset.type_filters) tmp_history = [] for wftask in wf_task_list: @@ -58,19 +57,14 @@ def execute_tasks_v2( # PRE TASK EXECUTION # Get filtered images - pre_filters = dict( - types=copy(tmp_filters["types"]), - attributes=copy(tmp_filters["attributes"]), - ) - pre_filters["types"].update(wftask.input_filters["types"]) - pre_filters["attributes"].update(wftask.input_filters["attributes"]) + pre_type_filters = copy(tmp_type_filters) + pre_type_filters.update(wftask.type_filters) filtered_images = filter_image_list( - images=tmp_images, - filters=Filters(**pre_filters), + images=tmp_images, type_filters=pre_type_filters ) # Verify that filtered images comply with task input_types for image in filtered_images: - if not match_filter(image, Filters(types=task.input_types)): + if not match_filter(image, type_filters=task.input_types): raise JobExecutionError( "Invalid filtered image list\n" f"Task input types: {task.input_types=}\n" @@ -249,19 +243,12 @@ def execute_tasks_v2( else: tmp_images.pop(img_search["index"]) - # Update filters.attributes: - # current + (task_output: not really, in current examples..) - if current_task_output.filters is not None: - tmp_filters["attributes"].update( - current_task_output.filters.attributes - ) - # Find manifest ouptut types types_from_manifest = task.output_types # Find task-output types - if current_task_output.filters is not None: - types_from_task = current_task_output.filters.types + if current_task_output.type_filters is not None: + types_from_task = current_task_output.type_filters else: types_from_task = {} @@ -279,8 +266,8 @@ def execute_tasks_v2( ) # Update filters.types - tmp_filters["types"].update(types_from_manifest) - tmp_filters["types"].update(types_from_task) + tmp_type_filters.update(types_from_manifest) + tmp_type_filters.update(types_from_task) # Update history (based on _DatasetHistoryItemV2) history_item = _DatasetHistoryItemV2( @@ -299,8 +286,8 @@ def execute_tasks_v2( # information with open(workflow_dir_local / HISTORY_FILENAME, "w") as f: json.dump(tmp_history, f, indent=2) - with open(workflow_dir_local / FILTERS_FILENAME, "w") as f: - json.dump(tmp_filters, f, indent=2) + with open(workflow_dir_local / TYPE_FILTERS_FILENAME, "w") as f: + json.dump(tmp_type_filters, f, indent=2) with open(workflow_dir_local / IMAGES_FILENAME, "w") as f: json.dump(tmp_images, f, indent=2) @@ -311,7 +298,7 @@ def execute_tasks_v2( # represent the new attributes (to replace the original ones) result = dict( history=tmp_history, - filters=tmp_filters, + type_filters=tmp_type_filters, images=tmp_images, ) return result diff --git a/fractal_server/app/runner/v2/task_interface.py b/fractal_server/app/runner/v2/task_interface.py index ab1a92aa90..594da627bd 100644 --- a/fractal_server/app/runner/v2/task_interface.py +++ b/fractal_server/app/runner/v2/task_interface.py @@ -6,7 +6,6 @@ from pydantic import validator from ....images import SingleImageTaskOutput -from fractal_server.images import Filters from fractal_server.urls import normalize_url @@ -16,7 +15,8 @@ class TaskOutput(BaseModel, extra=Extra.forbid): default_factory=list ) image_list_removals: list[str] = Field(default_factory=list) - filters: Filters = Field(default_factory=Filters) + type_filters: dict[str, bool] = Field(default_factory=dict) + attribute_filters: dict[str, list[Any]] = Field(default_factory=dict) def check_zarr_urls_are_unique(self) -> None: zarr_urls = [img.zarr_url for img in self.image_list_updates] From ffb9670b3d5ecb6bea3d082346a7754c64f3bf26 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 8 Jan 2025 15:27:03 +0100 Subject: [PATCH 23/99] reintroduce old columns --- fractal_server/app/models/v2/dataset.py | 4 ++ fractal_server/app/models/v2/workflowtask.py | 4 ++ .../c16108de9f77_remove_old_filter_columns.py | 58 ------------------- 3 files changed, 8 insertions(+), 58 deletions(-) delete mode 100644 fractal_server/migrations/versions/c16108de9f77_remove_old_filter_columns.py diff --git a/fractal_server/app/models/v2/dataset.py b/fractal_server/app/models/v2/dataset.py index 5fb63e356a..85c1f7cf22 100644 --- a/fractal_server/app/models/v2/dataset.py +++ b/fractal_server/app/models/v2/dataset.py @@ -1,5 +1,6 @@ from datetime import datetime from typing import Any +from typing import Literal from typing import Optional from sqlalchemy import Column @@ -40,6 +41,9 @@ class Config: sa_column=Column(JSON, server_default="[]", nullable=False) ) + filters: Optional[ + dict[Literal["attributes", "types"], dict[str, Any]] + ] = Field(sa_column=Column(JSON, nullable=True, server_default=None)) type_filters: dict[str, bool] = Field( sa_column=Column(JSON, nullable=False, server_default="{}") ) diff --git a/fractal_server/app/models/v2/workflowtask.py b/fractal_server/app/models/v2/workflowtask.py index 90bf7e0f8d..8a3ce414b3 100644 --- a/fractal_server/app/models/v2/workflowtask.py +++ b/fractal_server/app/models/v2/workflowtask.py @@ -1,4 +1,5 @@ from typing import Any +from typing import Literal from typing import Optional from sqlalchemy import Column @@ -24,6 +25,9 @@ class Config: args_parallel: Optional[dict[str, Any]] = Field(sa_column=Column(JSON)) args_non_parallel: Optional[dict[str, Any]] = Field(sa_column=Column(JSON)) + input_filters: Optional[ + dict[Literal["attributes", "types"], dict[str, Any]] + ] = Field(sa_column=Column(JSON, nullable=True, server_default=None)) type_filters: dict[str, bool] = Field( sa_column=Column(JSON, nullable=False, server_default="{}") ) diff --git a/fractal_server/migrations/versions/c16108de9f77_remove_old_filter_columns.py b/fractal_server/migrations/versions/c16108de9f77_remove_old_filter_columns.py deleted file mode 100644 index e79a358345..0000000000 --- a/fractal_server/migrations/versions/c16108de9f77_remove_old_filter_columns.py +++ /dev/null @@ -1,58 +0,0 @@ -"""remove old filter columns - -Revision ID: c16108de9f77 -Revises: f5aed9bbae62 -Create Date: 2025-01-07 17:36:56.520295 - -""" -import sqlalchemy as sa -from alembic import op -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = "c16108de9f77" -down_revision = "f5aed9bbae62" -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table("datasetv2", schema=None) as batch_op: - batch_op.drop_column("filters") - - with op.batch_alter_table("workflowtaskv2", schema=None) as batch_op: - batch_op.drop_column("input_filters") - - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table("workflowtaskv2", schema=None) as batch_op: - batch_op.add_column( - sa.Column( - "input_filters", - postgresql.JSON(astext_type=sa.Text()), - server_default=sa.text( - '\'{"attributes": {}, "types": {}}\'::json' - ), - autoincrement=False, - nullable=False, - ) - ) - - with op.batch_alter_table("datasetv2", schema=None) as batch_op: - batch_op.add_column( - sa.Column( - "filters", - postgresql.JSON(astext_type=sa.Text()), - server_default=sa.text( - '\'{"attributes": {}, "types": {}}\'::json' - ), - autoincrement=False, - nullable=False, - ) - ) - - # ### end Alembic commands ### From d7f8bf91244b5ec0a231effff39e8a08f0e96ecf Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 8 Jan 2025 15:29:41 +0100 Subject: [PATCH 24/99] validators to ImageQuery --- fractal_server/app/routes/api/v2/images.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fractal_server/app/routes/api/v2/images.py b/fractal_server/app/routes/api/v2/images.py index 7de43e51ba..42353f1dcc 100644 --- a/fractal_server/app/routes/api/v2/images.py +++ b/fractal_server/app/routes/api/v2/images.py @@ -8,6 +8,7 @@ from fastapi import status from pydantic import BaseModel from pydantic import Field +from pydantic import validator from sqlalchemy.orm.attributes import flag_modified from ._aux_functions import _get_dataset_check_owner @@ -15,6 +16,8 @@ from fractal_server.app.db import get_async_db from fractal_server.app.models import UserOAuth from fractal_server.app.routes.auth import current_active_user +from fractal_server.app.schemas._validators import validate_attribute_filters +from fractal_server.app.schemas._validators import validate_type_filters from fractal_server.images import SingleImage from fractal_server.images import SingleImageUpdate from fractal_server.images.tools import find_image_by_zarr_url @@ -40,6 +43,13 @@ class ImageQuery(BaseModel): type_filters: dict[str, bool] = Field(default_factory=dict) attribute_filters: dict[str, list[Any]] = Field(default_factory=dict) + _type_filters = validator("type_filters", pre=True, allow_reuse=True)( + validate_type_filters() + ) + _attribute_filters = validator( + "attribute_filters", pre=True, allow_reuse=True + )(validate_attribute_filters()) + @router.post( "/project/{project_id}/dataset/{dataset_id}/images/", From d37b8c6af9ce2a88a2d1463316b971d6e6071737 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 8 Jan 2025 15:47:48 +0100 Subject: [PATCH 25/99] fix migration --- ...c1a_split_filters_and_keep_old_columns.py} | 39 +++++++++++++++++-- 1 file changed, 35 insertions(+), 4 deletions(-) rename fractal_server/migrations/versions/{f5aed9bbae62_split_filters_and_keep_old_columns.py => ec7329221c1a_split_filters_and_keep_old_columns.py} (60%) diff --git a/fractal_server/migrations/versions/f5aed9bbae62_split_filters_and_keep_old_columns.py b/fractal_server/migrations/versions/ec7329221c1a_split_filters_and_keep_old_columns.py similarity index 60% rename from fractal_server/migrations/versions/f5aed9bbae62_split_filters_and_keep_old_columns.py rename to fractal_server/migrations/versions/ec7329221c1a_split_filters_and_keep_old_columns.py index ee635f3854..214c482682 100644 --- a/fractal_server/migrations/versions/f5aed9bbae62_split_filters_and_keep_old_columns.py +++ b/fractal_server/migrations/versions/ec7329221c1a_split_filters_and_keep_old_columns.py @@ -1,16 +1,15 @@ """split filters and keep old columns -Revision ID: f5aed9bbae62 +Revision ID: ec7329221c1a Revises: 316140ff7ee1 -Create Date: 2025-01-07 17:16:45.454431 +Create Date: 2025-01-08 15:46:26.940083 """ import sqlalchemy as sa from alembic import op - # revision identifiers, used by Alembic. -revision = "f5aed9bbae62" +revision = "ec7329221c1a" down_revision = "316140ff7ee1" branch_labels = None depends_on = None @@ -32,6 +31,14 @@ def upgrade() -> None: nullable=False, ) ) + batch_op.alter_column( + "filters", + existing_type=sa.JSON(astext_type=sa.Text()), + nullable=True, + existing_server_default=sa.text( + '\'{"attributes": {}, "types": {}}\'::json' + ), + ) with op.batch_alter_table("jobv2", schema=None) as batch_op: batch_op.add_column( @@ -49,6 +56,14 @@ def upgrade() -> None: "type_filters", sa.JSON(), server_default="{}", nullable=False ) ) + batch_op.alter_column( + "input_filters", + existing_type=sa.JSON(astext_type=sa.Text()), + nullable=True, + existing_server_default=sa.text( + '\'{"attributes": {}, "types": {}}\'::json' + ), + ) # ### end Alembic commands ### @@ -56,12 +71,28 @@ def upgrade() -> None: def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### with op.batch_alter_table("workflowtaskv2", schema=None) as batch_op: + batch_op.alter_column( + "input_filters", + existing_type=sa.JSON(astext_type=sa.Text()), + nullable=False, + existing_server_default=sa.text( + '\'{"attributes": {}, "types": {}}\'::json' + ), + ) batch_op.drop_column("type_filters") with op.batch_alter_table("jobv2", schema=None) as batch_op: batch_op.drop_column("attribute_filters") with op.batch_alter_table("datasetv2", schema=None) as batch_op: + batch_op.alter_column( + "filters", + existing_type=sa.JSON(astext_type=sa.Text()), + nullable=False, + existing_server_default=sa.text( + '\'{"attributes": {}, "types": {}}\'::json' + ), + ) batch_op.drop_column("attribute_filters") batch_op.drop_column("type_filters") From 40afc04f4dde671c874a18c27d5881d32fd9d6ec Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 8 Jan 2025 16:45:49 +0100 Subject: [PATCH 26/99] revamp validators --- fractal_server/app/routes/api/v2/images.py | 15 +++-- fractal_server/app/schemas/_validators.py | 56 +++++++++---------- fractal_server/app/schemas/v2/dataset.py | 49 +++++++++------- fractal_server/app/schemas/v2/job.py | 11 +++- fractal_server/app/schemas/v2/workflowtask.py | 32 ++++++++--- 5 files changed, 96 insertions(+), 67 deletions(-) diff --git a/fractal_server/app/routes/api/v2/images.py b/fractal_server/app/routes/api/v2/images.py index 42353f1dcc..cea374c305 100644 --- a/fractal_server/app/routes/api/v2/images.py +++ b/fractal_server/app/routes/api/v2/images.py @@ -8,6 +8,7 @@ from fastapi import status from pydantic import BaseModel from pydantic import Field +from pydantic import root_validator from pydantic import validator from sqlalchemy.orm.attributes import flag_modified @@ -16,6 +17,7 @@ from fractal_server.app.db import get_async_db from fractal_server.app.models import UserOAuth from fractal_server.app.routes.auth import current_active_user +from fractal_server.app.schemas._validators import root_validate_dict_keys from fractal_server.app.schemas._validators import validate_attribute_filters from fractal_server.app.schemas._validators import validate_type_filters from fractal_server.images import SingleImage @@ -43,12 +45,15 @@ class ImageQuery(BaseModel): type_filters: dict[str, bool] = Field(default_factory=dict) attribute_filters: dict[str, list[Any]] = Field(default_factory=dict) - _type_filters = validator("type_filters", pre=True, allow_reuse=True)( - validate_type_filters() + _dict_keys = root_validator(pre=True, allow_reuse=True)( + root_validate_dict_keys + ) + _type_filters = validator("type_filters", allow_reuse=True)( + validate_type_filters + ) + _attribute_filters = validator("attribute_filters", allow_reuse=True)( + validate_attribute_filters ) - _attribute_filters = validator( - "attribute_filters", pre=True, allow_reuse=True - )(validate_attribute_filters()) @router.post( diff --git a/fractal_server/app/schemas/_validators.py b/fractal_server/app/schemas/_validators.py index 0eab8be344..7d42eb7d7a 100644 --- a/fractal_server/app/schemas/_validators.py +++ b/fractal_server/app/schemas/_validators.py @@ -49,19 +49,6 @@ def val(d: Optional[dict[str, Any]]) -> Optional[dict[str, Any]]: return val -def valdict_keys_str(): - def val(d: Optional[dict[Any, Any]]) -> Optional[dict[str, Any]]: - - if d is not None: - if any(not isinstance(key, str) for key in d.keys()): - raise ValueError( - f"Dictionary contains non-string keys: '{d}'." - ) - return d - - return val - - def valdict_scalarvalues(attribute: str, accept_type_none: bool = True): """ Check that every value of a `dict[str, list[Any]]` is a list of scalar @@ -144,27 +131,34 @@ def val(must_be_unique: Optional[list]) -> Optional[list]: return val -def validate_type_filters(): - def val(type_filters: dict) -> dict[str, Any]: - type_filters = valdict_keys_str()(type_filters) +def validate_type_filters( + type_filters: Optional[dict], +) -> Optional[dict[str, bool]]: + if type_filters is not None: type_filters = valdict_keys("type_filters")(type_filters) - return type_filters - - return val + return type_filters -def validate_attribute_filters(accept_type_none: bool = True): - def val(attribute_filters: dict) -> dict[str, Any]: - attribute_filters = valdict_keys_str()(attribute_filters) +def validate_attribute_filters( + attribute_filters: Optional[dict[str, list[Any]]] +) -> Optional[dict[str, list[Any]]]: + if attribute_filters is not None: attribute_filters = valdict_keys("attribute_filters")( attribute_filters ) - for value in attribute_filters.values(): - if not isinstance(value, list): - raise ValueError(f"Values must be a list. Given {type(value)}") - attribute_filters = valdict_scalarvalues( - "attribute_filters", accept_type_none=accept_type_none - )(attribute_filters) - return attribute_filters - - return val + for key, values in attribute_filters.items(): + for value in values: + if not isinstance(value, (int, float, str, bool, type(None))): + raise ValueError( + f"{attribute_filters}[{key}] values must be a scalars " + "(int, float, str, bool, or None). " + f"Given {value} ({type(value)})" + ) + return attribute_filters + + +def root_validate_dict_keys(cls, object: dict) -> dict: + for dictionary in (v for v in object.values() if isinstance(v, dict)): + if not all(isinstance(key, str) for key in dictionary.keys()): + raise ValueError("Dictionary keys must be strings.") + return object diff --git a/fractal_server/app/schemas/v2/dataset.py b/fractal_server/app/schemas/v2/dataset.py index b4422b5f9f..604e7f2702 100644 --- a/fractal_server/app/schemas/v2/dataset.py +++ b/fractal_server/app/schemas/v2/dataset.py @@ -5,8 +5,10 @@ from pydantic import BaseModel from pydantic import Extra from pydantic import Field +from pydantic import root_validator from pydantic import validator +from .._validators import root_validate_dict_keys from .._validators import validate_attribute_filters from .._validators import validate_type_filters from .._validators import valstr @@ -41,14 +43,17 @@ class DatasetCreateV2(BaseModel, extra=Extra.forbid): # Validators - _name = validator("name", allow_reuse=True)(valstr("name")) - - _type_filters = validator("type_filters", pre=True, allow_reuse=True)( - validate_type_filters() + _dict_keys = root_validator(pre=True, allow_reuse=True)( + root_validate_dict_keys + ) + _type_filters = validator("type_filters", allow_reuse=True)( + validate_type_filters + ) + _attribute_filters = validator("attribute_filters", allow_reuse=True)( + validate_attribute_filters ) - _attribute_filters = validator( - "attribute_filters", pre=True, allow_reuse=True - )(validate_attribute_filters()) + + _name = validator("name", allow_reuse=True)(valstr("name")) @validator("zarr_dir") def normalize_zarr_dir(cls, v: Optional[str]) -> Optional[str]: @@ -83,14 +88,17 @@ class DatasetUpdateV2(BaseModel, extra=Extra.forbid): # Validators - _name = validator("name", allow_reuse=True)(valstr("name")) - - _type_filters = validator("type_filters", pre=True, allow_reuse=True)( - validate_type_filters() + _dict_keys = root_validator(pre=True, allow_reuse=True)( + root_validate_dict_keys + ) + _type_filters = validator("type_filters", allow_reuse=True)( + validate_type_filters + ) + _attribute_filters = validator("attribute_filters", allow_reuse=True)( + validate_attribute_filters ) - _attribute_filters = validator( - "attribute_filters", pre=True, allow_reuse=True - )(validate_attribute_filters()) + + _name = validator("name", allow_reuse=True)(valstr("name")) @validator("zarr_dir") def normalize_zarr_dir(cls, v: Optional[str]) -> Optional[str]: @@ -119,12 +127,15 @@ class DatasetImportV2(BaseModel, extra=Extra.forbid): # Validators - _type_filters = validator("type_filters", pre=True, allow_reuse=True)( - validate_type_filters() + _dict_keys = root_validator(pre=True, allow_reuse=True)( + root_validate_dict_keys + ) + _type_filters = validator("type_filters", allow_reuse=True)( + validate_type_filters + ) + _attribute_filters = validator("attribute_filters", allow_reuse=True)( + validate_attribute_filters ) - _attribute_filters = validator( - "attribute_filters", pre=True, allow_reuse=True - )(validate_attribute_filters()) @validator("zarr_dir") def normalize_zarr_dir(cls, v: str) -> str: diff --git a/fractal_server/app/schemas/v2/job.py b/fractal_server/app/schemas/v2/job.py index 740d9f2f47..ef7d8780f8 100644 --- a/fractal_server/app/schemas/v2/job.py +++ b/fractal_server/app/schemas/v2/job.py @@ -6,9 +6,11 @@ from pydantic import BaseModel from pydantic import Extra from pydantic import Field +from pydantic import root_validator from pydantic import validator from pydantic.types import StrictStr +from .._validators import root_validate_dict_keys from .._validators import validate_attribute_filters from .._validators import valstr from .dumps import DatasetDumpV2 @@ -50,9 +52,12 @@ class JobCreateV2(BaseModel, extra=Extra.forbid): _worker_init = validator("worker_init", allow_reuse=True)( valstr("worker_init") ) - _attribute_filters = validator( - "attribute_filters", pre=True, allow_reuse=True - )(validate_attribute_filters()) + _dict_keys = root_validator(pre=True, allow_reuse=True)( + root_validate_dict_keys + ) + _attribute_filters = validator("attribute_filters", allow_reuse=True)( + validate_attribute_filters + ) @validator("first_task_index", always=True) def first_task_index_non_negative(cls, v, values): diff --git a/fractal_server/app/schemas/v2/workflowtask.py b/fractal_server/app/schemas/v2/workflowtask.py index 94c009c370..9405e5b74d 100644 --- a/fractal_server/app/schemas/v2/workflowtask.py +++ b/fractal_server/app/schemas/v2/workflowtask.py @@ -6,8 +6,10 @@ from pydantic import BaseModel from pydantic import Extra from pydantic import Field +from pydantic import root_validator from pydantic import validator +from .._validators import root_validate_dict_keys from .._validators import valdict_keys from .._validators import validate_type_filters from .task import TaskExportV2 @@ -46,15 +48,19 @@ class WorkflowTaskCreateV2(BaseModel, extra=Extra.forbid): type_filters: dict[str, bool] = Field(default_factory=dict) # Validators + _dict_keys = root_validator(pre=True, allow_reuse=True)( + root_validate_dict_keys + ) + _type_filters = validator("type_filters", allow_reuse=True)( + validate_type_filters + ) + _meta_non_parallel = validator("meta_non_parallel", allow_reuse=True)( valdict_keys("meta_non_parallel") ) _meta_parallel = validator("meta_parallel", allow_reuse=True)( valdict_keys("meta_parallel") ) - _type_filters = validator("type_filters", pre=True, allow_reuse=True)( - validate_type_filters() - ) @validator("args_non_parallel") def validate_args_non_parallel(cls, value): @@ -124,15 +130,19 @@ class WorkflowTaskUpdateV2(BaseModel, extra=Extra.forbid): type_filters: Optional[dict[str, bool]] # Validators + _dict_keys = root_validator(pre=True, allow_reuse=True)( + root_validate_dict_keys + ) + _type_filters = validator("type_filters", allow_reuse=True)( + validate_type_filters + ) + _meta_non_parallel = validator("meta_non_parallel", allow_reuse=True)( valdict_keys("meta_non_parallel") ) _meta_parallel = validator("meta_parallel", allow_reuse=True)( valdict_keys("meta_parallel") ) - _type_filters = validator("type_filters", pre=True, allow_reuse=True)( - validate_type_filters() - ) @validator("args_non_parallel") def validate_args_non_parallel(cls, value): @@ -174,6 +184,13 @@ class WorkflowTaskImportV2(BaseModel, extra=Extra.forbid): task: Union[TaskImportV2, TaskImportV2Legacy] + _dict_keys = root_validator(pre=True, allow_reuse=True)( + root_validate_dict_keys + ) + _type_filters = validator("type_filters", allow_reuse=True)( + validate_type_filters + ) + _meta_non_parallel = validator("meta_non_parallel", allow_reuse=True)( valdict_keys("meta_non_parallel") ) @@ -186,9 +203,6 @@ class WorkflowTaskImportV2(BaseModel, extra=Extra.forbid): _args_parallel = validator("args_parallel", allow_reuse=True)( valdict_keys("args_parallel") ) - _type_filters = validator("type_filters", pre=True, allow_reuse=True)( - validate_type_filters() - ) class WorkflowTaskExportV2(BaseModel): From 845ea2857775de72b9f28ef6a80e23f41a6d69bc Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 8 Jan 2025 16:59:08 +0100 Subject: [PATCH 27/99] homogeneus types in list --- fractal_server/app/schemas/_validators.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fractal_server/app/schemas/_validators.py b/fractal_server/app/schemas/_validators.py index 7d42eb7d7a..931235681f 100644 --- a/fractal_server/app/schemas/_validators.py +++ b/fractal_server/app/schemas/_validators.py @@ -147,12 +147,15 @@ def validate_attribute_filters( attribute_filters ) for key, values in attribute_filters.items(): - for value in values: - if not isinstance(value, (int, float, str, bool, type(None))): + if values: + _type = type(values[0]) + if (_type not in (int, float, str, bool, type(None))) or ( + not all(isinstance(value, _type) for value in values) + ): raise ValueError( - f"{attribute_filters}[{key}] values must be a scalars " + f"List '{attribute_filters}[{key}] = {values}' " + "does not contain homogeneus valid elements " "(int, float, str, bool, or None). " - f"Given {value} ({type(value)})" ) return attribute_filters From 4144d77e8d18921d7bfcfd0c0f99547756f6750e Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 8 Jan 2025 16:59:29 +0100 Subject: [PATCH 28/99] remove check from match filter --- fractal_server/images/tools.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/fractal_server/images/tools.py b/fractal_server/images/tools.py index b9ee4f2f46..9f78a1f554 100644 --- a/fractal_server/images/tools.py +++ b/fractal_server/images/tools.py @@ -32,8 +32,8 @@ def find_image_by_zarr_url( def match_filter( image: dict[str, Any], - type_filters: Optional[dict[str, bool]] = None, - attribute_filters: Optional[dict[str, list[Any]]] = None, + type_filters: dict[str, bool], + attribute_filters: dict[str, list[Any]], ) -> bool: """ Find whether an image matches a filter set. @@ -46,18 +46,17 @@ def match_filter( Returns: Whether the image matches the filter set. """ - if type_filters is not None: - # Verify match with types (using a False default) - for key, value in type_filters.items(): - if image["types"].get(key, False) != value: - return False - if attribute_filters is not None: - # Verify match with attributes (only for not-None filters) - for key, values in attribute_filters.items(): - if None in values: - continue - if not image["attributes"].get(key) in values: - return False + + # Verify match with types (using a False default) + for key, value in type_filters.items(): + if image["types"].get(key, False) != value: + return False + + # Verify match with attributes (only for not-None filters) + for key, values in attribute_filters.items(): + if not image["attributes"].get(key) in values: + return False + return True From 63c03287ead5658e393f8f2cc51b430ae39d013b Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 8 Jan 2025 17:10:07 +0100 Subject: [PATCH 29/99] remove valdict_scalarvalues --- fractal_server/app/schemas/_validators.py | 28 ----------------------- 1 file changed, 28 deletions(-) diff --git a/fractal_server/app/schemas/_validators.py b/fractal_server/app/schemas/_validators.py index 931235681f..7b347c184a 100644 --- a/fractal_server/app/schemas/_validators.py +++ b/fractal_server/app/schemas/_validators.py @@ -1,7 +1,6 @@ import os from typing import Any from typing import Optional -from typing import Union def valstr(attribute: str, accept_none: bool = False): @@ -49,33 +48,6 @@ def val(d: Optional[dict[str, Any]]) -> Optional[dict[str, Any]]: return val -def valdict_scalarvalues(attribute: str, accept_type_none: bool = True): - """ - Check that every value of a `dict[str, list[Any]]` is a list of scalar - values (i.e. one of int, float, str, bool or None). - """ - - def val( - d: Optional[dict[str, list[Any]]] - ) -> Optional[dict[str, list[Union[int, float, str, bool, None]]]]: - if d is not None: - if accept_type_none: - accepted_types = (int, float, str, bool, type(None)) - else: - accepted_types = (int, float, str, bool) - for key, values in d.items(): - for value in values: - if not isinstance(value, accepted_types): - raise ValueError( - f"{attribute}[{key}] values must be a scalars " - "(int, float, str, bool, or None). " - f"Given {value} ({type(value)})" - ) - return d - - return val - - def valint(attribute: str, min_val: int = 1): """ Check that an integer attribute (e.g. if it is meant to be the ID of a From d7f9ba9cc122f772353cc4464dc8df85865f24dd Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Thu, 9 Jan 2025 10:13:30 +0100 Subject: [PATCH 30/99] task output validator --- .../app/runner/v2/task_interface.py | 47 ++++++++++++++++++- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/fractal_server/app/runner/v2/task_interface.py b/fractal_server/app/runner/v2/task_interface.py index 594da627bd..182f1d4ad0 100644 --- a/fractal_server/app/runner/v2/task_interface.py +++ b/fractal_server/app/runner/v2/task_interface.py @@ -1,22 +1,50 @@ from typing import Any +from typing import Optional +from typing import Union from pydantic import BaseModel from pydantic import Extra from pydantic import Field +from pydantic import root_validator from pydantic import validator from ....images import SingleImageTaskOutput +from fractal_server.app.schemas._validators import valdict_keys from fractal_server.urls import normalize_url +class LegacyFilters(BaseModel, extra=Extra.forbid): + attributes: dict[str, Any] = Field(default_factory=dict) + types: dict[str, bool] = Field(default_factory=dict) + # Validators + _attributes = validator("attributes", allow_reuse=True)( + valdict_keys("attributes") + ) + _types = validator("types", allow_reuse=True)(valdict_keys("types")) + + @validator("attributes") + def validate_attributes( + cls, v: dict[str, Any] + ) -> dict[str, Union[int, float, str, bool, None]]: + for key, value in v.items(): + if not isinstance(value, (int, float, str, bool, type(None))): + raise ValueError( + f"Filters.attributes[{key}] must be a scalar " + "(int, float, str, bool, or None). " + f"Given {value} ({type(value)})" + ) + return v + + class TaskOutput(BaseModel, extra=Extra.forbid): image_list_updates: list[SingleImageTaskOutput] = Field( default_factory=list ) image_list_removals: list[str] = Field(default_factory=list) - type_filters: dict[str, bool] = Field(default_factory=dict) - attribute_filters: dict[str, list[Any]] = Field(default_factory=dict) + + filters: Optional[LegacyFilters] = None + type_filters: Optional[dict[str, bool]] = None def check_zarr_urls_are_unique(self) -> None: zarr_urls = [img.zarr_url for img in self.image_list_updates] @@ -37,6 +65,21 @@ def check_zarr_urls_are_unique(self) -> None: msg = f"{msg}\n{duplicate}" raise ValueError(msg) + @root_validator() + def validate_filters(cls, values): + if values["filters"] is not None: + if values["type_filters"] is not None: + raise ValueError( + "Cannot set both (legacy) 'filters' and 'type_filters'." + ) + elif values["filters"].attributes: + raise ValueError( + "Legacy 'filters' cannot contain 'attributes'." + ) + else: + values["type_filters"] = values["filters"].types + return values + @validator("image_list_removals") def normalize_paths(cls, v: list[str]) -> list[str]: return [normalize_url(zarr_url) for zarr_url in v] From b3ec8447bdf2914f287bdcb9e722ab5c4f20b3bd Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 9 Jan 2025 10:54:39 +0100 Subject: [PATCH 31/99] Update task_interface.py --- .../app/runner/v2/task_interface.py | 40 +++++++------------ 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/fractal_server/app/runner/v2/task_interface.py b/fractal_server/app/runner/v2/task_interface.py index 182f1d4ad0..dbe882d68e 100644 --- a/fractal_server/app/runner/v2/task_interface.py +++ b/fractal_server/app/runner/v2/task_interface.py @@ -1,6 +1,5 @@ from typing import Any from typing import Optional -from typing import Union from pydantic import BaseModel from pydantic import Extra @@ -14,27 +13,17 @@ class LegacyFilters(BaseModel, extra=Extra.forbid): - attributes: dict[str, Any] = Field(default_factory=dict) + """ + For fractal-server<2.11, task output could include both + `filters["attributes"]` and `filters["types"]`. In the new version + there is a single field, named `type_filters`. + The current schema is only used to convert old type filters into the + new form, but it will reject any attribute filters. + """ + types: dict[str, bool] = Field(default_factory=dict) - # Validators - _attributes = validator("attributes", allow_reuse=True)( - valdict_keys("attributes") - ) _types = validator("types", allow_reuse=True)(valdict_keys("types")) - @validator("attributes") - def validate_attributes( - cls, v: dict[str, Any] - ) -> dict[str, Union[int, float, str, bool, None]]: - for key, value in v.items(): - if not isinstance(value, (int, float, str, bool, type(None))): - raise ValueError( - f"Filters.attributes[{key}] must be a scalar " - "(int, float, str, bool, or None). " - f"Given {value} ({type(value)})" - ) - return v - class TaskOutput(BaseModel, extra=Extra.forbid): @@ -44,7 +33,9 @@ class TaskOutput(BaseModel, extra=Extra.forbid): image_list_removals: list[str] = Field(default_factory=list) filters: Optional[LegacyFilters] = None - type_filters: Optional[dict[str, bool]] = None + type_filters: dict[str, bool] = Field(default_factory=dict) + + # FIXME: add the valdict_keys validator def check_zarr_urls_are_unique(self) -> None: zarr_urls = [img.zarr_url for img in self.image_list_updates] @@ -68,16 +59,15 @@ def check_zarr_urls_are_unique(self) -> None: @root_validator() def validate_filters(cls, values): if values["filters"] is not None: - if values["type_filters"] is not None: + if values["type_filters"] != {}: raise ValueError( "Cannot set both (legacy) 'filters' and 'type_filters'." ) - elif values["filters"].attributes: - raise ValueError( - "Legacy 'filters' cannot contain 'attributes'." - ) else: + # Convert legacy filters.types into new type_filters values["type_filters"] = values["filters"].types + values["filters"] = None + return values @validator("image_list_removals") From 27a6c1398d6508cefe4d598cb652608f70900e22 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 9 Jan 2025 11:09:41 +0100 Subject: [PATCH 32/99] Update runner with new filter logic --- fractal_server/app/runner/v2/__init__.py | 1 + .../app/runner/v2/_local/__init__.py | 6 +++ .../runner/v2/_local_experimental/__init__.py | 2 + .../app/runner/v2/_slurm_ssh/__init__.py | 2 +- .../app/runner/v2/_slurm_sudo/__init__.py | 3 ++ fractal_server/app/runner/v2/runner.py | 41 +++++++++++-------- fractal_server/app/schemas/_validators.py | 15 ++++--- 7 files changed, 46 insertions(+), 24 deletions(-) diff --git a/fractal_server/app/runner/v2/__init__.py b/fractal_server/app/runner/v2/__init__.py index dec83d8d5b..20270866bb 100644 --- a/fractal_server/app/runner/v2/__init__.py +++ b/fractal_server/app/runner/v2/__init__.py @@ -331,6 +331,7 @@ async def submit_workflow( worker_init=worker_init, first_task_index=job.first_task_index, last_task_index=job.last_task_index, + job_attribute_filters=job.attribute_filters, **backend_specific_kwargs, ) diff --git a/fractal_server/app/runner/v2/_local/__init__.py b/fractal_server/app/runner/v2/_local/__init__.py index ac068368ac..e0312832f9 100644 --- a/fractal_server/app/runner/v2/_local/__init__.py +++ b/fractal_server/app/runner/v2/_local/__init__.py @@ -20,6 +20,7 @@ Incidentally, it also represents the reference implementation for a backend. """ from pathlib import Path +from typing import Any from typing import Optional from ....models.v2 import DatasetV2 @@ -39,6 +40,7 @@ def _process_workflow( workflow_dir_local: Path, first_task_index: int, last_task_index: int, + job_attribute_filters: dict[str, list[Any]], ) -> dict: """ Internal processing routine @@ -57,6 +59,7 @@ def _process_workflow( workflow_dir_remote=workflow_dir_local, logger_name=logger_name, submit_setup_call=_local_submit_setup, + job_attribute_filters=job_attribute_filters, ) return new_dataset_attributes @@ -70,6 +73,7 @@ async def process_workflow( first_task_index: Optional[int] = None, last_task_index: Optional[int] = None, logger_name: str, + job_attribute_filters: dict[str, list[Any]], # Slurm-specific user_cache_dir: Optional[str] = None, slurm_user: Optional[str] = None, @@ -121,6 +125,7 @@ async def process_workflow( to the backend executor. This argument is present for compatibility with the standard backend interface, but is ignored in the `local` backend. + FIXME: FIXME Raises: TaskExecutionError: wrapper for errors raised during tasks' execution @@ -155,5 +160,6 @@ async def process_workflow( workflow_dir_local=workflow_dir_local, first_task_index=first_task_index, last_task_index=last_task_index, + job_attribute_filters=job_attribute_filters, ) return new_dataset_attributes diff --git a/fractal_server/app/runner/v2/_local_experimental/__init__.py b/fractal_server/app/runner/v2/_local_experimental/__init__.py index cb87a01a80..f71709cfcf 100644 --- a/fractal_server/app/runner/v2/_local_experimental/__init__.py +++ b/fractal_server/app/runner/v2/_local_experimental/__init__.py @@ -12,6 +12,8 @@ from ._submit_setup import _local_submit_setup from .executor import FractalProcessPoolExecutor +# FIXME: add job.attribute_filters + def _process_workflow( *, diff --git a/fractal_server/app/runner/v2/_slurm_ssh/__init__.py b/fractal_server/app/runner/v2/_slurm_ssh/__init__.py index 3816a779f0..6e23417769 100644 --- a/fractal_server/app/runner/v2/_slurm_ssh/__init__.py +++ b/fractal_server/app/runner/v2/_slurm_ssh/__init__.py @@ -32,7 +32,7 @@ from ._submit_setup import _slurm_submit_setup from fractal_server.logger import set_logger - +# FIXME: add job.attribute_filters logger = set_logger(__name__) diff --git a/fractal_server/app/runner/v2/_slurm_sudo/__init__.py b/fractal_server/app/runner/v2/_slurm_sudo/__init__.py index b2ee5f1d71..49ee96945d 100644 --- a/fractal_server/app/runner/v2/_slurm_sudo/__init__.py +++ b/fractal_server/app/runner/v2/_slurm_sudo/__init__.py @@ -30,6 +30,9 @@ from ._submit_setup import _slurm_submit_setup +# FIXME: add job.attribute_filters + + def _process_workflow( *, workflow: WorkflowV2, diff --git a/fractal_server/app/runner/v2/runner.py b/fractal_server/app/runner/v2/runner.py index 47f7b50881..8f277a0c11 100644 --- a/fractal_server/app/runner/v2/runner.py +++ b/fractal_server/app/runner/v2/runner.py @@ -4,6 +4,7 @@ from copy import copy from copy import deepcopy from pathlib import Path +from typing import Any from typing import Callable from typing import Optional @@ -27,6 +28,7 @@ def execute_tasks_v2( + *, wf_task_list: list[WorkflowTaskV2], dataset: DatasetV2, executor: ThreadPoolExecutor, @@ -34,6 +36,7 @@ def execute_tasks_v2( workflow_dir_remote: Optional[Path] = None, logger_name: Optional[str] = None, submit_setup_call: Callable = no_op_submit_setup_call, + job_attribute_filters: dict[str, list[Any]], ) -> DatasetV2: logger = logging.getLogger(logger_name) @@ -47,6 +50,7 @@ def execute_tasks_v2( zarr_dir = dataset.zarr_dir tmp_images = deepcopy(dataset.images) tmp_type_filters = deepcopy(dataset.type_filters) + tmp_attribute_filters = deepcopy(dataset.attribute_filters) tmp_history = [] for wftask in wf_task_list: @@ -57,10 +61,14 @@ def execute_tasks_v2( # PRE TASK EXECUTION # Get filtered images + pre_attribute_filters = deepcopy(tmp_attribute_filters) + pre_attribute_filters.update(job_attribute_filters) pre_type_filters = copy(tmp_type_filters) pre_type_filters.update(wftask.type_filters) filtered_images = filter_image_list( - images=tmp_images, type_filters=pre_type_filters + images=tmp_images, + type_filters=pre_type_filters, + attribute_filters=pre_attribute_filters, ) # Verify that filtered images comply with task input_types for image in filtered_images: @@ -72,7 +80,7 @@ def execute_tasks_v2( f'Image types: {image["types"]}\n' ) - # TASK EXECUTION (V2) + # TASK EXECUTION if task.type == "non_parallel": current_task_output = run_v2_task_non_parallel( images=filtered_images, @@ -243,31 +251,30 @@ def execute_tasks_v2( else: tmp_images.pop(img_search["index"]) - # Find manifest ouptut types - types_from_manifest = task.output_types + # Update type_filters - # Find task-output types - if current_task_output.type_filters is not None: - types_from_task = current_task_output.type_filters - else: - types_from_task = {} + # Assign the type filters based on different sources + # (task manifest and post-execution task output) + type_filters_from_task_manifest = task.output_types + type_filters_from_task_output = current_task_output.type_filters # Check that key sets are disjoint - set_types_from_manifest = set(types_from_manifest.keys()) - set_types_from_task = set(types_from_task.keys()) - if not set_types_from_manifest.isdisjoint(set_types_from_task): - overlap = set_types_from_manifest.intersection(set_types_from_task) + keys_from_manifest = set(type_filters_from_task_manifest.keys()) + keys_from_task_output = set(type_filters_from_task_output.keys()) + if not keys_from_manifest.isdisjoint(keys_from_task_output): + overlap = keys_from_manifest.intersection(keys_from_task_output) raise JobExecutionError( "Some type filters are being set twice, " f"for task '{task_name}'.\n" - f"Types from task output: {types_from_task}\n" - f"Types from task maniest: {types_from_manifest}\n" + f"Types from task output: {type_filters_from_task_output}\n" + "Types from task manifest: " + f"{type_filters_from_task_manifest}\n" f"Overlapping keys: {overlap}" ) # Update filters.types - tmp_type_filters.update(types_from_manifest) - tmp_type_filters.update(types_from_task) + tmp_type_filters.update(type_filters_from_task_manifest) + tmp_type_filters.update(type_filters_from_task_output) # Update history (based on _DatasetHistoryItemV2) history_item = _DatasetHistoryItemV2( diff --git a/fractal_server/app/schemas/_validators.py b/fractal_server/app/schemas/_validators.py index 7b347c184a..76c7cb73d5 100644 --- a/fractal_server/app/schemas/_validators.py +++ b/fractal_server/app/schemas/_validators.py @@ -121,13 +121,16 @@ def validate_attribute_filters( for key, values in attribute_filters.items(): if values: _type = type(values[0]) - if (_type not in (int, float, str, bool, type(None))) or ( - not all(isinstance(value, _type) for value in values) - ): + if not all(isinstance(value, _type) for value in values): raise ValueError( - f"List '{attribute_filters}[{key}] = {values}' " - "does not contain homogeneus valid elements " - "(int, float, str, bool, or None). " + f"attribute_filters[{key}] has values with " + f"non-homogeneous types: {values}." + ) + if _type not in (int, float, str, bool, type(None)): + # FIXME: Review whether None is accepted + raise ValueError( + f"attribute_filters[{key}] has values with " + f"invalid types: {values}." ) return attribute_filters From 0527cd71ff14bdeec88675302dd99ecfc369201c Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Thu, 9 Jan 2025 11:29:28 +0100 Subject: [PATCH 33/99] add validator to task output type filters --- fractal_server/app/runner/v2/task_interface.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fractal_server/app/runner/v2/task_interface.py b/fractal_server/app/runner/v2/task_interface.py index dbe882d68e..dffec3c6de 100644 --- a/fractal_server/app/runner/v2/task_interface.py +++ b/fractal_server/app/runner/v2/task_interface.py @@ -8,7 +8,8 @@ from pydantic import validator from ....images import SingleImageTaskOutput -from fractal_server.app.schemas._validators import valdict_keys +from fractal_server.app.schemas._validators import root_validate_dict_keys +from fractal_server.app.schemas._validators import validate_type_filters from fractal_server.urls import normalize_url @@ -22,7 +23,7 @@ class LegacyFilters(BaseModel, extra=Extra.forbid): """ types: dict[str, bool] = Field(default_factory=dict) - _types = validator("types", allow_reuse=True)(valdict_keys("types")) + _types = validator("types", allow_reuse=True)(validate_type_filters) class TaskOutput(BaseModel, extra=Extra.forbid): @@ -35,7 +36,12 @@ class TaskOutput(BaseModel, extra=Extra.forbid): filters: Optional[LegacyFilters] = None type_filters: dict[str, bool] = Field(default_factory=dict) - # FIXME: add the valdict_keys validator + _dict_keys = root_validator(pre=True, allow_reuse=True)( + root_validate_dict_keys + ) + _type_filters = validator("type_filters", allow_reuse=True)( + validate_type_filters + ) def check_zarr_urls_are_unique(self) -> None: zarr_urls = [img.zarr_url for img in self.image_list_updates] From efd4080b12a93fd3b770e855c9531f78c7df219b Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 9 Jan 2025 12:42:47 +0100 Subject: [PATCH 34/99] Introduce `AttributeFiltersType` and accept `None` as a value for attribute filters --- fractal_server/images/models.py | 2 ++ fractal_server/images/tools.py | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fractal_server/images/models.py b/fractal_server/images/models.py index 45eb84953d..611699e321 100644 --- a/fractal_server/images/models.py +++ b/fractal_server/images/models.py @@ -9,6 +9,8 @@ from fractal_server.app.schemas._validators import valdict_keys from fractal_server.urls import normalize_url +AttributeFiltersType = dict[str, Optional[list[Any]]] + class SingleImageBase(BaseModel): """ diff --git a/fractal_server/images/tools.py b/fractal_server/images/tools.py index 9f78a1f554..a9cf6d6b5c 100644 --- a/fractal_server/images/tools.py +++ b/fractal_server/images/tools.py @@ -4,6 +4,8 @@ from typing import Optional from typing import Union +from fractal_server.images.models import AttributeFiltersType + ImageSearch = dict[Literal["image", "index"], Union[int, dict[str, Any]]] @@ -33,7 +35,7 @@ def find_image_by_zarr_url( def match_filter( image: dict[str, Any], type_filters: dict[str, bool], - attribute_filters: dict[str, list[Any]], + attribute_filters: AttributeFiltersType, ) -> bool: """ Find whether an image matches a filter set. @@ -54,7 +56,9 @@ def match_filter( # Verify match with attributes (only for not-None filters) for key, values in attribute_filters.items(): - if not image["attributes"].get(key) in values: + if values is None: + continue + if image["attributes"].get(key) not in values: return False return True From 7a4ceb9ead387b96f7e673a5d2c3271009c341d3 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 9 Jan 2025 12:44:25 +0100 Subject: [PATCH 35/99] Extend usage of `AttributeFiltersType` --- fractal_server/app/models/v2/dataset.py | 3 ++- fractal_server/app/models/v2/job.py | 3 ++- fractal_server/app/routes/api/v2/images.py | 3 ++- fractal_server/app/runner/filenames.py | 3 +++ fractal_server/app/runner/v2/_local/__init__.py | 6 +++--- fractal_server/app/runner/v2/runner.py | 4 ++-- 6 files changed, 14 insertions(+), 8 deletions(-) diff --git a/fractal_server/app/models/v2/dataset.py b/fractal_server/app/models/v2/dataset.py index 85c1f7cf22..e21ce80679 100644 --- a/fractal_server/app/models/v2/dataset.py +++ b/fractal_server/app/models/v2/dataset.py @@ -11,6 +11,7 @@ from sqlmodel import SQLModel from ....utils import get_timestamp +from fractal_server.images.models import AttributeFiltersType class DatasetV2(SQLModel, table=True): @@ -47,7 +48,7 @@ class Config: type_filters: dict[str, bool] = Field( sa_column=Column(JSON, nullable=False, server_default="{}") ) - attribute_filters: dict[str, list[Any]] = Field( + attribute_filters: AttributeFiltersType = Field( sa_column=Column(JSON, nullable=False, server_default="{}") ) diff --git a/fractal_server/app/models/v2/job.py b/fractal_server/app/models/v2/job.py index b8dcbc0804..26efd12b07 100644 --- a/fractal_server/app/models/v2/job.py +++ b/fractal_server/app/models/v2/job.py @@ -10,6 +10,7 @@ from ....utils import get_timestamp from ...schemas.v2 import JobStatusTypeV2 +from fractal_server.images.models import AttributeFiltersType class JobV2(SQLModel, table=True): @@ -50,6 +51,6 @@ class Config: status: str = JobStatusTypeV2.SUBMITTED log: Optional[str] = None - attribute_filters: dict[str, list[Any]] = Field( + attribute_filters: AttributeFiltersType = Field( sa_column=Column(JSON, nullable=False, server_default="{}") ) diff --git a/fractal_server/app/routes/api/v2/images.py b/fractal_server/app/routes/api/v2/images.py index cea374c305..df778a1eb5 100644 --- a/fractal_server/app/routes/api/v2/images.py +++ b/fractal_server/app/routes/api/v2/images.py @@ -22,6 +22,7 @@ from fractal_server.app.schemas._validators import validate_type_filters from fractal_server.images import SingleImage from fractal_server.images import SingleImageUpdate +from fractal_server.images.models import AttributeFiltersType from fractal_server.images.tools import find_image_by_zarr_url from fractal_server.images.tools import match_filter @@ -43,7 +44,7 @@ class ImagePage(BaseModel): class ImageQuery(BaseModel): zarr_url: Optional[str] type_filters: dict[str, bool] = Field(default_factory=dict) - attribute_filters: dict[str, list[Any]] = Field(default_factory=dict) + attribute_filters: AttributeFiltersType = Field(default_factory=dict) _dict_keys = root_validator(pre=True, allow_reuse=True)( root_validate_dict_keys diff --git a/fractal_server/app/runner/filenames.py b/fractal_server/app/runner/filenames.py index db80a5efa0..242e4f1174 100644 --- a/fractal_server/app/runner/filenames.py +++ b/fractal_server/app/runner/filenames.py @@ -5,3 +5,6 @@ METADATA_FILENAME = "metadata.json" SHUTDOWN_FILENAME = "shutdown" WORKFLOW_LOG_FILENAME = "workflow.log" + + +FILTERS_FILENAME = "fake" diff --git a/fractal_server/app/runner/v2/_local/__init__.py b/fractal_server/app/runner/v2/_local/__init__.py index e0312832f9..0b680b4176 100644 --- a/fractal_server/app/runner/v2/_local/__init__.py +++ b/fractal_server/app/runner/v2/_local/__init__.py @@ -20,7 +20,6 @@ Incidentally, it also represents the reference implementation for a backend. """ from pathlib import Path -from typing import Any from typing import Optional from ....models.v2 import DatasetV2 @@ -30,6 +29,7 @@ from ..runner import execute_tasks_v2 from ._submit_setup import _local_submit_setup from .executor import FractalThreadPoolExecutor +from fractal_server.images.models import AttributeFiltersType def _process_workflow( @@ -40,7 +40,7 @@ def _process_workflow( workflow_dir_local: Path, first_task_index: int, last_task_index: int, - job_attribute_filters: dict[str, list[Any]], + job_attribute_filters: AttributeFiltersType, ) -> dict: """ Internal processing routine @@ -73,7 +73,7 @@ async def process_workflow( first_task_index: Optional[int] = None, last_task_index: Optional[int] = None, logger_name: str, - job_attribute_filters: dict[str, list[Any]], + job_attribute_filters: AttributeFiltersType, # Slurm-specific user_cache_dir: Optional[str] = None, slurm_user: Optional[str] = None, diff --git a/fractal_server/app/runner/v2/runner.py b/fractal_server/app/runner/v2/runner.py index 8f277a0c11..71340cec9d 100644 --- a/fractal_server/app/runner/v2/runner.py +++ b/fractal_server/app/runner/v2/runner.py @@ -4,7 +4,6 @@ from copy import copy from copy import deepcopy from pathlib import Path -from typing import Any from typing import Callable from typing import Optional @@ -25,6 +24,7 @@ from fractal_server.app.models.v2 import WorkflowTaskV2 from fractal_server.app.schemas.v2.dataset import _DatasetHistoryItemV2 from fractal_server.app.schemas.v2.workflowtask import WorkflowTaskStatusTypeV2 +from fractal_server.images.models import AttributeFiltersType def execute_tasks_v2( @@ -36,7 +36,7 @@ def execute_tasks_v2( workflow_dir_remote: Optional[Path] = None, logger_name: Optional[str] = None, submit_setup_call: Callable = no_op_submit_setup_call, - job_attribute_filters: dict[str, list[Any]], + job_attribute_filters: AttributeFiltersType, ) -> DatasetV2: logger = logging.getLogger(logger_name) From 51e1259d4d847248fed5814830ea1288050be192 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 9 Jan 2025 12:44:56 +0100 Subject: [PATCH 36/99] Extend usage of `AttributeFiltersType` --- fractal_server/app/schemas/v2/dataset.py | 9 +++++---- fractal_server/app/schemas/v2/dumps.py | 5 +++-- fractal_server/app/schemas/v2/job.py | 6 +++--- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/fractal_server/app/schemas/v2/dataset.py b/fractal_server/app/schemas/v2/dataset.py index 604e7f2702..93ab478155 100644 --- a/fractal_server/app/schemas/v2/dataset.py +++ b/fractal_server/app/schemas/v2/dataset.py @@ -16,6 +16,7 @@ from .project import ProjectReadV2 from .workflowtask import WorkflowTaskStatusTypeV2 from fractal_server.images import SingleImage +from fractal_server.images.models import AttributeFiltersType from fractal_server.urls import normalize_url @@ -39,7 +40,7 @@ class DatasetCreateV2(BaseModel, extra=Extra.forbid): zarr_dir: Optional[str] = None type_filters: dict[str, bool] = Field(default_factory=dict) - attribute_filters: dict[str, list[Any]] = Field(default_factory=dict) + attribute_filters: AttributeFiltersType = Field(default_factory=dict) # Validators @@ -76,7 +77,7 @@ class DatasetReadV2(BaseModel): zarr_dir: str type_filters: dict[str, bool] - attribute_filters: dict[str, list[Any]] + attribute_filters: AttributeFiltersType class DatasetUpdateV2(BaseModel, extra=Extra.forbid): @@ -123,7 +124,7 @@ class DatasetImportV2(BaseModel, extra=Extra.forbid): images: list[SingleImage] = Field(default_factory=list) type_filters: dict[str, bool] = Field(default_factory=dict) - attribute_filters: dict[str, list[Any]] = Field(default_factory=dict) + attribute_filters: AttributeFiltersType = Field(default_factory=dict) # Validators @@ -157,4 +158,4 @@ class DatasetExportV2(BaseModel): zarr_dir: str images: list[SingleImage] type_filters: dict[str, bool] - attribute_filters: dict[str, list[Any]] + attribute_filters: AttributeFiltersType diff --git a/fractal_server/app/schemas/v2/dumps.py b/fractal_server/app/schemas/v2/dumps.py index 2e2094a277..357b2ebf7f 100644 --- a/fractal_server/app/schemas/v2/dumps.py +++ b/fractal_server/app/schemas/v2/dumps.py @@ -8,12 +8,13 @@ 1. In the "*_dump" attributes of Job models; 2. In the `_DatasetHistoryItem.workflowtask` model, to trim its size. """ -from typing import Any from typing import Optional from pydantic import BaseModel from pydantic import Extra +from fractal_server.images.models import AttributeFiltersType + class ProjectDumpV2(BaseModel, extra=Extra.forbid): @@ -71,4 +72,4 @@ class DatasetDumpV2(BaseModel, extra=Extra.forbid): zarr_dir: str type_filters: dict[str, bool] - attribute_filters: dict[str, list[Any]] + attribute_filters: AttributeFiltersType diff --git a/fractal_server/app/schemas/v2/job.py b/fractal_server/app/schemas/v2/job.py index ef7d8780f8..0c9fb9006b 100644 --- a/fractal_server/app/schemas/v2/job.py +++ b/fractal_server/app/schemas/v2/job.py @@ -1,6 +1,5 @@ from datetime import datetime from enum import Enum -from typing import Any from typing import Optional from pydantic import BaseModel @@ -16,6 +15,7 @@ from .dumps import DatasetDumpV2 from .dumps import ProjectDumpV2 from .dumps import WorkflowDumpV2 +from fractal_server.images.models import AttributeFiltersType class JobStatusTypeV2(str, Enum): @@ -46,7 +46,7 @@ class JobCreateV2(BaseModel, extra=Extra.forbid): slurm_account: Optional[StrictStr] = None worker_init: Optional[str] - attribute_filters: dict[str, list[Any]] = Field(default_factory=dict) + attribute_filters: AttributeFiltersType = Field(default_factory=dict) # Validators _worker_init = validator("worker_init", allow_reuse=True)( @@ -112,7 +112,7 @@ class JobReadV2(BaseModel): first_task_index: Optional[int] last_task_index: Optional[int] worker_init: Optional[str] - attribute_filters: dict[str, list[Any]] + attribute_filters: AttributeFiltersType class JobUpdateV2(BaseModel, extra=Extra.forbid): From 459638cfee35a1e0405e1f54b85099ebeee9013f Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 9 Jan 2025 12:50:07 +0100 Subject: [PATCH 37/99] Adapt test_filter_image_list_with_filters --- tests/v2/05_images/test_benchmark_helper_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v2/05_images/test_benchmark_helper_functions.py b/tests/v2/05_images/test_benchmark_helper_functions.py index 6a296a2f40..a1fded05be 100644 --- a/tests/v2/05_images/test_benchmark_helper_functions.py +++ b/tests/v2/05_images/test_benchmark_helper_functions.py @@ -45,7 +45,7 @@ def test_filter_image_list_with_filters( ): new_list = filter_image_list( images=images, - attribute_filters=dict(a1=0, a2="a2", a3=None), + attribute_filters=dict(a1=[0], a2=["a2"], a3=None), type_filters=dict(t1=True, t2=False), ) debug(len(images), len(new_list)) From c37280427ab6f6420ce85ac226bf80f643837be6 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 9 Jan 2025 12:51:36 +0100 Subject: [PATCH 38/99] Add FIXME --- fractal_server/app/runner/filenames.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fractal_server/app/runner/filenames.py b/fractal_server/app/runner/filenames.py index 242e4f1174..bedc9b1d85 100644 --- a/fractal_server/app/runner/filenames.py +++ b/fractal_server/app/runner/filenames.py @@ -7,4 +7,4 @@ WORKFLOW_LOG_FILENAME = "workflow.log" -FILTERS_FILENAME = "fake" +FILTERS_FILENAME = "FIXME" From 4c322600fa756568fa5290d4a69676d6a52847b0 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 9 Jan 2025 12:51:57 +0100 Subject: [PATCH 39/99] Update `validate_attribute_filters` (accepting `values=None`) --- fractal_server/app/schemas/_validators.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/fractal_server/app/schemas/_validators.py b/fractal_server/app/schemas/_validators.py index 76c7cb73d5..32b51b3601 100644 --- a/fractal_server/app/schemas/_validators.py +++ b/fractal_server/app/schemas/_validators.py @@ -112,22 +112,31 @@ def validate_type_filters( def validate_attribute_filters( - attribute_filters: Optional[dict[str, list[Any]]] -) -> Optional[dict[str, list[Any]]]: + attribute_filters: Optional[dict[str, Optional[list[Any]]]], +) -> Optional[dict[str, Optional[list[Any]]]]: if attribute_filters is not None: attribute_filters = valdict_keys("attribute_filters")( attribute_filters ) for key, values in attribute_filters.items(): - if values: + if values is None: + # values=None corresponds to not applying any filter for + # attribute `key` + pass + elif values == []: + # WARNING: in this case, no image can match with the current + # filter. In the future we may deprecate this possibility. + pass + else: + # values is a non-empty list, and its items must all be of the + # same scalar non-None type _type = type(values[0]) if not all(isinstance(value, _type) for value in values): raise ValueError( f"attribute_filters[{key}] has values with " f"non-homogeneous types: {values}." ) - if _type not in (int, float, str, bool, type(None)): - # FIXME: Review whether None is accepted + if _type not in (int, float, str, bool): raise ValueError( f"attribute_filters[{key}] has values with " f"invalid types: {values}." From eb34d54f0e0d43321f2c7dd0948cb998f35b275b Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 9 Jan 2025 12:52:08 +0100 Subject: [PATCH 40/99] Add `test_attribute_filters_set_to_none` --- tests/v2/05_images/test_unit_image_tools.py | 28 +++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/v2/05_images/test_unit_image_tools.py b/tests/v2/05_images/test_unit_image_tools.py index 5ab399074e..f1751fc1d9 100644 --- a/tests/v2/05_images/test_unit_image_tools.py +++ b/tests/v2/05_images/test_unit_image_tools.py @@ -139,6 +139,34 @@ def test_match_filter(): ) +def test_attribute_filters_set_to_none(): + """ + This test shows how to disable a previously-set attribute filter by just + using `dict.update` (that is, without e.g. `dict.pop`). + """ + # Image not matching filter + image = dict( + types={}, + attributes={"key2": "invalid-value2"}, + ) + attribute_filters = {"key2": ["value2"]} + assert not match_filter( + image=image, + type_filters={}, + attribute_filters=attribute_filters, + ) + + # Unset filter by replacing ["value2"] with None + attribute_filters.update({"key2": None}) + + # Image matches filter + assert match_filter( + image=image, + type_filters={}, + attribute_filters=attribute_filters, + ) + + def test_filter_image_list(): # Empty res = filter_image_list(images) From bb3fa2f595f8b9487574589cf384ae666c675d32 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 9 Jan 2025 13:07:16 +0100 Subject: [PATCH 41/99] Add job_attribute_filters to all runner backends --- .../app/runner/v2/_local_experimental/__init__.py | 7 +++++-- fractal_server/app/runner/v2/_slurm_ssh/__init__.py | 10 +++++++--- fractal_server/app/runner/v2/_slurm_sudo/__init__.py | 8 +++++--- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/fractal_server/app/runner/v2/_local_experimental/__init__.py b/fractal_server/app/runner/v2/_local_experimental/__init__.py index f71709cfcf..43eaee85c5 100644 --- a/fractal_server/app/runner/v2/_local_experimental/__init__.py +++ b/fractal_server/app/runner/v2/_local_experimental/__init__.py @@ -11,8 +11,7 @@ from ..runner import execute_tasks_v2 from ._submit_setup import _local_submit_setup from .executor import FractalProcessPoolExecutor - -# FIXME: add job.attribute_filters +from fractal_server.images.models import AttributeFiltersType def _process_workflow( @@ -23,6 +22,7 @@ def _process_workflow( workflow_dir_local: Path, first_task_index: int, last_task_index: int, + job_attribute_filters: AttributeFiltersType, ) -> dict: """ Internal processing routine @@ -47,6 +47,7 @@ def _process_workflow( workflow_dir_remote=workflow_dir_local, logger_name=logger_name, submit_setup_call=_local_submit_setup, + job_attribute_filters=job_attribute_filters, ) except BrokenProcessPool as e: raise JobExecutionError( @@ -68,6 +69,7 @@ async def process_workflow( first_task_index: Optional[int] = None, last_task_index: Optional[int] = None, logger_name: str, + job_attribute_filters: AttributeFiltersType, # Slurm-specific user_cache_dir: Optional[str] = None, slurm_user: Optional[str] = None, @@ -153,5 +155,6 @@ async def process_workflow( workflow_dir_local=workflow_dir_local, first_task_index=first_task_index, last_task_index=last_task_index, + job_attribute_filters=job_attribute_filters, ) return new_dataset_attributes diff --git a/fractal_server/app/runner/v2/_slurm_ssh/__init__.py b/fractal_server/app/runner/v2/_slurm_ssh/__init__.py index 6e23417769..c26508c42b 100644 --- a/fractal_server/app/runner/v2/_slurm_ssh/__init__.py +++ b/fractal_server/app/runner/v2/_slurm_ssh/__init__.py @@ -30,9 +30,9 @@ from ...set_start_and_last_task_index import set_start_and_last_task_index from ..runner import execute_tasks_v2 from ._submit_setup import _slurm_submit_setup +from fractal_server.images.models import AttributeFiltersType from fractal_server.logger import set_logger -# FIXME: add job.attribute_filters logger = set_logger(__name__) @@ -47,6 +47,7 @@ def _process_workflow( last_task_index: int, fractal_ssh: FractalSSH, worker_init: Optional[Union[str, list[str]]] = None, + job_attribute_filters: AttributeFiltersType, ) -> dict[str, Any]: """ Internal processing routine for the SLURM backend @@ -90,6 +91,7 @@ def _process_workflow( workflow_dir_remote=workflow_dir_remote, logger_name=logger_name, submit_setup_call=_slurm_submit_setup, + job_attribute_filters=job_attribute_filters, ) return new_dataset_attributes @@ -103,12 +105,13 @@ async def process_workflow( first_task_index: Optional[int] = None, last_task_index: Optional[int] = None, logger_name: str, - # Not used + job_attribute_filters: AttributeFiltersType, fractal_ssh: FractalSSH, + worker_init: Optional[str] = None, + # Not used user_cache_dir: Optional[str] = None, slurm_user: Optional[str] = None, slurm_account: Optional[str] = None, - worker_init: Optional[str] = None, ) -> dict: """ Process workflow (SLURM backend public interface) @@ -132,5 +135,6 @@ async def process_workflow( last_task_index=last_task_index, worker_init=worker_init, fractal_ssh=fractal_ssh, + job_attribute_filters=job_attribute_filters, ) return new_dataset_attributes diff --git a/fractal_server/app/runner/v2/_slurm_sudo/__init__.py b/fractal_server/app/runner/v2/_slurm_sudo/__init__.py index 49ee96945d..8b11a4c2aa 100644 --- a/fractal_server/app/runner/v2/_slurm_sudo/__init__.py +++ b/fractal_server/app/runner/v2/_slurm_sudo/__init__.py @@ -28,9 +28,7 @@ from ...set_start_and_last_task_index import set_start_and_last_task_index from ..runner import execute_tasks_v2 from ._submit_setup import _slurm_submit_setup - - -# FIXME: add job.attribute_filters +from fractal_server.images.models import AttributeFiltersType def _process_workflow( @@ -46,6 +44,7 @@ def _process_workflow( slurm_account: Optional[str] = None, user_cache_dir: str, worker_init: Optional[Union[str, list[str]]] = None, + job_attribute_filters: AttributeFiltersType, ) -> dict[str, Any]: """ Internal processing routine for the SLURM backend @@ -86,6 +85,7 @@ def _process_workflow( workflow_dir_remote=workflow_dir_remote, logger_name=logger_name, submit_setup_call=_slurm_submit_setup, + job_attribute_filters=job_attribute_filters, ) return new_dataset_attributes @@ -99,6 +99,7 @@ async def process_workflow( first_task_index: Optional[int] = None, last_task_index: Optional[int] = None, logger_name: str, + job_attribute_filters: AttributeFiltersType, # Slurm-specific user_cache_dir: Optional[str] = None, slurm_user: Optional[str] = None, @@ -129,5 +130,6 @@ async def process_workflow( slurm_user=slurm_user, slurm_account=slurm_account, worker_init=worker_init, + job_attribute_filters=job_attribute_filters, ) return new_dataset_attributes From 7c94711636648ef6baed9d3207fbce1ac4ab1515 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 9 Jan 2025 13:09:54 +0100 Subject: [PATCH 42/99] Fix version migration script --- .../ec7329221c1a_split_filters_and_keep_old_columns.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fractal_server/migrations/versions/ec7329221c1a_split_filters_and_keep_old_columns.py b/fractal_server/migrations/versions/ec7329221c1a_split_filters_and_keep_old_columns.py index 214c482682..1557cb224a 100644 --- a/fractal_server/migrations/versions/ec7329221c1a_split_filters_and_keep_old_columns.py +++ b/fractal_server/migrations/versions/ec7329221c1a_split_filters_and_keep_old_columns.py @@ -33,11 +33,9 @@ def upgrade() -> None: ) batch_op.alter_column( "filters", - existing_type=sa.JSON(astext_type=sa.Text()), + existing_type=sa.JSON(), nullable=True, - existing_server_default=sa.text( - '\'{"attributes": {}, "types": {}}\'::json' - ), + existing_server_default='{"attributes": {}, "types": {}}', ) with op.batch_alter_table("jobv2", schema=None) as batch_op: From dac96c9e86f820898edfd7952f834ed56121821b Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 9 Jan 2025 13:11:39 +0100 Subject: [PATCH 43/99] Fix version migration script --- ...21c1a_split_filters_and_keep_old_columns.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/fractal_server/migrations/versions/ec7329221c1a_split_filters_and_keep_old_columns.py b/fractal_server/migrations/versions/ec7329221c1a_split_filters_and_keep_old_columns.py index 1557cb224a..339c2a0630 100644 --- a/fractal_server/migrations/versions/ec7329221c1a_split_filters_and_keep_old_columns.py +++ b/fractal_server/migrations/versions/ec7329221c1a_split_filters_and_keep_old_columns.py @@ -56,11 +56,9 @@ def upgrade() -> None: ) batch_op.alter_column( "input_filters", - existing_type=sa.JSON(astext_type=sa.Text()), + existing_type=sa.JSON(), nullable=True, - existing_server_default=sa.text( - '\'{"attributes": {}, "types": {}}\'::json' - ), + existing_server_default='{"attributes": {}, "types": {}}', ) # ### end Alembic commands ### @@ -71,11 +69,9 @@ def downgrade() -> None: with op.batch_alter_table("workflowtaskv2", schema=None) as batch_op: batch_op.alter_column( "input_filters", - existing_type=sa.JSON(astext_type=sa.Text()), + existing_type=sa.JSON(), nullable=False, - existing_server_default=sa.text( - '\'{"attributes": {}, "types": {}}\'::json' - ), + existing_server_default='{"attributes": {}, "types": {}}', ) batch_op.drop_column("type_filters") @@ -85,11 +81,9 @@ def downgrade() -> None: with op.batch_alter_table("datasetv2", schema=None) as batch_op: batch_op.alter_column( "filters", - existing_type=sa.JSON(astext_type=sa.Text()), + existing_type=sa.JSON(), nullable=False, - existing_server_default=sa.text( - '\'{"attributes": {}, "types": {}}\'::json' - ), + existing_server_default='{"attributes": {}, "types": {}}', ) batch_op.drop_column("attribute_filters") batch_op.drop_column("type_filters") From e49efea02686bc00ceee09443729512f99d7b9c5 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Thu, 9 Jan 2025 16:14:59 +0100 Subject: [PATCH 44/99] test image models --- fractal_server/images/models.py | 4 +- tests/v2/05_images/test_image_models.py | 286 +++++++++++++++++------- 2 files changed, 205 insertions(+), 85 deletions(-) diff --git a/fractal_server/images/models.py b/fractal_server/images/models.py index 611699e321..1c1863f1bb 100644 --- a/fractal_server/images/models.py +++ b/fractal_server/images/models.py @@ -84,8 +84,8 @@ def validate_attributes( class SingleImageUpdate(BaseModel): zarr_url: str - attributes: Optional[dict[str, Any]] - types: Optional[dict[str, bool]] + attributes: Optional[dict[str, Any]] = None + types: Optional[dict[str, bool]] = None @validator("zarr_url") def normalize_zarr_url(cls, v: str) -> str: diff --git a/tests/v2/05_images/test_image_models.py b/tests/v2/05_images/test_image_models.py index 918d30d313..7660ada410 100644 --- a/tests/v2/05_images/test_image_models.py +++ b/tests/v2/05_images/test_image_models.py @@ -1,109 +1,229 @@ -import pytest +from typing import TypeVar + from pydantic import ValidationError -from fractal_server.images import SingleImage -from fractal_server.images import SingleImageTaskOutput -from fractal_server.images import SingleImageUpdate +from fractal_server.images.models import SingleImage from fractal_server.images.models import SingleImageBase +from fractal_server.images.models import SingleImageTaskOutput +from fractal_server.images.models import SingleImageUpdate +T = TypeVar("T") -def test_single_image(): - with pytest.raises(ValidationError): - SingleImage() +def image_ok(model: T = SingleImageBase, **kwargs) -> T: + return model(**kwargs) - assert SingleImage(zarr_url="/somewhere").zarr_url == "/somewhere" - assert SingleImage(zarr_url="/somewhere", origin="/foo").origin == "/foo" - assert SingleImage(zarr_url="/somewhere", origin=None).origin is None +def image_fail(model: T = SingleImageBase, **kwargs) -> str: + try: + model(**kwargs) + raise AssertionError(f"{model=}, {kwargs=}") + except ValidationError as e: + return str(e) - valid_attributes = dict(a="string", b=3, c=0.33, d=True) - assert ( - SingleImage( - zarr_url="/somewhere", attributes=valid_attributes - ).attributes - == valid_attributes - ) - invalid_attributes = [ - dict(a=None), - dict(a=["l", "i", "s", "t"]), - dict(a={"d": "i", "c": "t"}), - ] - for attr in invalid_attributes: - with pytest.raises(ValidationError): - SingleImage(zarr_url="/somewhere", attributes=attr) - - valid_types = dict(a=True, b=False) - assert ( - SingleImage(zarr_url="/somewhere", types=valid_types).types - == valid_types - ) - invalid_types = dict(a="not a bool") - with pytest.raises(ValidationError): - SingleImage(zarr_url="/somewhere", types=invalid_types) +def test_SingleImageBase(): - -def test_url_normalization(): + image_fail() # zarr_url - assert SingleImage(zarr_url="/valid/url").zarr_url == "/valid/url" - assert SingleImage(zarr_url="/remove/slash/").zarr_url == "/remove/slash" + image = image_ok(zarr_url="/x") + assert image.dict() == { + "zarr_url": "/x", + "origin": None, + "attributes": {}, + "types": {}, + } + image_fail(zarr_url="x") # see 'test_url_normalization' + image_fail(zarr_url=None) - with pytest.raises(ValidationError) as e: - SingleImage(zarr_url="s3/foo") - assert "S3 handling" in e._excinfo[1].errors()[0]["msg"] + # origin + image = image_ok(zarr_url="/x", origin="/y") + assert image.origin == "/y" + image = image_ok(zarr_url="/x", origin=None) + assert image.origin is None + image_fail(zarr_url="/x", origin="y") + image_fail(origin="/y") + + # attributes + valid_attributes = { + "int": 1, + "float": 1.2, + "string": "abc", + "bool": True, + "null": None, + "list": ["l", "i", "s", "t"], + "dict": {"d": "i", "c": "t"}, + "function": lambda x: x, + "type": int, + } # Any + image = image_ok(zarr_url="/x", attributes=valid_attributes) + assert image.attributes == valid_attributes + invalid_attributes = { + "repeated": 1, + " repeated ": 2, + } + image_fail(zarr_url="/x", attributes=invalid_attributes) + + # types + valid_types = {"y": True, "n": False} # only booleans + image = image_ok(zarr_url="/x", types=valid_types) + assert image.types == valid_types + image_fail(zarr_url="/x", types={"a": "not a bool"}) + image_fail(zarr_url="/x", types={"a": True, " a": True}) + image_ok(zarr_url="/x", types={1: True}) - with pytest.raises(ValidationError) as e: - SingleImage(zarr_url="https://foo.bar") - assert "URLs must begin" in e._excinfo[1].errors()[0]["msg"] - # origin - assert SingleImage(zarr_url="/valid/url", origin=None).origin is None - assert ( - SingleImage(zarr_url="/valid/url", origin="/valid/origin").origin - == "/valid/origin" +def test_url_normalization(): + + image = image_ok(zarr_url="/valid/url") + assert image.zarr_url == "/valid/url" + image = image_ok(zarr_url="/remove/slash/") + assert image.zarr_url == "/remove/slash" + + e = image_fail(zarr_url="s3/foo") + assert "S3 handling" in e + e = image_fail(zarr_url="https://foo.bar") + assert "URLs must begin" in e + + image_ok(zarr_url="/x", origin=None) + image_ok(zarr_url="/x", origin="/y") + image = image_ok(zarr_url="/x", origin="/y///") + assert image.origin == "/y" + + e = image_fail(zarr_url="/x", origin="s3/foo") + assert "S3 handling" in e + e = image_fail(zarr_url="/x", origin="https://foo.bar") + assert "URLs must begin" in e + + +def test_SingleImageTaskOutput(): + + image_ok( + model=SingleImageTaskOutput, + zarr_url="/x", + attributes={ + "int": 1, + "float": 1.2, + "string": "abc", + "bool": True, + "null": None, + }, ) - assert ( - SingleImage(zarr_url="/valid/url", origin="/remove/slash//").origin - == "/remove/slash" + image_fail( + model=SingleImageTaskOutput, + zarr_url="/x", + attributes={"list": ["l", "i", "s", "t"]}, + ) + image_fail( + model=SingleImageTaskOutput, + zarr_url="/x", + attributes={"dict": {"d": "i", "c": "t"}}, + ) + image_fail( + model=SingleImageTaskOutput, + zarr_url="/x", + attributes={"function": lambda x: x}, + ) + image_fail( + model=SingleImageTaskOutput, + zarr_url="/x", + attributes={"type": int}, + ) + image_fail( + model=SingleImageTaskOutput, + zarr_url="/x", + attributes={"repeated": 1, " repeated": 2}, ) - with pytest.raises(ValidationError) as e: - SingleImage(zarr_url="/valid/url", origin="s3/foo") - assert "S3 handling" in e._excinfo[1].errors()[0]["msg"] - with pytest.raises(ValidationError) as e: - SingleImage(zarr_url="/valid/url", origin="http://foo.bar") - assert "URLs must begin" in e._excinfo[1].errors()[0]["msg"] - - -def test_single_image_task_output(): - base = SingleImageBase(zarr_url="/zarr/url", attributes={"x": None}) - # SingleImageTaskOutput accepts 'None' as value - SingleImageTaskOutput(**base.dict()) - # SingleImage does not accept 'None' as value - with pytest.raises(ValidationError): - SingleImage(**base.dict()) +def test_SingleImage(): -def test_single_image_update(): + image_ok( + model=SingleImage, + zarr_url="/x", + attributes={ + "int": 1, + "float": 1.2, + "string": "abc", + "bool": True, + }, + ) + image_fail( + model=SingleImage, + zarr_url="/x", + attributes={"null": None}, + ) + image_fail( + model=SingleImage, + zarr_url="/x", + attributes={"list": ["l", "i", "s", "t"]}, + ) + image_fail( + model=SingleImage, + zarr_url="/x", + attributes={"dict": {"d": "i", "c": "t"}}, + ) + image_fail( + model=SingleImage, + zarr_url="/x", + attributes={"function": lambda x: x}, + ) + image_fail( + model=SingleImage, + zarr_url="/x", + attributes={"type": int}, + ) + image_fail( + model=SingleImage, + zarr_url="/x", + attributes={"repeated": 1, " repeated": 2}, + ) - with pytest.raises(ValidationError): - SingleImageUpdate() - SingleImageUpdate(zarr_url="/something") - # override SingleImageBase validation - args = dict(zarr_url="/something", attributes=None) - with pytest.raises(ValidationError): - SingleImageBase(**args) - SingleImageUpdate(**args) +def test_SingleImageUpdate(): - args = dict(zarr_url="/something", types=None) - with pytest.raises(ValidationError): - SingleImageBase(**args) - SingleImageUpdate(**args) + image_fail(model=SingleImageUpdate) - with pytest.raises(ValidationError): - SingleImageUpdate( - zarr_url="/something", attributes={"invalid": ["l", "i", "s", "t"]} + # zarr_url + image = image_ok(model=SingleImageUpdate, zarr_url="/x") + assert image.dict() == { + "zarr_url": "/x", + "attributes": None, + "types": None, + } + image_fail(model=SingleImageUpdate, zarr_url="x") + image_fail(model=SingleImageUpdate, zarr_url=None) + + # attributes + valid_attributes = { + "int": 1, + "float": 1.2, + "string": "abc", + "bool": True, + } + image = image_ok( + model=SingleImageUpdate, zarr_url="/x", attributes=valid_attributes + ) + assert image.attributes == valid_attributes + for invalid_attributes in [ + {"null": None}, + {"list": ["l", "i", "s", "t"]}, + {"dict": {"d": "i", "c": "t"}}, + {"function": lambda x: x}, + {"type": int}, + {"repeated": 1, " repeated ": 2}, + ]: + image_fail( + model=SingleImageUpdate, + zarr_url="/x", + attributes=invalid_attributes, ) + + # types + valid_types = {"y": True, "n": False} # only booleans + image = image_ok(model=SingleImageUpdate, zarr_url="/x", types=valid_types) + assert image.types == valid_types + image_fail(zarr_url="/x", types={"a": "not a bool"}) + image_fail(zarr_url="/x", types={"a": True, " a": True}) + image_ok(zarr_url="/x", types={1: True}) From 984182f5b8da6de58512cedb682af8a30bf8cbd9 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Thu, 9 Jan 2025 18:17:42 +0100 Subject: [PATCH 45/99] test unit image tools --- fractal_server/images/tools.py | 4 +- tests/v2/05_images/test_unit_image_tools.py | 299 +++++++++----------- 2 files changed, 132 insertions(+), 171 deletions(-) diff --git a/fractal_server/images/tools.py b/fractal_server/images/tools.py index a9cf6d6b5c..6f4b1fe86c 100644 --- a/fractal_server/images/tools.py +++ b/fractal_server/images/tools.py @@ -89,8 +89,8 @@ def filter_image_list( for this_image in images if match_filter( this_image, - type_filters=type_filters, - attribute_filters=attribute_filters, + type_filters=type_filters or {}, + attribute_filters=attribute_filters or {}, ) ] return filtered_images diff --git a/tests/v2/05_images/test_unit_image_tools.py b/tests/v2/05_images/test_unit_image_tools.py index f1751fc1d9..160f442c13 100644 --- a/tests/v2/05_images/test_unit_image_tools.py +++ b/tests/v2/05_images/test_unit_image_tools.py @@ -1,195 +1,156 @@ -from fractal_server.images import SingleImage from fractal_server.images.tools import filter_image_list from fractal_server.images.tools import find_image_by_zarr_url from fractal_server.images.tools import match_filter -N = 100 -images = [ - SingleImage( - zarr_url=f"/a/b/c{i}.zarr", - attributes=dict( - name=("a" if i % 2 == 0 else "b"), - num=i % 3, - ), - types=dict( - a=(i <= N // 2), - b=(i >= N // 3), - ), - ).dict() - for i in range(N) -] - def test_find_image_by_zarr_url(): + images = [{"zarr_url": "/x"}, {"zarr_url": "/y"}, {"zarr_url": "/z"}] + res = find_image_by_zarr_url(zarr_url="/x", images=images) + assert res == { + "index": 0, + "image": {"zarr_url": "/x"}, + } + res = find_image_by_zarr_url(zarr_url="/y", images=images) + assert res == { + "index": 1, + "image": {"zarr_url": "/y"}, + } + res = find_image_by_zarr_url(zarr_url="/z", images=images) + assert res == { + "index": 2, + "image": {"zarr_url": "/z"}, + } - for i in range(N): - image_search = find_image_by_zarr_url( - zarr_url=f"/a/b/c{i}.zarr", images=images - ) - assert image_search["image"]["zarr_url"] == f"/a/b/c{i}.zarr" - assert image_search["index"] == i - image_search = find_image_by_zarr_url(zarr_url="/xxx", images=images) - assert image_search is None +def test_match_filter(): + # empty filters (always match) + assert match_filter(image=..., type_filters={}, attribute_filters={}) -def test_match_filter(): + image = {"types": {"a": True, "b": False}, "attributes": {"a": 1, "b": 2}} - image = SingleImage( - zarr_url="/a/b/c0.zarr", - attributes=dict( - name="a", - num=0, - ), - types=dict( - a=True, - b=False, - ), - ).dict() - - # Empty - assert match_filter(image) is True - - # Attributes - - # not existing attribute - assert match_filter(image, attribute_filters=dict(foo="bar")) is False - - assert match_filter(image, attribute_filters=dict(name="a")) is True - - assert match_filter(image, attribute_filters=dict(num=0)) is True - - assert ( - match_filter( - image, - attribute_filters=dict( - name="a", - num=0, - ), - ) - is True - ) - - # not existing attribute - assert ( - match_filter(image, attribute_filters=dict(name="a", num=0, foo="bar")) - is False - ) - - # int as string - assert ( - match_filter(image, attribute_filters=dict(name="a", num="0")) is False - ) - - # Types - - assert match_filter(image, type_filters=dict(a=True)) is True - assert match_filter(image, type_filters=dict(b=False)) is True - assert match_filter(image, type_filters=dict(a=True, b=False)) is True - assert match_filter(image, type_filters=dict(a=False)) is False - assert match_filter(image, type_filters=dict(a=True, b=True)) is False - # not existing 'True' types are checked - assert match_filter(image, type_filters=dict(c=True)) is False - # not existing 'False' types are ignored - assert match_filter(image, type_filters=dict(c=False)) is True - assert ( - match_filter(image, type_filters=dict(a=True, b=False, c=True)) - is False - ) - assert ( - match_filter(image, type_filters=dict(a=True, b=False, c=False)) - is True - ) - - # Both - - assert ( - match_filter( - image, attribute_filters=dict(name="a"), type_filters=dict(a=True) - ) - is True - ) - assert ( - match_filter( - image, attribute_filters=dict(name="a"), type_filters=dict(a=False) - ) - is False - ) - assert ( - match_filter( - image, attribute_filters=dict(name="b"), type_filters=dict(a=True) - ) - is False - ) - assert ( - match_filter( - image, - attribute_filters=dict(name="a"), - type_filters=dict(x=False, y=False, z=False), - ) - is True - ) - assert ( - match_filter( - image, - attribute_filters=dict(name="a"), - type_filters=dict(x=False, y=False, z=True), - ) - is False - ) - - -def test_attribute_filters_set_to_none(): - """ - This test shows how to disable a previously-set attribute filter by just - using `dict.update` (that is, without e.g. `dict.pop`). - """ - # Image not matching filter - image = dict( - types={}, - attributes={"key2": "invalid-value2"}, - ) - attribute_filters = {"key2": ["value2"]} + # type filters + # a + assert match_filter( + image=image, type_filters={"a": True}, attribute_filters={} + ) + assert not match_filter( + image=image, type_filters={"a": False}, attribute_filters={} + ) + # b assert not match_filter( + image=image, type_filters={"b": True}, attribute_filters={} + ) + assert match_filter( + image=image, type_filters={"b": False}, attribute_filters={} + ) + # c + assert not match_filter( + image=image, type_filters={"c": True}, attribute_filters={} + ) + assert match_filter( + image=image, type_filters={"c": False}, attribute_filters={} + ) + # a b c + assert match_filter( image=image, - type_filters={}, - attribute_filters=attribute_filters, + type_filters={"a": True, "b": False, "c": False}, + attribute_filters={}, + ) + assert not match_filter( + image=image, + type_filters={"a": False, "b": False, "c": False}, + attribute_filters={}, + ) + assert not match_filter( + image=image, + type_filters={"a": True, "b": True, "c": False}, + attribute_filters={}, + ) + assert not match_filter( + image=image, + type_filters={"a": False, "b": True, "c": False}, + attribute_filters={}, ) - # Unset filter by replacing ["value2"] with None - attribute_filters.update({"key2": None}) - - # Image matches filter + # attribute filters + assert match_filter( + image=image, type_filters={}, attribute_filters={"a": [1]} + ) + assert match_filter( + image=image, type_filters={}, attribute_filters={"a": [1], "b": [1, 2]} + ) + assert not match_filter( + image=image, type_filters={}, attribute_filters={"a": [0], "b": [1, 2]} + ) assert match_filter( image=image, type_filters={}, - attribute_filters=attribute_filters, + attribute_filters={"a": None, "b": [1, 2]}, + ) + + # both + assert match_filter( + image=image, type_filters={"a": True}, attribute_filters={"a": [1]} + ) + assert not match_filter( + image=image, type_filters={"a": False}, attribute_filters={"a": [1]} + ) + assert not match_filter( + image=image, type_filters={"a": True}, attribute_filters={"a": [0]} ) def test_filter_image_list(): - # Empty + + images = [ + {"types": {"a": True}, "attributes": {"a": 1, "b": 2}}, + {"types": {"a": True}, "attributes": {"a": 2, "b": 2}}, + {"types": {"a": False}, "attributes": {"a": 1, "b": 1}}, + {"types": {}, "attributes": {"a": 1, "b": 1}}, + {"types": {}, "attributes": {}}, + ] + + # empty res = filter_image_list(images) assert res == images - # Attributes - res = filter_image_list(images, attribute_filters=dict(name="a")) - k = (N // 2) if not N % 2 else (N + 1) // 2 - assert len(res) == k - res = filter_image_list(images, attribute_filters=dict(name="b")) - assert len(res) == N - k - res = filter_image_list(images, attribute_filters=dict(num=0)) - assert len(res) == len([i for i in range(N) if i % 3 == 0]) - res = filter_image_list(images, attribute_filters=dict(num=1)) - assert len(res) == len([i for i in range(N) if i % 3 == 1]) - res = filter_image_list(images, attribute_filters=dict(num=2)) - assert len(res) == len([i for i in range(N) if i % 3 == 2]) - res = filter_image_list(images, attribute_filters=dict(name="foo")) - assert len(res) == 0 - res = filter_image_list(images, attribute_filters=dict(num=3)) - assert len(res) == 0 - res = filter_image_list(images, attribute_filters=dict(name="a", num=3)) + res = filter_image_list(images, type_filters={}) + assert res == images + res = filter_image_list(images, attribute_filters={}) + assert res == images + res = filter_image_list(images, type_filters={}, attribute_filters={}) + assert res == images + + # type filters + res = filter_image_list(images, type_filters={"a": True}) + assert len(res) == 2 + res = filter_image_list(images, type_filters={"a": False}) + assert len(res) == 3 # complementary of 2 + res = filter_image_list(images, type_filters={"b": True}) assert len(res) == 0 - res = filter_image_list(images, attribute_filters=dict(name="foo", num=0)) + res = filter_image_list(images, type_filters={"b": False}) + assert len(res) == 5 + + # attribute filters + res = filter_image_list(images, attribute_filters={"a": [1]}) + assert len(res) == 3 + res = filter_image_list(images, attribute_filters={"a": [1, 2]}) + assert len(res) == 4 + res = filter_image_list(images, attribute_filters={"a": None, "b": None}) + assert len(res) == 5 + + # both + res = filter_image_list( + images, type_filters={"a": True}, attribute_filters={"a": [1]} + ) + assert len(res) == 1 + res = filter_image_list( + images, type_filters={"a": True}, attribute_filters={"a": [1, 2]} + ) + assert len(res) == 2 + res = filter_image_list( + images, + type_filters={"a": True}, + attribute_filters={"a": [1, 2], "b": [-1]}, + ) assert len(res) == 0 - res = filter_image_list(images, type_filters=dict(a=True, b=True)) - assert len(res) == N // 2 - N // 3 + 1 From c4a8d9f0e774b1bac8620d02c51f2c5e3a099562 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Thu, 9 Jan 2025 18:20:31 +0100 Subject: [PATCH 46/99] fix test benchmark --- tests/v2/05_images/test_benchmark_helper_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v2/05_images/test_benchmark_helper_functions.py b/tests/v2/05_images/test_benchmark_helper_functions.py index a1fded05be..60aba1b152 100644 --- a/tests/v2/05_images/test_benchmark_helper_functions.py +++ b/tests/v2/05_images/test_benchmark_helper_functions.py @@ -62,7 +62,7 @@ def test_filter_image_list_few_filters( ): new_list = filter_image_list( images=images, - attribute_filters=dict(a1=0), + attribute_filters=dict(a1=[0]), ) debug(len(images), len(new_list)) assert len(new_list) == len(images) // 2 From d422fe221b4fe336c28b49ab0236d55306ce5ecb Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Mon, 13 Jan 2025 12:02:30 +0100 Subject: [PATCH 47/99] fix query image endpoint --- fractal_server/app/routes/api/v2/images.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fractal_server/app/routes/api/v2/images.py b/fractal_server/app/routes/api/v2/images.py index df778a1eb5..3e13b5b694 100644 --- a/fractal_server/app/routes/api/v2/images.py +++ b/fractal_server/app/routes/api/v2/images.py @@ -174,7 +174,7 @@ async def query_dataset_images( else: images = [image] - if query.filters.attributes or query.filters.types: + if query.attribute_filters or query.type_filters: images = [ image for image in images From f8e91c858e05cd61351d05d87f125e5c123faafd Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Mon, 13 Jan 2025 12:14:22 +0100 Subject: [PATCH 48/99] fix test_api_dataset_images --- tests/v2/03_api/test_api_dataset_images.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/v2/03_api/test_api_dataset_images.py b/tests/v2/03_api/test_api_dataset_images.py index 88a69f54bb..de9dc2dc94 100644 --- a/tests/v2/03_api/test_api_dataset_images.py +++ b/tests/v2/03_api/test_api_dataset_images.py @@ -142,7 +142,9 @@ async def test_query_images( [ image for image in images - if match_filter(image, type_filters={"flag": False}) + if match_filter( + image, type_filters={"flag": False}, attribute_filters={} + ) ] ) assert res.json()["current_page"] == 1 @@ -160,7 +162,9 @@ async def test_query_images( [ image for image in images - if match_filter(image, type_filters={"flag": 1}) + if match_filter( + image, type_filters={"flag": 1}, attribute_filters={} + ) ] ) assert res.json()["page_size"] == 1000 From faefc525a27dd1ebbcb618875efae4ced4656c27 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Mon, 13 Jan 2025 12:53:18 +0100 Subject: [PATCH 49/99] fix dataset dump --- fractal_server/app/routes/api/v2/submit.py | 4 +++- fractal_server/app/schemas/v2/dataset.py | 6 ++++-- fractal_server/images/tools.py | 3 ++- tests/fixtures_server_v2.py | 2 +- tests/v2/03_api/test_api_job.py | 4 +++- 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fractal_server/app/routes/api/v2/submit.py b/fractal_server/app/routes/api/v2/submit.py index fc34f2929b..6029791721 100644 --- a/fractal_server/app/routes/api/v2/submit.py +++ b/fractal_server/app/routes/api/v2/submit.py @@ -159,7 +159,9 @@ async def apply_workflow( dataset_id=dataset_id, workflow_id=workflow_id, user_email=user.email, - dataset_dump=json.loads(dataset.json(exclude={"images", "history"})), + dataset_dump=json.loads( + dataset.json(exclude={"images", "history", "filters"}) + ), workflow_dump=json.loads(workflow.json(exclude={"task_list"})), project_dump=json.loads(project.json(exclude={"user_list"})), **job_create.dict(), diff --git a/fractal_server/app/schemas/v2/dataset.py b/fractal_server/app/schemas/v2/dataset.py index 93ab478155..75a6ed6d61 100644 --- a/fractal_server/app/schemas/v2/dataset.py +++ b/fractal_server/app/schemas/v2/dataset.py @@ -116,7 +116,8 @@ class DatasetImportV2(BaseModel, extra=Extra.forbid): name: zarr_dir: images: - filters: + type_filters: + attribute_filters: """ name: str @@ -151,7 +152,8 @@ class DatasetExportV2(BaseModel): name: zarr_dir: images: - filters: + type_filters: + attribute_filters: """ name: str diff --git a/fractal_server/images/tools.py b/fractal_server/images/tools.py index 6f4b1fe86c..874746b225 100644 --- a/fractal_server/images/tools.py +++ b/fractal_server/images/tools.py @@ -74,7 +74,8 @@ def filter_image_list( Arguments: images: A list of images. - filters: A set of filters. + type_filters: + attribute_filters: Returns: List of the `images` elements which match the filter set. diff --git a/tests/fixtures_server_v2.py b/tests/fixtures_server_v2.py index d297531e90..eaf2cbc6cf 100644 --- a/tests/fixtures_server_v2.py +++ b/tests/fixtures_server_v2.py @@ -147,7 +147,7 @@ async def __job_factory( dataset_id=dataset_id, workflow_id=workflow_id, dataset_dump=json.loads( - dataset.json(exclude={"history", "images"}) + dataset.json(exclude={"history", "images", "filters"}) ), workflow_dump=json.loads(workflow.json(exclude={"task_list"})), project_dump=json.loads(project.json(exclude={"user_list"})), diff --git a/tests/v2/03_api/test_api_job.py b/tests/v2/03_api/test_api_job.py index 946f2c3e10..6d41720c2b 100644 --- a/tests/v2/03_api/test_api_job.py +++ b/tests/v2/03_api/test_api_job.py @@ -353,7 +353,9 @@ async def test_project_apply_workflow_subset( **json.loads(workflow.json(exclude={"task_list"})) ).dict() expected_dataset_dump = DatasetDumpV2( - **json.loads(dataset1.json(exclude={"history", "images"})) + **json.loads( + dataset1.json(exclude={"history", "images", "filters"}) + ) ).dict() assert res.json()["project_dump"] == expected_project_dump assert res.json()["workflow_dump"] == expected_workflow_dump From aaf6a25ad391e1613e80ee36ceb3fdb4272032d8 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Mon, 13 Jan 2025 16:22:26 +0100 Subject: [PATCH 50/99] exclude none from DatasetUpdate --- fractal_server/app/routes/api/v2/dataset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fractal_server/app/routes/api/v2/dataset.py b/fractal_server/app/routes/api/v2/dataset.py index d47d69bd90..238dc254aa 100644 --- a/fractal_server/app/routes/api/v2/dataset.py +++ b/fractal_server/app/routes/api/v2/dataset.py @@ -172,7 +172,9 @@ async def update_dataset( ), ) - for key, value in dataset_update.dict(exclude_unset=True).items(): + for key, value in dataset_update.dict( + exclude_unset=True, exclude_none=True + ).items(): setattr(db_dataset, key, value) await db.commit() From 604049d1b1eec6af1394cd945f4eb74a43167cba Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Mon, 13 Jan 2025 16:30:23 +0100 Subject: [PATCH 51/99] test update dataset filters --- tests/v2/03_api/test_api_dataset.py | 46 ++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/tests/v2/03_api/test_api_dataset.py b/tests/v2/03_api/test_api_dataset.py index dcca09317a..34a773c6d0 100644 --- a/tests/v2/03_api/test_api_dataset.py +++ b/tests/v2/03_api/test_api_dataset.py @@ -374,7 +374,11 @@ async def test_patch_dataset( ): async with MockCurrentUser() as user: project = await project_factory_v2(user) - dataset = await dataset_factory_v2(project_id=project.id) + dataset = await dataset_factory_v2( + project_id=project.id, + attribute_filters={"a": [1, 2], "b": [3]}, + type_filters={"c": True, "d": False}, + ) project_id = project.id dataset_id = dataset.id @@ -419,6 +423,46 @@ async def test_patch_dataset( ) assert res.status_code == 422 + # Patch `attribute_filters` + res = await client.get( + f"{PREFIX}/project/{project_id}/dataset/{dataset_id}/" + ) + assert res.json()["attribute_filters"] == {"a": [1, 2], "b": [3]} + assert res.json()["type_filters"] == {"c": True, "d": False} + res = await client.patch( + f"{PREFIX}/project/{project_id}/dataset/{dataset_id}/", + json=dict(attribute_filters={"c": 3}), + ) + assert res.status_code == 422 + res = await client.patch( + f"{PREFIX}/project/{project_id}/dataset/{dataset_id}/", + json=dict(attribute_filters={"c": [3]}), + ) + assert res.status_code == 200 + assert res.json()["attribute_filters"] == {"c": [3]} + assert res.json()["type_filters"] == {"c": True, "d": False} + res = await client.patch( + f"{PREFIX}/project/{project_id}/dataset/{dataset_id}/", + json=dict(type_filters={"x": 42}), + ) + assert res.status_code == 422 + res = await client.patch( + f"{PREFIX}/project/{project_id}/dataset/{dataset_id}/", + json=dict(type_filters={"x": True}), + ) + assert res.status_code == 200 + assert res.json()["name"] == "something-new" + assert res.json()["zarr_dir"] == "/new_zarr_dir" + assert res.json()["attribute_filters"] == {"c": [3]} + assert res.json()["type_filters"] == {"x": True} + res = await client.patch( + f"{PREFIX}/project/{project_id}/dataset/{dataset_id}/", + json=dict(attribute_filters={}, type_filters=None), + ) + assert res.status_code == 200 + assert res.json()["attribute_filters"] == {} + assert res.json()["type_filters"] == {"x": True} + async def test_dataset_export( app, client, MockCurrentUser, project_factory_v2, dataset_factory_v2 From 14ef8eb58c335be7f7b9d20e98bf3d486e020cb9 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 09:48:59 +0100 Subject: [PATCH 52/99] fix test_filters --- tests/v2/05_images/test_filters.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/v2/05_images/test_filters.py b/tests/v2/05_images/test_filters.py index a4376d9c9f..c650f99a2b 100644 --- a/tests/v2/05_images/test_filters.py +++ b/tests/v2/05_images/test_filters.py @@ -101,12 +101,12 @@ def test_singleimage_attributes_validation(): # No filter ({}, {}, 6), # Key is not part of attribute keys - ({"missing_key": "whatever"}, {}, 0), + ({"missing_key": ["whatever"]}, {}, 0), # Key is not part of type keys (default is False) ({}, {"missing_key": True}, 0), ({}, {"missing_key": False}, 6), # Key is part of attribute keys, but value is missing - ({"plate": "missing_plate.zarr"}, {}, 0), + ({"plate": ["missing_plate.zarr"]}, {}, 0), # Meaning of None for attributes: skip a given filter ({"plate": None}, {}, 6), # Single type filter @@ -117,24 +117,24 @@ def test_singleimage_attributes_validation(): ({}, {"3D": True, "illumination_correction": True}, 2), # Both attribute and type filters ( - {"plate": "plate.zarr"}, + {"plate": ["plate.zarr"]}, {"3D": True, "illumination_correction": True}, 2, ), # Both attribute and type filters ( - {"plate": "plate_2d.zarr"}, + {"plate": ["plate_2d.zarr"]}, {"3D": True, "illumination_correction": True}, 0, ), # Both attribute and type filters ( - {"plate": "plate.zarr", "well": "A01"}, + {"plate": ["plate.zarr"], "well": ["A01"]}, {"3D": True, "illumination_correction": True}, 1, ), # Single attribute filter - ({"well": "A01"}, {}, 3), + ({"well": ["A01"]}, {}, 3), ], ) def test_filter_image_list_SingleImage( From 49635bc083f6107281bba4958c2d59a946030a09 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 12:05:16 +0100 Subject: [PATCH 53/99] fail with None filters --- fractal_server/app/routes/api/v2/dataset.py | 4 +- fractal_server/app/schemas/_validators.py | 62 +++++++++++---------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/fractal_server/app/routes/api/v2/dataset.py b/fractal_server/app/routes/api/v2/dataset.py index 238dc254aa..d47d69bd90 100644 --- a/fractal_server/app/routes/api/v2/dataset.py +++ b/fractal_server/app/routes/api/v2/dataset.py @@ -172,9 +172,7 @@ async def update_dataset( ), ) - for key, value in dataset_update.dict( - exclude_unset=True, exclude_none=True - ).items(): + for key, value in dataset_update.dict(exclude_unset=True).items(): setattr(db_dataset, key, value) await db.commit() diff --git a/fractal_server/app/schemas/_validators.py b/fractal_server/app/schemas/_validators.py index 32b51b3601..acd3707843 100644 --- a/fractal_server/app/schemas/_validators.py +++ b/fractal_server/app/schemas/_validators.py @@ -106,41 +106,43 @@ def val(must_be_unique: Optional[list]) -> Optional[list]: def validate_type_filters( type_filters: Optional[dict], ) -> Optional[dict[str, bool]]: - if type_filters is not None: - type_filters = valdict_keys("type_filters")(type_filters) + if type_filters is None: + raise ValueError("'type_filters' cannot be 'None'.") + + type_filters = valdict_keys("type_filters")(type_filters) return type_filters def validate_attribute_filters( attribute_filters: Optional[dict[str, Optional[list[Any]]]], -) -> Optional[dict[str, Optional[list[Any]]]]: - if attribute_filters is not None: - attribute_filters = valdict_keys("attribute_filters")( - attribute_filters - ) - for key, values in attribute_filters.items(): - if values is None: - # values=None corresponds to not applying any filter for - # attribute `key` - pass - elif values == []: - # WARNING: in this case, no image can match with the current - # filter. In the future we may deprecate this possibility. - pass - else: - # values is a non-empty list, and its items must all be of the - # same scalar non-None type - _type = type(values[0]) - if not all(isinstance(value, _type) for value in values): - raise ValueError( - f"attribute_filters[{key}] has values with " - f"non-homogeneous types: {values}." - ) - if _type not in (int, float, str, bool): - raise ValueError( - f"attribute_filters[{key}] has values with " - f"invalid types: {values}." - ) +) -> dict[str, Optional[list[Any]]]: + if attribute_filters is None: + raise ValueError("'attribute_filters' cannot be 'None'.") + + attribute_filters = valdict_keys("attribute_filters")(attribute_filters) + for key, values in attribute_filters.items(): + if values is None: + # values=None corresponds to not applying any filter for + # attribute `key` + pass + elif values == []: + # WARNING: in this case, no image can match with the current + # filter. In the future we may deprecate this possibility. + pass + else: + # values is a non-empty list, and its items must all be of the + # same scalar non-None type + _type = type(values[0]) + if not all(isinstance(value, _type) for value in values): + raise ValueError( + f"attribute_filters[{key}] has values with " + f"non-homogeneous types: {values}." + ) + if _type not in (int, float, str, bool): + raise ValueError( + f"attribute_filters[{key}] has values with " + f"invalid types: {values}." + ) return attribute_filters From fe8a0a32dcb2a3ea858b297ac365c89304e43e09 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 12:07:28 +0100 Subject: [PATCH 54/99] run ci for pr to dev-2.11 --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c089675614..fdb09910d1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,7 +4,7 @@ on: push: branches: ["main"] pull_request: - branches: ["main"] + branches: ["main", "dev-2.11"] jobs: From e52cd6ffbc1cd63a265ef1f745945957766ae536 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 12:14:34 +0100 Subject: [PATCH 55/99] fix test_patch_dataset --- tests/v2/03_api/test_api_dataset.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/v2/03_api/test_api_dataset.py b/tests/v2/03_api/test_api_dataset.py index 34a773c6d0..99fef4d9e4 100644 --- a/tests/v2/03_api/test_api_dataset.py +++ b/tests/v2/03_api/test_api_dataset.py @@ -459,9 +459,25 @@ async def test_patch_dataset( f"{PREFIX}/project/{project_id}/dataset/{dataset_id}/", json=dict(attribute_filters={}, type_filters=None), ) + assert res.status_code == 422 + res = await client.patch( + f"{PREFIX}/project/{project_id}/dataset/{dataset_id}/", + json=dict(attribute_filters={}), + ) assert res.status_code == 200 + assert res.json()["name"] == "something-new" + assert res.json()["zarr_dir"] == "/new_zarr_dir" assert res.json()["attribute_filters"] == {} assert res.json()["type_filters"] == {"x": True} + res = await client.patch( + f"{PREFIX}/project/{project_id}/dataset/{dataset_id}/", + json=dict(type_filters={}), + ) + assert res.status_code == 200 + assert res.json()["name"] == "something-new" + assert res.json()["zarr_dir"] == "/new_zarr_dir" + assert res.json()["attribute_filters"] == {} + assert res.json()["type_filters"] == {} async def test_dataset_export( From 31e249eac07d11f37d2f832690b477cd9b612bad Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 12:16:27 +0100 Subject: [PATCH 56/99] fix test_schemas_dataset_v2 --- tests/v2/01_schemas/test_schemas_dataset.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/v2/01_schemas/test_schemas_dataset.py b/tests/v2/01_schemas/test_schemas_dataset.py index 969ee15419..df0a3b20e8 100644 --- a/tests/v2/01_schemas/test_schemas_dataset.py +++ b/tests/v2/01_schemas/test_schemas_dataset.py @@ -66,10 +66,14 @@ async def test_schemas_dataset_v2(): # Update - # validation accepts `zarr_dir` and `filters` as None, but not `name` - DatasetUpdateV2(zarr_dir=None, attribute_filters=None) + # validation accepts `zarr_dir` as None, but not `name` and `filters` + DatasetUpdateV2(zarr_dir=None) with pytest.raises(ValidationError): - DatasetUpdateV2(name=None, zarr_dir=None, attribute_filters=None) + DatasetUpdateV2(name=None) + with pytest.raises(ValidationError): + DatasetUpdateV2(attribute_filters=None) + with pytest.raises(ValidationError): + DatasetUpdateV2(type_filters=None) dataset_update = DatasetUpdateV2(name="new name", zarr_dir="/zarr/") assert not dataset_update.zarr_dir.endswith("/") From 9b3b563e63ec53f0af3d10b462033561e5dd15c0 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 12:37:15 +0100 Subject: [PATCH 57/99] Fix use of match_filters in runner - cc @ychiucco --- fractal_server/app/runner/v2/runner.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fractal_server/app/runner/v2/runner.py b/fractal_server/app/runner/v2/runner.py index a27ad94b6a..a6e6d28644 100644 --- a/fractal_server/app/runner/v2/runner.py +++ b/fractal_server/app/runner/v2/runner.py @@ -71,7 +71,11 @@ def execute_tasks_v2( ) # Verify that filtered images comply with task input_types for image in filtered_images: - if not match_filter(image, type_filters=task.input_types): + if not match_filter( + image, + type_filters=task.input_types, + attribute_filters={}, + ): raise JobExecutionError( "Invalid filtered image list\n" f"Task input types: {task.input_types=}\n" From d43f99ae0d4e99fd834d50c3c90706c0f7a6e31e Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 12:44:08 +0100 Subject: [PATCH 58/99] Update tests 04 --- tests/v2/04_runner/aux_get_dataset_attrs.py | 2 +- tests/v2/04_runner/test_dummy_examples.py | 1 + tests/v2/04_runner/test_fractal_examples.py | 25 ++++++++++--------- .../test_no_images_parallelization.py | 1 + ...lization.py::test_parallelize_on_no_images | 0 tests/v2/04_runner/v2_mock_models.py | 7 +----- 6 files changed, 17 insertions(+), 19 deletions(-) create mode 100644 tests/v2/04_runner/test_no_images_parallelization.py::test_parallelize_on_no_images diff --git a/tests/v2/04_runner/aux_get_dataset_attrs.py b/tests/v2/04_runner/aux_get_dataset_attrs.py index 6fed8b0185..93ee375505 100644 --- a/tests/v2/04_runner/aux_get_dataset_attrs.py +++ b/tests/v2/04_runner/aux_get_dataset_attrs.py @@ -7,6 +7,6 @@ async def _get_dataset_attrs(db, dataset_id) -> dict[str, Any]: await db.close() db_dataset = await db.get(DatasetV2, dataset_id) dataset_attrs = db_dataset.model_dump( - include={"filters", "history", "images"} + include={"type_filters", "attribute_filters", "history", "images"} ) return dataset_attrs diff --git a/tests/v2/04_runner/test_dummy_examples.py b/tests/v2/04_runner/test_dummy_examples.py index 4c535e5ae0..0731a0901c 100644 --- a/tests/v2/04_runner/test_dummy_examples.py +++ b/tests/v2/04_runner/test_dummy_examples.py @@ -29,6 +29,7 @@ def execute_tasks_v2(wf_task_list, workflow_dir_local, **kwargs) -> None: raw_execute_tasks_v2( wf_task_list=wf_task_list, workflow_dir_local=workflow_dir_local, + job_attribute_filters={}, **kwargs, ) diff --git a/tests/v2/04_runner/test_fractal_examples.py b/tests/v2/04_runner/test_fractal_examples.py index 1882a87902..0312616b21 100644 --- a/tests/v2/04_runner/test_fractal_examples.py +++ b/tests/v2/04_runner/test_fractal_examples.py @@ -32,6 +32,7 @@ def execute_tasks_v2(wf_task_list, workflow_dir_local, **kwargs): raw_execute_tasks_v2( wf_task_list=wf_task_list, workflow_dir_local=workflow_dir_local, + job_attribute_filters={}, **kwargs, ) @@ -106,8 +107,8 @@ async def test_fractal_demos_01( assert _task_names_from_history(dataset_attrs["history"]) == [ "create_ome_zarr_compound" ] - assert dataset_attrs["filters"]["attributes"] == {} - assert dataset_attrs["filters"]["types"] == {} + assert dataset_attrs["attribute_filters"] == {} + assert dataset_attrs["type_filters"] == {} _assert_image_data_exist(dataset_attrs["images"]) assert len(dataset_attrs["images"]) == 2 @@ -134,8 +135,8 @@ async def test_fractal_demos_01( "create_ome_zarr_compound", "illumination_correction", ] - assert dataset_attrs["filters"]["attributes"] == {} - assert dataset_attrs["filters"]["types"] == { + assert dataset_attrs["attribute_filters"] == {} + assert dataset_attrs["type_filters"] == { "illumination_correction": True, } assert set(img["zarr_url"] for img in dataset_attrs["images"]) == { @@ -187,8 +188,8 @@ async def test_fractal_demos_01( "MIP_compound", ] - assert dataset_attrs["filters"]["attributes"] == {} - assert dataset_attrs["filters"]["types"] == { + assert dataset_attrs["attribute_filters"] == {} + assert dataset_attrs["type_filters"] == { "illumination_correction": True, "3D": False, } @@ -310,8 +311,8 @@ async def test_fractal_demos_01_no_overwrite( "create_ome_zarr_compound", "illumination_correction", ] - assert dataset_attrs["filters"]["attributes"] == {} - assert dataset_attrs["filters"]["types"] == { + assert dataset_attrs["attribute_filters"] == {} + assert dataset_attrs["type_filters"] == { "illumination_correction": True, } assert [img["zarr_url"] for img in dataset_attrs["images"]] == [ @@ -390,8 +391,8 @@ async def test_fractal_demos_01_no_overwrite( "illumination_correction", "MIP_compound", ] - assert dataset_attrs["filters"]["attributes"] == {} - assert dataset_attrs["filters"]["types"] == { + assert dataset_attrs["attribute_filters"] == {} + assert dataset_attrs["type_filters"] == { "3D": False, "illumination_correction": True, } @@ -429,8 +430,8 @@ async def test_fractal_demos_01_no_overwrite( }, } - assert dataset_attrs["filters"]["attributes"] == {} - assert dataset_attrs["filters"]["types"] == { + assert dataset_attrs["attribute_filters"] == {} + assert dataset_attrs["type_filters"] == { "3D": False, "illumination_correction": True, } diff --git a/tests/v2/04_runner/test_no_images_parallelization.py b/tests/v2/04_runner/test_no_images_parallelization.py index 3ae0653d98..cbe6ce6043 100644 --- a/tests/v2/04_runner/test_no_images_parallelization.py +++ b/tests/v2/04_runner/test_no_images_parallelization.py @@ -23,6 +23,7 @@ def execute_tasks_v2(wf_task_list, workflow_dir_local, **kwargs): raw_execute_tasks_v2( wf_task_list=wf_task_list, workflow_dir_local=workflow_dir_local, + job_attribute_filters={}, **kwargs, ) diff --git a/tests/v2/04_runner/test_no_images_parallelization.py::test_parallelize_on_no_images b/tests/v2/04_runner/test_no_images_parallelization.py::test_parallelize_on_no_images new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/v2/04_runner/v2_mock_models.py b/tests/v2/04_runner/v2_mock_models.py index 4321f2f9ec..a842f33186 100644 --- a/tests/v2/04_runner/v2_mock_models.py +++ b/tests/v2/04_runner/v2_mock_models.py @@ -55,13 +55,8 @@ class WorkflowTaskV2Mock(BaseModel): meta_parallel: Optional[dict[str, Any]] = Field() meta_non_parallel: Optional[dict[str, Any]] = Field() task: TaskV2Mock - input_filters: dict[str, Any] = Field(default_factory=dict) + type_filters: dict[str, bool] = Field(default_factory=dict) order: int id: int workflow_id: int = 0 task_id: int - - @validator("input_filters", always=True) - def _default_filters(cls, value): - if value == {}: - return {"types": {}, "attributes": {}} From 3b3e369b819ebe416b108cbcf4e3abb53636e551 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 12:45:51 +0100 Subject: [PATCH 59/99] migration script --- fractal_server/data_migrations/2_11_0.py | 71 ++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 fractal_server/data_migrations/2_11_0.py diff --git a/fractal_server/data_migrations/2_11_0.py b/fractal_server/data_migrations/2_11_0.py new file mode 100644 index 0000000000..b0b63501ea --- /dev/null +++ b/fractal_server/data_migrations/2_11_0.py @@ -0,0 +1,71 @@ +import logging + +from sqlalchemy.orm.attributes import flag_modified +from sqlmodel import select + +from fractal_server.app.db import get_sync_db +from fractal_server.app.models import DatasetV2 +from fractal_server.app.models import JobV2 +from fractal_server.app.models import WorkflowTaskV2 + +logger = logging.getLogger("fix_db") + + +def fix_db(): + + logger.warning("START execution of fix_db function") + + with next(get_sync_db()) as db: + + # DatasetV2.filters + # DatasetV2.history[].workflowtask.input_filters + stm = select(DatasetV2).order_by(DatasetV2.id) + datasets = db.execute(stm).scalars().all() + for ds in datasets: + ds.attribute_filters = ds.filters["attributes"] + ds.type_filters = ds.filters["types"] + ds.filters = None + for i, h in enumerate(ds.history): + ds.history[i]["workflowtask"]["type_filters"] = h[ + "workflowtask" + ]["input_filters"]["types"] + if h["workflowtask"]["input_filters"]["attributes"]: + logger.warning( + f"Deleting DatasetV2[{ds.id}].history.input_filters" + ".attributes = " + f"{h['workflowtask']['input_filters']['attributes']}" + ) + flag_modified(ds, "history") + db.add(ds) + logger.warning(f"Fixed filters in DatasetV2[{ds.id}]") + + # WorkflowTaskV2.input_filters + stm = select(WorkflowTaskV2).order_by(WorkflowTaskV2.id) + wftasks = db.execute(stm).scalars().all() + for wft in wftasks: + wft.type_filters = wft.input_filters["types"] + logger.warning( + f"Deleting WorkflowTaskV2[{wft.id}].input_filters.attributes =" + f" {wft['input_filters']['attributes']}" + ) + wft.input_filters = None + flag_modified(wft, "input_filters") + db.add(wft) + logger.warning(f"Fixed filters in WorkflowTaskV2[{wft.id}]") + + # JOBS V2 + stm = select(JobV2).order_by(JobV2.id) + jobs = db.execute(stm).scalars().all() + for job in jobs: + job.dataset_dump["type_filters"] = job.dataset_dump["filters"][ + "types" + ] + job.dataset_dump["attribute_filters"] = job.dataset_dump[ + "filters" + ]["attributes"] + job.dataset_dump["filters"] = None + flag_modified(job, "dataset_dump") + logger.warning(f"Fixed filters in JobV2[{job.id}].datasetdump") + + db.commit() + logger.warning("Changes committed.") From 2d2bff29a25675351a638b0cd16111744dc9ffb3 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 12:47:48 +0100 Subject: [PATCH 60/99] do not subscript wft --- fractal_server/data_migrations/2_11_0.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fractal_server/data_migrations/2_11_0.py b/fractal_server/data_migrations/2_11_0.py index b0b63501ea..ed62da0f0a 100644 --- a/fractal_server/data_migrations/2_11_0.py +++ b/fractal_server/data_migrations/2_11_0.py @@ -46,7 +46,7 @@ def fix_db(): wft.type_filters = wft.input_filters["types"] logger.warning( f"Deleting WorkflowTaskV2[{wft.id}].input_filters.attributes =" - f" {wft['input_filters']['attributes']}" + f" {wft.input_filters['attributes']}" ) wft.input_filters = None flag_modified(wft, "input_filters") From 0aab40877a1cf200b0f530be8e1ec97df8cc7fde Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 12:47:56 +0100 Subject: [PATCH 61/99] Fix tests 08 --- tests/v2/08_full_workflow/common_functions.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/v2/08_full_workflow/common_functions.py b/tests/v2/08_full_workflow/common_functions.py index 5cfc2932cb..aa159e73b7 100644 --- a/tests/v2/08_full_workflow/common_functions.py +++ b/tests/v2/08_full_workflow/common_functions.py @@ -128,7 +128,7 @@ async def full_workflow( assert res.status_code == 200 dataset = res.json() assert len(dataset["history"]) == 2 - assert dataset["filters"]["types"] == {"3D": False} + assert dataset["type_filters"] == {"3D": False} res = await client.post( f"{PREFIX}/project/{project_id}/dataset/{dataset_id}/" "images/query/", @@ -281,13 +281,10 @@ async def full_workflow_TaskExecutionError( ) assert res.status_code == 200 dataset = res.json() - EXPECTED_FILTERS = { - "attributes": {}, - "types": { - "3D": False, - }, - } - assert dataset["filters"] == EXPECTED_FILTERS + EXPECTED_TYPE_FILTERS = {"3D": False} + EXPECTED_ATTRIBUTE_FILTERS = {} + assert dataset["type_filters"] == EXPECTED_TYPE_FILTERS + assert dataset["attribute_filters"] == EXPECTED_ATTRIBUTE_FILTERS assert len(dataset["history"]) == 3 assert [item["status"] for item in dataset["history"]] == [ "done", From 074864580bccd6ab3ad33ce3639266f458848d63 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 12:48:52 +0100 Subject: [PATCH 62/99] FIXME --- fractal_server/app/runner/v2/merge_outputs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fractal_server/app/runner/v2/merge_outputs.py b/fractal_server/app/runner/v2/merge_outputs.py index bf84c94b8b..d27bfc4dc4 100644 --- a/fractal_server/app/runner/v2/merge_outputs.py +++ b/fractal_server/app/runner/v2/merge_outputs.py @@ -27,7 +27,7 @@ def merge_outputs(task_outputs: list[TaskOutput]) -> TaskOutput: additional_args = {} if last_new_filters is not None: - additional_args["filters"] = last_new_filters + additional_args["filters"] = last_new_filters # FIXME final_output = TaskOutput( image_list_updates=final_image_list_updates, From 6c06e92f79d76313d26c43ca2db54dfd3f06afbe Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 12:51:32 +0100 Subject: [PATCH 63/99] do not use extra field in dump --- fractal_server/data_migrations/2_11_0.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fractal_server/data_migrations/2_11_0.py b/fractal_server/data_migrations/2_11_0.py index ed62da0f0a..9d2c1df200 100644 --- a/fractal_server/data_migrations/2_11_0.py +++ b/fractal_server/data_migrations/2_11_0.py @@ -63,7 +63,6 @@ def fix_db(): job.dataset_dump["attribute_filters"] = job.dataset_dump[ "filters" ]["attributes"] - job.dataset_dump["filters"] = None flag_modified(job, "dataset_dump") logger.warning(f"Fixed filters in JobV2[{job.id}].datasetdump") From 71f242050945960c27d35063b5de35de9a4a89cb Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 12:53:22 +0100 Subject: [PATCH 64/99] Fix test 09 --- tests/v2/09_backends/test_local_experimental.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/v2/09_backends/test_local_experimental.py b/tests/v2/09_backends/test_local_experimental.py index d4f259d6da..957f7fae42 100644 --- a/tests/v2/09_backends/test_local_experimental.py +++ b/tests/v2/09_backends/test_local_experimental.py @@ -59,6 +59,7 @@ async def test_unit_process_workflow(): logger_name=None, workflow_dir_local="/foo", workflow_dir_remote="/bar", + job_attribute_filters={}, ) @@ -218,6 +219,7 @@ async def test_indirect_shutdown_during_process_workflow( workflow_dir_local=tmp_path, first_task_index=0, last_task_index=0, + job_attribute_filters={}, ) tmp_stdout.close() tmp_stderr.close() From 7fc266b8e19b09c665a373b572fddff80d949202 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 12:54:29 +0100 Subject: [PATCH 65/99] pop datasetdump.filters --- fractal_server/data_migrations/2_11_0.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fractal_server/data_migrations/2_11_0.py b/fractal_server/data_migrations/2_11_0.py index 9d2c1df200..f87187a576 100644 --- a/fractal_server/data_migrations/2_11_0.py +++ b/fractal_server/data_migrations/2_11_0.py @@ -63,6 +63,7 @@ def fix_db(): job.dataset_dump["attribute_filters"] = job.dataset_dump[ "filters" ]["attributes"] + job.dataset_dump.pop("filters") flag_modified(job, "dataset_dump") logger.warning(f"Fixed filters in JobV2[{job.id}].datasetdump") From 43c88cb972a15f65c938b35f0ef47d43c9181415 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 14:25:34 +0100 Subject: [PATCH 66/99] Review use of filters in `merge_outputs` --- fractal_server/app/runner/v2/merge_outputs.py | 34 +++++++++++-------- .../app/runner/v2/task_interface.py | 2 +- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/fractal_server/app/runner/v2/merge_outputs.py b/fractal_server/app/runner/v2/merge_outputs.py index d27bfc4dc4..6128eceed3 100644 --- a/fractal_server/app/runner/v2/merge_outputs.py +++ b/fractal_server/app/runner/v2/merge_outputs.py @@ -8,7 +8,7 @@ def merge_outputs(task_outputs: list[TaskOutput]) -> TaskOutput: final_image_list_updates = [] final_image_list_removals = [] - last_new_filters = None + last_new_type_filters = None for ind, task_output in enumerate(task_outputs): @@ -16,23 +16,27 @@ def merge_outputs(task_outputs: list[TaskOutput]) -> TaskOutput: final_image_list_removals.extend(task_output.image_list_removals) # Check that all filters are the same - current_new_filters = task_output.filters + current_new_type_filters = task_output.type_filters if ind == 0: - last_new_filters = copy(current_new_filters) - if current_new_filters != last_new_filters: - raise ValueError(f"{current_new_filters=} but {last_new_filters=}") - last_new_filters = copy(current_new_filters) + last_new_type_filters = copy(current_new_type_filters) + if current_new_type_filters != last_new_type_filters: + raise ValueError( + f"{current_new_type_filters=} but {last_new_type_filters=}" + ) + last_new_type_filters = copy(current_new_type_filters) final_image_list_updates = deduplicate_list(final_image_list_updates) - additional_args = {} - if last_new_filters is not None: - additional_args["filters"] = last_new_filters # FIXME - - final_output = TaskOutput( - image_list_updates=final_image_list_updates, - image_list_removals=final_image_list_removals, - **additional_args, - ) + if last_new_type_filters is None: + final_output = TaskOutput( + image_list_updates=final_image_list_updates, + image_list_removals=final_image_list_removals, + ) + else: + final_output = TaskOutput( + image_list_updates=final_image_list_updates, + image_list_removals=final_image_list_removals, + type_filters=last_new_type_filters, + ) return final_output diff --git a/fractal_server/app/runner/v2/task_interface.py b/fractal_server/app/runner/v2/task_interface.py index dffec3c6de..0a5f3993df 100644 --- a/fractal_server/app/runner/v2/task_interface.py +++ b/fractal_server/app/runner/v2/task_interface.py @@ -63,7 +63,7 @@ def check_zarr_urls_are_unique(self) -> None: raise ValueError(msg) @root_validator() - def validate_filters(cls, values): + def update_legacy_filters(cls, values): if values["filters"] is not None: if values["type_filters"] != {}: raise ValueError( From acf96957fefabd48affcbd4c78c6f2bfd3750e36 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 14:33:42 +0100 Subject: [PATCH 67/99] Remove obsolete file --- ...st_no_images_parallelization.py::test_parallelize_on_no_images | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tests/v2/04_runner/test_no_images_parallelization.py::test_parallelize_on_no_images diff --git a/tests/v2/04_runner/test_no_images_parallelization.py::test_parallelize_on_no_images b/tests/v2/04_runner/test_no_images_parallelization.py::test_parallelize_on_no_images deleted file mode 100644 index e69de29bb2..0000000000 From 097fbacb9c00a1f1b5f29ce47f796bf65ef16bd0 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 14:52:34 +0100 Subject: [PATCH 68/99] server default to None --- ...13a_split_filters_and_keep_old_columns.py} | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) rename fractal_server/migrations/versions/{ec7329221c1a_split_filters_and_keep_old_columns.py => db09233ad13a_split_filters_and_keep_old_columns.py} (75%) diff --git a/fractal_server/migrations/versions/ec7329221c1a_split_filters_and_keep_old_columns.py b/fractal_server/migrations/versions/db09233ad13a_split_filters_and_keep_old_columns.py similarity index 75% rename from fractal_server/migrations/versions/ec7329221c1a_split_filters_and_keep_old_columns.py rename to fractal_server/migrations/versions/db09233ad13a_split_filters_and_keep_old_columns.py index 339c2a0630..930664eef5 100644 --- a/fractal_server/migrations/versions/ec7329221c1a_split_filters_and_keep_old_columns.py +++ b/fractal_server/migrations/versions/db09233ad13a_split_filters_and_keep_old_columns.py @@ -1,15 +1,16 @@ """split filters and keep old columns -Revision ID: ec7329221c1a +Revision ID: db09233ad13a Revises: 316140ff7ee1 -Create Date: 2025-01-08 15:46:26.940083 +Create Date: 2025-01-14 14:50:46.007222 """ import sqlalchemy as sa from alembic import op +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "ec7329221c1a" +revision = "db09233ad13a" down_revision = "316140ff7ee1" branch_labels = None depends_on = None @@ -33,9 +34,9 @@ def upgrade() -> None: ) batch_op.alter_column( "filters", - existing_type=sa.JSON(), + existing_type=postgresql.JSON(astext_type=sa.Text()), nullable=True, - existing_server_default='{"attributes": {}, "types": {}}', + server_default=None, ) with op.batch_alter_table("jobv2", schema=None) as batch_op: @@ -56,9 +57,9 @@ def upgrade() -> None: ) batch_op.alter_column( "input_filters", - existing_type=sa.JSON(), + existing_type=postgresql.JSON(astext_type=sa.Text()), nullable=True, - existing_server_default='{"attributes": {}, "types": {}}', + server_default=None, ) # ### end Alembic commands ### @@ -69,9 +70,11 @@ def downgrade() -> None: with op.batch_alter_table("workflowtaskv2", schema=None) as batch_op: batch_op.alter_column( "input_filters", - existing_type=sa.JSON(), + existing_type=postgresql.JSON(astext_type=sa.Text()), nullable=False, - existing_server_default='{"attributes": {}, "types": {}}', + existing_server_default=sa.text( + '\'{"attributes": {}, "types": {}}\'::json' + ), ) batch_op.drop_column("type_filters") @@ -81,9 +84,11 @@ def downgrade() -> None: with op.batch_alter_table("datasetv2", schema=None) as batch_op: batch_op.alter_column( "filters", - existing_type=sa.JSON(), + existing_type=postgresql.JSON(astext_type=sa.Text()), nullable=False, - existing_server_default='{"attributes": {}, "types": {}}', + existing_server_default=sa.text( + '\'{"attributes": {}, "types": {}}\'::json' + ), ) batch_op.drop_column("attribute_filters") batch_op.drop_column("type_filters") From 6dc89f4b3df21825f6f5799c4ae35674007b5520 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 15:17:09 +0100 Subject: [PATCH 69/99] default_server="null" --- .../db09233ad13a_split_filters_and_keep_old_columns.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fractal_server/migrations/versions/db09233ad13a_split_filters_and_keep_old_columns.py b/fractal_server/migrations/versions/db09233ad13a_split_filters_and_keep_old_columns.py index 930664eef5..5cc9848fe4 100644 --- a/fractal_server/migrations/versions/db09233ad13a_split_filters_and_keep_old_columns.py +++ b/fractal_server/migrations/versions/db09233ad13a_split_filters_and_keep_old_columns.py @@ -36,7 +36,7 @@ def upgrade() -> None: "filters", existing_type=postgresql.JSON(astext_type=sa.Text()), nullable=True, - server_default=None, + server_default="null", ) with op.batch_alter_table("jobv2", schema=None) as batch_op: @@ -59,7 +59,7 @@ def upgrade() -> None: "input_filters", existing_type=postgresql.JSON(astext_type=sa.Text()), nullable=True, - server_default=None, + server_default="null", ) # ### end Alembic commands ### From 3bae4098c1ea9a6d6a9885b48402eaf495602d2e Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 15:23:01 +0100 Subject: [PATCH 70/99] default_server="null" in models --- fractal_server/app/models/v2/dataset.py | 2 +- fractal_server/app/models/v2/workflowtask.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fractal_server/app/models/v2/dataset.py b/fractal_server/app/models/v2/dataset.py index e21ce80679..7f4cbfe2ae 100644 --- a/fractal_server/app/models/v2/dataset.py +++ b/fractal_server/app/models/v2/dataset.py @@ -44,7 +44,7 @@ class Config: filters: Optional[ dict[Literal["attributes", "types"], dict[str, Any]] - ] = Field(sa_column=Column(JSON, nullable=True, server_default=None)) + ] = Field(sa_column=Column(JSON, nullable=True, server_default="null")) type_filters: dict[str, bool] = Field( sa_column=Column(JSON, nullable=False, server_default="{}") ) diff --git a/fractal_server/app/models/v2/workflowtask.py b/fractal_server/app/models/v2/workflowtask.py index 8a3ce414b3..30d29ae356 100644 --- a/fractal_server/app/models/v2/workflowtask.py +++ b/fractal_server/app/models/v2/workflowtask.py @@ -27,7 +27,7 @@ class Config: input_filters: Optional[ dict[Literal["attributes", "types"], dict[str, Any]] - ] = Field(sa_column=Column(JSON, nullable=True, server_default=None)) + ] = Field(sa_column=Column(JSON, nullable=True, server_default="null")) type_filters: dict[str, bool] = Field( sa_column=Column(JSON, nullable=False, server_default="{}") ) From eda21b8e340260d7315ed5f2d9812569ddb58194 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 15:57:54 +0100 Subject: [PATCH 71/99] move definitions outside list comprehension --- fractal_server/images/tools.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fractal_server/images/tools.py b/fractal_server/images/tools.py index 874746b225..8fd279b190 100644 --- a/fractal_server/images/tools.py +++ b/fractal_server/images/tools.py @@ -84,14 +84,18 @@ def filter_image_list( # When no filter is provided, return all images if type_filters is None and attribute_filters is None: return images + elif type_filters is None: + type_filters = {} + elif attribute_filters is None: + attribute_filters = {} filtered_images = [ copy(this_image) for this_image in images if match_filter( this_image, - type_filters=type_filters or {}, - attribute_filters=attribute_filters or {}, + type_filters=type_filters, + attribute_filters=attribute_filters, ) ] return filtered_images From 09e097bf574ea72348031c0a6abc5b6ec4842301 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 15:58:32 +0100 Subject: [PATCH 72/99] match_filter accept only kwargs --- fractal_server/images/tools.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fractal_server/images/tools.py b/fractal_server/images/tools.py index 8fd279b190..f88aea567b 100644 --- a/fractal_server/images/tools.py +++ b/fractal_server/images/tools.py @@ -33,6 +33,7 @@ def find_image_by_zarr_url( def match_filter( + *, image: dict[str, Any], type_filters: dict[str, bool], attribute_filters: AttributeFiltersType, From efc692587b67b3cc41018990d2d19ccebc105527 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 16:00:33 +0100 Subject: [PATCH 73/99] set level info --- fractal_server/data_migrations/2_11_0.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fractal_server/data_migrations/2_11_0.py b/fractal_server/data_migrations/2_11_0.py index f87187a576..56ed1d6296 100644 --- a/fractal_server/data_migrations/2_11_0.py +++ b/fractal_server/data_migrations/2_11_0.py @@ -9,6 +9,7 @@ from fractal_server.app.models import WorkflowTaskV2 logger = logging.getLogger("fix_db") +logger.setLevel(logging.INFO) def fix_db(): From c2b206998eedd780385dce18ce1db7bfbc2c63fd Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 16:02:17 +0100 Subject: [PATCH 74/99] use info logging --- fractal_server/data_migrations/2_11_0.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/fractal_server/data_migrations/2_11_0.py b/fractal_server/data_migrations/2_11_0.py index 56ed1d6296..47ed78f939 100644 --- a/fractal_server/data_migrations/2_11_0.py +++ b/fractal_server/data_migrations/2_11_0.py @@ -14,7 +14,7 @@ def fix_db(): - logger.warning("START execution of fix_db function") + logger.info("START execution of fix_db function") with next(get_sync_db()) as db: @@ -31,28 +31,22 @@ def fix_db(): "workflowtask" ]["input_filters"]["types"] if h["workflowtask"]["input_filters"]["attributes"]: - logger.warning( - f"Deleting DatasetV2[{ds.id}].history.input_filters" - ".attributes = " - f"{h['workflowtask']['input_filters']['attributes']}" - ) + logger.warning("TMP") flag_modified(ds, "history") db.add(ds) - logger.warning(f"Fixed filters in DatasetV2[{ds.id}]") + logger.info(f"Fixed filters in DatasetV2[{ds.id}]") # WorkflowTaskV2.input_filters stm = select(WorkflowTaskV2).order_by(WorkflowTaskV2.id) wftasks = db.execute(stm).scalars().all() for wft in wftasks: wft.type_filters = wft.input_filters["types"] - logger.warning( - f"Deleting WorkflowTaskV2[{wft.id}].input_filters.attributes =" - f" {wft.input_filters['attributes']}" - ) + if wft.input_filters["attributes"]: + logger.warning("TBD") wft.input_filters = None flag_modified(wft, "input_filters") db.add(wft) - logger.warning(f"Fixed filters in WorkflowTaskV2[{wft.id}]") + logger.info(f"Fixed filters in WorkflowTaskV2[{wft.id}]") # JOBS V2 stm = select(JobV2).order_by(JobV2.id) @@ -66,7 +60,7 @@ def fix_db(): ]["attributes"] job.dataset_dump.pop("filters") flag_modified(job, "dataset_dump") - logger.warning(f"Fixed filters in JobV2[{job.id}].datasetdump") + logger.info(f"Fixed filters in JobV2[{job.id}].datasetdump") db.commit() - logger.warning("Changes committed.") + logger.info("Changes committed.") From d95590e2d983b634f5a12637a361b74324b57f57 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 16:09:48 +0100 Subject: [PATCH 75/99] Improve match_filter --- fractal_server/images/tools.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/fractal_server/images/tools.py b/fractal_server/images/tools.py index f88aea567b..aaa0cc6bc8 100644 --- a/fractal_server/images/tools.py +++ b/fractal_server/images/tools.py @@ -68,7 +68,7 @@ def match_filter( def filter_image_list( images: list[dict[str, Any]], type_filters: Optional[dict[str, bool]] = None, - attribute_filters: Optional[dict[str, list[Any]]] = None, + attribute_filters: Optional[AttributeFiltersType] = None, ) -> list[dict[str, Any]]: """ Compute a sublist with images that match a filter set. @@ -85,18 +85,16 @@ def filter_image_list( # When no filter is provided, return all images if type_filters is None and attribute_filters is None: return images - elif type_filters is None: - type_filters = {} - elif attribute_filters is None: - attribute_filters = {} + actual_type_filters = type_filters or {} + actual_attribute_filters = attribute_filters or {} filtered_images = [ copy(this_image) for this_image in images if match_filter( - this_image, - type_filters=type_filters, - attribute_filters=attribute_filters, + image=this_image, + type_filters=actual_type_filters, + attribute_filters=actual_attribute_filters, ) ] return filtered_images From 3e22cadb227da02bff5adcec87672585675cd76b Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 16:20:10 +0100 Subject: [PATCH 76/99] docstring and warning message --- fractal_server/app/schemas/v2/dumps.py | 7 ++----- fractal_server/data_migrations/2_11_0.py | 7 ++++--- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/fractal_server/app/schemas/v2/dumps.py b/fractal_server/app/schemas/v2/dumps.py index 357b2ebf7f..d2f61c20fe 100644 --- a/fractal_server/app/schemas/v2/dumps.py +++ b/fractal_server/app/schemas/v2/dumps.py @@ -39,12 +39,9 @@ class TaskDumpV2(BaseModel): class WorkflowTaskDumpV2(BaseModel): """ - Before v2.5.0, WorkflowTaskV2 could have `task_id=task=None` and - non-`None` `task_legacy_id` and `task_legacy`. Since these objects - may still exist in the database after version updates, we are setting - `task_id` and `task` to `Optional` to avoid response-validation errors + We do not include 'extra=Extra.forbid' because legacy data may include + 'input_filters' field and we want to avoid response-validation errors for the endpoints that GET datasets. - Ref issue #1783. """ id: int diff --git a/fractal_server/data_migrations/2_11_0.py b/fractal_server/data_migrations/2_11_0.py index 47ed78f939..6033b103c5 100644 --- a/fractal_server/data_migrations/2_11_0.py +++ b/fractal_server/data_migrations/2_11_0.py @@ -30,8 +30,6 @@ def fix_db(): ds.history[i]["workflowtask"]["type_filters"] = h[ "workflowtask" ]["input_filters"]["types"] - if h["workflowtask"]["input_filters"]["attributes"]: - logger.warning("TMP") flag_modified(ds, "history") db.add(ds) logger.info(f"Fixed filters in DatasetV2[{ds.id}]") @@ -42,7 +40,10 @@ def fix_db(): for wft in wftasks: wft.type_filters = wft.input_filters["types"] if wft.input_filters["attributes"]: - logger.warning("TBD") + logger.warning( + f"Removing WorkflowTaskV2[{wft.id}].input_filters" + f"['attributes'] = {wft.input_filters['attributes']}" + ) wft.input_filters = None flag_modified(wft, "input_filters") db.add(wft) From 955d665c143e76ef1b04abad27e73f81195f2335 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 16:28:05 +0100 Subject: [PATCH 77/99] use kwargs in match_filter --- fractal_server/app/routes/api/v2/images.py | 4 ++-- fractal_server/app/runner/v2/runner.py | 2 +- tests/v2/03_api/test_api_dataset_images.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fractal_server/app/routes/api/v2/images.py b/fractal_server/app/routes/api/v2/images.py index 3e13b5b694..b72efdf93c 100644 --- a/fractal_server/app/routes/api/v2/images.py +++ b/fractal_server/app/routes/api/v2/images.py @@ -141,7 +141,7 @@ async def query_dataset_images( image for image in images if match_filter( - image, + image=image, type_filters=dataset.type_filters, attribute_filters=dataset.attribute_filters, ) @@ -179,7 +179,7 @@ async def query_dataset_images( image for image in images if match_filter( - image, + image=image, type_filters=query.type_filters, attribute_filters=query.attribute_filters, ) diff --git a/fractal_server/app/runner/v2/runner.py b/fractal_server/app/runner/v2/runner.py index a6e6d28644..f688dd9e17 100644 --- a/fractal_server/app/runner/v2/runner.py +++ b/fractal_server/app/runner/v2/runner.py @@ -72,7 +72,7 @@ def execute_tasks_v2( # Verify that filtered images comply with task input_types for image in filtered_images: if not match_filter( - image, + image=image, type_filters=task.input_types, attribute_filters={}, ): diff --git a/tests/v2/03_api/test_api_dataset_images.py b/tests/v2/03_api/test_api_dataset_images.py index de9dc2dc94..c23a302abd 100644 --- a/tests/v2/03_api/test_api_dataset_images.py +++ b/tests/v2/03_api/test_api_dataset_images.py @@ -143,7 +143,7 @@ async def test_query_images( image for image in images if match_filter( - image, type_filters={"flag": False}, attribute_filters={} + image=image, type_filters={"flag": False}, attribute_filters={} ) ] ) @@ -163,7 +163,7 @@ async def test_query_images( image for image in images if match_filter( - image, type_filters={"flag": 1}, attribute_filters={} + image=image, type_filters={"flag": 1}, attribute_filters={} ) ] ) From b5741067b3d244a2b56221bd7729228258b1fe48 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 16:34:36 +0100 Subject: [PATCH 78/99] changes from comments --- fractal_server/app/schemas/_validators.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fractal_server/app/schemas/_validators.py b/fractal_server/app/schemas/_validators.py index acd3707843..f9af97e4bb 100644 --- a/fractal_server/app/schemas/_validators.py +++ b/fractal_server/app/schemas/_validators.py @@ -2,6 +2,8 @@ from typing import Any from typing import Optional +from fractal_server.images.models import AttributeFiltersType + def valstr(attribute: str, accept_none: bool = False): """ @@ -104,8 +106,8 @@ def val(must_be_unique: Optional[list]) -> Optional[list]: def validate_type_filters( - type_filters: Optional[dict], -) -> Optional[dict[str, bool]]: + type_filters: Optional[dict[str, bool]] +) -> dict[str, bool]: if type_filters is None: raise ValueError("'type_filters' cannot be 'None'.") @@ -114,8 +116,8 @@ def validate_type_filters( def validate_attribute_filters( - attribute_filters: Optional[dict[str, Optional[list[Any]]]], -) -> dict[str, Optional[list[Any]]]: + attribute_filters: Optional[AttributeFiltersType], +) -> AttributeFiltersType: if attribute_filters is None: raise ValueError("'attribute_filters' cannot be 'None'.") @@ -147,6 +149,10 @@ def validate_attribute_filters( def root_validate_dict_keys(cls, object: dict) -> dict: + """ + For each dictionary in `object.values()`, + checks that that dictionary has only keys of type str. + """ for dictionary in (v for v in object.values() if isinstance(v, dict)): if not all(isinstance(key, str) for key in dictionary.keys()): raise ValueError("Dictionary keys must be strings.") From 582452366970de82e35f341f7c07d0fb7f3fb492 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 16:43:28 +0100 Subject: [PATCH 79/99] create module _filter_validators.py --- fractal_server/app/routes/api/v2/images.py | 6 ++- .../app/runner/v2/task_interface.py | 2 +- .../app/schemas/_filter_validators.py | 47 +++++++++++++++++++ fractal_server/app/schemas/_validators.py | 45 ------------------ fractal_server/app/schemas/v2/dataset.py | 4 +- fractal_server/app/schemas/v2/job.py | 2 +- fractal_server/app/schemas/v2/workflowtask.py | 2 +- 7 files changed, 56 insertions(+), 52 deletions(-) create mode 100644 fractal_server/app/schemas/_filter_validators.py diff --git a/fractal_server/app/routes/api/v2/images.py b/fractal_server/app/routes/api/v2/images.py index b72efdf93c..d41e925768 100644 --- a/fractal_server/app/routes/api/v2/images.py +++ b/fractal_server/app/routes/api/v2/images.py @@ -17,9 +17,11 @@ from fractal_server.app.db import get_async_db from fractal_server.app.models import UserOAuth from fractal_server.app.routes.auth import current_active_user +from fractal_server.app.schemas._filter_validators import ( + validate_attribute_filters, +) +from fractal_server.app.schemas._filter_validators import validate_type_filters from fractal_server.app.schemas._validators import root_validate_dict_keys -from fractal_server.app.schemas._validators import validate_attribute_filters -from fractal_server.app.schemas._validators import validate_type_filters from fractal_server.images import SingleImage from fractal_server.images import SingleImageUpdate from fractal_server.images.models import AttributeFiltersType diff --git a/fractal_server/app/runner/v2/task_interface.py b/fractal_server/app/runner/v2/task_interface.py index 0a5f3993df..f00523a72d 100644 --- a/fractal_server/app/runner/v2/task_interface.py +++ b/fractal_server/app/runner/v2/task_interface.py @@ -8,8 +8,8 @@ from pydantic import validator from ....images import SingleImageTaskOutput +from fractal_server.app.schemas._filter_validators import validate_type_filters from fractal_server.app.schemas._validators import root_validate_dict_keys -from fractal_server.app.schemas._validators import validate_type_filters from fractal_server.urls import normalize_url diff --git a/fractal_server/app/schemas/_filter_validators.py b/fractal_server/app/schemas/_filter_validators.py new file mode 100644 index 0000000000..c862b9c049 --- /dev/null +++ b/fractal_server/app/schemas/_filter_validators.py @@ -0,0 +1,47 @@ +from typing import Optional + +from ._validators import valdict_keys +from fractal_server.images.models import AttributeFiltersType + + +def validate_type_filters( + type_filters: Optional[dict[str, bool]] +) -> dict[str, bool]: + if type_filters is None: + raise ValueError("'type_filters' cannot be 'None'.") + + type_filters = valdict_keys("type_filters")(type_filters) + return type_filters + + +def validate_attribute_filters( + attribute_filters: Optional[AttributeFiltersType], +) -> AttributeFiltersType: + if attribute_filters is None: + raise ValueError("'attribute_filters' cannot be 'None'.") + + attribute_filters = valdict_keys("attribute_filters")(attribute_filters) + for key, values in attribute_filters.items(): + if values is None: + # values=None corresponds to not applying any filter for + # attribute `key` + pass + elif values == []: + # WARNING: in this case, no image can match with the current + # filter. In the future we may deprecate this possibility. + pass + else: + # values is a non-empty list, and its items must all be of the + # same scalar non-None type + _type = type(values[0]) + if not all(isinstance(value, _type) for value in values): + raise ValueError( + f"attribute_filters[{key}] has values with " + f"non-homogeneous types: {values}." + ) + if _type not in (int, float, str, bool): + raise ValueError( + f"attribute_filters[{key}] has values with " + f"invalid types: {values}." + ) + return attribute_filters diff --git a/fractal_server/app/schemas/_validators.py b/fractal_server/app/schemas/_validators.py index f9af97e4bb..b2c9d6e65e 100644 --- a/fractal_server/app/schemas/_validators.py +++ b/fractal_server/app/schemas/_validators.py @@ -2,8 +2,6 @@ from typing import Any from typing import Optional -from fractal_server.images.models import AttributeFiltersType - def valstr(attribute: str, accept_none: bool = False): """ @@ -105,49 +103,6 @@ def val(must_be_unique: Optional[list]) -> Optional[list]: return val -def validate_type_filters( - type_filters: Optional[dict[str, bool]] -) -> dict[str, bool]: - if type_filters is None: - raise ValueError("'type_filters' cannot be 'None'.") - - type_filters = valdict_keys("type_filters")(type_filters) - return type_filters - - -def validate_attribute_filters( - attribute_filters: Optional[AttributeFiltersType], -) -> AttributeFiltersType: - if attribute_filters is None: - raise ValueError("'attribute_filters' cannot be 'None'.") - - attribute_filters = valdict_keys("attribute_filters")(attribute_filters) - for key, values in attribute_filters.items(): - if values is None: - # values=None corresponds to not applying any filter for - # attribute `key` - pass - elif values == []: - # WARNING: in this case, no image can match with the current - # filter. In the future we may deprecate this possibility. - pass - else: - # values is a non-empty list, and its items must all be of the - # same scalar non-None type - _type = type(values[0]) - if not all(isinstance(value, _type) for value in values): - raise ValueError( - f"attribute_filters[{key}] has values with " - f"non-homogeneous types: {values}." - ) - if _type not in (int, float, str, bool): - raise ValueError( - f"attribute_filters[{key}] has values with " - f"invalid types: {values}." - ) - return attribute_filters - - def root_validate_dict_keys(cls, object: dict) -> dict: """ For each dictionary in `object.values()`, diff --git a/fractal_server/app/schemas/v2/dataset.py b/fractal_server/app/schemas/v2/dataset.py index 75a6ed6d61..6b3eb3cb95 100644 --- a/fractal_server/app/schemas/v2/dataset.py +++ b/fractal_server/app/schemas/v2/dataset.py @@ -8,9 +8,9 @@ from pydantic import root_validator from pydantic import validator +from .._filter_validators import validate_attribute_filters +from .._filter_validators import validate_type_filters from .._validators import root_validate_dict_keys -from .._validators import validate_attribute_filters -from .._validators import validate_type_filters from .._validators import valstr from .dumps import WorkflowTaskDumpV2 from .project import ProjectReadV2 diff --git a/fractal_server/app/schemas/v2/job.py b/fractal_server/app/schemas/v2/job.py index 0c9fb9006b..6625c6318f 100644 --- a/fractal_server/app/schemas/v2/job.py +++ b/fractal_server/app/schemas/v2/job.py @@ -9,8 +9,8 @@ from pydantic import validator from pydantic.types import StrictStr +from .._filter_validators import validate_attribute_filters from .._validators import root_validate_dict_keys -from .._validators import validate_attribute_filters from .._validators import valstr from .dumps import DatasetDumpV2 from .dumps import ProjectDumpV2 diff --git a/fractal_server/app/schemas/v2/workflowtask.py b/fractal_server/app/schemas/v2/workflowtask.py index 9405e5b74d..d8c2f37d15 100644 --- a/fractal_server/app/schemas/v2/workflowtask.py +++ b/fractal_server/app/schemas/v2/workflowtask.py @@ -9,9 +9,9 @@ from pydantic import root_validator from pydantic import validator +from .._filter_validators import validate_type_filters from .._validators import root_validate_dict_keys from .._validators import valdict_keys -from .._validators import validate_type_filters from .task import TaskExportV2 from .task import TaskImportV2 from .task import TaskImportV2Legacy From fac902fe28915916b4124b2369550a176a96cf5c Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 16:53:10 +0100 Subject: [PATCH 80/99] comment and docstring [skip ci] --- fractal_server/app/routes/api/v2/submit.py | 2 ++ fractal_server/app/runner/v2/_local/__init__.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fractal_server/app/routes/api/v2/submit.py b/fractal_server/app/routes/api/v2/submit.py index 6029791721..8a85eb9373 100644 --- a/fractal_server/app/routes/api/v2/submit.py +++ b/fractal_server/app/routes/api/v2/submit.py @@ -159,6 +159,8 @@ async def apply_workflow( dataset_id=dataset_id, workflow_id=workflow_id, user_email=user.email, + # The 'filters' field is not supported any more but still exists as a + # database column, therefore we manually exclude it from dumps dataset_dump=json.loads( dataset.json(exclude={"images", "history", "filters"}) ), diff --git a/fractal_server/app/runner/v2/_local/__init__.py b/fractal_server/app/runner/v2/_local/__init__.py index 501366b46c..5230b870e2 100644 --- a/fractal_server/app/runner/v2/_local/__init__.py +++ b/fractal_server/app/runner/v2/_local/__init__.py @@ -121,7 +121,6 @@ async def process_workflow( to the backend executor. This argument is present for compatibility with the standard backend interface, but is ignored in the `local` backend. - FIXME: FIXME Raises: TaskExecutionError: wrapper for errors raised during tasks' execution From f2068470c4b1285c677476d7d5c2b92366fbce94 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Tue, 14 Jan 2025 16:53:39 +0100 Subject: [PATCH 81/99] comment [skip ci] --- fractal_server/app/routes/api/v2/submit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fractal_server/app/routes/api/v2/submit.py b/fractal_server/app/routes/api/v2/submit.py index 8a85eb9373..2c6284d277 100644 --- a/fractal_server/app/routes/api/v2/submit.py +++ b/fractal_server/app/routes/api/v2/submit.py @@ -160,7 +160,7 @@ async def apply_workflow( workflow_id=workflow_id, user_email=user.email, # The 'filters' field is not supported any more but still exists as a - # database column, therefore we manually exclude it from dumps + # database column, therefore we manually exclude it from dumps. dataset_dump=json.loads( dataset.json(exclude={"images", "history", "filters"}) ), From 2e748131ae4bb6d500e732b4518f5631097acf0b Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Wed, 15 Jan 2025 09:46:05 +0100 Subject: [PATCH 82/99] Add some validation tests --- tests/v2/01_schemas/test_schemas_dataset.py | 46 +++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/tests/v2/01_schemas/test_schemas_dataset.py b/tests/v2/01_schemas/test_schemas_dataset.py index df0a3b20e8..22e9db0423 100644 --- a/tests/v2/01_schemas/test_schemas_dataset.py +++ b/tests/v2/01_schemas/test_schemas_dataset.py @@ -1,4 +1,5 @@ import pytest +from devtools import debug from pydantic import ValidationError from fractal_server.app.models.v2 import DatasetV2 @@ -10,6 +11,51 @@ from fractal_server.urls import normalize_url +VALID_ATTRIBUTE_FILTERS = ( + {}, + {"key1": []}, + {"key1": ["A"]}, + {"key1": ["A", "B"]}, + {"key1": [1, 2]}, + {"key1": [True, False]}, + {"key1": [1.5, -1.2]}, + {"key1": None}, + {"key1": [1, 2], "key2": ["A", "B"]}, +) + +INVALID_ATTRIBUTE_FILTERS = ( + {True: ["value"]}, # non-string key + {1: ["value"]}, # non-string key + {"key1": [1], " key1": [1]}, # non-unique normalized keys + {"key1": [None]}, # None value + # {"key1": [1, True]}, # non-homogeneous types - FIXME unsupported + {"key1": [1, 1.0]}, # non-homogeneous types - FIXME unsupported + {"key1": [1, "a"]}, # non-homogeneous types + {"key1": [[1, 2], [1, 2]]}, # non-scalar type +) + + +@pytest.mark.parametrize("attribute_filters", VALID_ATTRIBUTE_FILTERS) +def test_valid_attribute_filters(attribute_filters: dict): + DatasetCreateV2( + name="x", + zarr_dir="/x", + attribute_filters=attribute_filters, + ) + + +@pytest.mark.parametrize("attribute_filters", INVALID_ATTRIBUTE_FILTERS) +def test_invalid_attribute_filters(attribute_filters: dict): + debug(attribute_filters) + with pytest.raises(ValueError) as e: + DatasetCreateV2( + name="x", + zarr_dir="/x", + attribute_filters=attribute_filters, + ) + debug(e.value) + + async def test_schemas_dataset_v2(): project = ProjectV2(id=1, name="project") From 27d03b9783467b3240bb50a2950172229733ba31 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Wed, 15 Jan 2025 09:47:53 +0100 Subject: [PATCH 83/99] Extend some validation tests --- tests/v2/01_schemas/test_schemas_dataset.py | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/tests/v2/01_schemas/test_schemas_dataset.py b/tests/v2/01_schemas/test_schemas_dataset.py index 22e9db0423..08a1d7c7c5 100644 --- a/tests/v2/01_schemas/test_schemas_dataset.py +++ b/tests/v2/01_schemas/test_schemas_dataset.py @@ -26,12 +26,15 @@ INVALID_ATTRIBUTE_FILTERS = ( {True: ["value"]}, # non-string key {1: ["value"]}, # non-string key + {"key1": 1}, # not a list + {"key1": True}, # not a list + {"key1": "something"}, # not a list {"key1": [1], " key1": [1]}, # non-unique normalized keys {"key1": [None]}, # None value - # {"key1": [1, True]}, # non-homogeneous types - FIXME unsupported {"key1": [1, 1.0]}, # non-homogeneous types - FIXME unsupported {"key1": [1, "a"]}, # non-homogeneous types {"key1": [[1, 2], [1, 2]]}, # non-scalar type + # {"key1": [1, True]}, # non-homogeneous types - FIXME unsupported ) @@ -60,21 +63,6 @@ async def test_schemas_dataset_v2(): project = ProjectV2(id=1, name="project") - # Create - with pytest.raises(ValidationError): - # Not a list - DatasetCreateV2( - name="name", - zarr_dir="/zarr", - attribute_filters={"x": 1}, - ) - with pytest.raises(ValidationError): - # Non-scalar attribute - DatasetCreateV2( - name="name", - zarr_dir="/zarr", - attribute_filters={"x": [{1, 0}]}, - ) with pytest.raises(ValidationError): # Non-boolean types DatasetCreateV2(name="name", zarr_dir="/zarr", type_filters={"a": "b"}) @@ -83,7 +71,6 @@ async def test_schemas_dataset_v2(): dataset_create = DatasetCreateV2( name="name", - attribute_filters={"x": [10, 11]}, zarr_dir="/tmp/", ) assert dataset_create.zarr_dir == normalize_url(dataset_create.zarr_dir) From 337be2c500038652280a623824f90f253421b142 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 15 Jan 2025 10:19:29 +0100 Subject: [PATCH 84/99] remove outdated comment [skip ci] --- tests/v2/01_schemas/test_schemas_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v2/01_schemas/test_schemas_dataset.py b/tests/v2/01_schemas/test_schemas_dataset.py index 08a1d7c7c5..ae0b647eb2 100644 --- a/tests/v2/01_schemas/test_schemas_dataset.py +++ b/tests/v2/01_schemas/test_schemas_dataset.py @@ -31,7 +31,7 @@ {"key1": "something"}, # not a list {"key1": [1], " key1": [1]}, # non-unique normalized keys {"key1": [None]}, # None value - {"key1": [1, 1.0]}, # non-homogeneous types - FIXME unsupported + {"key1": [1, 1.0]}, # non-homogeneous types {"key1": [1, "a"]}, # non-homogeneous types {"key1": [[1, 2], [1, 2]]}, # non-scalar type # {"key1": [1, True]}, # non-homogeneous types - FIXME unsupported From 4cda1f13e9832abd7dc7acd4101df374876f3754 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 15 Jan 2025 10:50:18 +0100 Subject: [PATCH 85/99] unit test merge_outputs --- .../04_runner/test_unit_aux_functions_v2.py | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/v2/04_runner/test_unit_aux_functions_v2.py b/tests/v2/04_runner/test_unit_aux_functions_v2.py index f3407fe560..18ec8e118b 100644 --- a/tests/v2/04_runner/test_unit_aux_functions_v2.py +++ b/tests/v2/04_runner/test_unit_aux_functions_v2.py @@ -4,6 +4,7 @@ from fractal_server.app.runner.exceptions import JobExecutionError from fractal_server.app.runner.v2.deduplicate_list import deduplicate_list +from fractal_server.app.runner.v2.merge_outputs import merge_outputs from fractal_server.app.runner.v2.runner_functions import ( _cast_and_validate_InitTaskOutput, ) @@ -84,3 +85,34 @@ def test_cast_and_validate_functions(): ) with pytest.raises(JobExecutionError): _cast_and_validate_InitTaskOutput(dict(invalid=True)) + + +def test_merge_outputs(): + + # 1 + task_outputs = [ + TaskOutput(type_filters={"a": True}), + TaskOutput(type_filters={"a": True}), + ] + merged = merge_outputs(task_outputs) + assert merged.type_filters == {"a": True} + + # 2 + task_outputs = [ + TaskOutput(type_filters={"a": True}), + TaskOutput(type_filters={"b": True}), + ] + with pytest.raises(ValueError): + merge_outputs(task_outputs) + + # 3 + task_outputs = [ + TaskOutput(type_filters={"a": True}), + TaskOutput(type_filters={"a": False}), + ] + with pytest.raises(ValueError): + merge_outputs(task_outputs) + + # 4 + merged = merge_outputs([]) + assert merged == TaskOutput() From c18194c3e97c75997cff8a9a5c19fa0625546a52 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 15 Jan 2025 10:50:50 +0100 Subject: [PATCH 86/99] refactor merge_outputs --- fractal_server/app/runner/v2/merge_outputs.py | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/fractal_server/app/runner/v2/merge_outputs.py b/fractal_server/app/runner/v2/merge_outputs.py index 6128eceed3..428848ff89 100644 --- a/fractal_server/app/runner/v2/merge_outputs.py +++ b/fractal_server/app/runner/v2/merge_outputs.py @@ -1,5 +1,3 @@ -from copy import copy - from fractal_server.app.runner.v2.deduplicate_list import deduplicate_list from fractal_server.app.runner.v2.task_interface import TaskOutput @@ -8,35 +6,31 @@ def merge_outputs(task_outputs: list[TaskOutput]) -> TaskOutput: final_image_list_updates = [] final_image_list_removals = [] - last_new_type_filters = None - for ind, task_output in enumerate(task_outputs): + for task_output in task_outputs: final_image_list_updates.extend(task_output.image_list_updates) final_image_list_removals.extend(task_output.image_list_removals) - # Check that all filters are the same - current_new_type_filters = task_output.type_filters - if ind == 0: - last_new_type_filters = copy(current_new_type_filters) - if current_new_type_filters != last_new_type_filters: + # Check that all type_filters are the same + if task_output.type_filters != task_outputs[0].type_filters: raise ValueError( - f"{current_new_type_filters=} but {last_new_type_filters=}" + f"{task_output.type_filters=} " + f"but {task_outputs[0].type_filters=}" ) - last_new_type_filters = copy(current_new_type_filters) final_image_list_updates = deduplicate_list(final_image_list_updates) - if last_new_type_filters is None: + if task_outputs: final_output = TaskOutput( image_list_updates=final_image_list_updates, image_list_removals=final_image_list_removals, + type_filters=task_outputs[0].type_filters, ) else: final_output = TaskOutput( image_list_updates=final_image_list_updates, image_list_removals=final_image_list_removals, - type_filters=last_new_type_filters, ) return final_output From f1d067f5d1cfcdb39e6d46feddcedf56f3b6ebe0 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 15 Jan 2025 11:03:47 +0100 Subject: [PATCH 87/99] improve unit test --- .../04_runner/test_unit_aux_functions_v2.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/v2/04_runner/test_unit_aux_functions_v2.py b/tests/v2/04_runner/test_unit_aux_functions_v2.py index 18ec8e118b..495edab7be 100644 --- a/tests/v2/04_runner/test_unit_aux_functions_v2.py +++ b/tests/v2/04_runner/test_unit_aux_functions_v2.py @@ -13,6 +13,7 @@ ) from fractal_server.app.runner.v2.task_interface import InitArgsModel from fractal_server.app.runner.v2.task_interface import TaskOutput +from fractal_server.images import SingleImageTaskOutput def test_deduplicate_list_of_dicts(): @@ -116,3 +117,28 @@ def test_merge_outputs(): # 4 merged = merge_outputs([]) assert merged == TaskOutput() + + # 5 + task_outputs = [ + TaskOutput( + image_list_updates=[ + SingleImageTaskOutput(zarr_url="/a"), + SingleImageTaskOutput(zarr_url="/b"), + ], + image_list_removals=["/x", "/y", "/z"], + ), + TaskOutput( + image_list_updates=[ + SingleImageTaskOutput(zarr_url="/c"), + SingleImageTaskOutput(zarr_url="/a"), + ], + image_list_removals=["/x", "/y", "/z"], + ), + ] + merged = merge_outputs(task_outputs) + assert merged.image_list_updates == [ + SingleImageTaskOutput(zarr_url="/a"), + SingleImageTaskOutput(zarr_url="/b"), + SingleImageTaskOutput(zarr_url="/c"), + ] + assert merged.image_list_removals == ["/x", "/y", "/z", "/x", "/y", "/z"] From 6a67061ee40bd68f9bcb40beb9974e416f3c6f19 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 15 Jan 2025 11:22:13 +0100 Subject: [PATCH 88/99] test update_legacy_filters --- .../04_runner/test_unit_aux_functions_v2.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/v2/04_runner/test_unit_aux_functions_v2.py b/tests/v2/04_runner/test_unit_aux_functions_v2.py index 495edab7be..60f0347155 100644 --- a/tests/v2/04_runner/test_unit_aux_functions_v2.py +++ b/tests/v2/04_runner/test_unit_aux_functions_v2.py @@ -142,3 +142,24 @@ def test_merge_outputs(): SingleImageTaskOutput(zarr_url="/c"), ] assert merged.image_list_removals == ["/x", "/y", "/z", "/x", "/y", "/z"] + + +def test_update_legacy_filters(): + + legacy_filters = {"types": {"a": True}} + + # 1 + output = TaskOutput(filters=legacy_filters) + assert output.filters is None + assert output.type_filters == legacy_filters["types"] + + # 2 + output = TaskOutput(type_filters=legacy_filters["types"]) + assert output.filters is None + assert output.type_filters == legacy_filters["types"] + + # 3 + with pytest.raises(ValidationError): + TaskOutput( + filters=legacy_filters, type_filters=legacy_filters["types"] + ) From ca2aba1799dd2073e5e83832f60caa0da7addc82 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 15 Jan 2025 14:37:33 +0100 Subject: [PATCH 89/99] simplify check --- fractal_server/app/runner/v2/merge_outputs.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/fractal_server/app/runner/v2/merge_outputs.py b/fractal_server/app/runner/v2/merge_outputs.py index 428848ff89..9c89c2d02e 100644 --- a/fractal_server/app/runner/v2/merge_outputs.py +++ b/fractal_server/app/runner/v2/merge_outputs.py @@ -4,6 +4,9 @@ def merge_outputs(task_outputs: list[TaskOutput]) -> TaskOutput: + if len(task_outputs) == 0: + return TaskOutput() + final_image_list_updates = [] final_image_list_removals = [] @@ -21,16 +24,10 @@ def merge_outputs(task_outputs: list[TaskOutput]) -> TaskOutput: final_image_list_updates = deduplicate_list(final_image_list_updates) - if task_outputs: - final_output = TaskOutput( - image_list_updates=final_image_list_updates, - image_list_removals=final_image_list_removals, - type_filters=task_outputs[0].type_filters, - ) - else: - final_output = TaskOutput( - image_list_updates=final_image_list_updates, - image_list_removals=final_image_list_removals, - ) + final_output = TaskOutput( + image_list_updates=final_image_list_updates, + image_list_removals=final_image_list_removals, + type_filters=task_outputs[0].type_filters, + ) return final_output From ef05293211cec14bff73636fd0cd1596da9090be Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 15 Jan 2025 14:41:40 +0100 Subject: [PATCH 90/99] deduplicate image_list_removals --- fractal_server/app/runner/v2/merge_outputs.py | 1 + tests/v2/04_runner/test_unit_aux_functions_v2.py | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fractal_server/app/runner/v2/merge_outputs.py b/fractal_server/app/runner/v2/merge_outputs.py index 9c89c2d02e..f0b0dad35e 100644 --- a/fractal_server/app/runner/v2/merge_outputs.py +++ b/fractal_server/app/runner/v2/merge_outputs.py @@ -23,6 +23,7 @@ def merge_outputs(task_outputs: list[TaskOutput]) -> TaskOutput: ) final_image_list_updates = deduplicate_list(final_image_list_updates) + final_image_list_removals = list(set(final_image_list_removals)) final_output = TaskOutput( image_list_updates=final_image_list_updates, diff --git a/tests/v2/04_runner/test_unit_aux_functions_v2.py b/tests/v2/04_runner/test_unit_aux_functions_v2.py index 60f0347155..43ef767774 100644 --- a/tests/v2/04_runner/test_unit_aux_functions_v2.py +++ b/tests/v2/04_runner/test_unit_aux_functions_v2.py @@ -132,16 +132,17 @@ def test_merge_outputs(): SingleImageTaskOutput(zarr_url="/c"), SingleImageTaskOutput(zarr_url="/a"), ], - image_list_removals=["/x", "/y", "/z"], + image_list_removals=["/x", "/w", "/z"], ), ] merged = merge_outputs(task_outputs) - assert merged.image_list_updates == [ + for output in ( SingleImageTaskOutput(zarr_url="/a"), SingleImageTaskOutput(zarr_url="/b"), SingleImageTaskOutput(zarr_url="/c"), - ] - assert merged.image_list_removals == ["/x", "/y", "/z", "/x", "/y", "/z"] + ): + assert output in merged.image_list_updates + assert set(merged.image_list_removals) == set(["/x", "/y", "/z", "/w"]) def test_update_legacy_filters(): From e77d32e722c5e46489405ce803479cd2ddee2535 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 15 Jan 2025 14:43:35 +0100 Subject: [PATCH 91/99] assert correct order --- tests/v2/04_runner/test_unit_aux_functions_v2.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/v2/04_runner/test_unit_aux_functions_v2.py b/tests/v2/04_runner/test_unit_aux_functions_v2.py index 43ef767774..6dce08031f 100644 --- a/tests/v2/04_runner/test_unit_aux_functions_v2.py +++ b/tests/v2/04_runner/test_unit_aux_functions_v2.py @@ -136,12 +136,11 @@ def test_merge_outputs(): ), ] merged = merge_outputs(task_outputs) - for output in ( + assert merged.image_list_updates == [ SingleImageTaskOutput(zarr_url="/a"), SingleImageTaskOutput(zarr_url="/b"), SingleImageTaskOutput(zarr_url="/c"), - ): - assert output in merged.image_list_updates + ] assert set(merged.image_list_removals) == set(["/x", "/y", "/z", "/w"]) From d23efb0534cbccb0bd2b2f5a4b17824faa78c663 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Wed, 15 Jan 2025 14:44:03 +0100 Subject: [PATCH 92/99] Comment [skip ci] --- fractal_server/app/runner/v2/merge_outputs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fractal_server/app/runner/v2/merge_outputs.py b/fractal_server/app/runner/v2/merge_outputs.py index f0b0dad35e..c1a6cbe2cf 100644 --- a/fractal_server/app/runner/v2/merge_outputs.py +++ b/fractal_server/app/runner/v2/merge_outputs.py @@ -22,6 +22,7 @@ def merge_outputs(task_outputs: list[TaskOutput]) -> TaskOutput: f"but {task_outputs[0].type_filters=}" ) + # Note: the ordering of `image_list_removals` is not guaranteed final_image_list_updates = deduplicate_list(final_image_list_updates) final_image_list_removals = list(set(final_image_list_removals)) From 80801f910e34e86bed33141a29c92403dda63ad0 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Wed, 15 Jan 2025 14:45:44 +0100 Subject: [PATCH 93/99] Add case to unit test --- tests/v2/04_runner/test_unit_aux_functions_v2.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/v2/04_runner/test_unit_aux_functions_v2.py b/tests/v2/04_runner/test_unit_aux_functions_v2.py index 6dce08031f..4cf676b006 100644 --- a/tests/v2/04_runner/test_unit_aux_functions_v2.py +++ b/tests/v2/04_runner/test_unit_aux_functions_v2.py @@ -163,3 +163,8 @@ def test_update_legacy_filters(): TaskOutput( filters=legacy_filters, type_filters=legacy_filters["types"] ) + + # 4 + output = TaskOutput() + assert output.filters is None + assert output.type_filters == {} From 87f4fc6571a2f14024a9f65bdc8816aff3713cd8 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Wed, 15 Jan 2025 14:49:56 +0100 Subject: [PATCH 94/99] Only use `job_attribute_filters` in runner - ref #2155 --- fractal_server/app/runner/v2/runner.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fractal_server/app/runner/v2/runner.py b/fractal_server/app/runner/v2/runner.py index f688dd9e17..25fe1f80aa 100644 --- a/fractal_server/app/runner/v2/runner.py +++ b/fractal_server/app/runner/v2/runner.py @@ -50,7 +50,6 @@ def execute_tasks_v2( zarr_dir = dataset.zarr_dir tmp_images = deepcopy(dataset.images) tmp_type_filters = deepcopy(dataset.type_filters) - tmp_attribute_filters = deepcopy(dataset.attribute_filters) for wftask in wf_task_list: task = wftask.task @@ -60,14 +59,12 @@ def execute_tasks_v2( # PRE TASK EXECUTION # Get filtered images - pre_attribute_filters = deepcopy(tmp_attribute_filters) - pre_attribute_filters.update(job_attribute_filters) pre_type_filters = copy(tmp_type_filters) pre_type_filters.update(wftask.type_filters) filtered_images = filter_image_list( images=tmp_images, type_filters=pre_type_filters, - attribute_filters=pre_attribute_filters, + attribute_filters=job_attribute_filters, ) # Verify that filtered images comply with task input_types for image in filtered_images: @@ -297,11 +294,9 @@ def execute_tasks_v2( with next(get_sync_db()) as db: db_dataset = db.get(DatasetV2, dataset.id) db_dataset.history[-1]["status"] = WorkflowTaskStatusTypeV2.DONE - db_dataset.attribute_filters = tmp_attribute_filters db_dataset.type_filters = tmp_type_filters db_dataset.images = tmp_images for attribute_name in [ - "attribute_filters", "type_filters", "history", "images", From 837ed30ae05ffd04fcacc76483ff9226224678bd Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Wed, 15 Jan 2025 14:56:29 +0100 Subject: [PATCH 95/99] Extend unit test --- tests/v2/03_api/test_api_dataset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/v2/03_api/test_api_dataset.py b/tests/v2/03_api/test_api_dataset.py index 99fef4d9e4..8ddcc68581 100644 --- a/tests/v2/03_api/test_api_dataset.py +++ b/tests/v2/03_api/test_api_dataset.py @@ -504,6 +504,7 @@ async def test_dataset_export( assert res_dataset["images"] == IMAGES assert res_dataset["attribute_filters"] == dict() assert res_dataset["type_filters"] == dict() + assert "filters" not in res_dataset.keys() async def test_dataset_import( From 898c561170f79656472a480f2ded278003db1ca7 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Wed, 15 Jan 2025 15:06:12 +0100 Subject: [PATCH 96/99] Rename schema --- fractal_server/images/models.py | 6 +++--- tests/v2/05_images/test_image_models.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fractal_server/images/models.py b/fractal_server/images/models.py index 1c1863f1bb..4536d4f495 100644 --- a/fractal_server/images/models.py +++ b/fractal_server/images/models.py @@ -12,7 +12,7 @@ AttributeFiltersType = dict[str, Optional[list[Any]]] -class SingleImageBase(BaseModel): +class _SingleImageBase(BaseModel): """ Base for SingleImage and SingleImageTaskOutput. @@ -45,7 +45,7 @@ def normalize_orig(cls, v: Optional[str]) -> Optional[str]: return normalize_url(v) -class SingleImageTaskOutput(SingleImageBase): +class SingleImageTaskOutput(_SingleImageBase): """ `SingleImageBase`, with scalar `attributes` values (`None` included). """ @@ -64,7 +64,7 @@ def validate_attributes( return v -class SingleImage(SingleImageBase): +class SingleImage(_SingleImageBase): """ `SingleImageBase`, with scalar `attributes` values (`None` excluded). """ diff --git a/tests/v2/05_images/test_image_models.py b/tests/v2/05_images/test_image_models.py index 7660ada410..40e42189cd 100644 --- a/tests/v2/05_images/test_image_models.py +++ b/tests/v2/05_images/test_image_models.py @@ -2,19 +2,19 @@ from pydantic import ValidationError +from fractal_server.images.models import _SingleImageBase from fractal_server.images.models import SingleImage -from fractal_server.images.models import SingleImageBase from fractal_server.images.models import SingleImageTaskOutput from fractal_server.images.models import SingleImageUpdate T = TypeVar("T") -def image_ok(model: T = SingleImageBase, **kwargs) -> T: +def image_ok(model: T = _SingleImageBase, **kwargs) -> T: return model(**kwargs) -def image_fail(model: T = SingleImageBase, **kwargs) -> str: +def image_fail(model: T = _SingleImageBase, **kwargs) -> str: try: model(**kwargs) raise AssertionError(f"{model=}, {kwargs=}") From 02d5526fd09f915227e2a6eb80faf522389ca014 Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 15 Jan 2025 15:32:03 +0100 Subject: [PATCH 97/99] remove default from image_ok and image_fail --- tests/v2/05_images/test_image_models.py | 72 +++++++++++++++---------- 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/tests/v2/05_images/test_image_models.py b/tests/v2/05_images/test_image_models.py index 40e42189cd..e52d338754 100644 --- a/tests/v2/05_images/test_image_models.py +++ b/tests/v2/05_images/test_image_models.py @@ -10,11 +10,11 @@ T = TypeVar("T") -def image_ok(model: T = _SingleImageBase, **kwargs) -> T: +def image_ok(model: T, **kwargs) -> T: return model(**kwargs) -def image_fail(model: T = _SingleImageBase, **kwargs) -> str: +def image_fail(model: T, **kwargs) -> str: try: model(**kwargs) raise AssertionError(f"{model=}, {kwargs=}") @@ -24,26 +24,28 @@ def image_fail(model: T = _SingleImageBase, **kwargs) -> str: def test_SingleImageBase(): - image_fail() + image_fail(model=_SingleImageBase) # zarr_url - image = image_ok(zarr_url="/x") + image = image_ok(model=_SingleImageBase, zarr_url="/x") assert image.dict() == { "zarr_url": "/x", "origin": None, "attributes": {}, "types": {}, } - image_fail(zarr_url="x") # see 'test_url_normalization' - image_fail(zarr_url=None) + image_fail( + model=_SingleImageBase, zarr_url="x" + ) # see 'test_url_normalization' + image_fail(model=_SingleImageBase, zarr_url=None) # origin - image = image_ok(zarr_url="/x", origin="/y") + image = image_ok(model=_SingleImageBase, zarr_url="/x", origin="/y") assert image.origin == "/y" - image = image_ok(zarr_url="/x", origin=None) + image = image_ok(model=_SingleImageBase, zarr_url="/x", origin=None) assert image.origin is None - image_fail(zarr_url="/x", origin="y") - image_fail(origin="/y") + image_fail(model=_SingleImageBase, zarr_url="/x", origin="y") + image_fail(model=_SingleImageBase, origin="/y") # attributes valid_attributes = { @@ -57,43 +59,53 @@ def test_SingleImageBase(): "function": lambda x: x, "type": int, } # Any - image = image_ok(zarr_url="/x", attributes=valid_attributes) + image = image_ok( + model=_SingleImageBase, zarr_url="/x", attributes=valid_attributes + ) assert image.attributes == valid_attributes invalid_attributes = { "repeated": 1, " repeated ": 2, } - image_fail(zarr_url="/x", attributes=invalid_attributes) + image_fail( + model=_SingleImageBase, zarr_url="/x", attributes=invalid_attributes + ) # types valid_types = {"y": True, "n": False} # only booleans - image = image_ok(zarr_url="/x", types=valid_types) + image = image_ok(model=_SingleImageBase, zarr_url="/x", types=valid_types) assert image.types == valid_types - image_fail(zarr_url="/x", types={"a": "not a bool"}) - image_fail(zarr_url="/x", types={"a": True, " a": True}) - image_ok(zarr_url="/x", types={1: True}) + image_fail( + model=_SingleImageBase, zarr_url="/x", types={"a": "not a bool"} + ) + image_fail( + model=_SingleImageBase, zarr_url="/x", types={"a": True, " a": True} + ) + image_ok(model=_SingleImageBase, zarr_url="/x", types={1: True}) def test_url_normalization(): - image = image_ok(zarr_url="/valid/url") + image = image_ok(model=_SingleImageBase, zarr_url="/valid/url") assert image.zarr_url == "/valid/url" - image = image_ok(zarr_url="/remove/slash/") + image = image_ok(model=_SingleImageBase, zarr_url="/remove/slash/") assert image.zarr_url == "/remove/slash" - e = image_fail(zarr_url="s3/foo") + e = image_fail(model=_SingleImageBase, zarr_url="s3/foo") assert "S3 handling" in e - e = image_fail(zarr_url="https://foo.bar") + e = image_fail(model=_SingleImageBase, zarr_url="https://foo.bar") assert "URLs must begin" in e - image_ok(zarr_url="/x", origin=None) - image_ok(zarr_url="/x", origin="/y") - image = image_ok(zarr_url="/x", origin="/y///") + image_ok(model=_SingleImageBase, zarr_url="/x", origin=None) + image_ok(model=_SingleImageBase, zarr_url="/x", origin="/y") + image = image_ok(model=_SingleImageBase, zarr_url="/x", origin="/y///") assert image.origin == "/y" - e = image_fail(zarr_url="/x", origin="s3/foo") + e = image_fail(model=_SingleImageBase, zarr_url="/x", origin="s3/foo") assert "S3 handling" in e - e = image_fail(zarr_url="/x", origin="https://foo.bar") + e = image_fail( + model=_SingleImageBase, zarr_url="/x", origin="https://foo.bar" + ) assert "URLs must begin" in e @@ -224,6 +236,10 @@ def test_SingleImageUpdate(): valid_types = {"y": True, "n": False} # only booleans image = image_ok(model=SingleImageUpdate, zarr_url="/x", types=valid_types) assert image.types == valid_types - image_fail(zarr_url="/x", types={"a": "not a bool"}) - image_fail(zarr_url="/x", types={"a": True, " a": True}) - image_ok(zarr_url="/x", types={1: True}) + image_fail( + model=SingleImageUpdate, zarr_url="/x", types={"a": "not a bool"} + ) + image_fail( + model=SingleImageUpdate, zarr_url="/x", types={"a": True, " a": True} + ) + image_ok(model=SingleImageUpdate, zarr_url="/x", types={1: True}) From 45d573b836abaebe9376b30cbc06f3f78126116b Mon Sep 17 00:00:00 2001 From: Yuri Chiucconi Date: Wed, 15 Jan 2025 15:33:53 +0100 Subject: [PATCH 98/99] look fok unexisting image --- tests/v2/05_images/test_unit_image_tools.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/v2/05_images/test_unit_image_tools.py b/tests/v2/05_images/test_unit_image_tools.py index 160f442c13..86a0131530 100644 --- a/tests/v2/05_images/test_unit_image_tools.py +++ b/tests/v2/05_images/test_unit_image_tools.py @@ -20,6 +20,8 @@ def test_find_image_by_zarr_url(): "index": 2, "image": {"zarr_url": "/z"}, } + res = find_image_by_zarr_url(zarr_url="/k", images=images) + assert res is None def test_match_filter(): From 0df4d4a61fb8b7ed19a69301b8542b0579844b90 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Wed, 15 Jan 2025 15:42:07 +0100 Subject: [PATCH 99/99] CHANGELOG [skip ci] --- CHANGELOG.md | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dffa68557a..b74deda546 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,25 @@ **Note**: Numbers like (\#1234) point to closed Pull Requests on the fractal-server repository. -# 2.11.0 (unreleased) +> WARNING: Notes for 2.11.0 prereleases are currently separated, and they should be merged at a later stage. + +# 2.11.0a1 + +> Note: This release requires running a `fractalctl update-db-data` + +(changes that affect API, database lifecycle, runner, ...) + +* Split filters into attribute and types (\#2168). +* Support multiple options for attribute filters (\#2168). +* Deprecate support for attribute filters in workflowtask (\#2168). +* Introduce support for attribute filters in jobs (\#2168). +* Data migration script (\#2168). + +# 2.11.0a0 -* Runner - * Integrate database write access in runner component (\#2169). * API: * Update and simplify `/api/v2/project/{project_id}/status/` (\#2169). +* Runner + * Integrate database write access in runner component (\#2169). # 2.10.5