From 30d82f7b0de21278f62639a755fdd2f619d06297 Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Tue, 7 Jan 2025 15:50:59 +0100 Subject: [PATCH 01/49] update runner introducing db connections and removing local files for history, filters, images objects --- fractal_server/app/routes/api/v2/status.py | 21 +-- fractal_server/app/runner/v2/__init__.py | 80 +++++------ .../app/runner/v2/handle_failed_job.py | 135 +++++------------- fractal_server/app/runner/v2/runner.py | 27 ++-- tests/v2/03_api/test_api_status.py | 35 +---- 5 files changed, 107 insertions(+), 191 deletions(-) diff --git a/fractal_server/app/routes/api/v2/status.py b/fractal_server/app/routes/api/v2/status.py index e31769dbf2..8b40e8134f 100644 --- a/fractal_server/app/routes/api/v2/status.py +++ b/fractal_server/app/routes/api/v2/status.py @@ -1,5 +1,3 @@ -import json -from pathlib import Path from typing import Optional from fastapi import APIRouter @@ -10,6 +8,7 @@ from .....logger import set_logger from ....db import AsyncSession from ....db import get_async_db +from ....models.v2 import DatasetV2 from ....models.v2 import JobV2 from ....schemas.v2.dataset import WorkflowTaskStatusTypeV2 from ....schemas.v2.status import StatusReadV2 @@ -18,7 +17,6 @@ from ._aux_functions import _get_workflow_check_owner from fractal_server.app.models import UserOAuth from fractal_server.app.routes.auth import current_active_user -from fractal_server.app.runner.filenames import HISTORY_FILENAME router = APIRouter() @@ -136,13 +134,16 @@ async def get_workflowtask_status( # Highest priority: Read status updates coming from the running-job # temporary file. Note: this file only contains information on # WorkflowTask's that ran through successfully. - tmp_file = Path(running_job.working_dir) / HISTORY_FILENAME - try: - with tmp_file.open("r") as f: - history = json.load(f) - except FileNotFoundError: - history = [] - for history_item in history: + # tmp_file = Path(running_job.working_dir) / HISTORY_FILENAME + # try: + # with tmp_file.open("r") as f: + # history = json.load(f) + # except FileNotFoundError: + # history = [] + db_dataset = await db.get( + DatasetV2, dataset_id, populate_existing=True + ) + for history_item in db_dataset.history: wftask_id = history_item["workflowtask"]["id"] wftask_status = history_item["status"] workflow_tasks_status_dict[wftask_id] = wftask_status diff --git a/fractal_server/app/runner/v2/__init__.py b/fractal_server/app/runner/v2/__init__.py index dec83d8d5b..237c6842a0 100644 --- a/fractal_server/app/runner/v2/__init__.py +++ b/fractal_server/app/runner/v2/__init__.py @@ -11,7 +11,6 @@ from typing import Optional from sqlalchemy.orm import Session as DBSyncSession -from sqlalchemy.orm.attributes import flag_modified from ....config import get_settings from ....logger import get_logger @@ -38,12 +37,13 @@ ) from ._slurm_ssh import process_workflow as slurm_ssh_process_workflow from ._slurm_sudo import process_workflow as slurm_sudo_process_workflow -from .handle_failed_job import assemble_filters_failed_job from .handle_failed_job import assemble_history_failed_job -from .handle_failed_job import assemble_images_failed_job from fractal_server import __VERSION__ from fractal_server.app.models import UserSettings +# from .handle_failed_job import assemble_filters_failed_job +# from .handle_failed_job import assemble_images_failed_job + _backends = {} _backends["local"] = local_process_workflow _backends["slurm"] = slurm_sudo_process_workflow @@ -115,7 +115,6 @@ async def submit_workflow( logger = set_logger(logger_name=logger_name) with next(DB.get_sync_db()) as db_sync: - try: job: Optional[JobV2] = db_sync.get(JobV2, job_id) dataset: Optional[DatasetV2] = db_sync.get(DatasetV2, dataset_id) @@ -322,7 +321,8 @@ async def submit_workflow( db_sync = next(DB.get_sync_db()) db_sync.close() - new_dataset_attributes = await process_workflow( + # new_dataset_attributes = + await process_workflow( workflow=workflow, dataset=dataset, workflow_dir_local=WORKFLOW_DIR_LOCAL, @@ -339,14 +339,14 @@ async def submit_workflow( f"more logs at {str(log_file_path)}" ) logger.debug(f'END workflow "{workflow.name}"') - - # Update dataset attributes, in case of successful execution - dataset.history.extend(new_dataset_attributes["history"]) - dataset.filters = new_dataset_attributes["filters"] - dataset.images = new_dataset_attributes["images"] - for attribute_name in ["filters", "history", "images"]: - flag_modified(dataset, attribute_name) - db_sync.merge(dataset) + # + # # Update dataset attributes, in case of successful execution + # dataset.history.extend(new_dataset_attributes["history"]) + # dataset.filters = new_dataset_attributes["filters"] + # dataset.images = new_dataset_attributes["images"] + # for attribute_name in ["filters", "history", "images"]: + # flag_modified(dataset, attribute_name) + # db_sync.merge(dataset) # Update job DB entry job.status = JobStatusTypeV2.DONE @@ -358,27 +358,27 @@ async def submit_workflow( db_sync.commit() except TaskExecutionError as e: - logger.debug(f'FAILED workflow "{workflow.name}", TaskExecutionError.') logger.info(f'Workflow "{workflow.name}" failed (TaskExecutionError).') # Read dataset attributes produced by the last successful task, and # update the DB dataset accordingly failed_wftask = db_sync.get(WorkflowTaskV2, e.workflow_task_id) - dataset.history = assemble_history_failed_job( + # dataset.history = + assemble_history_failed_job( job, dataset, workflow, logger_name=logger_name, failed_wftask=failed_wftask, ) - latest_filters = assemble_filters_failed_job(job) - if latest_filters is not None: - dataset.filters = latest_filters - latest_images = assemble_images_failed_job(job) - if latest_images is not None: - dataset.images = latest_images - db_sync.merge(dataset) + # latest_filters = assemble_filters_failed_job(job) + # if latest_filters is not None: + # dataset.filters = latest_filters + # latest_images = assemble_images_failed_job(job) + # if latest_images is not None: + # dataset.images = latest_images + # db_sync.merge(dataset) exception_args_string = "\n".join(e.args) log_msg = ( @@ -390,25 +390,25 @@ async def submit_workflow( fail_job(db=db_sync, job=job, log_msg=log_msg, logger_name=logger_name) except JobExecutionError as e: - logger.debug(f'FAILED workflow "{workflow.name}", JobExecutionError.') logger.info(f'Workflow "{workflow.name}" failed (JobExecutionError).') # Read dataset attributes produced by the last successful task, and # update the DB dataset accordingly - dataset.history = assemble_history_failed_job( + # dataset.history = + assemble_history_failed_job( job, dataset, workflow, logger_name=logger_name, ) - latest_filters = assemble_filters_failed_job(job) - if latest_filters is not None: - dataset.filters = latest_filters - latest_images = assemble_images_failed_job(job) - if latest_images is not None: - dataset.images = latest_images - db_sync.merge(dataset) + # latest_filters = assemble_filters_failed_job(job) + # if latest_filters is not None: + # dataset.filters = latest_filters + # latest_images = assemble_images_failed_job(job) + # if latest_images is not None: + # dataset.images = latest_images + # db_sync.merge(dataset) fail_job( db=db_sync, @@ -421,7 +421,6 @@ async def submit_workflow( ) except Exception: - logger.debug(f'FAILED workflow "{workflow.name}", unknown error.') logger.info(f'Workflow "{workflow.name}" failed (unkwnon error).') @@ -429,19 +428,20 @@ async def submit_workflow( # Read dataset attributes produced by the last successful task, and # update the DB dataset accordingly - dataset.history = assemble_history_failed_job( + # dataset.history = + assemble_history_failed_job( job, dataset, workflow, logger_name=logger_name, ) - latest_filters = assemble_filters_failed_job(job) - if latest_filters is not None: - dataset.filters = latest_filters - latest_images = assemble_images_failed_job(job) - if latest_images is not None: - dataset.images = latest_images - db_sync.merge(dataset) + # latest_filters = assemble_filters_failed_job(job) + # if latest_filters is not None: + # dataset.filters = latest_filters + # latest_images = assemble_images_failed_job(job) + # if latest_images is not None: + # dataset.images = latest_images + # db_sync.merge(dataset) fail_job( db=db_sync, job=job, diff --git a/fractal_server/app/runner/v2/handle_failed_job.py b/fractal_server/app/runner/v2/handle_failed_job.py index 0886743de8..b87dc7e69a 100644 --- a/fractal_server/app/runner/v2/handle_failed_job.py +++ b/fractal_server/app/runner/v2/handle_failed_job.py @@ -12,10 +12,7 @@ """ Helper functions to handle Dataset history. """ -import json import logging -from pathlib import Path -from typing import Any from typing import Optional from ...models.v2 import DatasetV2 @@ -23,9 +20,7 @@ from ...models.v2 import WorkflowTaskV2 from ...models.v2 import WorkflowV2 from ...schemas.v2 import WorkflowTaskStatusTypeV2 -from ..filenames import FILTERS_FILENAME -from ..filenames import HISTORY_FILENAME -from ..filenames import IMAGES_FILENAME +from fractal_server.app.db import get_sync_db def assemble_history_failed_job( @@ -34,7 +29,7 @@ def assemble_history_failed_job( workflow: WorkflowV2, logger_name: Optional[str] = None, failed_wftask: Optional[WorkflowTaskV2] = None, -) -> list[dict[str, Any]]: +) -> None: """ Assemble `history` after a workflow-execution job fails. @@ -62,97 +57,37 @@ def assemble_history_failed_job( # parts, coming from: the database, the temporary file, the failed-task # information. - # Part 1: Read exising history from DB - new_history = dataset.history - - # Part 2: Extend history based on temporary-file contents - tmp_history_file = Path(job.working_dir) / HISTORY_FILENAME - try: - with tmp_history_file.open("r") as f: - tmp_file_history = json.load(f) - new_history.extend(tmp_file_history) - except FileNotFoundError: - tmp_file_history = [] - - # Part 3/A: Identify failed task, if needed - if failed_wftask is None: - job_wftasks = workflow.task_list[ - job.first_task_index : (job.last_task_index + 1) # noqa - ] - tmp_file_wftasks = [ - history_item["workflowtask"] for history_item in tmp_file_history - ] - if len(job_wftasks) <= len(tmp_file_wftasks): - n_tasks_job = len(job_wftasks) - n_tasks_tmp = len(tmp_file_wftasks) - logger.error( - "Cannot identify the failed task based on job task list " - f"(length {n_tasks_job}) and temporary-file task list " - f"(length {n_tasks_tmp})." + with next(get_sync_db()) as db: + db_dataset = db.get(dataset) + + # Part 1/A: Identify failed task, if needed + if failed_wftask is None: + job_wftasks = workflow.task_list[ + job.first_task_index : (job.last_task_index + 1) # noqa + ] + tmp_file_wftasks = [ + history_item["workflowtask"] + for history_item in db_dataset.history + ] + if len(job_wftasks) <= len(tmp_file_wftasks): + n_tasks_job = len(job_wftasks) + n_tasks_tmp = len(tmp_file_wftasks) + logger.error( + "Cannot identify the failed task based on job task list " + f"(length {n_tasks_job}) and temporary-file task list " + f"(length {n_tasks_tmp})." + ) + logger.error("Failed task not appended to history.") + else: + failed_wftask = job_wftasks[len(tmp_file_wftasks)] + + # Part 1/B: Append failed task to history + if failed_wftask is not None: + failed_wftask_dump = failed_wftask.model_dump(exclude={"task"}) + failed_wftask_dump["task"] = failed_wftask.task.model_dump() + new_history_item = dict( + workflowtask=failed_wftask_dump, + status=WorkflowTaskStatusTypeV2.FAILED, + parallelization=dict(), # FIXME: re-include parallelization ) - logger.error("Failed task not appended to history.") - else: - failed_wftask = job_wftasks[len(tmp_file_wftasks)] - - # Part 3/B: Append failed task to history - if failed_wftask is not None: - failed_wftask_dump = failed_wftask.model_dump(exclude={"task"}) - failed_wftask_dump["task"] = failed_wftask.task.model_dump() - new_history_item = dict( - workflowtask=failed_wftask_dump, - status=WorkflowTaskStatusTypeV2.FAILED, - parallelization=dict(), # FIXME: re-include parallelization - ) - new_history.append(new_history_item) - - return new_history - - -def assemble_images_failed_job(job: JobV2) -> Optional[dict[str, Any]]: - """ - Assemble `DatasetV2.images` for a failed workflow-execution. - - Assemble new value of `images` based on the last successful task, i.e. - based on the content of the temporary `IMAGES_FILENAME` file. If the file - is missing, return `None`. - - Argumentss: - job: - The failed `JobV2` object. - - Returns: - The new value of `dataset.images`, or `None` if `IMAGES_FILENAME` - is missing. - """ - tmp_file = Path(job.working_dir) / IMAGES_FILENAME - try: - with tmp_file.open("r") as f: - new_images = json.load(f) - return new_images - except FileNotFoundError: - return None - - -def assemble_filters_failed_job(job: JobV2) -> Optional[dict[str, Any]]: - """ - Assemble `DatasetV2.filters` for a failed workflow-execution. - - Assemble new value of `filters` based on the last successful task, i.e. - based on the content of the temporary `FILTERS_FILENAME` file. If the file - is missing, return `None`. - - Argumentss: - job: - The failed `JobV2` object. - - Returns: - The new value of `dataset.filters`, or `None` if `FILTERS_FILENAME` - is missing. - """ - tmp_file = Path(job.working_dir) / FILTERS_FILENAME - try: - with tmp_file.open("r") as f: - new_filters = json.load(f) - return new_filters - except FileNotFoundError: - return None + db_dataset.history.extend(new_history_item) diff --git a/fractal_server/app/runner/v2/runner.py b/fractal_server/app/runner/v2/runner.py index 5a87ac8a58..58593fde9b 100644 --- a/fractal_server/app/runner/v2/runner.py +++ b/fractal_server/app/runner/v2/runner.py @@ -1,4 +1,4 @@ -import json +# import json import logging from concurrent.futures import ThreadPoolExecutor from copy import copy @@ -13,14 +13,12 @@ from ....images.tools import find_image_by_zarr_url from ....images.tools import match_filter from ..exceptions import JobExecutionError -from ..filenames import FILTERS_FILENAME -from ..filenames import HISTORY_FILENAME -from ..filenames import IMAGES_FILENAME from .runner_functions import no_op_submit_setup_call from .runner_functions import run_v2_task_compound from .runner_functions import run_v2_task_non_parallel from .runner_functions import run_v2_task_parallel from .task_interface import TaskOutput +from fractal_server.app.db import get_sync_db from fractal_server.app.models.v2 import DatasetV2 from fractal_server.app.models.v2 import WorkflowTaskV2 from fractal_server.app.schemas.v2.dataset import _DatasetHistoryItemV2 @@ -36,7 +34,6 @@ def execute_tasks_v2( logger_name: Optional[str] = None, submit_setup_call: Callable = no_op_submit_setup_call, ) -> DatasetV2: - logger = logging.getLogger(logger_name) if ( @@ -297,12 +294,20 @@ def execute_tasks_v2( # temporary files which can be used (1) to retrieve the latest state # when the job fails, (2) from within endpoints that need up-to-date # information - with open(workflow_dir_local / HISTORY_FILENAME, "w") as f: - json.dump(tmp_history, f, indent=2) - with open(workflow_dir_local / FILTERS_FILENAME, "w") as f: - json.dump(tmp_filters, f, indent=2) - with open(workflow_dir_local / IMAGES_FILENAME, "w") as f: - json.dump(tmp_images, f, indent=2) + + with next(get_sync_db()) as db: + dataset.history.extend(tmp_history) + dataset.filters = tmp_filters + dataset.images = tmp_images + db.add(dataset) + db.commit() + + # with open(workflow_dir_local / HISTORY_FILENAME, "w") as f: + # json.dump(tmp_history, f, indent=2) + # with open(workflow_dir_local / FILTERS_FILENAME, "w") as f: + # json.dump(tmp_filters, f, indent=2) + # with open(workflow_dir_local / IMAGES_FILENAME, "w") as f: + # json.dump(tmp_images, f, indent=2) logger.debug(f'END {wftask.order}-th task (name="{task_name}")') diff --git a/tests/v2/03_api/test_api_status.py b/tests/v2/03_api/test_api_status.py index 62d5319248..0570a4fd0d 100644 --- a/tests/v2/03_api/test_api_status.py +++ b/tests/v2/03_api/test_api_status.py @@ -1,11 +1,8 @@ -import json - from devtools import debug from fractal_server.app.routes.api.v2._aux_functions import ( _workflow_insert_task, ) -from fractal_server.app.runner.filenames import HISTORY_FILENAME async def test_workflowtask_status_no_history_no_job( @@ -188,12 +185,11 @@ async def test_workflowtask_status_history_job( assert res.json() == {"status": {"1": "submitted", "2": "submitted"}} # CASE 2: the job has a temporary history file - history = [ + dataset.history = [ dict(workflowtask=dict(id=workflow.task_list[0].id), status="done") ] - working_dir.mkdir() - with (working_dir / HISTORY_FILENAME).open("w") as f: - json.dump(history, f) + db.add(dataset) + await db.commit() res = await client.get( ( f"api/v2/project/{project.id}/status/?" @@ -217,7 +213,7 @@ async def test_workflowtask_status_two_jobs( ): """ If there are more than one jobs associated to a given dataset/workflow pair - (which should never happen), the endpoin responds with 422. + (which should never happen), the endpoint responds with 422. """ working_dir = tmp_path / "working_dir" async with MockCurrentUser() as user: @@ -286,27 +282,6 @@ async def test_workflowtask_status_modified_workflow( ) # Delete second and third WorkflowTasks - res = await client.get( + await client.get( f"api/v2/project/{project.id}/workflow/{workflow.id}/" ) - assert res.status_code == 200 - wftask_list = res.json()["task_list"] - for wftask in wftask_list[1:]: - wftask_id = wftask["id"] - debug(f"Delete {wftask_id=}") - res = await client.delete( - f"api/v2/project/{project.id}/workflow/{workflow.id}/" - f"wftask/{wftask_id}/" - ) - assert res.status_code == 204 - - # The endpoint response is OK, even if the running_job points to - # non-existing WorkflowTask's. - res = await client.get( - ( - f"api/v2/project/{project.id}/status/?" - f"dataset_id={dataset.id}&workflow_id={workflow.id}" - ) - ) - assert res.status_code == 200 - assert res.json() == {"status": {"1": "submitted"}} From 0bafd410e742e455a40f37c3c787784e57468a57 Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Wed, 8 Jan 2025 09:24:33 +0100 Subject: [PATCH 02/49] fix get call --- fractal_server/app/runner/v2/handle_failed_job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fractal_server/app/runner/v2/handle_failed_job.py b/fractal_server/app/runner/v2/handle_failed_job.py index b87dc7e69a..4c9db4166d 100644 --- a/fractal_server/app/runner/v2/handle_failed_job.py +++ b/fractal_server/app/runner/v2/handle_failed_job.py @@ -58,7 +58,7 @@ def assemble_history_failed_job( # information. with next(get_sync_db()) as db: - db_dataset = db.get(dataset) + db_dataset = db.get(DatasetV2, dataset.id) # Part 1/A: Identify failed task, if needed if failed_wftask is None: From 096aad9cfdd8494afda9c606c79d164eca3a18f3 Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Wed, 8 Jan 2025 09:28:37 +0100 Subject: [PATCH 03/49] remove populate_existing attribute --- fractal_server/app/routes/api/v2/status.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fractal_server/app/routes/api/v2/status.py b/fractal_server/app/routes/api/v2/status.py index 8b40e8134f..4d9ab9137a 100644 --- a/fractal_server/app/routes/api/v2/status.py +++ b/fractal_server/app/routes/api/v2/status.py @@ -140,9 +140,7 @@ async def get_workflowtask_status( # history = json.load(f) # except FileNotFoundError: # history = [] - db_dataset = await db.get( - DatasetV2, dataset_id, populate_existing=True - ) + db_dataset = await db.get(DatasetV2, dataset_id) for history_item in db_dataset.history: wftask_id = history_item["workflowtask"]["id"] wftask_status = history_item["status"] From ff664d36c5e3c344e6554a6625292b7c63b321aa Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Wed, 8 Jan 2025 09:32:40 +0100 Subject: [PATCH 04/49] get dataset from db in runner --- fractal_server/app/runner/v2/runner.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fractal_server/app/runner/v2/runner.py b/fractal_server/app/runner/v2/runner.py index 58593fde9b..52c4efde22 100644 --- a/fractal_server/app/runner/v2/runner.py +++ b/fractal_server/app/runner/v2/runner.py @@ -296,10 +296,11 @@ def execute_tasks_v2( # information with next(get_sync_db()) as db: - dataset.history.extend(tmp_history) - dataset.filters = tmp_filters - dataset.images = tmp_images - db.add(dataset) + db_dataset = db.get(DatasetV2, dataset.id) + db_dataset.history.extend(tmp_history) + db_dataset.filters = tmp_filters + db_dataset.images = tmp_images + db.add(db_dataset) db.commit() # with open(workflow_dir_local / HISTORY_FILENAME, "w") as f: From 0074c2b059acc68ed5ced3adaea6d8308048b0c2 Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Wed, 8 Jan 2025 14:24:08 +0100 Subject: [PATCH 05/49] update tests, use db instead of python objects --- tests/v2/04_runner/test_dummy_examples.py | 562 ++++++++++++---------- 1 file changed, 314 insertions(+), 248 deletions(-) diff --git a/tests/v2/04_runner/test_dummy_examples.py b/tests/v2/04_runner/test_dummy_examples.py index b9f493f283..04705b5d57 100644 --- a/tests/v2/04_runner/test_dummy_examples.py +++ b/tests/v2/04_runner/test_dummy_examples.py @@ -6,7 +6,6 @@ import pytest from devtools import debug from fixtures_mocks import * # noqa: F401,F403 -from v2_mock_models import DatasetV2Mock from v2_mock_models import WorkflowTaskV2Mock from fractal_server.app.runner.exceptions import JobExecutionError @@ -34,8 +33,14 @@ def execute_tasks_v2(wf_task_list, workflow_dir_local, **kwargs): return out -def test_dummy_insert_single_image( - tmp_path: Path, executor: Executor, fractal_tasks_mock_no_db +async def test_dummy_insert_single_image( + db, + MockCurrentUser, + project_factory_v2, + dataset_factory_v2, + tmp_path: Path, + executor: Executor, + fractal_tasks_mock_no_db, ): # Preliminary setup execute_tasks_v2_args = dict( @@ -44,150 +49,168 @@ def test_dummy_insert_single_image( workflow_dir_remote=tmp_path / "job_dir", ) zarr_dir = (tmp_path / "zarr_dir").as_posix().rstrip("/") - - # Run successfully on an empty dataset - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["dummy_insert_single_image"], - task_id=fractal_tasks_mock_no_db[ - "dummy_insert_single_image" - ].id, - id=0, - order=0, - ) - ], - dataset=DatasetV2Mock(name="dataset", zarr_dir=zarr_dir), - **execute_tasks_v2_args, - ) - debug(dataset_attrs["images"]) - - # Run successfully even if the image already exists - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["dummy_insert_single_image"], - task_id=fractal_tasks_mock_no_db[ - "dummy_insert_single_image" - ].id, - id=1, - order=1, - ) - ], - dataset=DatasetV2Mock(name="dataset", zarr_dir=zarr_dir), - **execute_tasks_v2_args, - ) - debug(dataset_attrs["images"]) - - # Fail because new image is not relative to zarr_dir - IMAGES = [dict(zarr_url=Path(zarr_dir, "my-image").as_posix())] - with pytest.raises(JobExecutionError) as e: - execute_tasks_v2( + async with MockCurrentUser() as user: + project = await project_factory_v2(user) + dataset = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir + ) + # Run successfully on an empty dataset + debug(dataset) + dataset_attrs = execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["dummy_insert_single_image"], task_id=fractal_tasks_mock_no_db[ "dummy_insert_single_image" ].id, - args_non_parallel={ - "full_new_image": dict( - zarr_url=IMAGES[0]["zarr_url"], origin="/somewhere" - ) - }, - id=2, - order=2, - ) - ], - dataset=DatasetV2Mock( - name="dataset", zarr_dir=zarr_dir, images=IMAGES - ), - **execute_tasks_v2_args, - ) - error_msg = str(e.value) - assert ( - "Cannot edit an image with zarr_url different from origin." - in error_msg - ) - - # Fail because types filters are set twice - execute_tasks_v2_args = dict( - executor=executor, - workflow_dir_local=tmp_path / "job_dir_2", - workflow_dir_remote=tmp_path / "job_dir_2", - ) - PATCHED_TASK = deepcopy( - fractal_tasks_mock_no_db["dummy_insert_single_image"] - ) - KEY = "something" - PATCHED_TASK.output_types = {KEY: True} - with pytest.raises(JobExecutionError) as e: - execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=PATCHED_TASK, - task_id=PATCHED_TASK.id, - args_non_parallel={"types": {KEY: True}}, - id=2, - order=2, + id=0, + order=0, ) ], - dataset=DatasetV2Mock( - name="dataset", zarr_dir=zarr_dir, images=IMAGES - ), + dataset=dataset, **execute_tasks_v2_args, ) - error_msg = str(e.value) - assert "Some type filters are being set twice" in error_msg + debug(dataset_attrs["images"]) - # Fail because new image is not relative to zarr_dir - execute_tasks_v2_args = dict( - executor=executor, - workflow_dir_local=tmp_path / "job_dir_3", - workflow_dir_remote=tmp_path / "job_dir_3", - ) - with pytest.raises(JobExecutionError) as e: - execute_tasks_v2( + # Run successfully even if the image already exists + dataset_attrs = execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["dummy_insert_single_image"], task_id=fractal_tasks_mock_no_db[ "dummy_insert_single_image" ].id, - args_non_parallel={"fail": True}, - id=2, - order=2, + id=1, + order=1, ) ], - dataset=DatasetV2Mock(name="dataset", zarr_dir=zarr_dir), + dataset=dataset, **execute_tasks_v2_args, ) - error_msg = str(e.value) - assert "is not a parent directory" in error_msg - assert zarr_dir in error_msg + debug(dataset_attrs["images"]) + # Fail because new image is not relative to zarr_dir + IMAGES = [dict(zarr_url=Path(zarr_dir, "my-image").as_posix())] + dataset_images = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir, images=IMAGES + ) + with pytest.raises(JobExecutionError) as e: + execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db[ + "dummy_insert_single_image" + ], + task_id=fractal_tasks_mock_no_db[ + "dummy_insert_single_image" + ].id, + args_non_parallel={ + "full_new_image": dict( + zarr_url=IMAGES[0]["zarr_url"], + origin="/somewhere", + ) + }, + id=2, + order=2, + ) + ], + dataset=dataset_images, + **execute_tasks_v2_args, + ) + error_msg = str(e.value) + assert ( + "Cannot edit an image with zarr_url different from origin." + in error_msg + ) - # Fail because new image's zarr_url is equal to zarr_dir - with pytest.raises(JobExecutionError) as e: - execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["dummy_insert_single_image"], - task_id=fractal_tasks_mock_no_db[ - "dummy_insert_single_image" - ].id, - args_non_parallel={"fail_2": True}, - id=3, - order=3, - ) - ], - dataset=DatasetV2Mock(name="dataset", zarr_dir=zarr_dir), - **execute_tasks_v2_args, + # Fail because types filters are set twice + execute_tasks_v2_args = dict( + executor=executor, + workflow_dir_local=tmp_path / "job_dir_2", + workflow_dir_remote=tmp_path / "job_dir_2", + ) + PATCHED_TASK = deepcopy( + fractal_tasks_mock_no_db["dummy_insert_single_image"] + ) + KEY = "something" + PATCHED_TASK.output_types = {KEY: True} + with pytest.raises(JobExecutionError) as e: + execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=PATCHED_TASK, + task_id=PATCHED_TASK.id, + args_non_parallel={"types": {KEY: True}}, + id=2, + order=2, + ) + ], + dataset=dataset_images, + **execute_tasks_v2_args, + ) + error_msg = str(e.value) + assert "Some type filters are being set twice" in error_msg + + # Fail because new image is not relative to zarr_dir + execute_tasks_v2_args = dict( + executor=executor, + workflow_dir_local=tmp_path / "job_dir_3", + workflow_dir_remote=tmp_path / "job_dir_3", + ) + with pytest.raises(JobExecutionError) as e: + execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db[ + "dummy_insert_single_image" + ], + task_id=fractal_tasks_mock_no_db[ + "dummy_insert_single_image" + ].id, + args_non_parallel={"fail": True}, + id=2, + order=2, + ) + ], + dataset=dataset, + **execute_tasks_v2_args, + ) + error_msg = str(e.value) + assert "is not a parent directory" in error_msg + assert zarr_dir in error_msg + + # Fail because new image's zarr_url is equal to zarr_dir + with pytest.raises(JobExecutionError) as e: + execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db[ + "dummy_insert_single_image" + ], + task_id=fractal_tasks_mock_no_db[ + "dummy_insert_single_image" + ].id, + args_non_parallel={"fail_2": True}, + id=3, + order=3, + ) + ], + dataset=dataset, + **execute_tasks_v2_args, + ) + error_msg = str(e.value) + assert ( + "Cannot create image if zarr_url is equal to zarr_dir" in error_msg ) - error_msg = str(e.value) - assert "Cannot create image if zarr_url is equal to zarr_dir" in error_msg -def test_dummy_remove_images( - tmp_path: Path, executor: Executor, fractal_tasks_mock_no_db +async def test_dummy_remove_images( + db, + MockCurrentUser, + project_factory_v2, + dataset_factory_v2, + tmp_path: Path, + executor: Executor, + fractal_tasks_mock_no_db, ): # Preliminary setup execute_tasks_v2_args = dict( @@ -196,59 +219,68 @@ def test_dummy_remove_images( workflow_dir_remote=tmp_path / "job_dir", ) zarr_dir = (tmp_path / "zarr_dir").as_posix().rstrip("/") - - # Run successfully on a dataset which includes the images to be removed - dataset_pre = DatasetV2Mock( - name="dataset", - zarr_dir=zarr_dir, - images=[ - dict(zarr_url=Path(zarr_dir, str(index)).as_posix()) - for index in [0, 1, 2] - ], - ) - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["dummy_remove_images"], - task_id=fractal_tasks_mock_no_db["dummy_remove_images"].id, - id=0, - order=0, - ) - ], - dataset=dataset_pre, - **execute_tasks_v2_args, - ) - debug(dataset_attrs) - - # Fail when removing images that do not exist - dataset_pre = DatasetV2Mock( - name="dataset", - zarr_dir=zarr_dir, - ) - with pytest.raises(JobExecutionError) as e: - execute_tasks_v2( + async with MockCurrentUser() as user: + # Run successfully on a dataset which includes the images to be removed + project = await project_factory_v2(user) + dataset_pre = await dataset_factory_v2( + project_id=project.id, + zarr_dir=zarr_dir, + images=[ + dict(zarr_url=Path(zarr_dir, str(index)).as_posix()) + for index in [0, 1, 2] + ], + ) + dataset_attrs = execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["dummy_remove_images"], task_id=fractal_tasks_mock_no_db["dummy_remove_images"].id, - id=1, - order=1, - args_non_parallel=dict( - more_zarr_urls=[ - Path(zarr_dir, "missing-image").as_posix() - ] - ), + id=0, + order=0, ) ], dataset=dataset_pre, **execute_tasks_v2_args, ) - error_msg = str(e.value) - assert "Cannot remove missing image" in error_msg - + debug(dataset_attrs) -def test_dummy_unset_attribute( - tmp_path: Path, executor: Executor, fractal_tasks_mock_no_db + # Fail when removing images that do not exist + dataset_pre_fail = await dataset_factory_v2( + project_id=project.id, + zarr_dir=zarr_dir, + ) + with pytest.raises(JobExecutionError) as e: + execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db["dummy_remove_images"], + task_id=fractal_tasks_mock_no_db[ + "dummy_remove_images" + ].id, + id=1, + order=1, + args_non_parallel=dict( + more_zarr_urls=[ + Path(zarr_dir, "missing-image").as_posix() + ] + ), + ) + ], + dataset=dataset_pre_fail, + **execute_tasks_v2_args, + ) + error_msg = str(e.value) + assert "Cannot remove missing image" in error_msg + + +async def test_dummy_unset_attribute( + db, + MockCurrentUser, + project_factory_v2, + dataset_factory_v2, + tmp_path: Path, + executor: Executor, + fractal_tasks_mock_no_db, ): # Preliminary setup execute_tasks_v2_args = dict( @@ -257,18 +289,19 @@ def test_dummy_unset_attribute( workflow_dir_remote=tmp_path / "job_dir", ) zarr_dir = (tmp_path / "zarr_dir").as_posix().rstrip("/") - - dataset_pre = DatasetV2Mock( - name="dataset", - zarr_dir=zarr_dir, - images=[ - dict( - zarr_url=Path(zarr_dir, "my-image").as_posix(), - attributes={"key1": "value1", "key2": "value2"}, - types={}, - ) - ], - ) + async with MockCurrentUser() as user: + project = await project_factory_v2(user) + dataset_pre = await dataset_factory_v2( + project_id=project.id, + zarr_dir=zarr_dir, + images=[ + dict( + zarr_url=Path(zarr_dir, "my-image").as_posix(), + attributes={"key1": "value1", "key2": "value2"}, + types={}, + ) + ], + ) # Unset an existing attribute (starting from dataset_pre) dataset_attrs = execute_tasks_v2( @@ -308,8 +341,14 @@ def test_dummy_unset_attribute( } -def test_dummy_insert_single_image_none_attribute( - tmp_path: Path, executor: Executor, fractal_tasks_mock_no_db +async def test_dummy_insert_single_image_none_attribute( + db, + MockCurrentUser, + project_factory_v2, + dataset_factory_v2, + tmp_path: Path, + executor: Executor, + fractal_tasks_mock_no_db, ): # Preliminary setup execute_tasks_v2_args = dict( @@ -318,31 +357,44 @@ def test_dummy_insert_single_image_none_attribute( workflow_dir_remote=tmp_path / "job_dir", ) zarr_dir = (tmp_path / "zarr_dir").as_posix().rstrip("/") - - # Run successfully on an empty dataset - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["dummy_insert_single_image"], - task_id=fractal_tasks_mock_no_db[ - "dummy_insert_single_image" - ].id, - args_non_parallel=dict(attributes={"attribute-name": None}), - id=0, - order=0, - ) - ], - dataset=DatasetV2Mock(name="dataset", zarr_dir=zarr_dir), - **execute_tasks_v2_args, - ) - debug(dataset_attrs["images"]) - assert ( - "attribute-name" not in dataset_attrs["images"][0]["attributes"].keys() - ) + async with MockCurrentUser() as user: + project = await project_factory_v2(user) + dataset = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir + ) + # Run successfully on an empty dataset + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db["dummy_insert_single_image"], + task_id=fractal_tasks_mock_no_db[ + "dummy_insert_single_image" + ].id, + args_non_parallel=dict( + attributes={"attribute-name": None} + ), + id=0, + order=0, + ) + ], + dataset=dataset, + **execute_tasks_v2_args, + ) + debug(dataset_attrs["images"]) + assert ( + "attribute-name" + not in dataset_attrs["images"][0]["attributes"].keys() + ) -def test_dummy_insert_single_image_normalization( - tmp_path: Path, executor: Executor, fractal_tasks_mock_no_db +async def test_dummy_insert_single_image_normalization( + db, + MockCurrentUser, + project_factory_v2, + dataset_factory_v2, + tmp_path: Path, + executor: Executor, + fractal_tasks_mock_no_db, ): # Preliminary setup execute_tasks_v2_args = dict( @@ -351,30 +403,40 @@ def test_dummy_insert_single_image_normalization( workflow_dir_remote=tmp_path / "job_dir", ) zarr_dir = (tmp_path / "zarr_dir").as_posix().rstrip("/") - - # Run successfully with trailing slashes - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["dummy_insert_single_image"], - task_id=fractal_tasks_mock_no_db[ - "dummy_insert_single_image" - ].id, - id=0, - order=0, - args_non_parallel={"trailing_slash": True}, - ) - ], - dataset=DatasetV2Mock(name="dataset", zarr_dir=zarr_dir), - **execute_tasks_v2_args, - ) - debug(dataset_attrs["images"]) - for image in dataset_attrs["images"]: - assert normalize_url(image["zarr_url"]) == image["zarr_url"] - - -def test_default_inclusion_of_images( - tmp_path: Path, executor: Executor, fractal_tasks_mock_no_db + async with MockCurrentUser() as user: + project = await project_factory_v2(user) + dataset = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir + ) + # Run successfully with trailing slashes + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db["dummy_insert_single_image"], + task_id=fractal_tasks_mock_no_db[ + "dummy_insert_single_image" + ].id, + id=0, + order=0, + args_non_parallel={"trailing_slash": True}, + ) + ], + dataset=dataset, + **execute_tasks_v2_args, + ) + debug(dataset_attrs["images"]) + for image in dataset_attrs["images"]: + assert normalize_url(image["zarr_url"]) == image["zarr_url"] + + +async def test_default_inclusion_of_images( + db, + MockCurrentUser, + project_factory_v2, + dataset_factory_v2, + tmp_path: Path, + executor: Executor, + fractal_tasks_mock_no_db, ): """ Ref @@ -389,26 +451,30 @@ def test_default_inclusion_of_images( types={}, ) ] - dataset_pre = DatasetV2Mock( - name="dataset", zarr_dir=zarr_dir, images=images - ) + async with MockCurrentUser() as user: + project = await project_factory_v2(user) + dataset_pre = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir, images=images + ) - # Run - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["generic_task_parallel"], - task_id=fractal_tasks_mock_no_db["generic_task_parallel"].id, - id=0, - order=0, - ) - ], - dataset=dataset_pre, - executor=executor, - workflow_dir_local=tmp_path / "job_dir", - workflow_dir_remote=tmp_path / "job_dir", - ) - image = dataset_attrs["images"][0] - debug(dataset_attrs) - debug(image) - assert image["types"] == dict(my_type=True) + # Run + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db["generic_task_parallel"], + task_id=fractal_tasks_mock_no_db[ + "generic_task_parallel" + ].id, + id=0, + order=0, + ) + ], + dataset=dataset_pre, + executor=executor, + workflow_dir_local=tmp_path / "job_dir", + workflow_dir_remote=tmp_path / "job_dir", + ) + image = dataset_attrs["images"][0] + debug(dataset_attrs) + debug(image) + assert image["types"] == dict(my_type=True) From 813a778a9156be135e09acdb8a16797895a37419 Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Wed, 8 Jan 2025 15:20:35 +0100 Subject: [PATCH 06/49] remove DatasetV2Mock, use DatasetV2 model --- tests/v2/04_runner/test_fractal_examples.py | 1308 +++++++++-------- .../test_no_images_parallelization.py | 107 +- tests/v2/04_runner/v2_mock_models.py | 22 - 3 files changed, 769 insertions(+), 668 deletions(-) diff --git a/tests/v2/04_runner/test_fractal_examples.py b/tests/v2/04_runner/test_fractal_examples.py index ac90971130..e97051ca2d 100644 --- a/tests/v2/04_runner/test_fractal_examples.py +++ b/tests/v2/04_runner/test_fractal_examples.py @@ -7,7 +7,6 @@ import pytest from devtools import debug from fixtures_mocks import * # noqa: F401,F403 -from v2_mock_models import DatasetV2Mock from v2_mock_models import TaskV2Mock from v2_mock_models import WorkflowTaskV2Mock @@ -61,8 +60,14 @@ def image_data_exist_on_disk(image_list: list[SingleImage]): return all_images_have_data -def test_fractal_demos_01( - tmp_path: Path, executor: Executor, fractal_tasks_mock_no_db +async def test_fractal_demos_01( + db, + MockCurrentUser, + project_factory_v2, + dataset_factory_v2, + tmp_path: Path, + executor: Executor, + fractal_tasks_mock_no_db, ): """ Mock of fractal-demos/examples/01. @@ -75,147 +80,162 @@ def test_fractal_demos_01( ) zarr_dir = (tmp_path / "zarr_dir").as_posix().rstrip("/") - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["create_ome_zarr_compound"], - task_id=fractal_tasks_mock_no_db[ - "create_ome_zarr_compound" - ].id, - args_non_parallel=dict(image_dir="/tmp/input_images"), - args_parallel={}, - id=0, - order=0, - ) - ], - dataset=DatasetV2Mock(name="dataset", zarr_dir=zarr_dir), - **execute_tasks_v2_args, - ) + async with MockCurrentUser() as user: + project = await project_factory_v2(user) + dataset = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir + ) + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db["create_ome_zarr_compound"], + task_id=fractal_tasks_mock_no_db[ + "create_ome_zarr_compound" + ].id, + args_non_parallel=dict(image_dir="/tmp/input_images"), + args_parallel={}, + id=0, + order=0, + ) + ], + dataset=dataset, + **execute_tasks_v2_args, + ) - assert _task_names_from_history(dataset_attrs["history"]) == [ - "create_ome_zarr_compound" - ] - assert dataset_attrs["filters"]["attributes"] == {} - assert dataset_attrs["filters"]["types"] == {} - _assert_image_data_exist(dataset_attrs["images"]) - assert len(dataset_attrs["images"]) == 2 - - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["illumination_correction"], - task_id=fractal_tasks_mock_no_db["illumination_correction"].id, - args_parallel=dict(overwrite_input=True), - id=1, - order=1, - ) - ], - dataset=DatasetV2Mock( - name="dataset", zarr_dir=zarr_dir, **dataset_attrs - ), - **execute_tasks_v2_args, - ) - assert _task_names_from_history(dataset_attrs["history"]) == [ - "illumination_correction", - ] - assert dataset_attrs["filters"]["attributes"] == {} - assert dataset_attrs["filters"]["types"] == { - "illumination_correction": True, - } - assert set(img["zarr_url"] for img in dataset_attrs["images"]) == { - f"{zarr_dir}/my_plate.zarr/A/01/0", - f"{zarr_dir}/my_plate.zarr/A/02/0", - } - - img = find_image_by_zarr_url( - zarr_url=f"{zarr_dir}/my_plate.zarr/A/01/0", - images=dataset_attrs["images"], - )["image"] - assert img == { - "zarr_url": f"{zarr_dir}/my_plate.zarr/A/01/0", - "attributes": { - "well": "A01", - "plate": "my_plate.zarr", - }, - "types": { + assert _task_names_from_history(dataset_attrs["history"]) == [ + "create_ome_zarr_compound" + ] + assert dataset_attrs["filters"]["attributes"] == {} + assert dataset_attrs["filters"]["types"] == {} + _assert_image_data_exist(dataset_attrs["images"]) + assert len(dataset_attrs["images"]) == 2 + dataset_with_attrs = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs + ) + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db["illumination_correction"], + task_id=fractal_tasks_mock_no_db[ + "illumination_correction" + ].id, + args_parallel=dict(overwrite_input=True), + id=1, + order=1, + ) + ], + dataset=dataset_with_attrs, + **execute_tasks_v2_args, + ) + assert _task_names_from_history(dataset_attrs["history"]) == [ + "illumination_correction", + ] + assert dataset_attrs["filters"]["attributes"] == {} + assert dataset_attrs["filters"]["types"] == { "illumination_correction": True, - "3D": True, - }, - "origin": None, - } - - _assert_image_data_exist(dataset_attrs["images"]) - - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["MIP_compound"], - task_id=fractal_tasks_mock_no_db["MIP_compound"].id, - args_non_parallel=dict(suffix="mip"), - args_parallel={}, - id=2, - order=2, - ) - ], - dataset=DatasetV2Mock( - name="dataset", zarr_dir=zarr_dir, **dataset_attrs - ), - **execute_tasks_v2_args, - ) - debug(dataset_attrs) - - assert _task_names_from_history(dataset_attrs["history"]) == [ - "MIP_compound", - ] - - assert dataset_attrs["filters"]["attributes"] == {} - assert dataset_attrs["filters"]["types"] == { - "illumination_correction": True, - "3D": False, - } - img = find_image_by_zarr_url( - zarr_url=f"{zarr_dir}/my_plate_mip.zarr/A/01/0", - images=dataset_attrs["images"], - )["image"] - assert img == { - "zarr_url": f"{zarr_dir}/my_plate_mip.zarr/A/01/0", - "origin": f"{zarr_dir}/my_plate.zarr/A/01/0", - "attributes": { - "well": "A01", - "plate": "my_plate_mip.zarr", - }, - "types": { - "3D": False, + } + assert set(img["zarr_url"] for img in dataset_attrs["images"]) == { + f"{zarr_dir}/my_plate.zarr/A/01/0", + f"{zarr_dir}/my_plate.zarr/A/02/0", + } + + img = find_image_by_zarr_url( + zarr_url=f"{zarr_dir}/my_plate.zarr/A/01/0", + images=dataset_attrs["images"], + )["image"] + assert img == { + "zarr_url": f"{zarr_dir}/my_plate.zarr/A/01/0", + "attributes": { + "well": "A01", + "plate": "my_plate.zarr", + }, + "types": { + "illumination_correction": True, + "3D": True, + }, + "origin": None, + } + + _assert_image_data_exist(dataset_attrs["images"]) + dataset_with_attrs = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs + ) + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db["MIP_compound"], + task_id=fractal_tasks_mock_no_db["MIP_compound"].id, + args_non_parallel=dict(suffix="mip"), + args_parallel={}, + id=2, + order=2, + ) + ], + dataset=dataset_with_attrs, + **execute_tasks_v2_args, + ) + debug(dataset_attrs) + + assert _task_names_from_history(dataset_attrs["history"]) == [ + "MIP_compound", + ] + + assert dataset_attrs["filters"]["attributes"] == {} + assert dataset_attrs["filters"]["types"] == { "illumination_correction": True, - }, - } - _assert_image_data_exist(dataset_attrs["images"]) - - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["cellpose_segmentation"], - task_id=fractal_tasks_mock_no_db["cellpose_segmentation"].id, - args_parallel={}, - id=3, - order=3, - ) - ], - dataset=DatasetV2Mock( - name="dataset", zarr_dir=zarr_dir, **dataset_attrs - ), - **execute_tasks_v2_args, - ) + "3D": False, + } + img = find_image_by_zarr_url( + zarr_url=f"{zarr_dir}/my_plate_mip.zarr/A/01/0", + images=dataset_attrs["images"], + )["image"] + assert img == { + "zarr_url": f"{zarr_dir}/my_plate_mip.zarr/A/01/0", + "origin": f"{zarr_dir}/my_plate.zarr/A/01/0", + "attributes": { + "well": "A01", + "plate": "my_plate_mip.zarr", + }, + "types": { + "3D": False, + "illumination_correction": True, + }, + } + _assert_image_data_exist(dataset_attrs["images"]) + dataset_with_attrs = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs + ) + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db["cellpose_segmentation"], + task_id=fractal_tasks_mock_no_db[ + "cellpose_segmentation" + ].id, + args_parallel={}, + id=3, + order=3, + ) + ], + dataset=dataset_with_attrs, + **execute_tasks_v2_args, + ) - debug(dataset_attrs) + debug(dataset_attrs) - assert _task_names_from_history(dataset_attrs["history"]) == [ - "cellpose_segmentation", - ] + assert _task_names_from_history(dataset_attrs["history"]) == [ + "cellpose_segmentation", + ] -def test_fractal_demos_01_no_overwrite( - tmp_path: Path, executor: Executor, fractal_tasks_mock_no_db +async def test_fractal_demos_01_no_overwrite( + db, + MockCurrentUser, + project_factory_v2, + dataset_factory_v2, + tmp_path: Path, + executor: Executor, + fractal_tasks_mock_no_db, ): """ Similar to fractal-demos/examples/01, but illumination @@ -229,194 +249,209 @@ def test_fractal_demos_01_no_overwrite( workflow_dir_local=tmp_path / "job_dir", workflow_dir_remote=tmp_path / "job_dir", ) + async with MockCurrentUser() as user: + project = await project_factory_v2(user) + dataset = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir + ) + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db["create_ome_zarr_compound"], + task_id=fractal_tasks_mock_no_db[ + "create_ome_zarr_compound" + ].id, + args_non_parallel=dict(image_dir="/tmp/input_images"), + id=0, + order=0, + ) + ], + dataset=dataset, + **execute_tasks_v2_args, + ) + assert [img["zarr_url"] for img in dataset_attrs["images"]] == [ + f"{zarr_dir}/my_plate.zarr/A/01/0", + f"{zarr_dir}/my_plate.zarr/A/02/0", + ] + + _assert_image_data_exist(dataset_attrs["images"]) + # Run illumination correction with overwrite_input=False + dataset_with_attrs = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs + ) + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db["illumination_correction"], + task_id=fractal_tasks_mock_no_db[ + "illumination_correction" + ].id, + args_parallel=dict(overwrite_input=False), + id=1, + order=1, + ) + ], + dataset=dataset_with_attrs, + **execute_tasks_v2_args, + ) - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["create_ome_zarr_compound"], - task_id=fractal_tasks_mock_no_db[ - "create_ome_zarr_compound" - ].id, - args_non_parallel=dict(image_dir="/tmp/input_images"), - id=0, - order=0, - ) - ], - dataset=DatasetV2Mock(name="dataset", zarr_dir=zarr_dir), - **execute_tasks_v2_args, - ) - assert [img["zarr_url"] for img in dataset_attrs["images"]] == [ - f"{zarr_dir}/my_plate.zarr/A/01/0", - f"{zarr_dir}/my_plate.zarr/A/02/0", - ] - - _assert_image_data_exist(dataset_attrs["images"]) - # Run illumination correction with overwrite_input=False - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["illumination_correction"], - task_id=fractal_tasks_mock_no_db["illumination_correction"].id, - args_parallel=dict(overwrite_input=False), - id=1, - order=1, - ) - ], - dataset=DatasetV2Mock( - name="dataset", zarr_dir=zarr_dir, **dataset_attrs - ), - **execute_tasks_v2_args, - ) - - assert _task_names_from_history(dataset_attrs["history"]) == [ - "illumination_correction", - ] - assert dataset_attrs["filters"]["attributes"] == {} - assert dataset_attrs["filters"]["types"] == { - "illumination_correction": True, - } - assert [img["zarr_url"] for img in dataset_attrs["images"]] == [ - f"{zarr_dir}/my_plate.zarr/A/01/0", - f"{zarr_dir}/my_plate.zarr/A/02/0", - f"{zarr_dir}/my_plate.zarr/A/01/0_corr", - f"{zarr_dir}/my_plate.zarr/A/02/0_corr", - ] - assert dataset_attrs["images"][0] == { - "zarr_url": f"{zarr_dir}/my_plate.zarr/A/01/0", - "origin": None, - "attributes": { - "well": "A01", - "plate": "my_plate.zarr", - }, - "types": { - "3D": True, - }, - } - assert dataset_attrs["images"][1] == { - "zarr_url": f"{zarr_dir}/my_plate.zarr/A/02/0", - "origin": None, - "attributes": { - "well": "A02", - "plate": "my_plate.zarr", - }, - "types": { - "3D": True, - }, - } - assert dataset_attrs["images"][2] == { - "zarr_url": f"{zarr_dir}/my_plate.zarr/A/01/0_corr", - "origin": f"{zarr_dir}/my_plate.zarr/A/01/0", - "attributes": { - "well": "A01", - "plate": "my_plate.zarr", - }, - "types": { + assert _task_names_from_history(dataset_attrs["history"]) == [ + "illumination_correction", + ] + assert dataset_attrs["filters"]["attributes"] == {} + assert dataset_attrs["filters"]["types"] == { "illumination_correction": True, - "3D": True, - }, - } - assert dataset_attrs["images"][3] == { - "zarr_url": f"{zarr_dir}/my_plate.zarr/A/02/0_corr", - "origin": f"{zarr_dir}/my_plate.zarr/A/02/0", - "attributes": { - "well": "A02", - "plate": "my_plate.zarr", - }, - "types": { - "3D": True, - "illumination_correction": True, - }, - } - _assert_image_data_exist(dataset_attrs["images"]) - - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["MIP_compound"], - task_id=fractal_tasks_mock_no_db["MIP_compound"].id, - args_non_parallel=dict(suffix="mip"), - id=2, - order=2, - ) - ], - dataset=DatasetV2Mock( - name="dataset", zarr_dir=zarr_dir, **dataset_attrs - ), - **execute_tasks_v2_args, - ) + } + assert [img["zarr_url"] for img in dataset_attrs["images"]] == [ + f"{zarr_dir}/my_plate.zarr/A/01/0", + f"{zarr_dir}/my_plate.zarr/A/02/0", + f"{zarr_dir}/my_plate.zarr/A/01/0_corr", + f"{zarr_dir}/my_plate.zarr/A/02/0_corr", + ] + assert dataset_attrs["images"][0] == { + "zarr_url": f"{zarr_dir}/my_plate.zarr/A/01/0", + "origin": None, + "attributes": { + "well": "A01", + "plate": "my_plate.zarr", + }, + "types": { + "3D": True, + }, + } + assert dataset_attrs["images"][1] == { + "zarr_url": f"{zarr_dir}/my_plate.zarr/A/02/0", + "origin": None, + "attributes": { + "well": "A02", + "plate": "my_plate.zarr", + }, + "types": { + "3D": True, + }, + } + assert dataset_attrs["images"][2] == { + "zarr_url": f"{zarr_dir}/my_plate.zarr/A/01/0_corr", + "origin": f"{zarr_dir}/my_plate.zarr/A/01/0", + "attributes": { + "well": "A01", + "plate": "my_plate.zarr", + }, + "types": { + "illumination_correction": True, + "3D": True, + }, + } + assert dataset_attrs["images"][3] == { + "zarr_url": f"{zarr_dir}/my_plate.zarr/A/02/0_corr", + "origin": f"{zarr_dir}/my_plate.zarr/A/02/0", + "attributes": { + "well": "A02", + "plate": "my_plate.zarr", + }, + "types": { + "3D": True, + "illumination_correction": True, + }, + } + _assert_image_data_exist(dataset_attrs["images"]) + dataset_with_attrs = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs + ) + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db["MIP_compound"], + task_id=fractal_tasks_mock_no_db["MIP_compound"].id, + args_non_parallel=dict(suffix="mip"), + id=2, + order=2, + ) + ], + dataset=dataset_with_attrs, + **execute_tasks_v2_args, + ) - assert _task_names_from_history(dataset_attrs["history"]) == [ - "MIP_compound", - ] - assert dataset_attrs["filters"]["attributes"] == {} - assert dataset_attrs["filters"]["types"] == { - "3D": False, - "illumination_correction": True, - } - assert [img["zarr_url"] for img in dataset_attrs["images"]] == [ - f"{zarr_dir}/my_plate.zarr/A/01/0", - f"{zarr_dir}/my_plate.zarr/A/02/0", - f"{zarr_dir}/my_plate.zarr/A/01/0_corr", - f"{zarr_dir}/my_plate.zarr/A/02/0_corr", - f"{zarr_dir}/my_plate_mip.zarr/A/01/0_corr", - f"{zarr_dir}/my_plate_mip.zarr/A/02/0_corr", - ] - - assert dataset_attrs["images"][4] == { - "zarr_url": f"{zarr_dir}/my_plate_mip.zarr/A/01/0_corr", - "origin": f"{zarr_dir}/my_plate.zarr/A/01/0_corr", - "attributes": { - "well": "A01", - "plate": "my_plate_mip.zarr", - }, - "types": { + assert _task_names_from_history(dataset_attrs["history"]) == [ + "MIP_compound", + ] + assert dataset_attrs["filters"]["attributes"] == {} + assert dataset_attrs["filters"]["types"] == { "3D": False, "illumination_correction": True, - }, - } - assert dataset_attrs["images"][5] == { - "zarr_url": f"{zarr_dir}/my_plate_mip.zarr/A/02/0_corr", - "origin": f"{zarr_dir}/my_plate.zarr/A/02/0_corr", - "attributes": { - "well": "A02", - "plate": "my_plate_mip.zarr", - }, - "types": { + } + assert [img["zarr_url"] for img in dataset_attrs["images"]] == [ + f"{zarr_dir}/my_plate.zarr/A/01/0", + f"{zarr_dir}/my_plate.zarr/A/02/0", + f"{zarr_dir}/my_plate.zarr/A/01/0_corr", + f"{zarr_dir}/my_plate.zarr/A/02/0_corr", + f"{zarr_dir}/my_plate_mip.zarr/A/01/0_corr", + f"{zarr_dir}/my_plate_mip.zarr/A/02/0_corr", + ] + + assert dataset_attrs["images"][4] == { + "zarr_url": f"{zarr_dir}/my_plate_mip.zarr/A/01/0_corr", + "origin": f"{zarr_dir}/my_plate.zarr/A/01/0_corr", + "attributes": { + "well": "A01", + "plate": "my_plate_mip.zarr", + }, + "types": { + "3D": False, + "illumination_correction": True, + }, + } + assert dataset_attrs["images"][5] == { + "zarr_url": f"{zarr_dir}/my_plate_mip.zarr/A/02/0_corr", + "origin": f"{zarr_dir}/my_plate.zarr/A/02/0_corr", + "attributes": { + "well": "A02", + "plate": "my_plate_mip.zarr", + }, + "types": { + "3D": False, + "illumination_correction": True, + }, + } + + assert dataset_attrs["filters"]["attributes"] == {} + assert dataset_attrs["filters"]["types"] == { "3D": False, "illumination_correction": True, - }, - } - - assert dataset_attrs["filters"]["attributes"] == {} - assert dataset_attrs["filters"]["types"] == { - "3D": False, - "illumination_correction": True, - } - - _assert_image_data_exist(dataset_attrs["images"]) - - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["cellpose_segmentation"], - task_id=fractal_tasks_mock_no_db["cellpose_segmentation"].id, - id=3, - order=3, - ) - ], - dataset=DatasetV2Mock( - name="dataset", zarr_dir=zarr_dir, **dataset_attrs - ), - **execute_tasks_v2_args, - ) + } - assert _task_names_from_history(dataset_attrs["history"]) == [ - "cellpose_segmentation", - ] + _assert_image_data_exist(dataset_attrs["images"]) + dataset_with_attrs = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs + ) + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db["cellpose_segmentation"], + task_id=fractal_tasks_mock_no_db[ + "cellpose_segmentation" + ].id, + id=3, + order=3, + ) + ], + dataset=dataset_with_attrs, + **execute_tasks_v2_args, + ) + assert _task_names_from_history(dataset_attrs["history"]) == [ + "cellpose_segmentation", + ] -def test_registration_no_overwrite( - tmp_path: Path, executor: Executor, fractal_tasks_mock_no_db + +async def test_registration_no_overwrite( + db, + MockCurrentUser, + project_factory_v2, + dataset_factory_v2, + tmp_path: Path, + executor: Executor, + fractal_tasks_mock_no_db, ): """ Test registration workflow, based on four tasks. @@ -428,104 +463,125 @@ def test_registration_no_overwrite( workflow_dir_remote=tmp_path / "job_dir", ) zarr_dir = (tmp_path / "zarr_dir").as_posix().rstrip("/") - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db[ - "create_ome_zarr_multiplex_compound" - ], - task_id=fractal_tasks_mock_no_db[ - "create_ome_zarr_multiplex_compound" - ].id, - args_non_parallel=dict(image_dir="/tmp/input_images"), - id=0, - order=0, - ), - ], - dataset=DatasetV2Mock(name="dataset", zarr_dir=zarr_dir), - **execute_tasks_v2_args, - ) + async with MockCurrentUser() as user: + project = await project_factory_v2(user) + dataset = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir + ) + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db[ + "create_ome_zarr_multiplex_compound" + ], + task_id=fractal_tasks_mock_no_db[ + "create_ome_zarr_multiplex_compound" + ].id, + args_non_parallel=dict(image_dir="/tmp/input_images"), + id=0, + order=0, + ), + ], + dataset=dataset, + **execute_tasks_v2_args, + ) + # Run init registration + dataset_with_attrs = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs + ) + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db[ + "calculate_registration_compound" + ], + task_id=fractal_tasks_mock_no_db[ + "calculate_registration_compound" + ].id, + args_non_parallel={"ref_acquisition": 0}, + id=1, + order=1, + ) + ], + dataset=dataset_with_attrs, + **execute_tasks_v2_args, + ) - # Run init registration - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db[ - "calculate_registration_compound" - ], - task_id=fractal_tasks_mock_no_db[ - "calculate_registration_compound" - ].id, - args_non_parallel={"ref_acquisition": 0}, - id=1, - order=1, - ) - ], - dataset=DatasetV2Mock( - name="dataset", zarr_dir=zarr_dir, **dataset_attrs - ), - **execute_tasks_v2_args, - ) + # In all non-reference-cycle images, a certain table was updated + for image in dataset_attrs["images"]: + if image["attributes"]["acquisition"] == 0: + assert not os.path.isfile( + f"{image['zarr_url']}/registration_table" + ) + else: + assert os.path.isfile( + f"{image['zarr_url']}/registration_table" + ) - # In all non-reference-cycle images, a certain table was updated - for image in dataset_attrs["images"]: - if image["attributes"]["acquisition"] == 0: - assert not os.path.isfile( - f"{image['zarr_url']}/registration_table" - ) - else: - assert os.path.isfile(f"{image['zarr_url']}/registration_table") - - # Run find_registration_consensus - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["find_registration_consensus"], - task_id=fractal_tasks_mock_no_db[ - "find_registration_consensus" - ].id, - id=2, - order=2, - ) - ], - dataset=DatasetV2Mock( - name="dataset", zarr_dir=zarr_dir, **dataset_attrs - ), - **execute_tasks_v2_args, - ) + # Run find_registration_consensus + dataset_with_attrs = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs + ) + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db[ + "find_registration_consensus" + ], + task_id=fractal_tasks_mock_no_db[ + "find_registration_consensus" + ].id, + id=2, + order=2, + ) + ], + dataset=dataset_with_attrs, + **execute_tasks_v2_args, + ) - # In all images, a certain (post-consensus) table was updated - for image in dataset_attrs["images"]: - assert os.path.isfile(f"{image['zarr_url']}/registration_table_final") - - # The image list still has the original six images only - assert len(dataset_attrs["images"]) == 6 - - # Run apply_registration_to_image - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["apply_registration_to_image"], - task_id=fractal_tasks_mock_no_db[ - "apply_registration_to_image" - ].id, - args_parallel={"overwrite_input": False}, - id=3, - order=3, + # In all images, a certain (post-consensus) table was updated + for image in dataset_attrs["images"]: + assert os.path.isfile( + f"{image['zarr_url']}/registration_table_final" ) - ], - dataset=DatasetV2Mock( - name="dataset", zarr_dir=zarr_dir, **dataset_attrs - ), - **execute_tasks_v2_args, - ) - # A new copy of each image was created - assert len(dataset_attrs["images"]) == 12 + # The image list still has the original six images only + assert len(dataset_attrs["images"]) == 6 + # Run apply_registration_to_image + dataset_with_attrs = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs + ) + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db[ + "apply_registration_to_image" + ], + task_id=fractal_tasks_mock_no_db[ + "apply_registration_to_image" + ].id, + args_parallel={"overwrite_input": False}, + id=3, + order=3, + ) + ], + dataset=dataset_with_attrs, + **execute_tasks_v2_args, + ) + + # A new copy of each image was created + assert len(dataset_attrs["images"]) == 12 -def test_registration_overwrite( - tmp_path: Path, executor: Executor, fractal_tasks_mock_no_db + +async def test_registration_overwrite( + db, + MockCurrentUser, + project_factory_v2, + dataset_factory_v2, + tmp_path: Path, + executor: Executor, + fractal_tasks_mock_no_db, ): """ Test registration workflow, based on four tasks. @@ -538,106 +594,128 @@ def test_registration_overwrite( ) zarr_dir = (tmp_path / "zarr_dir").as_posix().rstrip("/") - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db[ - "create_ome_zarr_multiplex_compound" - ], - task_id=fractal_tasks_mock_no_db[ - "create_ome_zarr_multiplex_compound" - ].id, - args_non_parallel=dict(image_dir="/tmp/input_images"), - id=0, - order=0, - ), - ], - dataset=DatasetV2Mock(name="dataset", zarr_dir=zarr_dir), - **execute_tasks_v2_args, - ) + async with MockCurrentUser() as user: + project = await project_factory_v2(user) + dataset = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir + ) + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db[ + "create_ome_zarr_multiplex_compound" + ], + task_id=fractal_tasks_mock_no_db[ + "create_ome_zarr_multiplex_compound" + ].id, + args_non_parallel=dict(image_dir="/tmp/input_images"), + id=0, + order=0, + ), + ], + dataset=dataset, + **execute_tasks_v2_args, + ) - # Run init registration - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db[ - "calculate_registration_compound" - ], - task_id=fractal_tasks_mock_no_db[ - "calculate_registration_compound" - ].id, - args_non_parallel={"ref_acquisition": 0}, - order=1, - id=1, - ) - ], - dataset=DatasetV2Mock( - name="dataset", zarr_dir=zarr_dir, **dataset_attrs - ), - **execute_tasks_v2_args, - ) + # Run init registration + dataset_with_attrs = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs + ) + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db[ + "calculate_registration_compound" + ], + task_id=fractal_tasks_mock_no_db[ + "calculate_registration_compound" + ].id, + args_non_parallel={"ref_acquisition": 0}, + order=1, + id=1, + ) + ], + dataset=dataset_with_attrs, + **execute_tasks_v2_args, + ) - # In all non-reference-cycle images, a certain table was updated - for image in dataset_attrs["images"]: - if image["attributes"]["acquisition"] == 0: - assert not os.path.isfile( - f"{image['zarr_url']}/registration_table" - ) - else: - assert os.path.isfile(f"{image['zarr_url']}/registration_table") - - # Run find_registration_consensus - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["find_registration_consensus"], - task_id=fractal_tasks_mock_no_db[ - "find_registration_consensus" - ].id, - id=2, - order=2, - ) - ], - dataset=DatasetV2Mock( - name="dataset", zarr_dir=zarr_dir, **dataset_attrs - ), - **execute_tasks_v2_args, - ) + # In all non-reference-cycle images, a certain table was updated + for image in dataset_attrs["images"]: + if image["attributes"]["acquisition"] == 0: + assert not os.path.isfile( + f"{image['zarr_url']}/registration_table" + ) + else: + assert os.path.isfile( + f"{image['zarr_url']}/registration_table" + ) + + # Run find_registration_consensus + dataset_with_attrs = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs + ) + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db[ + "find_registration_consensus" + ], + task_id=fractal_tasks_mock_no_db[ + "find_registration_consensus" + ].id, + id=2, + order=2, + ) + ], + dataset=dataset_with_attrs, + **execute_tasks_v2_args, + ) - # In all images, a certain (post-consensus) table was updated - for image in dataset_attrs["images"]: - assert os.path.isfile(f"{image['zarr_url']}/registration_table_final") - - # The image list still has the original six images only - assert len(dataset_attrs["images"]) == 6 - - # Run apply_registration_to_image - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["apply_registration_to_image"], - task_id=fractal_tasks_mock_no_db[ - "apply_registration_to_image" - ].id, - args_parallel={"overwrite_input": True}, - id=3, - order=3, + # In all images, a certain (post-consensus) table was updated + for image in dataset_attrs["images"]: + assert os.path.isfile( + f"{image['zarr_url']}/registration_table_final" ) - ], - dataset=DatasetV2Mock( - name="dataset", zarr_dir=zarr_dir, **dataset_attrs - ), - **execute_tasks_v2_args, - ) - # Images are still the same number, but they are marked as registered - assert len(dataset_attrs["images"]) == 6 - for image in dataset_attrs["images"]: - assert image["types"]["registration"] is True + # The image list still has the original six images only + assert len(dataset_attrs["images"]) == 6 + + # Run apply_registration_to_image + dataset_with_attrs = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs + ) + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db[ + "apply_registration_to_image" + ], + task_id=fractal_tasks_mock_no_db[ + "apply_registration_to_image" + ].id, + args_parallel={"overwrite_input": True}, + id=3, + order=3, + ) + ], + dataset=dataset_with_attrs, + **execute_tasks_v2_args, + ) + + # Images are still the same number, but they are marked as registered + assert len(dataset_attrs["images"]) == 6 + for image in dataset_attrs["images"]: + assert image["types"]["registration"] is True -def test_channel_parallelization_with_overwrite( - tmp_path: Path, executor: Executor, fractal_tasks_mock_no_db +async def test_channel_parallelization_with_overwrite( + db, + MockCurrentUser, + project_factory_v2, + dataset_factory_v2, + tmp_path: Path, + executor: Executor, + fractal_tasks_mock_no_db, ): zarr_dir = (tmp_path / "zarr_dir").as_posix().rstrip("/") @@ -646,51 +724,63 @@ def test_channel_parallelization_with_overwrite( workflow_dir_local=tmp_path / "job_dir", workflow_dir_remote=tmp_path / "job_dir", ) - # Run create_ome_zarr+yokogawa_to_zarr - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["create_ome_zarr_compound"], - task_id=fractal_tasks_mock_no_db[ - "create_ome_zarr_compound" - ].id, - args_non_parallel=dict(image_dir="/tmp/input_images"), - id=0, - order=0, - ), - ], - dataset=DatasetV2Mock(name="dataset", zarr_dir=zarr_dir), - **execute_tasks_v2_args, - ) + async with MockCurrentUser() as user: + project = await project_factory_v2(user) + dataset = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir + ) + # Run create_ome_zarr+yokogawa_to_zarr + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db["create_ome_zarr_compound"], + task_id=fractal_tasks_mock_no_db[ + "create_ome_zarr_compound" + ].id, + args_non_parallel=dict(image_dir="/tmp/input_images"), + id=0, + order=0, + ), + ], + dataset=dataset, + **execute_tasks_v2_args, + ) - # Run illumination_correction_compound - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db[ - "illumination_correction_compound" - ], - task_id=fractal_tasks_mock_no_db[ - "illumination_correction_compound" - ].id, - args_non_parallel=dict(overwrite_input=True), - args_parallel=dict(another_argument="something"), - id=1, - order=1, - ), - ], - dataset=DatasetV2Mock( - name="dataset", zarr_dir=zarr_dir, **dataset_attrs - ), - **execute_tasks_v2_args, - ) + # Run illumination_correction_compound + dataset_with_attrs = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs + ) + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db[ + "illumination_correction_compound" + ], + task_id=fractal_tasks_mock_no_db[ + "illumination_correction_compound" + ].id, + args_non_parallel=dict(overwrite_input=True), + args_parallel=dict(another_argument="something"), + id=1, + order=1, + ), + ], + dataset=dataset_with_attrs, + **execute_tasks_v2_args, + ) - # Check that there are now 2 images - assert len(dataset_attrs["images"]) == 2 + # Check that there are now 2 images + assert len(dataset_attrs["images"]) == 2 -def test_channel_parallelization_no_overwrite( - tmp_path: Path, executor: Executor, fractal_tasks_mock_no_db +async def test_channel_parallelization_no_overwrite( + db, + MockCurrentUser, + project_factory_v2, + dataset_factory_v2, + tmp_path: Path, + executor: Executor, + fractal_tasks_mock_no_db, ): zarr_dir = (tmp_path / "zarr_dir").as_posix().rstrip("/") @@ -699,50 +789,60 @@ def test_channel_parallelization_no_overwrite( workflow_dir_local=tmp_path / "job_dir", workflow_dir_remote=tmp_path / "job_dir", ) - # Run create_ome_zarr+yokogawa_to_zarr - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db["create_ome_zarr_compound"], - task_id=fractal_tasks_mock_no_db[ - "create_ome_zarr_compound" - ].id, - args_non_parallel=dict(image_dir="/tmp/input_images"), - id=0, - order=0, - ), - ], - dataset=DatasetV2Mock(name="dataset", zarr_dir=zarr_dir), - **execute_tasks_v2_args, - ) + async with MockCurrentUser() as user: + project = await project_factory_v2(user) + dataset = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir + ) + # Run create_ome_zarr+yokogawa_to_zarr + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db["create_ome_zarr_compound"], + task_id=fractal_tasks_mock_no_db[ + "create_ome_zarr_compound" + ].id, + args_non_parallel=dict(image_dir="/tmp/input_images"), + id=0, + order=0, + ), + ], + dataset=dataset, + **execute_tasks_v2_args, + ) - # Run illumination_correction_compound - dataset_attrs = execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=fractal_tasks_mock_no_db[ - "illumination_correction_compound" - ], - task_id=fractal_tasks_mock_no_db[ - "illumination_correction_compound" - ].id, - args_non_parallel=dict(overwrite_input=False), - args_parallel=dict(another_argument="something"), - id=1, - order=1, - ), - ], - dataset=DatasetV2Mock( - name="dataset", zarr_dir=zarr_dir, **dataset_attrs - ), - **execute_tasks_v2_args, - ) + # Run illumination_correction_compound + dataset_with_attrs = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs + ) + dataset_attrs = execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=fractal_tasks_mock_no_db[ + "illumination_correction_compound" + ], + task_id=fractal_tasks_mock_no_db[ + "illumination_correction_compound" + ].id, + args_non_parallel=dict(overwrite_input=False), + args_parallel=dict(another_argument="something"), + id=1, + order=1, + ), + ], + dataset=dataset_with_attrs, + **execute_tasks_v2_args, + ) - # Check that there are now 4 images - assert len(dataset_attrs["images"]) == 4 + # Check that there are now 4 images + assert len(dataset_attrs["images"]) == 4 -def test_invalid_filtered_image_list( +async def test_invalid_filtered_image_list( + db, + MockCurrentUser, + project_factory_v2, + dataset_factory_v2, tmp_path: Path, executor: Executor, ): @@ -759,27 +859,29 @@ def test_invalid_filtered_image_list( zarr_dir = (tmp_path / "zarr_dir").as_posix().rstrip("/") zarr_url = Path(zarr_dir, "my_image").as_posix() image = SingleImage(zarr_url=zarr_url, attributes={}, types={}).dict() - - with pytest.raises(JobExecutionError) as e: - execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=TaskV2Mock( - id=0, - name="name", - source="source", - command_non_parallel="cmd", - type="non_parallel", - input_types=dict(invalid=True), - ), - task_id=0, - id=0, - order=0, - ) - ], - dataset=DatasetV2Mock( - name="dataset", zarr_dir=zarr_dir, images=[image] - ), - **execute_tasks_v2_args, + async with MockCurrentUser() as user: + project = await project_factory_v2(user) + dataset = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir, images=[image] ) - assert "Invalid filtered image list" in str(e.value) + with pytest.raises(JobExecutionError) as e: + execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=TaskV2Mock( + id=0, + name="name", + source="source", + command_non_parallel="cmd", + type="non_parallel", + input_types=dict(invalid=True), + ), + task_id=0, + id=0, + order=0, + ) + ], + dataset=dataset, + **execute_tasks_v2_args, + ) + assert "Invalid filtered image list" in str(e.value) diff --git a/tests/v2/04_runner/test_no_images_parallelization.py b/tests/v2/04_runner/test_no_images_parallelization.py index b744176cc6..8e82120250 100644 --- a/tests/v2/04_runner/test_no_images_parallelization.py +++ b/tests/v2/04_runner/test_no_images_parallelization.py @@ -3,7 +3,6 @@ from pathlib import Path from fixtures_mocks import * # noqa: F401,F403 -from v2_mock_models import DatasetV2Mock from v2_mock_models import TaskV2Mock from v2_mock_models import WorkflowTaskV2Mock @@ -29,7 +28,14 @@ def execute_tasks_v2(wf_task_list, workflow_dir_local, **kwargs): return out -def test_parallelize_on_no_images(tmp_path: Path, executor: Executor): +async def test_parallelize_on_no_images( + db, + MockCurrentUser, + project_factory_v2, + dataset_factory_v2, + tmp_path: Path, + executor: Executor, +): """ Run a parallel task on a dataset with no images. """ @@ -40,29 +46,40 @@ def test_parallelize_on_no_images(tmp_path: Path, executor: Executor): workflow_dir_remote=tmp_path / "job_dir", ) zarr_dir = (tmp_path / "zarr_dir").as_posix().rstrip("/") - - # Run successfully on an empty dataset - execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=TaskV2Mock( - name="name", - type="parallel", - command_parallel="echo", + async with MockCurrentUser() as user: + project = await project_factory_v2(user) + dataset = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir + ) + # Run successfully on an empty dataset + execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=TaskV2Mock( + name="name", + type="parallel", + command_parallel="echo", + id=0, + source="source", + ), + task_id=0, id=0, - source="source", - ), - task_id=0, - id=0, - order=0, - ) - ], - dataset=DatasetV2Mock(name="dataset", zarr_dir=zarr_dir), - **execute_tasks_v2_args, - ) + order=0, + ) + ], + dataset=dataset, + **execute_tasks_v2_args, + ) -def test_parallelize_on_no_images_compound(tmp_path: Path, executor: Executor): +async def test_parallelize_on_no_images_compound( + db, + MockCurrentUser, + project_factory_v2, + dataset_factory_v2, + tmp_path: Path, + executor: Executor, +): """ Run a compound task with an empty parallelization list. """ @@ -73,25 +90,29 @@ def test_parallelize_on_no_images_compound(tmp_path: Path, executor: Executor): workflow_dir_remote=tmp_path / "job_dir", ) zarr_dir = (tmp_path / "zarr_dir").as_posix().rstrip("/") - - # Run successfully on an empty dataset - execute_tasks_v2( - wf_task_list=[ - WorkflowTaskV2Mock( - task=TaskV2Mock( - name="name", - type="compound", - # this produces an empty parallelization list - command_non_parallel="echo", - command_parallel="echo", + async with MockCurrentUser() as user: + project = await project_factory_v2(user) + dataset = await dataset_factory_v2( + project_id=project.id, zarr_dir=zarr_dir + ) + # Run successfully on an empty dataset + execute_tasks_v2( + wf_task_list=[ + WorkflowTaskV2Mock( + task=TaskV2Mock( + name="name", + type="compound", + # this produces an empty parallelization list + command_non_parallel="echo", + command_parallel="echo", + id=0, + source="source", + ), + task_id=0, id=0, - source="source", - ), - task_id=0, - id=0, - order=0, - ) - ], - dataset=DatasetV2Mock(name="dataset", zarr_dir=zarr_dir), - **execute_tasks_v2_args, - ) + order=0, + ) + ], + dataset=dataset, + **execute_tasks_v2_args, + ) diff --git a/tests/v2/04_runner/v2_mock_models.py b/tests/v2/04_runner/v2_mock_models.py index dd5b6e6f51..4321f2f9ec 100644 --- a/tests/v2/04_runner/v2_mock_models.py +++ b/tests/v2/04_runner/v2_mock_models.py @@ -1,5 +1,4 @@ from typing import Any -from typing import Literal from typing import Optional from pydantic import BaseModel @@ -8,27 +7,6 @@ from pydantic import validator -class DatasetV2Mock(BaseModel): - id: Optional[int] = None - name: str - zarr_dir: str - images: list[dict[str, Any]] = Field(default_factory=list) - filters: dict[Literal["types", "attributes"], dict[str, Any]] = Field( - default_factory=dict - ) - history: list = Field(default_factory=list) - - @property - def image_zarr_urls(self) -> list[str]: - return [image["zarr_urls"] for image in self.images] - - @validator("filters", always=True) - def _default_filters(cls, value): - if value == {}: - return {"types": {}, "attributes": {}} - return value - - class TaskV2Mock(BaseModel): id: int name: str From a97c086b4f1567a866a8989c50b6befc0853cebb Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Thu, 9 Jan 2025 10:02:23 +0100 Subject: [PATCH 07/49] add database logic for history update --- fractal_server/app/runner/v2/handle_failed_job.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fractal_server/app/runner/v2/handle_failed_job.py b/fractal_server/app/runner/v2/handle_failed_job.py index 4c9db4166d..1c6475b6ae 100644 --- a/fractal_server/app/runner/v2/handle_failed_job.py +++ b/fractal_server/app/runner/v2/handle_failed_job.py @@ -15,6 +15,8 @@ import logging from typing import Optional +from sqlalchemy.orm.attributes import flag_modified + from ...models.v2 import DatasetV2 from ...models.v2 import JobV2 from ...models.v2 import WorkflowTaskV2 @@ -90,4 +92,8 @@ def assemble_history_failed_job( status=WorkflowTaskStatusTypeV2.FAILED, parallelization=dict(), # FIXME: re-include parallelization ) - db_dataset.history.extend(new_history_item) + db_dataset.history.append(new_history_item) + + flag_modified(db_dataset, "history") + db.merge(db_dataset) + db.commit() From 9e360b67f93ea5b7e8adc3bfec80b8c87bdd2865 Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Thu, 9 Jan 2025 10:03:52 +0100 Subject: [PATCH 08/49] add database logic for attrs update, move tmp_history, inside loop --- fractal_server/app/runner/v2/runner.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fractal_server/app/runner/v2/runner.py b/fractal_server/app/runner/v2/runner.py index 52c4efde22..1468ff4f14 100644 --- a/fractal_server/app/runner/v2/runner.py +++ b/fractal_server/app/runner/v2/runner.py @@ -7,6 +7,8 @@ from typing import Callable from typing import Optional +from sqlalchemy.orm.attributes import flag_modified + from ....images import Filters from ....images import SingleImage from ....images.tools import filter_image_list @@ -45,9 +47,9 @@ def execute_tasks_v2( zarr_dir = dataset.zarr_dir tmp_images = deepcopy(dataset.images) tmp_filters = deepcopy(dataset.filters) - tmp_history = [] for wftask in wf_task_list: + tmp_history = [] task = wftask.task task_name = task.name logger.debug(f'SUBMIT {wftask.order}-th task (name="{task_name}")') @@ -289,7 +291,6 @@ def execute_tasks_v2( ), ).dict() tmp_history.append(history_item) - # Write current dataset attributes (history, images, filters) into # temporary files which can be used (1) to retrieve the latest state # when the job fails, (2) from within endpoints that need up-to-date @@ -300,7 +301,9 @@ def execute_tasks_v2( db_dataset.history.extend(tmp_history) db_dataset.filters = tmp_filters db_dataset.images = tmp_images - db.add(db_dataset) + for attribute_name in ["filters", "history", "images"]: + flag_modified(db_dataset, attribute_name) + db.merge(db_dataset) db.commit() # with open(workflow_dir_local / HISTORY_FILENAME, "w") as f: From 42b03c9654dc164c60c671116fd07405a27f52fa Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Thu, 9 Jan 2025 10:05:46 +0100 Subject: [PATCH 09/49] remove history,filters,image local files --- tests/v2/08_full_workflow/common_functions.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/tests/v2/08_full_workflow/common_functions.py b/tests/v2/08_full_workflow/common_functions.py index bf94580795..a4dd1e1ec6 100644 --- a/tests/v2/08_full_workflow/common_functions.py +++ b/tests/v2/08_full_workflow/common_functions.py @@ -7,9 +7,6 @@ from devtools import debug from fractal_server.app.models.v2 import TaskV2 -from fractal_server.app.runner.filenames import FILTERS_FILENAME -from fractal_server.app.runner.filenames import HISTORY_FILENAME -from fractal_server.app.runner.filenames import IMAGES_FILENAME from fractal_server.app.runner.filenames import WORKFLOW_LOG_FILENAME PREFIX = "/api/v2" @@ -130,7 +127,6 @@ async def full_workflow( ) assert res.status_code == 200 dataset = res.json() - debug(dataset) assert len(dataset["history"]) == 2 assert dataset["filters"]["types"] == {"3D": False} res = await client.post( @@ -169,9 +165,9 @@ async def full_workflow( with zipfile.ZipFile(f"{working_dir}.zip", "r") as zip_ref: actual_files = zip_ref.namelist() expected_files = [ - HISTORY_FILENAME, - FILTERS_FILENAME, - IMAGES_FILENAME, + # HISTORY_FILENAME, + # FILTERS_FILENAME, + # IMAGES_FILENAME, WORKFLOW_LOG_FILENAME, ] assert set(expected_files) < set(actual_files) @@ -204,7 +200,6 @@ async def full_workflow_TaskExecutionError( user_kwargs: Optional[dict] = None, user_settings_dict: Optional[dict] = None, ): - if user_kwargs is None: user_kwargs = {} @@ -596,9 +591,9 @@ async def workflow_with_non_python_task( must_exist = [ "0.log", "0.args.json", - IMAGES_FILENAME, - HISTORY_FILENAME, - FILTERS_FILENAME, + # IMAGES_FILENAME, + # HISTORY_FILENAME, + # FILTERS_FILENAME, WORKFLOW_LOG_FILENAME, ] From d41516c32599168eec6fe23321fca56b2e0ab9c2 Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Thu, 9 Jan 2025 13:17:31 +0100 Subject: [PATCH 10/49] [skip ci] BROKEN - add submitted status in history, improve history usage --- fractal_server/app/routes/api/v2/status.py | 13 +++--- .../app/runner/v2/handle_failed_job.py | 29 +++++++------ fractal_server/app/runner/v2/runner.py | 43 +++++++++++++------ 3 files changed, 54 insertions(+), 31 deletions(-) diff --git a/fractal_server/app/routes/api/v2/status.py b/fractal_server/app/routes/api/v2/status.py index 4d9ab9137a..cbe2e25112 100644 --- a/fractal_server/app/routes/api/v2/status.py +++ b/fractal_server/app/routes/api/v2/status.py @@ -8,7 +8,6 @@ from .....logger import set_logger from ....db import AsyncSession from ....db import get_async_db -from ....models.v2 import DatasetV2 from ....models.v2 import JobV2 from ....schemas.v2.dataset import WorkflowTaskStatusTypeV2 from ....schemas.v2.status import StatusReadV2 @@ -140,12 +139,12 @@ async def get_workflowtask_status( # history = json.load(f) # except FileNotFoundError: # history = [] - db_dataset = await db.get(DatasetV2, dataset_id) - for history_item in db_dataset.history: - wftask_id = history_item["workflowtask"]["id"] - wftask_status = history_item["status"] - workflow_tasks_status_dict[wftask_id] = wftask_status - + # db_dataset = await db.get(DatasetV2, dataset_id) + # for history_item in db_dataset.history: + # wftask_id = history_item["workflowtask"]["id"] + # wftask_status = history_item["status"] + # workflow_tasks_status_dict[wftask_id] = wftask_status + # # Based on previously-gathered information, clean up the response body clean_workflow_tasks_status_dict = {} for wf_task in workflow.task_list: diff --git a/fractal_server/app/runner/v2/handle_failed_job.py b/fractal_server/app/runner/v2/handle_failed_job.py index 1c6475b6ae..d2b7e0a376 100644 --- a/fractal_server/app/runner/v2/handle_failed_job.py +++ b/fractal_server/app/runner/v2/handle_failed_job.py @@ -62,11 +62,11 @@ def assemble_history_failed_job( with next(get_sync_db()) as db: db_dataset = db.get(DatasetV2, dataset.id) + job_wftasks = workflow.task_list[ + job.first_task_index : (job.last_task_index + 1) # noqa + ] # Part 1/A: Identify failed task, if needed if failed_wftask is None: - job_wftasks = workflow.task_list[ - job.first_task_index : (job.last_task_index + 1) # noqa - ] tmp_file_wftasks = [ history_item["workflowtask"] for history_item in db_dataset.history @@ -87,13 +87,18 @@ def assemble_history_failed_job( if failed_wftask is not None: failed_wftask_dump = failed_wftask.model_dump(exclude={"task"}) failed_wftask_dump["task"] = failed_wftask.task.model_dump() - new_history_item = dict( - workflowtask=failed_wftask_dump, - status=WorkflowTaskStatusTypeV2.FAILED, - parallelization=dict(), # FIXME: re-include parallelization - ) - db_dataset.history.append(new_history_item) - flag_modified(db_dataset, "history") - db.merge(db_dataset) - db.commit() + for ind, history_item in enumerate(db_dataset.history): + if ( + history_item["workflowtask"]["task"] + == failed_wftask_dump["task"] + ): + history_item["status"] = WorkflowTaskStatusTypeV2.FAILED + db_dataset.history[ind] = history_item + flag_modified(db_dataset, "history") + db.merge(db_dataset) + db.commit() + break + + else: + pass diff --git a/fractal_server/app/runner/v2/runner.py b/fractal_server/app/runner/v2/runner.py index 1468ff4f14..d4f8e27002 100644 --- a/fractal_server/app/runner/v2/runner.py +++ b/fractal_server/app/runner/v2/runner.py @@ -77,6 +77,18 @@ def execute_tasks_v2( f'Image types: {image["types"]}\n' ) + with next(get_sync_db()) as db: + db_dataset = db.get(DatasetV2, dataset.id) + new_history_item = _DatasetHistoryItemV2( + workflowtask=wftask, + status=WorkflowTaskStatusTypeV2.SUBMITTED, + parallelization=dict(), # FIXME: re-include parallelization + ).dict() + db_dataset.history.append(new_history_item) + flag_modified(db_dataset, "history") + db.merge(db_dataset) + db.commit() + # TASK EXECUTION (V2) if task.type == "non_parallel": current_task_output = run_v2_task_non_parallel( @@ -280,17 +292,6 @@ def execute_tasks_v2( # Update filters.types tmp_filters["types"].update(types_from_manifest) tmp_filters["types"].update(types_from_task) - - # Update history (based on _DatasetHistoryItemV2) - history_item = _DatasetHistoryItemV2( - workflowtask=wftask, - status=WorkflowTaskStatusTypeV2.DONE, - parallelization=dict( - # task_type=wftask.task.type, # FIXME: breaks for V1 tasks - # component_list=fil, #FIXME - ), - ).dict() - tmp_history.append(history_item) # Write current dataset attributes (history, images, filters) into # temporary files which can be used (1) to retrieve the latest state # when the job fails, (2) from within endpoints that need up-to-date @@ -298,7 +299,25 @@ def execute_tasks_v2( with next(get_sync_db()) as db: db_dataset = db.get(DatasetV2, dataset.id) - db_dataset.history.extend(tmp_history) + for ind, history_item in enumerate(db_dataset.history): + if history_item["workflowtask"]["task"] == wftask["task"]: + history_item["status"] = WorkflowTaskStatusTypeV2.DONE + db_dataset.history[ind] = history_item + break + + else: + pass + # Update history (based on _DatasetHistoryItemV2) + # history_item = _DatasetHistoryItemV2( + # workflowtask=wftask, + # status=WorkflowTaskStatusTypeV2.DONE, + # parallelization=dict( + # # task_type=wftask.task.type,# FIXME: breaks for V1 tasks + # # component_list=fil, #FIXME + # ), + # ).dict() + # tmp_history.append(history_item) + # db_dataset.history.extend(tmp_history) db_dataset.filters = tmp_filters db_dataset.images = tmp_images for attribute_name in ["filters", "history", "images"]: From de29ce9d76f665ee6a6209194438a6784b51db14 Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Thu, 9 Jan 2025 16:40:37 +0100 Subject: [PATCH 11/49] add submitted status in history, improve history usage --- fractal_server/app/runner/v2/handle_failed_job.py | 5 +++-- fractal_server/app/runner/v2/runner.py | 9 ++------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/fractal_server/app/runner/v2/handle_failed_job.py b/fractal_server/app/runner/v2/handle_failed_job.py index d2b7e0a376..d100183d46 100644 --- a/fractal_server/app/runner/v2/handle_failed_job.py +++ b/fractal_server/app/runner/v2/handle_failed_job.py @@ -89,9 +89,10 @@ def assemble_history_failed_job( failed_wftask_dump["task"] = failed_wftask.task.model_dump() for ind, history_item in enumerate(db_dataset.history): + if ( - history_item["workflowtask"]["task"] - == failed_wftask_dump["task"] + history_item["workflowtask"]["task"]["id"] + == failed_wftask_dump["task"]["id"] ): history_item["status"] = WorkflowTaskStatusTypeV2.FAILED db_dataset.history[ind] = history_item diff --git a/fractal_server/app/runner/v2/runner.py b/fractal_server/app/runner/v2/runner.py index d4f8e27002..5c50f885c7 100644 --- a/fractal_server/app/runner/v2/runner.py +++ b/fractal_server/app/runner/v2/runner.py @@ -296,17 +296,12 @@ def execute_tasks_v2( # temporary files which can be used (1) to retrieve the latest state # when the job fails, (2) from within endpoints that need up-to-date # information - with next(get_sync_db()) as db: db_dataset = db.get(DatasetV2, dataset.id) for ind, history_item in enumerate(db_dataset.history): - if history_item["workflowtask"]["task"] == wftask["task"]: - history_item["status"] = WorkflowTaskStatusTypeV2.DONE - db_dataset.history[ind] = history_item - break + history_item["status"] = WorkflowTaskStatusTypeV2.DONE + db_dataset.history[ind] = history_item - else: - pass # Update history (based on _DatasetHistoryItemV2) # history_item = _DatasetHistoryItemV2( # workflowtask=wftask, From 15d485ea35389275966d7d3157ea7f1bc13c0b38 Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Thu, 9 Jan 2025 23:01:55 +0100 Subject: [PATCH 12/49] add logic if failed_wftask is None, set last wftask to failed if it is submitted --- fractal_server/app/runner/v2/__init__.py | 2 +- fractal_server/app/runner/v2/handle_failed_job.py | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/fractal_server/app/runner/v2/__init__.py b/fractal_server/app/runner/v2/__init__.py index 237c6842a0..c0c38be389 100644 --- a/fractal_server/app/runner/v2/__init__.py +++ b/fractal_server/app/runner/v2/__init__.py @@ -363,6 +363,7 @@ async def submit_workflow( # Read dataset attributes produced by the last successful task, and # update the DB dataset accordingly + failed_wftask = db_sync.get(WorkflowTaskV2, e.workflow_task_id) # dataset.history = assemble_history_failed_job( @@ -392,7 +393,6 @@ async def submit_workflow( except JobExecutionError as e: logger.debug(f'FAILED workflow "{workflow.name}", JobExecutionError.') logger.info(f'Workflow "{workflow.name}" failed (JobExecutionError).') - # Read dataset attributes produced by the last successful task, and # update the DB dataset accordingly # dataset.history = diff --git a/fractal_server/app/runner/v2/handle_failed_job.py b/fractal_server/app/runner/v2/handle_failed_job.py index d100183d46..b7f547b518 100644 --- a/fractal_server/app/runner/v2/handle_failed_job.py +++ b/fractal_server/app/runner/v2/handle_failed_job.py @@ -58,7 +58,6 @@ def assemble_history_failed_job( # The final value of the history attribute should include up to three # parts, coming from: the database, the temporary file, the failed-task # information. - with next(get_sync_db()) as db: db_dataset = db.get(DatasetV2, dataset.id) @@ -89,7 +88,6 @@ def assemble_history_failed_job( failed_wftask_dump["task"] = failed_wftask.task.model_dump() for ind, history_item in enumerate(db_dataset.history): - if ( history_item["workflowtask"]["task"]["id"] == failed_wftask_dump["task"]["id"] @@ -100,6 +98,11 @@ def assemble_history_failed_job( db.merge(db_dataset) db.commit() break - - else: - pass + if ( + db_dataset.history[-1]["status"] + == WorkflowTaskStatusTypeV2.SUBMITTED + ): + db_dataset.history[-1]["status"] = WorkflowTaskStatusTypeV2.FAILED + flag_modified(db_dataset, "history") + db.merge(db_dataset) + db.commit() From cd4351fbcfd6d7682e105eef7dd46da14a9ebbd1 Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Thu, 9 Jan 2025 23:21:36 +0100 Subject: [PATCH 13/49] add condition if wf has no task --- fractal_server/app/runner/v2/handle_failed_job.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fractal_server/app/runner/v2/handle_failed_job.py b/fractal_server/app/runner/v2/handle_failed_job.py index b7f547b518..9146973401 100644 --- a/fractal_server/app/runner/v2/handle_failed_job.py +++ b/fractal_server/app/runner/v2/handle_failed_job.py @@ -99,7 +99,8 @@ def assemble_history_failed_job( db.commit() break if ( - db_dataset.history[-1]["status"] + len(db_dataset.history) > 0 + and db_dataset.history[-1]["status"] == WorkflowTaskStatusTypeV2.SUBMITTED ): db_dataset.history[-1]["status"] = WorkflowTaskStatusTypeV2.FAILED From 69ac3fff0d9dee38ba644f2f6af9b1394dad9d91 Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Fri, 10 Jan 2025 00:16:23 +0100 Subject: [PATCH 14/49] use db data for runner response, adapt tests --- fractal_server/app/runner/v2/runner.py | 31 +++++++++------------ tests/v2/04_runner/test_fractal_examples.py | 14 +++++++++- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/fractal_server/app/runner/v2/runner.py b/fractal_server/app/runner/v2/runner.py index 5c50f885c7..8366ff697d 100644 --- a/fractal_server/app/runner/v2/runner.py +++ b/fractal_server/app/runner/v2/runner.py @@ -49,7 +49,6 @@ def execute_tasks_v2( tmp_filters = deepcopy(dataset.filters) for wftask in wf_task_list: - tmp_history = [] task = wftask.task task_name = task.name logger.debug(f'SUBMIT {wftask.order}-th task (name="{task_name}")') @@ -76,7 +75,6 @@ def execute_tasks_v2( f'Image zarr_url: {image["zarr_url"]}\n' f'Image types: {image["types"]}\n' ) - with next(get_sync_db()) as db: db_dataset = db.get(DatasetV2, dataset.id) new_history_item = _DatasetHistoryItemV2( @@ -88,7 +86,6 @@ def execute_tasks_v2( flag_modified(db_dataset, "history") db.merge(db_dataset) db.commit() - # TASK EXECUTION (V2) if task.type == "non_parallel": current_task_output = run_v2_task_non_parallel( @@ -319,22 +316,20 @@ def execute_tasks_v2( flag_modified(db_dataset, attribute_name) db.merge(db_dataset) db.commit() + db.refresh(db_dataset) - # with open(workflow_dir_local / HISTORY_FILENAME, "w") as f: - # json.dump(tmp_history, f, indent=2) - # with open(workflow_dir_local / FILTERS_FILENAME, "w") as f: - # json.dump(tmp_filters, f, indent=2) - # with open(workflow_dir_local / IMAGES_FILENAME, "w") as f: - # json.dump(tmp_images, f, indent=2) + # with open(workflow_dir_local / HISTORY_FILENAME, "w") as f: + # json.dump(tmp_history, f, indent=2) + # with open(workflow_dir_local / FILTERS_FILENAME, "w") as f: + # json.dump(tmp_filters, f, indent=2) + # with open(workflow_dir_local / IMAGES_FILENAME, "w") as f: + # json.dump(tmp_images, f, indent=2) - logger.debug(f'END {wftask.order}-th task (name="{task_name}")') + logger.debug(f'END {wftask.order}-th task (name="{task_name}")') - # NOTE: tmp_history only contains the newly-added history items (to be - # appended to the original history), while tmp_filters and tmp_images - # represent the new attributes (to replace the original ones) - result = dict( - history=tmp_history, - filters=tmp_filters, - images=tmp_images, - ) + result = dict( + history=db_dataset.history, + filters=db_dataset.filters, + images=db_dataset.images, + ) return result diff --git a/tests/v2/04_runner/test_fractal_examples.py b/tests/v2/04_runner/test_fractal_examples.py index e97051ca2d..3ff5decc42 100644 --- a/tests/v2/04_runner/test_fractal_examples.py +++ b/tests/v2/04_runner/test_fractal_examples.py @@ -101,7 +101,7 @@ async def test_fractal_demos_01( dataset=dataset, **execute_tasks_v2_args, ) - + debug(dataset_attrs["history"]) assert _task_names_from_history(dataset_attrs["history"]) == [ "create_ome_zarr_compound" ] @@ -128,6 +128,7 @@ async def test_fractal_demos_01( **execute_tasks_v2_args, ) assert _task_names_from_history(dataset_attrs["history"]) == [ + "create_ome_zarr_compound", "illumination_correction", ] assert dataset_attrs["filters"]["attributes"] == {} @@ -177,6 +178,8 @@ async def test_fractal_demos_01( debug(dataset_attrs) assert _task_names_from_history(dataset_attrs["history"]) == [ + "create_ome_zarr_compound", + "illumination_correction", "MIP_compound", ] @@ -224,6 +227,9 @@ async def test_fractal_demos_01( debug(dataset_attrs) assert _task_names_from_history(dataset_attrs["history"]) == [ + "create_ome_zarr_compound", + "illumination_correction", + "MIP_compound", "cellpose_segmentation", ] @@ -296,6 +302,7 @@ async def test_fractal_demos_01_no_overwrite( ) assert _task_names_from_history(dataset_attrs["history"]) == [ + "create_ome_zarr_compound", "illumination_correction", ] assert dataset_attrs["filters"]["attributes"] == {} @@ -373,6 +380,8 @@ async def test_fractal_demos_01_no_overwrite( ) assert _task_names_from_history(dataset_attrs["history"]) == [ + "create_ome_zarr_compound", + "illumination_correction", "MIP_compound", ] assert dataset_attrs["filters"]["attributes"] == {} @@ -440,6 +449,9 @@ async def test_fractal_demos_01_no_overwrite( ) assert _task_names_from_history(dataset_attrs["history"]) == [ + "create_ome_zarr_compound", + "illumination_correction", + "MIP_compound", "cellpose_segmentation", ] From 64dd9915bbf3ebf661656f3ba3117cd18cbe2761 Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Fri, 10 Jan 2025 10:38:41 +0100 Subject: [PATCH 15/49] fix tests with new status endpoint logic --- fractal_server/app/routes/api/v2/status.py | 12 ++++++++---- tests/v2/03_api/test_api_status.py | 16 ++++++++++++---- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/fractal_server/app/routes/api/v2/status.py b/fractal_server/app/routes/api/v2/status.py index cbe2e25112..d6ad90b97f 100644 --- a/fractal_server/app/routes/api/v2/status.py +++ b/fractal_server/app/routes/api/v2/status.py @@ -87,6 +87,7 @@ async def get_workflowtask_status( # Lowest priority: read status from DB, which corresponds to jobs that are # not running history = dataset.history + for history_item in history: wftask_id = history_item["workflowtask"]["id"] wftask_status = history_item["status"] @@ -95,8 +96,8 @@ async def get_workflowtask_status( if running_job is None: # If no job is running, the chronological-last history item is also the # positional-last workflow task to be included in the response. - if len(dataset.history) > 0: - last_valid_wftask_id = dataset.history[-1]["workflowtask"]["id"] + if len(history) > 0: + last_valid_wftask_id = history[-1]["workflowtask"]["id"] else: last_valid_wftask_id = None else: @@ -106,11 +107,14 @@ async def get_workflowtask_status( # as "submitted" start = running_job.first_task_index end = running_job.last_task_index + 1 - for wftask in workflow.task_list[start:end]: + + all_wf_task_list = workflow.task_list[start:end] + for wftask in all_wf_task_list[ + len(workflow_tasks_status_dict) : # noqa: E203 + ]: workflow_tasks_status_dict[ wftask.id ] = WorkflowTaskStatusTypeV2.SUBMITTED - # The last workflow task that is included in the submitted job is also # the positional-last workflow task to be included in the response. try: diff --git a/tests/v2/03_api/test_api_status.py b/tests/v2/03_api/test_api_status.py index 0570a4fd0d..ecd9999e03 100644 --- a/tests/v2/03_api/test_api_status.py +++ b/tests/v2/03_api/test_api_status.py @@ -1,8 +1,10 @@ from devtools import debug +from sqlalchemy.orm.attributes import flag_modified from fractal_server.app.routes.api.v2._aux_functions import ( _workflow_insert_task, ) +from fractal_server.app.schemas.v2.dataset import WorkflowTaskStatusTypeV2 async def test_workflowtask_status_no_history_no_job( @@ -150,7 +152,9 @@ async def test_workflowtask_status_history_job( there is a running job associated to a given dataset/workflow pair. """ working_dir = tmp_path / "working_dir" - history = [dict(workflowtask=dict(id=3), status="done")] + history = [ + dict(workflowtask=dict(id=1), status=WorkflowTaskStatusTypeV2.DONE) + ] async with MockCurrentUser() as user: project = await project_factory_v2(user) dataset = await dataset_factory_v2( @@ -182,13 +186,17 @@ async def test_workflowtask_status_history_job( ) ) assert res.status_code == 200 - assert res.json() == {"status": {"1": "submitted", "2": "submitted"}} + assert res.json() == {"status": {"1": "done", "2": "submitted"}} # CASE 2: the job has a temporary history file dataset.history = [ - dict(workflowtask=dict(id=workflow.task_list[0].id), status="done") + dict( + workflowtask=dict(id=workflow.task_list[0].id), + status=WorkflowTaskStatusTypeV2.DONE, + ) ] - db.add(dataset) + flag_modified(dataset, "history") + await db.merge(dataset) await db.commit() res = await client.get( ( From 81f72679340acd430a06b0f208c37fa283d33c7c Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Mon, 13 Jan 2025 10:53:44 +0100 Subject: [PATCH 16/49] review status endpoint logic, add arg to flake8 --- .pre-commit-config.yaml | 2 +- fractal_server/app/routes/api/v2/status.py | 36 +++++++++++++++++----- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 231b9203ca..7e02c5f835 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: rev: 7.1.1 hooks: - id: flake8 - args: ["--exclude", "examples/*"] + args: ["--exclude", "examples/*", "--ignore=E203,W503"] - repo: https://github.com/PyCQA/bandit rev: '1.7.4' hooks: diff --git a/fractal_server/app/routes/api/v2/status.py b/fractal_server/app/routes/api/v2/status.py index d6ad90b97f..f831a3c6be 100644 --- a/fractal_server/app/routes/api/v2/status.py +++ b/fractal_server/app/routes/api/v2/status.py @@ -87,8 +87,9 @@ async def get_workflowtask_status( # Lowest priority: read status from DB, which corresponds to jobs that are # not running history = dataset.history - + wftask_history_id = [] for history_item in history: + wftask_history_id.append(history_item["workflowtask"]["id"]) wftask_id = history_item["workflowtask"]["id"] wftask_status = history_item["status"] workflow_tasks_status_dict[wftask_id] = wftask_status @@ -109,12 +110,33 @@ async def get_workflowtask_status( end = running_job.last_task_index + 1 all_wf_task_list = workflow.task_list[start:end] - for wftask in all_wf_task_list[ - len(workflow_tasks_status_dict) : # noqa: E203 - ]: - workflow_tasks_status_dict[ - wftask.id - ] = WorkflowTaskStatusTypeV2.SUBMITTED + # for wftask in all_wf_task_list[ + # len(workflow_tasks_status_dict) : # noqa: E203 + # ]: + for ind, wftask in enumerate(all_wf_task_list): + if ( + wftask.id in wftask_history_id + and workflow_tasks_status_dict[wftask_id] + == WorkflowTaskStatusTypeV2.SUBMITTED + ): + workflow_tasks_status_dict.update( + { + sub_wftask.id: WorkflowTaskStatusTypeV2.SUBMITTED + for sub_wftask in all_wf_task_list[ind:] + } + ) + break + elif ( + wftask.id in wftask_history_id + and workflow_tasks_status_dict[wftask_id] + == WorkflowTaskStatusTypeV2.DONE + ): + pass + else: + workflow_tasks_status_dict.update( + {wftask.id: WorkflowTaskStatusTypeV2.SUBMITTED} + ) + # The last workflow task that is included in the submitted job is also # the positional-last workflow task to be included in the response. try: From e2cbfdc1627db1b102628bebd3da5539dec99d7d Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Mon, 13 Jan 2025 11:59:42 +0100 Subject: [PATCH 17/49] update test for coverage --- fractal_server/app/routes/api/v2/status.py | 3 --- tests/v2/03_api/test_api_status.py | 10 ++++++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/fractal_server/app/routes/api/v2/status.py b/fractal_server/app/routes/api/v2/status.py index f831a3c6be..e20939120d 100644 --- a/fractal_server/app/routes/api/v2/status.py +++ b/fractal_server/app/routes/api/v2/status.py @@ -110,9 +110,6 @@ async def get_workflowtask_status( end = running_job.last_task_index + 1 all_wf_task_list = workflow.task_list[start:end] - # for wftask in all_wf_task_list[ - # len(workflow_tasks_status_dict) : # noqa: E203 - # ]: for ind, wftask in enumerate(all_wf_task_list): if ( wftask.id in wftask_history_id diff --git a/tests/v2/03_api/test_api_status.py b/tests/v2/03_api/test_api_status.py index ecd9999e03..d51e596354 100644 --- a/tests/v2/03_api/test_api_status.py +++ b/tests/v2/03_api/test_api_status.py @@ -153,7 +153,9 @@ async def test_workflowtask_status_history_job( """ working_dir = tmp_path / "working_dir" history = [ - dict(workflowtask=dict(id=1), status=WorkflowTaskStatusTypeV2.DONE) + dict( + workflowtask=dict(id=1), status=WorkflowTaskStatusTypeV2.SUBMITTED + ) ] async with MockCurrentUser() as user: project = await project_factory_v2(user) @@ -178,7 +180,7 @@ async def test_workflowtask_status_history_job( last_task_index=1, ) - # CASE 1: the job has no temporary history file + # CASE 1: first submitted res = await client.get( ( f"api/v2/project/{project.id}/status/?" @@ -186,9 +188,9 @@ async def test_workflowtask_status_history_job( ) ) assert res.status_code == 200 - assert res.json() == {"status": {"1": "done", "2": "submitted"}} + assert res.json() == {"status": {"1": "submitted", "2": "submitted"}} - # CASE 2: the job has a temporary history file + # CASE 2: first done dataset.history = [ dict( workflowtask=dict(id=workflow.task_list[0].id), From 4da4d1c90bcfb0cdd93e55c7071ca323229c423b Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 13 Jan 2025 14:14:07 +0100 Subject: [PATCH 18/49] Possible other way of finding SUBMITTED wftasks --- fractal_server/app/routes/api/v2/status.py | 39 +++++++++------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/fractal_server/app/routes/api/v2/status.py b/fractal_server/app/routes/api/v2/status.py index e20939120d..79d62cd755 100644 --- a/fractal_server/app/routes/api/v2/status.py +++ b/fractal_server/app/routes/api/v2/status.py @@ -108,31 +108,22 @@ async def get_workflowtask_status( # as "submitted" start = running_job.first_task_index end = running_job.last_task_index + 1 + running_job_wftasks = workflow.task_list[start:end] + running_job_statuses = [ + workflow_tasks_status_dict.get(wft.id) + for wft in running_job_wftasks + ] + try: + first_submitted_index = running_job_statuses.index( + WorkflowTaskStatusTypeV2.SUBMITTED + ) + except ValueError: + first_submitted_index = 0 - all_wf_task_list = workflow.task_list[start:end] - for ind, wftask in enumerate(all_wf_task_list): - if ( - wftask.id in wftask_history_id - and workflow_tasks_status_dict[wftask_id] - == WorkflowTaskStatusTypeV2.SUBMITTED - ): - workflow_tasks_status_dict.update( - { - sub_wftask.id: WorkflowTaskStatusTypeV2.SUBMITTED - for sub_wftask in all_wf_task_list[ind:] - } - ) - break - elif ( - wftask.id in wftask_history_id - and workflow_tasks_status_dict[wftask_id] - == WorkflowTaskStatusTypeV2.DONE - ): - pass - else: - workflow_tasks_status_dict.update( - {wftask.id: WorkflowTaskStatusTypeV2.SUBMITTED} - ) + for wftask in enumerate(running_job_wftasks[first_submitted_index:]): + workflow_tasks_status_dict[ + wftask.id + ] = WorkflowTaskStatusTypeV2.SUBMITTED # The last workflow task that is included in the submitted job is also # the positional-last workflow task to be included in the response. From 3b0c1acc4e05ca59b8f5a6ede8fff76781240cbd Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 13 Jan 2025 14:19:31 +0100 Subject: [PATCH 19/49] Remove spurious `enumerate` --- fractal_server/app/routes/api/v2/status.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fractal_server/app/routes/api/v2/status.py b/fractal_server/app/routes/api/v2/status.py index 79d62cd755..3463d45f2a 100644 --- a/fractal_server/app/routes/api/v2/status.py +++ b/fractal_server/app/routes/api/v2/status.py @@ -120,7 +120,7 @@ async def get_workflowtask_status( except ValueError: first_submitted_index = 0 - for wftask in enumerate(running_job_wftasks[first_submitted_index:]): + for wftask in running_job_wftasks[first_submitted_index:]: workflow_tasks_status_dict[ wftask.id ] = WorkflowTaskStatusTypeV2.SUBMITTED From 45f00a844f5838cf7b54cdf6c74993995341c491 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 13 Jan 2025 14:28:16 +0100 Subject: [PATCH 20/49] Style --- tests/v2/03_api/test_api_status.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/v2/03_api/test_api_status.py b/tests/v2/03_api/test_api_status.py index d51e596354..741772a5ca 100644 --- a/tests/v2/03_api/test_api_status.py +++ b/tests/v2/03_api/test_api_status.py @@ -154,7 +154,8 @@ async def test_workflowtask_status_history_job( working_dir = tmp_path / "working_dir" history = [ dict( - workflowtask=dict(id=1), status=WorkflowTaskStatusTypeV2.SUBMITTED + workflowtask=dict(id=1), + status=WorkflowTaskStatusTypeV2.SUBMITTED, ) ] async with MockCurrentUser() as user: @@ -188,7 +189,7 @@ async def test_workflowtask_status_history_job( ) ) assert res.status_code == 200 - assert res.json() == {"status": {"1": "submitted", "2": "submitted"}} + assert res.json()["status"] == {"1": "submitted", "2": "submitted"} # CASE 2: first done dataset.history = [ @@ -207,7 +208,7 @@ async def test_workflowtask_status_history_job( ) ) assert res.status_code == 200 - assert res.json() == {"status": {"1": "done", "2": "submitted"}} + assert res.json()["status"] == {"1": "done", "2": "submitted"} async def test_workflowtask_status_two_jobs( From e3c5460477234653ac7a90dffcdc3c55a39cf75a Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 13 Jan 2025 14:28:35 +0100 Subject: [PATCH 21/49] Fix test --- tests/v2/03_api/test_api_status.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/v2/03_api/test_api_status.py b/tests/v2/03_api/test_api_status.py index 741772a5ca..6733610c78 100644 --- a/tests/v2/03_api/test_api_status.py +++ b/tests/v2/03_api/test_api_status.py @@ -196,7 +196,11 @@ async def test_workflowtask_status_history_job( dict( workflowtask=dict(id=workflow.task_list[0].id), status=WorkflowTaskStatusTypeV2.DONE, - ) + ), + dict( + workflowtask=dict(id=workflow.task_list[1].id), + status=WorkflowTaskStatusTypeV2.SUBMITTED, + ), ] flag_modified(dataset, "history") await db.merge(dataset) From cf266932370f5d77084c0c45ac8b4be6e7caa70b Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 13 Jan 2025 14:31:22 +0100 Subject: [PATCH 22/49] Add log --- fractal_server/app/routes/api/v2/status.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fractal_server/app/routes/api/v2/status.py b/fractal_server/app/routes/api/v2/status.py index 3463d45f2a..2648d07855 100644 --- a/fractal_server/app/routes/api/v2/status.py +++ b/fractal_server/app/routes/api/v2/status.py @@ -118,6 +118,10 @@ async def get_workflowtask_status( WorkflowTaskStatusTypeV2.SUBMITTED ) except ValueError: + logger.info( + f"Job {running_job.id} is submitted but it task list does " + f"not contain a {WorkflowTaskStatusTypeV2.SUBMITTED} task." + ) first_submitted_index = 0 for wftask in running_job_wftasks[first_submitted_index:]: From 8cb3d19e8fcdda117b9d28decd3262cdcf6b19b8 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 13 Jan 2025 15:07:05 +0100 Subject: [PATCH 23/49] Remove obsolete variable --- fractal_server/app/routes/api/v2/status.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fractal_server/app/routes/api/v2/status.py b/fractal_server/app/routes/api/v2/status.py index 2648d07855..e1432380f9 100644 --- a/fractal_server/app/routes/api/v2/status.py +++ b/fractal_server/app/routes/api/v2/status.py @@ -87,9 +87,7 @@ async def get_workflowtask_status( # Lowest priority: read status from DB, which corresponds to jobs that are # not running history = dataset.history - wftask_history_id = [] for history_item in history: - wftask_history_id.append(history_item["workflowtask"]["id"]) wftask_id = history_item["workflowtask"]["id"] wftask_status = history_item["status"] workflow_tasks_status_dict[wftask_id] = wftask_status @@ -108,9 +106,10 @@ async def get_workflowtask_status( # as "submitted" start = running_job.first_task_index end = running_job.last_task_index + 1 + running_job_wftasks = workflow.task_list[start:end] running_job_statuses = [ - workflow_tasks_status_dict.get(wft.id) + workflow_tasks_status_dict.get(wft.id, None) for wft in running_job_wftasks ] try: @@ -119,7 +118,7 @@ async def get_workflowtask_status( ) except ValueError: logger.info( - f"Job {running_job.id} is submitted but it task list does " + f"Job {running_job.id} is submitted but its task list does " f"not contain a {WorkflowTaskStatusTypeV2.SUBMITTED} task." ) first_submitted_index = 0 From 6a10497367ca4226fd3acb6687485cd4ac15bf5c Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 13 Jan 2025 15:07:45 +0100 Subject: [PATCH 24/49] style --- tests/v2/03_api/test_api_status.py | 63 ++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/tests/v2/03_api/test_api_status.py b/tests/v2/03_api/test_api_status.py index 6733610c78..593bbb6e72 100644 --- a/tests/v2/03_api/test_api_status.py +++ b/tests/v2/03_api/test_api_status.py @@ -1,4 +1,3 @@ -from devtools import debug from sqlalchemy.orm.attributes import flag_modified from fractal_server.app.routes.api.v2._aux_functions import ( @@ -7,7 +6,7 @@ from fractal_server.app.schemas.v2.dataset import WorkflowTaskStatusTypeV2 -async def test_workflowtask_status_no_history_no_job( +async def test_status_no_history_no_running_job( db, MockCurrentUser, project_factory_v2, @@ -17,8 +16,8 @@ async def test_workflowtask_status_no_history_no_job( client, ): """ - Test the status endpoint when there is information in the DB and no running - job associated to a given dataset/workflow pair. + GIVEN A database with no jobs and no history + THEN The status-endpoint response is empty """ async with MockCurrentUser() as user: project = await project_factory_v2(user) @@ -40,7 +39,7 @@ async def test_workflowtask_status_no_history_no_job( assert res.json() == {"status": {}} -async def test_workflowtask_status_history_no_job( +async def test_status_yes_history_no_running_job( db, MockCurrentUser, project_factory_v2, @@ -50,8 +49,8 @@ async def test_workflowtask_status_history_no_job( client, ): """ - Test the status endpoint when there is a non-empty history in the DB but - no running job associated to a given dataset/workflow pair. + GIVEN A database with non-empty dataset.history and no running jobs + THEN The status-endpoint response is empty """ async with MockCurrentUser() as user: project = await project_factory_v2(user) @@ -136,7 +135,7 @@ async def test_workflowtask_status_history_no_job( assert res.json() == {"status": {"3": "done"}} -async def test_workflowtask_status_history_job( +async def test_status_yes_history_yes_running_job( db, MockCurrentUser, tmp_path, @@ -152,17 +151,9 @@ async def test_workflowtask_status_history_job( there is a running job associated to a given dataset/workflow pair. """ working_dir = tmp_path / "working_dir" - history = [ - dict( - workflowtask=dict(id=1), - status=WorkflowTaskStatusTypeV2.SUBMITTED, - ) - ] async with MockCurrentUser() as user: project = await project_factory_v2(user) - dataset = await dataset_factory_v2( - project_id=project.id, history=history - ) + dataset = await dataset_factory_v2(project_id=project.id, history=[]) task = await task_factory_v2( user_id=user.id, name="task1", source="task1" ) @@ -182,6 +173,15 @@ async def test_workflowtask_status_history_job( ) # CASE 1: first submitted + dataset.history = [ + dict( + workflowtask=dict(id=workflow.task_list[0].id), + status=WorkflowTaskStatusTypeV2.SUBMITTED, + ), + ] + flag_modified(dataset, "history") + await db.merge(dataset) + await db.commit() res = await client.get( ( f"api/v2/project/{project.id}/status/?" @@ -214,6 +214,29 @@ async def test_workflowtask_status_history_job( assert res.status_code == 200 assert res.json()["status"] == {"1": "done", "2": "submitted"} + # CASE 3: no "SUBMITTED" in the task list + dataset.history = [ + dict( + workflowtask=dict(id=workflow.task_list[0].id), + status=WorkflowTaskStatusTypeV2.DONE, + ), + dict( + workflowtask=dict(id=workflow.task_list[1].id), + status=WorkflowTaskStatusTypeV2.FAILED, + ), + ] + flag_modified(dataset, "history") + await db.merge(dataset) + await db.commit() + res = await client.get( + ( + f"api/v2/project/{project.id}/status/?" + f"dataset_id={dataset.id}&workflow_id={workflow.id}" + ) + ) + assert res.status_code == 200 + assert res.json()["status"] == {"1": "submitted", "2": "submitted"} + async def test_workflowtask_status_two_jobs( db, @@ -248,14 +271,12 @@ async def test_workflowtask_status_two_jobs( dataset_id=dataset.id, working_dir=str(working_dir), ) - res = await client.get( ( f"api/v2/project/{project.id}/status/?" f"dataset_id={dataset.id}&workflow_id={workflow.id}" ) ) - debug(res.json()) assert res.status_code == 422 @@ -272,8 +293,8 @@ async def test_workflowtask_status_modified_workflow( ): """ Test the status endpoint when there is a running job associated to a given - dataset/workflow pair, but the workflow is modified after the job was - created. + dataset/workflow pair, but the workflow has been modified after the job + was created. """ working_dir = tmp_path / "working_dir" async with MockCurrentUser() as user: From 9e3376e5229775b7a250535d3615cb347c564515 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 13 Jan 2025 15:17:21 +0100 Subject: [PATCH 25/49] Reintroduce test_workflowtask_status_modified_workflow --- tests/v2/03_api/test_api_status.py | 68 ++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/tests/v2/03_api/test_api_status.py b/tests/v2/03_api/test_api_status.py index 593bbb6e72..53c68ad3ad 100644 --- a/tests/v2/03_api/test_api_status.py +++ b/tests/v2/03_api/test_api_status.py @@ -293,13 +293,12 @@ async def test_workflowtask_status_modified_workflow( ): """ Test the status endpoint when there is a running job associated to a given - dataset/workflow pair, but the workflow has been modified after the job - was created. + dataset/workflow pair, but the workflow is modified after the job was + created. """ working_dir = tmp_path / "working_dir" async with MockCurrentUser() as user: project = await project_factory_v2(user) - dataset = await dataset_factory_v2(project_id=project.id, history=[]) task = await task_factory_v2( user_id=user.id, name="task1", source="task1" ) @@ -308,6 +307,23 @@ async def test_workflowtask_status_modified_workflow( await _workflow_insert_task( workflow_id=workflow.id, task_id=task.id, db=db ) + dataset = await dataset_factory_v2( + project_id=project.id, + history=[ + dict( + workflowtask=dict(id=workflow.task_list[0].id), + status=WorkflowTaskStatusTypeV2.DONE, + ), + dict( + workflowtask=dict(id=workflow.task_list[1].id), + status=WorkflowTaskStatusTypeV2.DONE, + ), + dict( + workflowtask=dict(id=workflow.task_list[2].id), + status=WorkflowTaskStatusTypeV2.SUBMITTED, + ), + ], + ) await job_factory_v2( project_id=project.id, workflow_id=workflow.id, @@ -318,6 +334,50 @@ async def test_workflowtask_status_modified_workflow( ) # Delete second and third WorkflowTasks - await client.get( + res = await client.get( f"api/v2/project/{project.id}/workflow/{workflow.id}/" ) + assert res.status_code == 200 + wftask_list = res.json()["task_list"] + for wftask in wftask_list[1:]: + wftask_id = wftask["id"] + res = await client.delete( + f"api/v2/project/{project.id}/workflow/{workflow.id}/" + f"wftask/{wftask_id}/" + ) + assert res.status_code == 204 + + # The endpoint response is OK, even if the running_job points to + # non-existing WorkflowTask's. + res = await client.get( + ( + f"api/v2/project/{project.id}/status/?" + f"dataset_id={dataset.id}&workflow_id={workflow.id}" + ) + ) + assert res.status_code == 200 + assert res.json() == {"status": {"1": "submitted"}} + + # Delete last remaining task + res = await client.get( + f"api/v2/project/{project.id}/workflow/{workflow.id}/" + ) + assert res.status_code == 200 + for wftask in res.json()["task_list"]: + wftask_id = wftask["id"] + res = await client.delete( + f"api/v2/project/{project.id}/workflow/{workflow.id}/" + f"wftask/{wftask_id}/" + ) + assert res.status_code == 204 + + # The endpoint response is OK, even if the running_job points to + # non-existing WorkflowTask's. + res = await client.get( + ( + f"api/v2/project/{project.id}/status/?" + f"dataset_id={dataset.id}&workflow_id={workflow.id}" + ) + ) + assert res.status_code == 200 + assert res.json() == {"status": {}} From fe1403144dedef7339be2e8a192cc8157795d32d Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 13 Jan 2025 15:17:29 +0100 Subject: [PATCH 26/49] info -> warning --- fractal_server/app/routes/api/v2/status.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fractal_server/app/routes/api/v2/status.py b/fractal_server/app/routes/api/v2/status.py index e1432380f9..1e1882c240 100644 --- a/fractal_server/app/routes/api/v2/status.py +++ b/fractal_server/app/routes/api/v2/status.py @@ -117,7 +117,7 @@ async def get_workflowtask_status( WorkflowTaskStatusTypeV2.SUBMITTED ) except ValueError: - logger.info( + logger.warning( f"Job {running_job.id} is submitted but its task list does " f"not contain a {WorkflowTaskStatusTypeV2.SUBMITTED} task." ) From daabe56d35eed7e2d14153fd6e03c8dc93bb8813 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Mon, 13 Jan 2025 15:18:07 +0100 Subject: [PATCH 27/49] Remove comment --- fractal_server/app/routes/api/v2/status.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/fractal_server/app/routes/api/v2/status.py b/fractal_server/app/routes/api/v2/status.py index 1e1882c240..69a7e38522 100644 --- a/fractal_server/app/routes/api/v2/status.py +++ b/fractal_server/app/routes/api/v2/status.py @@ -147,21 +147,6 @@ async def get_workflowtask_status( last_valid_wftask_id = None logger.warning(f"Now setting {last_valid_wftask_id=}.") - # Highest priority: Read status updates coming from the running-job - # temporary file. Note: this file only contains information on - # WorkflowTask's that ran through successfully. - # tmp_file = Path(running_job.working_dir) / HISTORY_FILENAME - # try: - # with tmp_file.open("r") as f: - # history = json.load(f) - # except FileNotFoundError: - # history = [] - # db_dataset = await db.get(DatasetV2, dataset_id) - # for history_item in db_dataset.history: - # wftask_id = history_item["workflowtask"]["id"] - # wftask_status = history_item["status"] - # workflow_tasks_status_dict[wftask_id] = wftask_status - # # Based on previously-gathered information, clean up the response body clean_workflow_tasks_status_dict = {} for wf_task in workflow.task_list: From 90546cf0a6cbbeaac0858d1a3f41145c18807b9f Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Mon, 13 Jan 2025 22:29:11 +0100 Subject: [PATCH 28/49] [skip ci] remove/update comments --- fractal_server/app/runner/v2/runner.py | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/fractal_server/app/runner/v2/runner.py b/fractal_server/app/runner/v2/runner.py index 8366ff697d..32038bdd4e 100644 --- a/fractal_server/app/runner/v2/runner.py +++ b/fractal_server/app/runner/v2/runner.py @@ -75,6 +75,7 @@ def execute_tasks_v2( f'Image zarr_url: {image["zarr_url"]}\n' f'Image types: {image["types"]}\n' ) + # First, set status SUBMITTED in dataset.history for each wftask with next(get_sync_db()) as db: db_dataset = db.get(DatasetV2, dataset.id) new_history_item = _DatasetHistoryItemV2( @@ -289,8 +290,8 @@ def execute_tasks_v2( # Update filters.types tmp_filters["types"].update(types_from_manifest) tmp_filters["types"].update(types_from_task) - # Write current dataset attributes (history, images, filters) into - # temporary files which can be used (1) to retrieve the latest state + # Write current dataset attributes (history, images, filters) into the + # database. They can be used (1) to retrieve the latest state # when the job fails, (2) from within endpoints that need up-to-date # information with next(get_sync_db()) as db: @@ -299,17 +300,6 @@ def execute_tasks_v2( history_item["status"] = WorkflowTaskStatusTypeV2.DONE db_dataset.history[ind] = history_item - # Update history (based on _DatasetHistoryItemV2) - # history_item = _DatasetHistoryItemV2( - # workflowtask=wftask, - # status=WorkflowTaskStatusTypeV2.DONE, - # parallelization=dict( - # # task_type=wftask.task.type,# FIXME: breaks for V1 tasks - # # component_list=fil, #FIXME - # ), - # ).dict() - # tmp_history.append(history_item) - # db_dataset.history.extend(tmp_history) db_dataset.filters = tmp_filters db_dataset.images = tmp_images for attribute_name in ["filters", "history", "images"]: @@ -318,13 +308,6 @@ def execute_tasks_v2( db.commit() db.refresh(db_dataset) - # with open(workflow_dir_local / HISTORY_FILENAME, "w") as f: - # json.dump(tmp_history, f, indent=2) - # with open(workflow_dir_local / FILTERS_FILENAME, "w") as f: - # json.dump(tmp_filters, f, indent=2) - # with open(workflow_dir_local / IMAGES_FILENAME, "w") as f: - # json.dump(tmp_images, f, indent=2) - logger.debug(f'END {wftask.order}-th task (name="{task_name}")') result = dict( From 47d01bffd384f0d4cc4a7c3b2eaca4f83e946fa7 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 08:45:59 +0100 Subject: [PATCH 29/49] Review filenames.py --- fractal_server/app/routes/api/v1/dataset.py | 4 ++-- fractal_server/app/runner/filenames.py | 6 ++---- fractal_server/app/runner/v1/_common.py | 8 ++++---- fractal_server/app/runner/v1/handle_failed_job.py | 8 ++++---- fractal_server/app/runner/v2/handle_failed_job.py | 2 +- tests/v1/04_api/test_history_v1.py | 10 +++++----- tests/v2/08_full_workflow/common_functions.py | 6 ------ 7 files changed, 18 insertions(+), 26 deletions(-) diff --git a/fractal_server/app/routes/api/v1/dataset.py b/fractal_server/app/routes/api/v1/dataset.py index c12f04b688..202985b522 100644 --- a/fractal_server/app/routes/api/v1/dataset.py +++ b/fractal_server/app/routes/api/v1/dataset.py @@ -17,7 +17,7 @@ from ....models.v1 import Dataset from ....models.v1 import Project from ....models.v1 import Resource -from ....runner.filenames import HISTORY_FILENAME +from ....runner.filenames import HISTORY_FILENAME_V1 from ....schemas.v1 import DatasetCreateV1 from ....schemas.v1 import DatasetReadV1 from ....schemas.v1 import DatasetStatusReadV1 @@ -511,7 +511,7 @@ async def get_workflowtask_status( # Highest priority: Read status updates coming from the running-job # temporary file. Note: this file only contains information on # WorkflowTask's that ran through successfully - tmp_file = Path(running_job.working_dir) / HISTORY_FILENAME + tmp_file = Path(running_job.working_dir) / HISTORY_FILENAME_V1 try: with tmp_file.open("r") as f: history = json.load(f) diff --git a/fractal_server/app/runner/filenames.py b/fractal_server/app/runner/filenames.py index eaa8a123c0..22cdcbfcad 100644 --- a/fractal_server/app/runner/filenames.py +++ b/fractal_server/app/runner/filenames.py @@ -1,6 +1,4 @@ -HISTORY_FILENAME = "history.json" -FILTERS_FILENAME = "filters.json" -IMAGES_FILENAME = "images.json" -METADATA_FILENAME = "metadata.json" +HISTORY_FILENAME_V1 = "history.json" +METADATA_FILENAME_V1 = "metadata.json" SHUTDOWN_FILENAME = "shutdown" WORKFLOW_LOG_FILENAME = "workflow.log" diff --git a/fractal_server/app/runner/v1/_common.py b/fractal_server/app/runner/v1/_common.py index 5bcf1f0f40..46187e0187 100644 --- a/fractal_server/app/runner/v1/_common.py +++ b/fractal_server/app/runner/v1/_common.py @@ -28,8 +28,8 @@ from ..exceptions import TaskExecutionError from .common import TaskParameters from .common import write_args_file -from fractal_server.app.runner.filenames import HISTORY_FILENAME -from fractal_server.app.runner.filenames import METADATA_FILENAME +from fractal_server.app.runner.filenames import HISTORY_FILENAME_V1 +from fractal_server.app.runner.filenames import METADATA_FILENAME_V1 from fractal_server.app.runner.task_files import get_task_file_paths from fractal_server.string_tools import validate_cmd @@ -610,11 +610,11 @@ def execute_tasks( ) # Write most recent metadata to METADATA_FILENAME - with open(workflow_dir_local / METADATA_FILENAME, "w") as f: + with open(workflow_dir_local / METADATA_FILENAME_V1, "w") as f: json.dump(current_task_pars.metadata, f, indent=2) # Write most recent metadata to HISTORY_FILENAME - with open(workflow_dir_local / HISTORY_FILENAME, "w") as f: + with open(workflow_dir_local / HISTORY_FILENAME_V1, "w") as f: json.dump(current_task_pars.history, f, indent=2) return current_task_pars diff --git a/fractal_server/app/runner/v1/handle_failed_job.py b/fractal_server/app/runner/v1/handle_failed_job.py index 92c910f882..658f31fdfc 100644 --- a/fractal_server/app/runner/v1/handle_failed_job.py +++ b/fractal_server/app/runner/v1/handle_failed_job.py @@ -24,8 +24,8 @@ from ...models.v1 import Workflow from ...models.v1 import WorkflowTask from ...schemas.v1 import WorkflowTaskStatusTypeV1 -from ..filenames import HISTORY_FILENAME -from ..filenames import METADATA_FILENAME +from ..filenames import HISTORY_FILENAME_V1 +from ..filenames import METADATA_FILENAME_V1 def assemble_history_failed_job( @@ -64,7 +64,7 @@ def assemble_history_failed_job( new_history = output_dataset.history # Part 2: Extend history based on tmp_metadata_file - tmp_history_file = Path(job.working_dir) / HISTORY_FILENAME + tmp_history_file = Path(job.working_dir) / HISTORY_FILENAME_V1 try: with tmp_history_file.open("r") as f: tmp_file_history = json.load(f) @@ -129,7 +129,7 @@ def assemble_meta_failed_job( """ new_meta = deepcopy(output_dataset.meta) - metadata_file = Path(job.working_dir) / METADATA_FILENAME + metadata_file = Path(job.working_dir) / METADATA_FILENAME_V1 try: with metadata_file.open("r") as f: metadata_update = json.load(f) diff --git a/fractal_server/app/runner/v2/handle_failed_job.py b/fractal_server/app/runner/v2/handle_failed_job.py index 9146973401..7103c62274 100644 --- a/fractal_server/app/runner/v2/handle_failed_job.py +++ b/fractal_server/app/runner/v2/handle_failed_job.py @@ -46,7 +46,7 @@ def assemble_history_failed_job( failed_wftask: If set, append it to `history` during step 3; if `None`, infer it by comparing the job task list and the one in - `HISTORY_FILENAME`. + `HISTORY_FILENAME`. FIXME Returns: The new value of `history`, to be merged into diff --git a/tests/v1/04_api/test_history_v1.py b/tests/v1/04_api/test_history_v1.py index 8b11eb955e..f5aa7898c7 100644 --- a/tests/v1/04_api/test_history_v1.py +++ b/tests/v1/04_api/test_history_v1.py @@ -6,7 +6,7 @@ from fractal_server.app.routes.api.v1._aux_functions import ( _workflow_insert_task, ) -from fractal_server.app.runner.filenames import HISTORY_FILENAME +from fractal_server.app.runner.filenames import HISTORY_FILENAME_V1 from fractal_server.app.runner.v1.handle_failed_job import ( assemble_history_failed_job, ) # noqa @@ -46,9 +46,9 @@ async def test_get_workflowtask_status( working_dir = tmp_path / "working_dir" working_dir.mkdir() - with (working_dir / HISTORY_FILENAME).open("w") as f: + with (working_dir / HISTORY_FILENAME_V1).open("w") as f: json.dump(history, f) - debug(working_dir / HISTORY_FILENAME) + debug(working_dir / HISTORY_FILENAME_V1) async with MockCurrentUser() as user: project = await project_factory(user) @@ -315,7 +315,7 @@ async def test_assemble_history_failed_job_fail( Path(job.working_dir).mkdir() tmp_history = [dict(workflowtask={"id": wftask.id})] - with (Path(job.working_dir) / HISTORY_FILENAME).open("w") as fp: + with (Path(job.working_dir) / HISTORY_FILENAME_V1).open("w") as fp: json.dump(tmp_history, fp) logger = logging.getLogger(None) @@ -341,7 +341,7 @@ async def test_json_decode_error( history = "NOT A VALID JSON" working_dir = tmp_path / "working_dir" working_dir.mkdir() - with (working_dir / HISTORY_FILENAME).open("w") as f: + with (working_dir / HISTORY_FILENAME_V1).open("w") as f: f.write(history) async with MockCurrentUser() as user: diff --git a/tests/v2/08_full_workflow/common_functions.py b/tests/v2/08_full_workflow/common_functions.py index a4dd1e1ec6..5cfc2932cb 100644 --- a/tests/v2/08_full_workflow/common_functions.py +++ b/tests/v2/08_full_workflow/common_functions.py @@ -165,9 +165,6 @@ async def full_workflow( with zipfile.ZipFile(f"{working_dir}.zip", "r") as zip_ref: actual_files = zip_ref.namelist() expected_files = [ - # HISTORY_FILENAME, - # FILTERS_FILENAME, - # IMAGES_FILENAME, WORKFLOW_LOG_FILENAME, ] assert set(expected_files) < set(actual_files) @@ -591,9 +588,6 @@ async def workflow_with_non_python_task( must_exist = [ "0.log", "0.args.json", - # IMAGES_FILENAME, - # HISTORY_FILENAME, - # FILTERS_FILENAME, WORKFLOW_LOG_FILENAME, ] From 4d640de7e9739e352563a0726b0a5e6029a68b26 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 08:48:07 +0100 Subject: [PATCH 30/49] Remove comments [skip ci] --- fractal_server/app/runner/v2/__init__.py | 33 +----------------------- 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/fractal_server/app/runner/v2/__init__.py b/fractal_server/app/runner/v2/__init__.py index c0c38be389..758f06396b 100644 --- a/fractal_server/app/runner/v2/__init__.py +++ b/fractal_server/app/runner/v2/__init__.py @@ -339,14 +339,6 @@ async def submit_workflow( f"more logs at {str(log_file_path)}" ) logger.debug(f'END workflow "{workflow.name}"') - # - # # Update dataset attributes, in case of successful execution - # dataset.history.extend(new_dataset_attributes["history"]) - # dataset.filters = new_dataset_attributes["filters"] - # dataset.images = new_dataset_attributes["images"] - # for attribute_name in ["filters", "history", "images"]: - # flag_modified(dataset, attribute_name) - # db_sync.merge(dataset) # Update job DB entry job.status = JobStatusTypeV2.DONE @@ -365,7 +357,6 @@ async def submit_workflow( # update the DB dataset accordingly failed_wftask = db_sync.get(WorkflowTaskV2, e.workflow_task_id) - # dataset.history = assemble_history_failed_job( job, dataset, @@ -373,13 +364,6 @@ async def submit_workflow( logger_name=logger_name, failed_wftask=failed_wftask, ) - # latest_filters = assemble_filters_failed_job(job) - # if latest_filters is not None: - # dataset.filters = latest_filters - # latest_images = assemble_images_failed_job(job) - # if latest_images is not None: - # dataset.images = latest_images - # db_sync.merge(dataset) exception_args_string = "\n".join(e.args) log_msg = ( @@ -395,20 +379,12 @@ async def submit_workflow( logger.info(f'Workflow "{workflow.name}" failed (JobExecutionError).') # Read dataset attributes produced by the last successful task, and # update the DB dataset accordingly - # dataset.history = assemble_history_failed_job( job, dataset, workflow, logger_name=logger_name, ) - # latest_filters = assemble_filters_failed_job(job) - # if latest_filters is not None: - # dataset.filters = latest_filters - # latest_images = assemble_images_failed_job(job) - # if latest_images is not None: - # dataset.images = latest_images - # db_sync.merge(dataset) fail_job( db=db_sync, @@ -428,20 +404,13 @@ async def submit_workflow( # Read dataset attributes produced by the last successful task, and # update the DB dataset accordingly - # dataset.history = assemble_history_failed_job( job, dataset, workflow, logger_name=logger_name, ) - # latest_filters = assemble_filters_failed_job(job) - # if latest_filters is not None: - # dataset.filters = latest_filters - # latest_images = assemble_images_failed_job(job) - # if latest_images is not None: - # dataset.images = latest_images - # db_sync.merge(dataset) + fail_job( db=db_sync, job=job, From e826ac97d3e3cf1d86277194c3307342c73913b6 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 09:17:31 +0100 Subject: [PATCH 31/49] Do not return dataset attributes from execute_tasks_v2 (and process-workflow) functions --- fractal_server/app/runner/v2/__init__.py | 1 - .../app/runner/v2/_local/__init__.py | 15 ++++----------- .../runner/v2/_local_experimental/__init__.py | 16 ++++------------ .../app/runner/v2/_slurm_ssh/__init__.py | 11 ++++------- .../app/runner/v2/_slurm_sudo/__init__.py | 15 ++++----------- fractal_server/app/runner/v2/runner.py | 19 +++++++------------ 6 files changed, 23 insertions(+), 54 deletions(-) diff --git a/fractal_server/app/runner/v2/__init__.py b/fractal_server/app/runner/v2/__init__.py index 758f06396b..38b4f48ce9 100644 --- a/fractal_server/app/runner/v2/__init__.py +++ b/fractal_server/app/runner/v2/__init__.py @@ -321,7 +321,6 @@ async def submit_workflow( db_sync = next(DB.get_sync_db()) db_sync.close() - # new_dataset_attributes = await process_workflow( workflow=workflow, dataset=dataset, diff --git a/fractal_server/app/runner/v2/_local/__init__.py b/fractal_server/app/runner/v2/_local/__init__.py index ac068368ac..2a967b7c19 100644 --- a/fractal_server/app/runner/v2/_local/__init__.py +++ b/fractal_server/app/runner/v2/_local/__init__.py @@ -39,7 +39,7 @@ def _process_workflow( workflow_dir_local: Path, first_task_index: int, last_task_index: int, -) -> dict: +) -> None: """ Internal processing routine @@ -47,7 +47,7 @@ def _process_workflow( """ with FractalThreadPoolExecutor() as executor: - new_dataset_attributes = execute_tasks_v2( + execute_tasks_v2( wf_task_list=workflow.task_list[ first_task_index : (last_task_index + 1) # noqa ], # noqa @@ -58,7 +58,6 @@ def _process_workflow( logger_name=logger_name, submit_setup_call=_local_submit_setup, ) - return new_dataset_attributes async def process_workflow( @@ -75,7 +74,7 @@ async def process_workflow( slurm_user: Optional[str] = None, slurm_account: Optional[str] = None, worker_init: Optional[str] = None, -) -> dict: +) -> None: """ Run a workflow @@ -127,11 +126,6 @@ async def process_workflow( (positive exit codes). JobExecutionError: wrapper for errors raised by the tasks' executors (negative exit codes). - - Returns: - output_dataset_metadata: - The updated metadata for the dataset, as returned by the last task - of the workflow """ if workflow_dir_remote and (workflow_dir_remote != workflow_dir_local): @@ -148,7 +142,7 @@ async def process_workflow( last_task_index=last_task_index, ) - new_dataset_attributes = await async_wrap(_process_workflow)( + await async_wrap(_process_workflow)( workflow=workflow, dataset=dataset, logger_name=logger_name, @@ -156,4 +150,3 @@ async def process_workflow( first_task_index=first_task_index, last_task_index=last_task_index, ) - return new_dataset_attributes diff --git a/fractal_server/app/runner/v2/_local_experimental/__init__.py b/fractal_server/app/runner/v2/_local_experimental/__init__.py index cb87a01a80..197ae74f75 100644 --- a/fractal_server/app/runner/v2/_local_experimental/__init__.py +++ b/fractal_server/app/runner/v2/_local_experimental/__init__.py @@ -21,7 +21,7 @@ def _process_workflow( workflow_dir_local: Path, first_task_index: int, last_task_index: int, -) -> dict: +) -> None: """ Internal processing routine @@ -35,7 +35,7 @@ def _process_workflow( shutdown_file=workflow_dir_local / SHUTDOWN_FILENAME ) as executor: try: - new_dataset_attributes = execute_tasks_v2( + execute_tasks_v2( wf_task_list=workflow.task_list[ first_task_index : (last_task_index + 1) # noqa ], @@ -54,8 +54,6 @@ def _process_workflow( ) ) - return new_dataset_attributes - async def process_workflow( *, @@ -71,7 +69,7 @@ async def process_workflow( slurm_user: Optional[str] = None, slurm_account: Optional[str] = None, worker_init: Optional[str] = None, -) -> dict: +) -> None: """ Run a workflow @@ -123,11 +121,6 @@ async def process_workflow( (positive exit codes). JobExecutionError: wrapper for errors raised by the tasks' executors (negative exit codes). - - Returns: - output_dataset_metadata: - The updated metadata for the dataset, as returned by the last task - of the workflow """ if workflow_dir_remote and (workflow_dir_remote != workflow_dir_local): @@ -144,7 +137,7 @@ async def process_workflow( last_task_index=last_task_index, ) - new_dataset_attributes = await async_wrap(_process_workflow)( + await async_wrap(_process_workflow)( workflow=workflow, dataset=dataset, logger_name=logger_name, @@ -152,4 +145,3 @@ async def process_workflow( first_task_index=first_task_index, last_task_index=last_task_index, ) - return new_dataset_attributes diff --git a/fractal_server/app/runner/v2/_slurm_ssh/__init__.py b/fractal_server/app/runner/v2/_slurm_ssh/__init__.py index 3816a779f0..e80cc532cd 100644 --- a/fractal_server/app/runner/v2/_slurm_ssh/__init__.py +++ b/fractal_server/app/runner/v2/_slurm_ssh/__init__.py @@ -17,7 +17,6 @@ Executor objects. """ from pathlib import Path -from typing import Any from typing import Optional from typing import Union @@ -47,7 +46,7 @@ def _process_workflow( last_task_index: int, fractal_ssh: FractalSSH, worker_init: Optional[Union[str, list[str]]] = None, -) -> dict[str, Any]: +) -> None: """ Internal processing routine for the SLURM backend @@ -80,7 +79,7 @@ def _process_workflow( workflow_dir_remote=workflow_dir_remote, common_script_lines=worker_init, ) as executor: - new_dataset_attributes = execute_tasks_v2( + execute_tasks_v2( wf_task_list=workflow.task_list[ first_task_index : (last_task_index + 1) # noqa ], # noqa @@ -91,7 +90,6 @@ def _process_workflow( logger_name=logger_name, submit_setup_call=_slurm_submit_setup, ) - return new_dataset_attributes async def process_workflow( @@ -109,7 +107,7 @@ async def process_workflow( slurm_user: Optional[str] = None, slurm_account: Optional[str] = None, worker_init: Optional[str] = None, -) -> dict: +) -> None: """ Process workflow (SLURM backend public interface) """ @@ -122,7 +120,7 @@ async def process_workflow( last_task_index=last_task_index, ) - new_dataset_attributes = await async_wrap(_process_workflow)( + await async_wrap(_process_workflow)( workflow=workflow, dataset=dataset, logger_name=logger_name, @@ -133,4 +131,3 @@ async def process_workflow( worker_init=worker_init, fractal_ssh=fractal_ssh, ) - return new_dataset_attributes diff --git a/fractal_server/app/runner/v2/_slurm_sudo/__init__.py b/fractal_server/app/runner/v2/_slurm_sudo/__init__.py index b2ee5f1d71..936b2647b2 100644 --- a/fractal_server/app/runner/v2/_slurm_sudo/__init__.py +++ b/fractal_server/app/runner/v2/_slurm_sudo/__init__.py @@ -17,7 +17,6 @@ Executor objects. """ from pathlib import Path -from typing import Any from typing import Optional from typing import Union @@ -43,16 +42,13 @@ def _process_workflow( slurm_account: Optional[str] = None, user_cache_dir: str, worker_init: Optional[Union[str, list[str]]] = None, -) -> dict[str, Any]: +) -> None: """ Internal processing routine for the SLURM backend This function initialises the a FractalSlurmExecutor, setting logging, workflow working dir and user to impersonate. It then schedules the workflow tasks and returns the new dataset attributes - - Returns: - new_dataset_attributes: """ if not slurm_user: @@ -73,7 +69,7 @@ def _process_workflow( common_script_lines=worker_init, slurm_account=slurm_account, ) as executor: - new_dataset_attributes = execute_tasks_v2( + execute_tasks_v2( wf_task_list=workflow.task_list[ first_task_index : (last_task_index + 1) # noqa ], # noqa @@ -84,7 +80,6 @@ def _process_workflow( logger_name=logger_name, submit_setup_call=_slurm_submit_setup, ) - return new_dataset_attributes async def process_workflow( @@ -101,7 +96,7 @@ async def process_workflow( slurm_user: Optional[str] = None, slurm_account: Optional[str] = None, worker_init: Optional[str] = None, -) -> dict: +) -> None: """ Process workflow (SLURM backend public interface). """ @@ -113,8 +108,7 @@ async def process_workflow( first_task_index=first_task_index, last_task_index=last_task_index, ) - - new_dataset_attributes = await async_wrap(_process_workflow)( + await async_wrap(_process_workflow)( workflow=workflow, dataset=dataset, logger_name=logger_name, @@ -127,4 +121,3 @@ async def process_workflow( slurm_account=slurm_account, worker_init=worker_init, ) - return new_dataset_attributes diff --git a/fractal_server/app/runner/v2/runner.py b/fractal_server/app/runner/v2/runner.py index 32038bdd4e..f46bf8a239 100644 --- a/fractal_server/app/runner/v2/runner.py +++ b/fractal_server/app/runner/v2/runner.py @@ -35,12 +35,14 @@ def execute_tasks_v2( workflow_dir_remote: Optional[Path] = None, logger_name: Optional[str] = None, submit_setup_call: Callable = no_op_submit_setup_call, -) -> DatasetV2: +) -> None: logger = logging.getLogger(logger_name) - if ( - not workflow_dir_local.exists() - ): # FIXME: this should have already happened + if not workflow_dir_local.exists(): + logger.warning( + f"Now creating {workflow_dir_local}, " + "but it should have already happened." + ) workflow_dir_local.mkdir() # Initialize local dataset attributes @@ -308,11 +310,4 @@ def execute_tasks_v2( db.commit() db.refresh(db_dataset) - logger.debug(f'END {wftask.order}-th task (name="{task_name}")') - - result = dict( - history=db_dataset.history, - filters=db_dataset.filters, - images=db_dataset.images, - ) - return result + logger.debug(f'END {wftask.order}-th task (name="{task_name}")') From 9201e7d6a6f63f73bb055060f1ef104ee404c8fd Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 09:17:58 +0100 Subject: [PATCH 32/49] BROKEN - start updating tests/v2/04_runner/test_fractal_examples.py --- tests/v2/04_runner/test_fractal_examples.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tests/v2/04_runner/test_fractal_examples.py b/tests/v2/04_runner/test_fractal_examples.py index 3ff5decc42..f65496aac5 100644 --- a/tests/v2/04_runner/test_fractal_examples.py +++ b/tests/v2/04_runner/test_fractal_examples.py @@ -10,6 +10,7 @@ from v2_mock_models import TaskV2Mock from v2_mock_models import WorkflowTaskV2Mock +from fractal_server.app.models import DatasetV2 from fractal_server.app.runner.exceptions import JobExecutionError from fractal_server.images import SingleImage from fractal_server.images.tools import find_image_by_zarr_url @@ -60,6 +61,15 @@ def image_data_exist_on_disk(image_list: list[SingleImage]): return all_images_have_data +async def _get_dataset_attrs(db, dataset_id) -> dict[str, Any]: + await db.close() + db_dataset = await db.get(DatasetV2, dataset_id) + dataset_attrs = db_dataset.model_dump( + include={"filters", "history", "images"} + ) + return dataset_attrs + + async def test_fractal_demos_01( db, MockCurrentUser, @@ -85,7 +95,7 @@ async def test_fractal_demos_01( dataset = await dataset_factory_v2( project_id=project.id, zarr_dir=zarr_dir ) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["create_ome_zarr_compound"], @@ -101,6 +111,7 @@ async def test_fractal_demos_01( dataset=dataset, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset.id) debug(dataset_attrs["history"]) assert _task_names_from_history(dataset_attrs["history"]) == [ "create_ome_zarr_compound" @@ -109,10 +120,11 @@ async def test_fractal_demos_01( assert dataset_attrs["filters"]["types"] == {} _assert_image_data_exist(dataset_attrs["images"]) assert len(dataset_attrs["images"]) == 2 + dataset_with_attrs = await dataset_factory_v2( project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs ) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["illumination_correction"], @@ -127,6 +139,7 @@ async def test_fractal_demos_01( dataset=dataset_with_attrs, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset.id) assert _task_names_from_history(dataset_attrs["history"]) == [ "create_ome_zarr_compound", "illumination_correction", From ee978f8d9c90d83ad820c8a0cd9d3c8f6217da8c Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Tue, 14 Jan 2025 09:22:59 +0100 Subject: [PATCH 33/49] refactor tests --- tests/v2/04_runner/test_dummy_examples.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/v2/04_runner/test_dummy_examples.py b/tests/v2/04_runner/test_dummy_examples.py index 04705b5d57..1df71fea9e 100644 --- a/tests/v2/04_runner/test_dummy_examples.py +++ b/tests/v2/04_runner/test_dummy_examples.py @@ -12,7 +12,7 @@ from fractal_server.urls import normalize_url -def execute_tasks_v2(wf_task_list, workflow_dir_local, **kwargs): +def execute_tasks_v2(wf_task_list, workflow_dir_local, **kwargs) -> None: from fractal_server.app.runner.task_files import task_subfolder_name from fractal_server.app.runner.v2.runner import ( execute_tasks_v2 as raw_execute_tasks_v2, @@ -25,12 +25,11 @@ def execute_tasks_v2(wf_task_list, workflow_dir_local, **kwargs): logging.info(f"Now creating {subfolder.as_posix()}") subfolder.mkdir(parents=True) - out = raw_execute_tasks_v2( + raw_execute_tasks_v2( wf_task_list=wf_task_list, workflow_dir_local=workflow_dir_local, **kwargs, ) - return out async def test_dummy_insert_single_image( @@ -56,7 +55,7 @@ async def test_dummy_insert_single_image( ) # Run successfully on an empty dataset debug(dataset) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["dummy_insert_single_image"], @@ -70,10 +69,9 @@ async def test_dummy_insert_single_image( dataset=dataset, **execute_tasks_v2_args, ) - debug(dataset_attrs["images"]) # Run successfully even if the image already exists - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["dummy_insert_single_image"], @@ -87,7 +85,6 @@ async def test_dummy_insert_single_image( dataset=dataset, **execute_tasks_v2_args, ) - debug(dataset_attrs["images"]) # Fail because new image is not relative to zarr_dir IMAGES = [dict(zarr_url=Path(zarr_dir, "my-image").as_posix())] dataset_images = await dataset_factory_v2( From 8e16efd53f1c34835632bc059dabb215c6c62492 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 09:23:02 +0100 Subject: [PATCH 34/49] BROKEN - start updating tests/v2/04_runner/test_fractal_examples.py --- tests/v2/04_runner/aux_get_dataset_attrs.py | 12 ++++++ tests/v2/04_runner/test_fractal_examples.py | 46 ++++++++++----------- 2 files changed, 34 insertions(+), 24 deletions(-) create mode 100644 tests/v2/04_runner/aux_get_dataset_attrs.py diff --git a/tests/v2/04_runner/aux_get_dataset_attrs.py b/tests/v2/04_runner/aux_get_dataset_attrs.py new file mode 100644 index 0000000000..6fed8b0185 --- /dev/null +++ b/tests/v2/04_runner/aux_get_dataset_attrs.py @@ -0,0 +1,12 @@ +from typing import Any + +from fractal_server.app.models import DatasetV2 + + +async def _get_dataset_attrs(db, dataset_id) -> dict[str, Any]: + await db.close() + db_dataset = await db.get(DatasetV2, dataset_id) + dataset_attrs = db_dataset.model_dump( + include={"filters", "history", "images"} + ) + return dataset_attrs diff --git a/tests/v2/04_runner/test_fractal_examples.py b/tests/v2/04_runner/test_fractal_examples.py index f65496aac5..f2dd8ab202 100644 --- a/tests/v2/04_runner/test_fractal_examples.py +++ b/tests/v2/04_runner/test_fractal_examples.py @@ -5,12 +5,12 @@ from typing import Any import pytest +from aux_get_dataset_attrs import _get_dataset_attrs from devtools import debug from fixtures_mocks import * # noqa: F401,F403 from v2_mock_models import TaskV2Mock from v2_mock_models import WorkflowTaskV2Mock -from fractal_server.app.models import DatasetV2 from fractal_server.app.runner.exceptions import JobExecutionError from fractal_server.images import SingleImage from fractal_server.images.tools import find_image_by_zarr_url @@ -61,15 +61,6 @@ def image_data_exist_on_disk(image_list: list[SingleImage]): return all_images_have_data -async def _get_dataset_attrs(db, dataset_id) -> dict[str, Any]: - await db.close() - db_dataset = await db.get(DatasetV2, dataset_id) - dataset_attrs = db_dataset.model_dump( - include={"filters", "history", "images"} - ) - return dataset_attrs - - async def test_fractal_demos_01( db, MockCurrentUser, @@ -139,7 +130,7 @@ async def test_fractal_demos_01( dataset=dataset_with_attrs, **execute_tasks_v2_args, ) - dataset_attrs = await _get_dataset_attrs(db, dataset.id) + dataset_attrs = await _get_dataset_attrs(db, dataset_with_attrs.id) assert _task_names_from_history(dataset_attrs["history"]) == [ "create_ome_zarr_compound", "illumination_correction", @@ -174,7 +165,7 @@ async def test_fractal_demos_01( dataset_with_attrs = await dataset_factory_v2( project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs ) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["MIP_compound"], @@ -188,6 +179,7 @@ async def test_fractal_demos_01( dataset=dataset_with_attrs, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset_with_attrs.id) debug(dataset_attrs) assert _task_names_from_history(dataset_attrs["history"]) == [ @@ -221,7 +213,7 @@ async def test_fractal_demos_01( dataset_with_attrs = await dataset_factory_v2( project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs ) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["cellpose_segmentation"], @@ -236,7 +228,7 @@ async def test_fractal_demos_01( dataset=dataset_with_attrs, **execute_tasks_v2_args, ) - + dataset_attrs = await _get_dataset_attrs(db, dataset_with_attrs.id) debug(dataset_attrs) assert _task_names_from_history(dataset_attrs["history"]) == [ @@ -273,7 +265,7 @@ async def test_fractal_demos_01_no_overwrite( dataset = await dataset_factory_v2( project_id=project.id, zarr_dir=zarr_dir ) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["create_ome_zarr_compound"], @@ -288,6 +280,7 @@ async def test_fractal_demos_01_no_overwrite( dataset=dataset, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset.id) assert [img["zarr_url"] for img in dataset_attrs["images"]] == [ f"{zarr_dir}/my_plate.zarr/A/01/0", f"{zarr_dir}/my_plate.zarr/A/02/0", @@ -298,7 +291,7 @@ async def test_fractal_demos_01_no_overwrite( dataset_with_attrs = await dataset_factory_v2( project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs ) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["illumination_correction"], @@ -313,7 +306,7 @@ async def test_fractal_demos_01_no_overwrite( dataset=dataset_with_attrs, **execute_tasks_v2_args, ) - + dataset_attrs = await _get_dataset_attrs(db, dataset_with_attrs.id) assert _task_names_from_history(dataset_attrs["history"]) == [ "create_ome_zarr_compound", "illumination_correction", @@ -378,7 +371,7 @@ async def test_fractal_demos_01_no_overwrite( dataset_with_attrs = await dataset_factory_v2( project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs ) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["MIP_compound"], @@ -391,6 +384,7 @@ async def test_fractal_demos_01_no_overwrite( dataset=dataset_with_attrs, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset_with_attrs.id) assert _task_names_from_history(dataset_attrs["history"]) == [ "create_ome_zarr_compound", @@ -446,7 +440,7 @@ async def test_fractal_demos_01_no_overwrite( dataset_with_attrs = await dataset_factory_v2( project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs ) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["cellpose_segmentation"], @@ -460,7 +454,7 @@ async def test_fractal_demos_01_no_overwrite( dataset=dataset_with_attrs, **execute_tasks_v2_args, ) - + dataset_attrs = await _get_dataset_attrs(db, dataset_with_attrs.id) assert _task_names_from_history(dataset_attrs["history"]) == [ "create_ome_zarr_compound", "illumination_correction", @@ -493,7 +487,7 @@ async def test_registration_no_overwrite( dataset = await dataset_factory_v2( project_id=project.id, zarr_dir=zarr_dir ) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db[ @@ -510,11 +504,12 @@ async def test_registration_no_overwrite( dataset=dataset, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset.id) # Run init registration dataset_with_attrs = await dataset_factory_v2( project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs ) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db[ @@ -531,6 +526,7 @@ async def test_registration_no_overwrite( dataset=dataset_with_attrs, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset_with_attrs.id) # In all non-reference-cycle images, a certain table was updated for image in dataset_attrs["images"]: @@ -547,7 +543,7 @@ async def test_registration_no_overwrite( dataset_with_attrs = await dataset_factory_v2( project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs ) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db[ @@ -563,6 +559,7 @@ async def test_registration_no_overwrite( dataset=dataset_with_attrs, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset_with_attrs.id) # In all images, a certain (post-consensus) table was updated for image in dataset_attrs["images"]: @@ -577,7 +574,7 @@ async def test_registration_no_overwrite( dataset_with_attrs = await dataset_factory_v2( project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs ) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db[ @@ -594,6 +591,7 @@ async def test_registration_no_overwrite( dataset=dataset_with_attrs, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset_with_attrs.id) # A new copy of each image was created assert len(dataset_attrs["images"]) == 12 From ac2e956131d88ee58f0690623d05c1a8dbf30b56 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 09:26:11 +0100 Subject: [PATCH 35/49] Fix tests/v2/04_runner/test_fractal_examples.py --- tests/v2/04_runner/test_fractal_examples.py | 24 ++++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/tests/v2/04_runner/test_fractal_examples.py b/tests/v2/04_runner/test_fractal_examples.py index f2dd8ab202..25a22d8f70 100644 --- a/tests/v2/04_runner/test_fractal_examples.py +++ b/tests/v2/04_runner/test_fractal_examples.py @@ -622,7 +622,7 @@ async def test_registration_overwrite( dataset = await dataset_factory_v2( project_id=project.id, zarr_dir=zarr_dir ) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db[ @@ -639,12 +639,13 @@ async def test_registration_overwrite( dataset=dataset, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset.id) # Run init registration dataset_with_attrs = await dataset_factory_v2( project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs ) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db[ @@ -661,6 +662,7 @@ async def test_registration_overwrite( dataset=dataset_with_attrs, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset_with_attrs.id) # In all non-reference-cycle images, a certain table was updated for image in dataset_attrs["images"]: @@ -677,7 +679,7 @@ async def test_registration_overwrite( dataset_with_attrs = await dataset_factory_v2( project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs ) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db[ @@ -693,6 +695,7 @@ async def test_registration_overwrite( dataset=dataset_with_attrs, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset_with_attrs.id) # In all images, a certain (post-consensus) table was updated for image in dataset_attrs["images"]: @@ -707,7 +710,7 @@ async def test_registration_overwrite( dataset_with_attrs = await dataset_factory_v2( project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs ) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db[ @@ -724,6 +727,7 @@ async def test_registration_overwrite( dataset=dataset_with_attrs, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset_with_attrs.id) # Images are still the same number, but they are marked as registered assert len(dataset_attrs["images"]) == 6 @@ -753,7 +757,7 @@ async def test_channel_parallelization_with_overwrite( project_id=project.id, zarr_dir=zarr_dir ) # Run create_ome_zarr+yokogawa_to_zarr - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["create_ome_zarr_compound"], @@ -768,12 +772,13 @@ async def test_channel_parallelization_with_overwrite( dataset=dataset, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset.id) # Run illumination_correction_compound dataset_with_attrs = await dataset_factory_v2( project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs ) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db[ @@ -791,6 +796,7 @@ async def test_channel_parallelization_with_overwrite( dataset=dataset_with_attrs, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset_with_attrs.id) # Check that there are now 2 images assert len(dataset_attrs["images"]) == 2 @@ -818,7 +824,7 @@ async def test_channel_parallelization_no_overwrite( project_id=project.id, zarr_dir=zarr_dir ) # Run create_ome_zarr+yokogawa_to_zarr - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["create_ome_zarr_compound"], @@ -833,12 +839,13 @@ async def test_channel_parallelization_no_overwrite( dataset=dataset, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset.id) # Run illumination_correction_compound dataset_with_attrs = await dataset_factory_v2( project_id=project.id, zarr_dir=zarr_dir, **dataset_attrs ) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db[ @@ -856,6 +863,7 @@ async def test_channel_parallelization_no_overwrite( dataset=dataset_with_attrs, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset_with_attrs.id) # Check that there are now 4 images assert len(dataset_attrs["images"]) == 4 From 6425917877500e868a82c17970088f6a5778a15e Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 09:30:54 +0100 Subject: [PATCH 36/49] Style/docstrings/comments --- fractal_server/app/runner/v2/__init__.py | 2 -- fractal_server/app/runner/v2/_local/__init__.py | 9 +++------ .../app/runner/v2/_local_experimental/__init__.py | 10 ++-------- fractal_server/app/runner/v2/_slurm_ssh/__init__.py | 9 +++------ fractal_server/app/runner/v2/_slurm_sudo/__init__.py | 6 +++--- 5 files changed, 11 insertions(+), 25 deletions(-) diff --git a/fractal_server/app/runner/v2/__init__.py b/fractal_server/app/runner/v2/__init__.py index 38b4f48ce9..432f64c322 100644 --- a/fractal_server/app/runner/v2/__init__.py +++ b/fractal_server/app/runner/v2/__init__.py @@ -41,8 +41,6 @@ from fractal_server import __VERSION__ from fractal_server.app.models import UserSettings -# from .handle_failed_job import assemble_filters_failed_job -# from .handle_failed_job import assemble_images_failed_job _backends = {} _backends["local"] = local_process_workflow diff --git a/fractal_server/app/runner/v2/_local/__init__.py b/fractal_server/app/runner/v2/_local/__init__.py index 2a967b7c19..74ca9fc78a 100644 --- a/fractal_server/app/runner/v2/_local/__init__.py +++ b/fractal_server/app/runner/v2/_local/__init__.py @@ -41,16 +41,13 @@ def _process_workflow( last_task_index: int, ) -> None: """ - Internal processing routine - - Schedules the workflow using a `FractalThreadPoolExecutor`. + Run the workflow using a `FractalThreadPoolExecutor`. """ - with FractalThreadPoolExecutor() as executor: execute_tasks_v2( wf_task_list=workflow.task_list[ - first_task_index : (last_task_index + 1) # noqa - ], # noqa + first_task_index : (last_task_index + 1) + ], dataset=dataset, executor=executor, workflow_dir_local=workflow_dir_local, diff --git a/fractal_server/app/runner/v2/_local_experimental/__init__.py b/fractal_server/app/runner/v2/_local_experimental/__init__.py index 197ae74f75..c298779ec2 100644 --- a/fractal_server/app/runner/v2/_local_experimental/__init__.py +++ b/fractal_server/app/runner/v2/_local_experimental/__init__.py @@ -23,13 +23,7 @@ def _process_workflow( last_task_index: int, ) -> None: """ - Internal processing routine - - Schedules the workflow using a `FractalProcessPoolExecutor`. - - Cf. - [process_workflow][fractal_server.app.runner.v2._local_experimental.process_workflow] - for the call signature. + Run the workflow using a `FractalProcessPoolExecutor`. """ with FractalProcessPoolExecutor( shutdown_file=workflow_dir_local / SHUTDOWN_FILENAME @@ -37,7 +31,7 @@ def _process_workflow( try: execute_tasks_v2( wf_task_list=workflow.task_list[ - first_task_index : (last_task_index + 1) # noqa + first_task_index : (last_task_index + 1) ], dataset=dataset, executor=executor, diff --git a/fractal_server/app/runner/v2/_slurm_ssh/__init__.py b/fractal_server/app/runner/v2/_slurm_ssh/__init__.py index e80cc532cd..def74eb560 100644 --- a/fractal_server/app/runner/v2/_slurm_ssh/__init__.py +++ b/fractal_server/app/runner/v2/_slurm_ssh/__init__.py @@ -48,14 +48,11 @@ def _process_workflow( worker_init: Optional[Union[str, list[str]]] = None, ) -> None: """ - Internal processing routine for the SLURM backend + Run the workflow using a `FractalSlurmSSHExecutor`. This function initialises the a FractalSlurmExecutor, setting logging, workflow working dir and user to impersonate. It then schedules the workflow tasks and returns the new dataset attributes - - Returns: - new_dataset_attributes: """ if isinstance(worker_init, str): @@ -81,8 +78,8 @@ def _process_workflow( ) as executor: execute_tasks_v2( wf_task_list=workflow.task_list[ - first_task_index : (last_task_index + 1) # noqa - ], # noqa + first_task_index : (last_task_index + 1) + ], dataset=dataset, executor=executor, workflow_dir_local=workflow_dir_local, diff --git a/fractal_server/app/runner/v2/_slurm_sudo/__init__.py b/fractal_server/app/runner/v2/_slurm_sudo/__init__.py index 936b2647b2..2fafc67ef5 100644 --- a/fractal_server/app/runner/v2/_slurm_sudo/__init__.py +++ b/fractal_server/app/runner/v2/_slurm_sudo/__init__.py @@ -44,7 +44,7 @@ def _process_workflow( worker_init: Optional[Union[str, list[str]]] = None, ) -> None: """ - Internal processing routine for the SLURM backend + Run the workflow using a `FractalSlurmExecutor`. This function initialises the a FractalSlurmExecutor, setting logging, workflow working dir and user to impersonate. It then schedules the @@ -71,8 +71,8 @@ def _process_workflow( ) as executor: execute_tasks_v2( wf_task_list=workflow.task_list[ - first_task_index : (last_task_index + 1) # noqa - ], # noqa + first_task_index : (last_task_index + 1) + ], dataset=dataset, executor=executor, workflow_dir_local=workflow_dir_local, From 57fabf318e1811e2a92b778b9f0ffbca02f07982 Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Tue, 14 Jan 2025 09:33:22 +0100 Subject: [PATCH 37/49] refactor test_dummy_example --- tests/v2/04_runner/test_dummy_examples.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tests/v2/04_runner/test_dummy_examples.py b/tests/v2/04_runner/test_dummy_examples.py index 1df71fea9e..0cc994f4e5 100644 --- a/tests/v2/04_runner/test_dummy_examples.py +++ b/tests/v2/04_runner/test_dummy_examples.py @@ -4,6 +4,7 @@ from pathlib import Path import pytest +from aux_get_dataset_attrs import _get_dataset_attrs from devtools import debug from fixtures_mocks import * # noqa: F401,F403 from v2_mock_models import WorkflowTaskV2Mock @@ -227,7 +228,7 @@ async def test_dummy_remove_images( for index in [0, 1, 2] ], ) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["dummy_remove_images"], @@ -239,7 +240,6 @@ async def test_dummy_remove_images( dataset=dataset_pre, **execute_tasks_v2_args, ) - debug(dataset_attrs) # Fail when removing images that do not exist dataset_pre_fail = await dataset_factory_v2( @@ -301,7 +301,7 @@ async def test_dummy_unset_attribute( ) # Unset an existing attribute (starting from dataset_pre) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["dummy_unset_attribute"], @@ -314,6 +314,7 @@ async def test_dummy_unset_attribute( dataset=dataset_pre, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset_pre.id) debug(dataset_attrs["images"]) assert "key2" not in dataset_attrs["images"][0]["attributes"].keys() @@ -331,7 +332,7 @@ async def test_dummy_unset_attribute( dataset=dataset_pre, **execute_tasks_v2_args, ) - debug(dataset_attrs["images"]) + dataset_attrs = await _get_dataset_attrs(db, dataset_pre.id) assert dataset_attrs["images"][0]["attributes"] == { "key1": "value1", "key2": "value2", @@ -360,7 +361,7 @@ async def test_dummy_insert_single_image_none_attribute( project_id=project.id, zarr_dir=zarr_dir ) # Run successfully on an empty dataset - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["dummy_insert_single_image"], @@ -377,6 +378,7 @@ async def test_dummy_insert_single_image_none_attribute( dataset=dataset, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset.id) debug(dataset_attrs["images"]) assert ( "attribute-name" @@ -406,7 +408,7 @@ async def test_dummy_insert_single_image_normalization( project_id=project.id, zarr_dir=zarr_dir ) # Run successfully with trailing slashes - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["dummy_insert_single_image"], @@ -421,6 +423,7 @@ async def test_dummy_insert_single_image_normalization( dataset=dataset, **execute_tasks_v2_args, ) + dataset_attrs = await _get_dataset_attrs(db, dataset.id) debug(dataset_attrs["images"]) for image in dataset_attrs["images"]: assert normalize_url(image["zarr_url"]) == image["zarr_url"] @@ -455,7 +458,7 @@ async def test_default_inclusion_of_images( ) # Run - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["generic_task_parallel"], @@ -471,6 +474,7 @@ async def test_default_inclusion_of_images( workflow_dir_local=tmp_path / "job_dir", workflow_dir_remote=tmp_path / "job_dir", ) + dataset_attrs = await _get_dataset_attrs(db, dataset_pre.id) image = dataset_attrs["images"][0] debug(dataset_attrs) debug(image) From ef76dafcc9380f7cbbf2d9e1b2c64fbeb8bb718e Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 09:35:59 +0100 Subject: [PATCH 38/49] Remove some output of execute_tasks_v2 --- tests/v2/04_runner/test_fractal_examples.py | 3 +-- tests/v2/04_runner/test_no_images_parallelization.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/v2/04_runner/test_fractal_examples.py b/tests/v2/04_runner/test_fractal_examples.py index 25a22d8f70..1882a87902 100644 --- a/tests/v2/04_runner/test_fractal_examples.py +++ b/tests/v2/04_runner/test_fractal_examples.py @@ -29,12 +29,11 @@ def execute_tasks_v2(wf_task_list, workflow_dir_local, **kwargs): logging.info(f"Now creating {subfolder.as_posix()}") subfolder.mkdir(parents=True) - out = raw_execute_tasks_v2( + raw_execute_tasks_v2( wf_task_list=wf_task_list, workflow_dir_local=workflow_dir_local, **kwargs, ) - return out def _assert_image_data_exist(image_list: list[dict]): diff --git a/tests/v2/04_runner/test_no_images_parallelization.py b/tests/v2/04_runner/test_no_images_parallelization.py index 8e82120250..3ae0653d98 100644 --- a/tests/v2/04_runner/test_no_images_parallelization.py +++ b/tests/v2/04_runner/test_no_images_parallelization.py @@ -20,12 +20,11 @@ def execute_tasks_v2(wf_task_list, workflow_dir_local, **kwargs): logging.info(f"Now creating {subfolder.as_posix()}") subfolder.mkdir(parents=True) - out = raw_execute_tasks_v2( + raw_execute_tasks_v2( wf_task_list=wf_task_list, workflow_dir_local=workflow_dir_local, **kwargs, ) - return out async def test_parallelize_on_no_images( From 60f4eddc01c0f612804a021161e679563b02f174 Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Tue, 14 Jan 2025 09:36:42 +0100 Subject: [PATCH 39/49] refactor test_dummy_example --- tests/v2/04_runner/test_dummy_examples.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/v2/04_runner/test_dummy_examples.py b/tests/v2/04_runner/test_dummy_examples.py index 0cc994f4e5..4c535e5ae0 100644 --- a/tests/v2/04_runner/test_dummy_examples.py +++ b/tests/v2/04_runner/test_dummy_examples.py @@ -319,7 +319,7 @@ async def test_dummy_unset_attribute( assert "key2" not in dataset_attrs["images"][0]["attributes"].keys() # Unset a missing attribute (starting from dataset_pre) - dataset_attrs = execute_tasks_v2( + execute_tasks_v2( wf_task_list=[ WorkflowTaskV2Mock( task=fractal_tasks_mock_no_db["dummy_unset_attribute"], @@ -465,6 +465,7 @@ async def test_default_inclusion_of_images( task_id=fractal_tasks_mock_no_db[ "generic_task_parallel" ].id, + rder=0, id=0, order=0, ) From ce8b73f9c9bffe3ba5fe543d786bee3dd4a48520 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 10:00:19 +0100 Subject: [PATCH 40/49] Review how to set task status to failed --- fractal_server/app/runner/v2/__init__.py | 31 +------ .../app/runner/v2/handle_failed_job.py | 93 ++++--------------- fractal_server/app/runner/v2/runner.py | 6 +- 3 files changed, 25 insertions(+), 105 deletions(-) diff --git a/fractal_server/app/runner/v2/__init__.py b/fractal_server/app/runner/v2/__init__.py index 432f64c322..f902260e56 100644 --- a/fractal_server/app/runner/v2/__init__.py +++ b/fractal_server/app/runner/v2/__init__.py @@ -23,7 +23,6 @@ from ...db import DB from ...models.v2 import DatasetV2 from ...models.v2 import JobV2 -from ...models.v2 import WorkflowTaskV2 from ...models.v2 import WorkflowV2 from ...schemas.v2 import JobStatusTypeV2 from ..exceptions import JobExecutionError @@ -37,7 +36,7 @@ ) from ._slurm_ssh import process_workflow as slurm_ssh_process_workflow from ._slurm_sudo import process_workflow as slurm_sudo_process_workflow -from .handle_failed_job import assemble_history_failed_job +from .handle_failed_job import mark_last_wftask_as_failed from fractal_server import __VERSION__ from fractal_server.app.models import UserSettings @@ -350,18 +349,10 @@ async def submit_workflow( logger.debug(f'FAILED workflow "{workflow.name}", TaskExecutionError.') logger.info(f'Workflow "{workflow.name}" failed (TaskExecutionError).') - # Read dataset attributes produced by the last successful task, and - # update the DB dataset accordingly - - failed_wftask = db_sync.get(WorkflowTaskV2, e.workflow_task_id) - assemble_history_failed_job( - job, - dataset, + mark_last_wftask_as_failed( workflow, logger_name=logger_name, - failed_wftask=failed_wftask, ) - exception_args_string = "\n".join(e.args) log_msg = ( f"TASK ERROR: " @@ -374,15 +365,10 @@ async def submit_workflow( except JobExecutionError as e: logger.debug(f'FAILED workflow "{workflow.name}", JobExecutionError.') logger.info(f'Workflow "{workflow.name}" failed (JobExecutionError).') - # Read dataset attributes produced by the last successful task, and - # update the DB dataset accordingly - assemble_history_failed_job( - job, + mark_last_wftask_as_failed( dataset, - workflow, logger_name=logger_name, ) - fail_job( db=db_sync, job=job, @@ -396,18 +382,11 @@ async def submit_workflow( except Exception: logger.debug(f'FAILED workflow "{workflow.name}", unknown error.') logger.info(f'Workflow "{workflow.name}" failed (unkwnon error).') - - current_traceback = traceback.format_exc() - - # Read dataset attributes produced by the last successful task, and - # update the DB dataset accordingly - assemble_history_failed_job( - job, + mark_last_wftask_as_failed( dataset, - workflow, logger_name=logger_name, ) - + current_traceback = traceback.format_exc() fail_job( db=db_sync, job=job, diff --git a/fractal_server/app/runner/v2/handle_failed_job.py b/fractal_server/app/runner/v2/handle_failed_job.py index 7103c62274..7c94031c2e 100644 --- a/fractal_server/app/runner/v2/handle_failed_job.py +++ b/fractal_server/app/runner/v2/handle_failed_job.py @@ -13,97 +13,40 @@ Helper functions to handle Dataset history. """ import logging -from typing import Optional from sqlalchemy.orm.attributes import flag_modified from ...models.v2 import DatasetV2 -from ...models.v2 import JobV2 -from ...models.v2 import WorkflowTaskV2 -from ...models.v2 import WorkflowV2 from ...schemas.v2 import WorkflowTaskStatusTypeV2 from fractal_server.app.db import get_sync_db -def assemble_history_failed_job( - job: JobV2, +def mark_last_wftask_as_failed( dataset: DatasetV2, - workflow: WorkflowV2, - logger_name: Optional[str] = None, - failed_wftask: Optional[WorkflowTaskV2] = None, + logger_name: str, ) -> None: """ - Assemble `history` after a workflow-execution job fails. + Edit dataset history, by marking last item as failed. Args: - job: - The failed `JobV2` object. - dataset: - The `DatasetV2` object associated to `job`. - workflow: - The `WorkflowV2` object associated to `job`. + dataset: The `DatasetV2` object logger_name: A logger name. - failed_wftask: - If set, append it to `history` during step 3; if `None`, infer - it by comparing the job task list and the one in - `HISTORY_FILENAME`. FIXME - - Returns: - The new value of `history`, to be merged into - `dataset.meta`. """ logger = logging.getLogger(logger_name) - - # The final value of the history attribute should include up to three - # parts, coming from: the database, the temporary file, the failed-task - # information. with next(get_sync_db()) as db: db_dataset = db.get(DatasetV2, dataset.id) - - job_wftasks = workflow.task_list[ - job.first_task_index : (job.last_task_index + 1) # noqa - ] - # Part 1/A: Identify failed task, if needed - if failed_wftask is None: - tmp_file_wftasks = [ - history_item["workflowtask"] - for history_item in db_dataset.history - ] - if len(job_wftasks) <= len(tmp_file_wftasks): - n_tasks_job = len(job_wftasks) - n_tasks_tmp = len(tmp_file_wftasks) - logger.error( - "Cannot identify the failed task based on job task list " - f"(length {n_tasks_job}) and temporary-file task list " - f"(length {n_tasks_tmp})." - ) - logger.error("Failed task not appended to history.") - else: - failed_wftask = job_wftasks[len(tmp_file_wftasks)] - - # Part 1/B: Append failed task to history - if failed_wftask is not None: - failed_wftask_dump = failed_wftask.model_dump(exclude={"task"}) - failed_wftask_dump["task"] = failed_wftask.task.model_dump() - - for ind, history_item in enumerate(db_dataset.history): - if ( - history_item["workflowtask"]["task"]["id"] - == failed_wftask_dump["task"]["id"] - ): - history_item["status"] = WorkflowTaskStatusTypeV2.FAILED - db_dataset.history[ind] = history_item - flag_modified(db_dataset, "history") - db.merge(db_dataset) - db.commit() - break - if ( - len(db_dataset.history) > 0 - and db_dataset.history[-1]["status"] - == WorkflowTaskStatusTypeV2.SUBMITTED - ): - db_dataset.history[-1]["status"] = WorkflowTaskStatusTypeV2.FAILED - flag_modified(db_dataset, "history") - db.merge(db_dataset) - db.commit() + workflowtask_id = db_dataset.history[-1]["workflowtask"]["id"] + last_item_status = db_dataset.history[-1]["status"] + if last_item_status != WorkflowTaskStatusTypeV2.SUBMITTED: + logger.warning( + "Unexpected branch: " + f"Last history item, for {workflowtask_id=}, " + "has status {status}. Skip." + ) + return + logger.info(f"Setting history item for {workflowtask_id=} to failed.") + db_dataset.history[-1]["status"] = WorkflowTaskStatusTypeV2.FAILED + flag_modified(db_dataset, "history") + db.merge(db_dataset) + db.commit() diff --git a/fractal_server/app/runner/v2/runner.py b/fractal_server/app/runner/v2/runner.py index f46bf8a239..772e9acb3e 100644 --- a/fractal_server/app/runner/v2/runner.py +++ b/fractal_server/app/runner/v2/runner.py @@ -292,16 +292,14 @@ def execute_tasks_v2( # Update filters.types tmp_filters["types"].update(types_from_manifest) tmp_filters["types"].update(types_from_task) + # Write current dataset attributes (history, images, filters) into the # database. They can be used (1) to retrieve the latest state # when the job fails, (2) from within endpoints that need up-to-date # information with next(get_sync_db()) as db: db_dataset = db.get(DatasetV2, dataset.id) - for ind, history_item in enumerate(db_dataset.history): - history_item["status"] = WorkflowTaskStatusTypeV2.DONE - db_dataset.history[ind] = history_item - + db_dataset.history[-1]["status"] = WorkflowTaskStatusTypeV2.DONE db_dataset.filters = tmp_filters db_dataset.images = tmp_images for attribute_name in ["filters", "history", "images"]: From 23e515157659aeb9eed8a37f570a08b6ad2e119c Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 10:01:15 +0100 Subject: [PATCH 41/49] Style [skip ci] --- fractal_server/app/runner/v2/runner.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fractal_server/app/runner/v2/runner.py b/fractal_server/app/runner/v2/runner.py index 772e9acb3e..4a2f073d1e 100644 --- a/fractal_server/app/runner/v2/runner.py +++ b/fractal_server/app/runner/v2/runner.py @@ -1,4 +1,3 @@ -# import json import logging from concurrent.futures import ThreadPoolExecutor from copy import copy From 1a6e86ae6694ce14388c147b144772dc7a0531c1 Mon Sep 17 00:00:00 2001 From: mfranzon <43796979+mfranzon@users.noreply.github.com> Date: Tue, 14 Jan 2025 11:15:34 +0100 Subject: [PATCH 42/49] fix arg, update test --- fractal_server/app/runner/v2/__init__.py | 6 +++--- fractal_server/app/runner/v2/handle_failed_job.py | 4 ++-- tests/v2/03_api/test_submission_job_list_v2.py | 4 +--- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/fractal_server/app/runner/v2/__init__.py b/fractal_server/app/runner/v2/__init__.py index f902260e56..fee385c738 100644 --- a/fractal_server/app/runner/v2/__init__.py +++ b/fractal_server/app/runner/v2/__init__.py @@ -350,7 +350,7 @@ async def submit_workflow( logger.info(f'Workflow "{workflow.name}" failed (TaskExecutionError).') mark_last_wftask_as_failed( - workflow, + dataset_id=dataset_id, logger_name=logger_name, ) exception_args_string = "\n".join(e.args) @@ -366,7 +366,7 @@ async def submit_workflow( logger.debug(f'FAILED workflow "{workflow.name}", JobExecutionError.') logger.info(f'Workflow "{workflow.name}" failed (JobExecutionError).') mark_last_wftask_as_failed( - dataset, + dataset_id=dataset_id, logger_name=logger_name, ) fail_job( @@ -383,7 +383,7 @@ async def submit_workflow( logger.debug(f'FAILED workflow "{workflow.name}", unknown error.') logger.info(f'Workflow "{workflow.name}" failed (unkwnon error).') mark_last_wftask_as_failed( - dataset, + dataset_id=dataset_id, logger_name=logger_name, ) current_traceback = traceback.format_exc() diff --git a/fractal_server/app/runner/v2/handle_failed_job.py b/fractal_server/app/runner/v2/handle_failed_job.py index 7c94031c2e..9b98866e89 100644 --- a/fractal_server/app/runner/v2/handle_failed_job.py +++ b/fractal_server/app/runner/v2/handle_failed_job.py @@ -22,7 +22,7 @@ def mark_last_wftask_as_failed( - dataset: DatasetV2, + dataset_id: int, logger_name: str, ) -> None: """ @@ -35,7 +35,7 @@ def mark_last_wftask_as_failed( logger = logging.getLogger(logger_name) with next(get_sync_db()) as db: - db_dataset = db.get(DatasetV2, dataset.id) + db_dataset = db.get(DatasetV2, dataset_id) workflowtask_id = db_dataset.history[-1]["workflowtask"]["id"] last_item_status = db_dataset.history[-1]["status"] if last_item_status != WorkflowTaskStatusTypeV2.SUBMITTED: diff --git a/tests/v2/03_api/test_submission_job_list_v2.py b/tests/v2/03_api/test_submission_job_list_v2.py index 14e31ef1b7..4375ffe803 100644 --- a/tests/v2/03_api/test_submission_job_list_v2.py +++ b/tests/v2/03_api/test_submission_job_list_v2.py @@ -18,7 +18,6 @@ async def test_clean_app_job_list_v2( job_factory_v2, override_settings_factory, ): - # Check that app fixture starts in a clean state assert app.state.jobsV1 == [] assert app.state.jobsV2 == [] @@ -27,10 +26,9 @@ async def test_clean_app_job_list_v2( override_settings_factory(FRACTAL_API_MAX_JOB_LIST_LENGTH=0) async with MockCurrentUser(user_kwargs=dict(is_verified=True)) as user: - # Create DB objects task = await task_factory_v2( - user_id=user.id, name="task", command="echo" + user_id=user.id, name="task", command_non_parallel="echo" ) project = await project_factory_v2(user) workflow = await workflow_factory_v2(project_id=project.id) From b7ef0ecf04e4478f41c4a2a6584e9e1364d2eb3a Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 11:16:03 +0100 Subject: [PATCH 43/49] CHANGELOG [skip ci] --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8982c9db98..dffa68557a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ **Note**: Numbers like (\#1234) point to closed Pull Requests on the fractal-server repository. +# 2.11.0 (unreleased) + +* Runner + * Integrate database write access in runner component (\#2169). +* API: + * Update and simplify `/api/v2/project/{project_id}/status/` (\#2169). + # 2.10.5 * App: From d2ebb5a3c32e7d20a74259c88150d5d287c7d96a Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 11:18:33 +0100 Subject: [PATCH 44/49] Trigger GHA workflows also for `dev-2.11` branch --- .github/workflows/benchmarks.yaml | 4 ++-- .github/workflows/ci.yml | 4 ++-- .github/workflows/documentation.yaml | 4 ++-- .github/workflows/migrations.yml | 4 ++-- .github/workflows/oauth.yaml | 4 ++-- .github/workflows/pip_install.yml | 4 ++-- .github/workflows/poetry_build.yml | 4 ++-- .github/workflows/precommit.yml | 4 ++-- 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/benchmarks.yaml b/.github/workflows/benchmarks.yaml index 7129b77c9f..9faa0e7c9b 100644 --- a/.github/workflows/benchmarks.yaml +++ b/.github/workflows/benchmarks.yaml @@ -2,9 +2,9 @@ name: Benchmarks on: push: - branches: ["main"] + branches: ["main", "dev-2.11"] pull_request: - branches: ["main"] + branches: ["main", "dev-2.11"] jobs: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3072893810..8045a489b7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,9 +2,9 @@ name: ci on: push: - branches: ["main"] + branches: ["main", "dev-2.11"] pull_request: - branches: ["main"] + branches: ["main", "dev-2.11"] jobs: diff --git a/.github/workflows/documentation.yaml b/.github/workflows/documentation.yaml index ecb7e407d6..6df17afeba 100644 --- a/.github/workflows/documentation.yaml +++ b/.github/workflows/documentation.yaml @@ -2,9 +2,9 @@ name: docs on: push: - branches: ["main"] + branches: ["main", "dev-2.11"] pull_request: - branches: ["main"] + branches: ["main", "dev-2.11"] jobs: diff --git a/.github/workflows/migrations.yml b/.github/workflows/migrations.yml index 5bfe363c1d..358939d0d4 100644 --- a/.github/workflows/migrations.yml +++ b/.github/workflows/migrations.yml @@ -9,9 +9,9 @@ name: migrations on: push: - branches: ["main"] + branches: ["main", "dev-2.11"] pull_request: - branches: ["main"] + branches: ["main", "dev-2.11"] env: diff --git a/.github/workflows/oauth.yaml b/.github/workflows/oauth.yaml index 717c565694..4a24944947 100644 --- a/.github/workflows/oauth.yaml +++ b/.github/workflows/oauth.yaml @@ -2,9 +2,9 @@ name: OAuth2-OIDC on: push: - branches: ["main"] + branches: ["main", "dev-2.11"] pull_request: - branches: ["main"] + branches: ["main", "dev-2.11"] jobs: diff --git a/.github/workflows/pip_install.yml b/.github/workflows/pip_install.yml index c88b303171..ab900bc4d5 100644 --- a/.github/workflows/pip_install.yml +++ b/.github/workflows/pip_install.yml @@ -2,9 +2,9 @@ name: pip-install on: push: - branches: ["main"] + branches: ["main", "dev-2.11"] pull_request: - branches: ["main"] + branches: ["main", "dev-2.11"] jobs: diff --git a/.github/workflows/poetry_build.yml b/.github/workflows/poetry_build.yml index 7fa759648c..058697ad91 100644 --- a/.github/workflows/poetry_build.yml +++ b/.github/workflows/poetry_build.yml @@ -2,9 +2,9 @@ name: Build package on: push: - branches: ["main"] + branches: ["main", "dev-2.11"] pull_request: - branches: ["main"] + branches: ["main", "dev-2.11"] jobs: build: diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml index 1e2ce952c9..8ee60dc5e9 100644 --- a/.github/workflows/precommit.yml +++ b/.github/workflows/precommit.yml @@ -2,9 +2,9 @@ name: precommit on: push: - branches: ["main"] + branches: ["main","dev-2.11"] pull_request: - branches: ["main"] + branches: ["main","dev-2.11"] jobs: precommit: From 95f6bf75f1187e71ff30b0e3238545b85aa110c7 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 11:48:08 +0100 Subject: [PATCH 45/49] Handle empty-history case in mark_last_wftask_as_failed --- fractal_server/app/runner/v2/handle_failed_job.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fractal_server/app/runner/v2/handle_failed_job.py b/fractal_server/app/runner/v2/handle_failed_job.py index 9b98866e89..6b63327091 100644 --- a/fractal_server/app/runner/v2/handle_failed_job.py +++ b/fractal_server/app/runner/v2/handle_failed_job.py @@ -36,6 +36,13 @@ def mark_last_wftask_as_failed( logger = logging.getLogger(logger_name) with next(get_sync_db()) as db: db_dataset = db.get(DatasetV2, dataset_id) + if len(db_dataset.history) == 0: + logger.warning( + f"History for {dataset_id=} is empty. Likely reason: the job " + "failed before its first task was marked as SUBMITTED. " + "Continue." + ) + return workflowtask_id = db_dataset.history[-1]["workflowtask"]["id"] last_item_status = db_dataset.history[-1]["status"] if last_item_status != WorkflowTaskStatusTypeV2.SUBMITTED: From 30ab975b52f5290754766f202e29c52a7d45e5de Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 11:48:59 +0100 Subject: [PATCH 46/49] Fix comment --- tests/v2/03_api/test_api_status.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/v2/03_api/test_api_status.py b/tests/v2/03_api/test_api_status.py index 53c68ad3ad..5533b6c4c6 100644 --- a/tests/v2/03_api/test_api_status.py +++ b/tests/v2/03_api/test_api_status.py @@ -49,8 +49,8 @@ async def test_status_yes_history_no_running_job( client, ): """ - GIVEN A database with non-empty dataset.history and no running jobs - THEN The status-endpoint response is empty + Test the case of the database with non-empty dataset.history and no + running jobs. """ async with MockCurrentUser() as user: project = await project_factory_v2(user) From 34879b766a2bd04006417be161fe17b0876d5805 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 11:49:27 +0100 Subject: [PATCH 47/49] Fix f string --- fractal_server/app/runner/v2/handle_failed_job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fractal_server/app/runner/v2/handle_failed_job.py b/fractal_server/app/runner/v2/handle_failed_job.py index 6b63327091..023d04c11d 100644 --- a/fractal_server/app/runner/v2/handle_failed_job.py +++ b/fractal_server/app/runner/v2/handle_failed_job.py @@ -49,7 +49,7 @@ def mark_last_wftask_as_failed( logger.warning( "Unexpected branch: " f"Last history item, for {workflowtask_id=}, " - "has status {status}. Skip." + f"has status {last_item_status}. Skip." ) return logger.info(f"Setting history item for {workflowtask_id=} to failed.") From 3d56db2dbc8b7512c3870ae3fcd6b51bf28cc510 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 11:49:57 +0100 Subject: [PATCH 48/49] Remove db.refresh --- fractal_server/app/runner/v2/runner.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fractal_server/app/runner/v2/runner.py b/fractal_server/app/runner/v2/runner.py index 4a2f073d1e..93c645d6c3 100644 --- a/fractal_server/app/runner/v2/runner.py +++ b/fractal_server/app/runner/v2/runner.py @@ -305,6 +305,5 @@ def execute_tasks_v2( flag_modified(db_dataset, attribute_name) db.merge(db_dataset) db.commit() - db.refresh(db_dataset) logger.debug(f'END {wftask.order}-th task (name="{task_name}")') From 74dfe65fcc3320740328eac9481c3d7ec6c4bcb6 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Tue, 14 Jan 2025 12:11:55 +0100 Subject: [PATCH 49/49] Add test_unit_mark_last_wftask_as_failed --- .../test_unit_mark_last_wftask_as_failed.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 tests/v2/04_runner/test_unit_mark_last_wftask_as_failed.py diff --git a/tests/v2/04_runner/test_unit_mark_last_wftask_as_failed.py b/tests/v2/04_runner/test_unit_mark_last_wftask_as_failed.py new file mode 100644 index 0000000000..1f1e446f32 --- /dev/null +++ b/tests/v2/04_runner/test_unit_mark_last_wftask_as_failed.py @@ -0,0 +1,43 @@ +from fractal_server.app.runner.v2.handle_failed_job import ( + mark_last_wftask_as_failed, +) + + +async def test_unit_mark_last_wftask_as_failed( + db, + dataset_factory_v2, + project_factory_v2, + MockCurrentUser, + caplog, +): + async with MockCurrentUser() as user: + project = await project_factory_v2(user=user) + dataset_no_history = await dataset_factory_v2( + project_id=project.id, + name="name", + history=[], + ) + + caplog.clear() + mark_last_wftask_as_failed( + dataset_id=dataset_no_history.id, logger_name="logger" + ) + print(caplog.text) + assert "is empty. Likely reason" in caplog.text + + dataset_wrong_history = await dataset_factory_v2( + name="name", + history=[ + { + "workflowtask": {"id": 123}, + "status": "done", + } + ], + ) + + caplog.clear() + mark_last_wftask_as_failed( + dataset_id=dataset_wrong_history.id, logger_name="logger" + ) + print(caplog.text) + assert "Unexpected branch: Last history item" in caplog.text