Skip to content

Commit

Permalink
feat: use dask threads schedule with default values. (#1168)
Browse files Browse the repository at this point in the history
  • Loading branch information
Bento007 authored Jan 15, 2025
1 parent 093e4ae commit 6067311
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 13 deletions.
5 changes: 2 additions & 3 deletions cellxgene_schema_cli/cellxgene_schema/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ def schema_cli(verbose):
type=click.Path(exists=False, dir_okay=False, writable=True),
)
@click.option("-i", "--ignore-labels", help="Ignore ontology labels when validating", is_flag=True)
@click.option("-n", "--num-workers", help="Number of workers to use for parallel processing", default=1, type=int)
def schema_validate(h5ad_file, add_labels_file, ignore_labels, num_workers):
def schema_validate(h5ad_file, add_labels_file, ignore_labels):
# Imports are very slow so we defer loading until Click arg validation has passed
logger.info("Loading dependencies")
try:
Expand All @@ -48,7 +47,7 @@ def schema_validate(h5ad_file, add_labels_file, ignore_labels, num_workers):
logger.info("Loading validator modules")
from .validate import validate

is_valid, _, _ = validate(h5ad_file, add_labels_file, ignore_labels=ignore_labels, n_workers=num_workers)
is_valid, _, _ = validate(h5ad_file, add_labels_file, ignore_labels=ignore_labels)
if is_valid:
sys.exit(0)
else:
Expand Down
11 changes: 2 additions & 9 deletions cellxgene_schema_cli/cellxgene_schema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2110,7 +2110,6 @@ def validate(
h5ad_path: Union[str, bytes, os.PathLike],
add_labels_file: str = None,
ignore_labels: bool = False,
n_workers: int = 1,
) -> (bool, list, bool):
from .write_labels import AnnDataLabelAppender

Expand All @@ -2130,14 +2129,8 @@ def validate(
validator = Validator(
ignore_labels=ignore_labels,
)
with dask.config.set(
{
"num_workers": n_workers,
"threads_per_worker": 1,
"distributed.worker.memory.limit": "6GB",
"scheduler": "threads",
}
):

with dask.config.set({"scheduler": "threads"}):
validator.validate_adata(h5ad_path)
logger.info(f"Validation complete in {datetime.now() - start} with status is_valid={validator.is_valid}")

Expand Down
2 changes: 1 addition & 1 deletion cellxgene_schema_cli/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ anndata==0.11.2
cellxgene-ontology-guide==1.3.0 # update before a schema migration
click<9
Cython<4
dask==2024.12.0
dask[array]==2024.12.0
numpy<3
pandas>2,<3
PyYAML<7
Expand Down

0 comments on commit 6067311

Please sign in to comment.