Skip to content

Commit

Permalink
fix getting the length of a generator (#377)
Browse files Browse the repository at this point in the history
* fix getting the length of a generator

* copy generator to keep from destroying it

* fix linting
  • Loading branch information
rzlim08 authored Jul 29, 2024
1 parent d443456 commit c49eed2
Showing 1 changed file with 10 additions and 1 deletion.
11 changes: 10 additions & 1 deletion lib/idseq_utils/idseq_utils/batch_run_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from typing import Dict, List, Optional
from urllib.parse import urlparse

from itertools import tee

from idseq_utils.diamond_scatter import blastx_join
from idseq_utils.minimap2_scatter import minimap2_merge

Expand Down Expand Up @@ -296,6 +298,12 @@ def _db_chunks(bucket: str, prefix):
yield obj["Key"]


def count_generator(gen):
gen, gen_copy = tee(gen)
generator_length = sum(1 for _ in gen_copy)
return generator_length, gen


def run_alignment(
input_dir: str,
db_path: str,
Expand All @@ -321,7 +329,8 @@ def run_alignment(
]
for chunk_id, db_chunk in enumerate(_db_chunks(db_bucket, db_prefix))
)
with Pool(len(chunks)) as p:
chunk_length, chunks = count_generator(chunks)
with Pool(chunk_length) as p:
p.starmap(_run_chunk, chunks)
run(["s3parcp", "--recursive", chunk_dir, "chunks"], check=True)
if os.path.exists(os.path.join("chunks", "cache")):
Expand Down

0 comments on commit c49eed2

Please sign in to comment.