From 3a2475ca4cfd6b062d5395d579628f8dc25427c4 Mon Sep 17 00:00:00 2001 From: idalindegaard Date: Thu, 13 Jun 2024 12:54:01 +0200 Subject: [PATCH 01/16] New script and updated file --- .../udf/calculate/adjust_missing_reads.py | 47 +++++++++++++++++++ cg_lims/options.py | 31 ++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 cg_lims/EPPs/udf/calculate/adjust_missing_reads.py diff --git a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py new file mode 100644 index 00000000..24817ac1 --- /dev/null +++ b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py @@ -0,0 +1,47 @@ +import logging +import sys + +import click +from cg_lims.exceptions import LimsError, MissingCgFieldError, MissingUDFsError +from cg_lims.get.artifacts import get_artifacts +from cg_lims.get.samples import get_one_sample_from_artifact +from cg_lims.status_db_api import StatusDBAPI +from genologics.entities import Artifact +from requests.exceptions import ConnectionError + +LOG = logging.getLogger(__name__) + +def calculate_adjusted_reads(artifact: Artifact, factor: float) -> float: + """A function to calculate the adjusted reads to sequence for each artifact with the desired apptag""" + + reads = sample.udf.get("Reads to sequence (M)") + adjusted_reads = reads*factor + return adjusted_reads + +@click.command() +@options.apptag(help="String of UDF Sequencing Analysis, also known as apptag") +@options.factor(help="Factor to multiply Reads to sequence (M) with") +@options.threshold_reads(help="Threshold for determining which factor to adjust Reads to sequence (M) with for WGS topup samples") +@click.pass_context +def adjust_missing_reads( + ctx: click.Context, + apptag: str, + factor: float, + threshold_reads: float, +): + """Script to calculate the adjusted Reads to sequence (M) with a specific factor for specific apptags, + specified in the command line""" + + process = ctx.obj["process"] + lims = ctx.obj["lims"] + + try: + artifacts: List[Artifact] = get_artifacts(process=process, measurement=True) + for artifact in artifacts: + if sample.udf.get("Sequencing Analysis") == apptag: + adjusted_reads = calculate_adjusted_reads(artifact=artifact, factor=factor) + artifact.udf["Reads to sequence (M)"] = adjusted_reads + artifact.put() + click.echo("Udfs have been updated on all samples.") + except LimsError as e: + sys.exit(e.message) diff --git a/cg_lims/options.py b/cg_lims/options.py index a5695b22..af31875c 100644 --- a/cg_lims/options.py +++ b/cg_lims/options.py @@ -423,3 +423,34 @@ def well_udf(help: str = "UDF name for artifact well.") -> click.option: def container_name_udf(help: str = "UDF name for container name.") -> click.option: return click.option("--container-name-udf", required=False, default=None, help=help) + + +def apptag( + help: str = "String of UDF Sequencing Analysis, also known as apptag", +) -> click.option: + return click.option( + "--apptag", + required=True, + multiple=True, + help=help, + ) + +def factor( + help: str = "Factor to multiply Reads to sequence (M) with", +) -> click.option: + return click.option( + "--factor", + required=True, + multiple=True, + help=help, + ) + +def threshold_reads( + help: str = "Threshold for determining which factor to adjust Reads to sequence (M) with for WGS topup samples", +) -> click.option: + return click.option( + "--threshold-reads", + required=False, + multiple=True, + help=help, + ) \ No newline at end of file From dc3aeebd0ebba35a2b7a52a93c948d291874b6bb Mon Sep 17 00:00:00 2001 From: idalindegaard Date: Wed, 17 Jul 2024 10:55:58 +0200 Subject: [PATCH 02/16] New EPP adjust missing reads --- .../udf/calculate/adjust_missing_reads.py | 162 ++++++++++++-- cg_lims/EPPs/udf/calculate/base.py | 2 + cg_lims/options.py | 198 +++++++++++++++++- 3 files changed, 343 insertions(+), 19 deletions(-) diff --git a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py index 24817ac1..e4874527 100644 --- a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py +++ b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py @@ -8,26 +8,119 @@ from cg_lims.status_db_api import StatusDBAPI from genologics.entities import Artifact from requests.exceptions import ConnectionError +from cg_lims import options LOG = logging.getLogger(__name__) -def calculate_adjusted_reads(artifact: Artifact, factor: float) -> float: +def calculate_adjusted_reads(artifact: Artifact, factor: str) -> float: """A function to calculate the adjusted reads to sequence for each artifact with the desired apptag""" - reads = sample.udf.get("Reads to sequence (M)") - adjusted_reads = reads*factor - return adjusted_reads + reads = artifact.udf.get("Reads to sequence (M)") + + return round(float(reads)*float(factor), 1) + +def adjust_wgs_topups(artifact: Artifact, factor_wgs_lower: str, factor_wgs_higher: str, threshold_reads: str) -> None: + """A function that calculates adjusted reads to sequence for WGS topups, where the 'topup' factor is determined + by a threshold for the reads to sequence. This is specified in the cli""" + + reads = float(artifact.udf.get("Reads to sequence (M)")) + if reads < float(threshold_reads): + adjusted_reads = round(float(reads)*float(factor_wgs_lower), 1) + else: + adjusted_reads = round(float(reads)*float(factor_wgs_higher), 1) + artifact.udf["Reads to sequence (M)"] = str(adjusted_reads) + artifact.put() + +def reset_microbial_reads(artifact: Artifact, reset_microbial_reads: str) -> None: + """A function that resets the reads to sequence for microbial samples, and the threshold_reads specifies what they are + supposed to be reset to""" + + artifact.udf["Reads to sequence (M)"] = reset_microbial_reads + artifact.put() + +def is_topup(artifact: Artifact) -> bool: + """A function that determines whether an artifact has already been sequenced before or not, and therefore is a topup + sample/artifact or not""" + + output = False + if artifact.samples[0].udf.get("Total Reads (M)"): + output = True + return output + +def is_adjusted(artifact: Artifact) -> bool: + """A function that checks if the process UDF Adjusted Reads to Sequence is set/true. This will + be updated after the EPP to adjust the reads to sequence has run one time""" + + process = artifact.parent_process + output = False + if process.udf.get("Adjusted Reads to Sequence"): + output = True + return output + +def validate_udf_values(artifact: Artifact) -> bool: + """A function checking whether Reads to Sequence (M) has a negative/no value. + Then the function returns the output as 'False' and logs all those sample IDs in the EPP log""" + + output = True + if not artifact.udf["Reads to sequence (M)"] or float(artifact.udf["Reads to sequence (M)"]) < 0: + output = False + LOG.info( + f"Sample {artifact.samples[0].id} has no or a negative value for Reads to sequence (M). Skipping." + ) + return output + +def adjust_reads(artifact: Artifact, apptags: tuple, factor: str) -> None: + """Only artifacts that have passed the validation of acceptable Reads to Sequence (M) values will be adjusted""" + + if validate_udf_values(artifact=artifact): + adjusted_reads = calculate_adjusted_reads(artifact=artifact, factor=factor) + artifact.udf["Reads to sequence (M)"] = str(adjusted_reads) + artifact.put() @click.command() -@options.apptag(help="String of UDF Sequencing Analysis, also known as apptag") -@options.factor(help="Factor to multiply Reads to sequence (M) with") -@options.threshold_reads(help="Threshold for determining which factor to adjust Reads to sequence (M) with for WGS topup samples") +@options.apptag_wgs(help="String of UDF Sequencing Analysis, also known as apptag, for WGS samples") +@options.apptag_wgs_tumor(help="String of UDF Sequencing Analysis, also known as apptag, for WGS tumor samples") +@options.apptag_tga(help="String of UDF Sequencing Analysis, also known as apptag, for TGA samples") +@options.apptag_micro(help="String of UDF Sequencing Analysis, also known as apptag, for micro samples") +@options.apptag_rml(help="String of UDF Sequencing Analysis, also known as apptag, for RML samples") +@options.apptag_virus(help="String of UDF Sequencing Analysis, also known as apptag, for virus samples") +@options.apptag_rna(help="String of UDF Sequencing Analysis, also known as apptag, for RNA samples") +@options.factor_wgs_tumor(help= "Factor to multiply Reads to sequence (M) with for WGS tumor samples") +@options.factor_tga(help= "Factor to multiply Reads to sequence (M) with for TGA samples") +@options.factor_micro(help= "Factor to multiply Reads to sequence (M) with for micro samples") +@options.factor_rml(help= "Factor to multiply Reads to sequence (M) with for RML samples") +@options.factor_rna(help= "Factor to multiply Reads to sequence (M) with for RNA samples") +@options.factor_rna_topups(help= "Factor to multiply Reads to sequence (M) with for RNA topup samples") +@options.factor_rml_topups(help= "Factor to multiply Reads to sequence (M) with for RML topup samples") +@options.factor_tga_topups(help= "Factor to multiply Reads to sequence (M) with for TGA topup samples") +@options.factor_wgs_lower(help= "Lower factor to multiply Reads to sequence (M) with for WGS samples") +@options.factor_wgs_higher(help= "Higher factor to multiply Reads to sequence (M) with for WGS samples") +@options.threshold_reads(help="Threshold for Reads to sequence (M) during adjustment") +@options.reset_micro_reads(help="A value to re-set Reads to sequence (M) for microbial samples") +@options.reset_virus_reads(help="A value to re-set Reads to sequence (M) for virus samples") @click.pass_context def adjust_missing_reads( ctx: click.Context, - apptag: str, - factor: float, - threshold_reads: float, + apptag_wgs: tuple, + apptag_wgs_tumor: tuple, + apptag_tga: tuple, + apptag_micro: tuple, + apptag_rml: tuple, + apptag_virus: tuple, + apptag_rna: tuple, + factor_wgs_tumor: str, + factor_tga: str, + factor_micro: str, + factor_rml: str, + factor_rna: str, + factor_rna_topups: str, + factor_rml_topups: str, + factor_tga_topups: str, + factor_wgs_lower: str, + factor_wgs_higher: str, + threshold_reads: str, + reset_micro_reads: str, + reset_virus_reads: str, ): """Script to calculate the adjusted Reads to sequence (M) with a specific factor for specific apptags, specified in the command line""" @@ -36,12 +129,51 @@ def adjust_missing_reads( lims = ctx.obj["lims"] try: - artifacts: List[Artifact] = get_artifacts(process=process, measurement=True) + artifacts: List[Artifact] = get_artifacts(process=process) for artifact in artifacts: - if sample.udf.get("Sequencing Analysis") == apptag: - adjusted_reads = calculate_adjusted_reads(artifact=artifact, factor=factor) - artifact.udf["Reads to sequence (M)"] = adjusted_reads - artifact.put() + if not is_adjusted(artifact=artifact): + for app in apptag_wgs: + if app in artifact.samples[0].udf.get("Sequencing Analysis") and is_topup(artifact=artifact): + adjust_wgs_topups(artifact=artifact, factor_wgs_lower=factor_wgs_lower, + factor_wgs_higher=factor_wgs_higher, threshold_reads=threshold_reads) + for app in apptag_wgs_tumor: + if app in artifact.samples[0].udf.get("Sequencing Analysis"): + if is_topup(artifact=artifact): + adjust_wgs_topups(artifact=artifact, factor_wgs_lower=factor_wgs_lower, + factor_wgs_higher=factor_wgs_higher, threshold_reads=threshold_reads) + else: + adjust_reads(artifact=artifact, apptags=apptag_wgs_tumor, factor=factor_wgs_tumor) + for app in apptag_tga: + if app in artifact.samples[0].udf.get("Sequencing Analysis"): + if is_topup(artifact=artifact): + adjust_reads(artifact=artifact, apptags=apptag_tga, factor=factor_tga_topups) + else: + adjust_reads(artifact=artifact, apptags=apptag_tga, factor=factor_tga) + for app in apptag_micro: + if app in artifact.samples[0].udf.get("Sequencing Analysis"): + if is_topup(artifact=artifact): + reset_microbial_reads(artifact=artifact, reset_microbial_reads=reset_micro_reads) + else: + adjust_reads(artifact=artifact, apptags=apptag_micro, factor=factor_micro) + for app in apptag_virus: + if app in artifact.samples[0].udf.get("Sequencing Analysis") and is_topup(artifact=artifact): + reset_microbial_reads(artifact=artifact, reset_microbial_reads=reset_virus_reads) + for app in apptag_rml: + if app in artifact.samples[0].udf.get("Sequencing Analysis"): + if is_topup(artifact=artifact): + adjust_reads(artifact=artifact, apptags=apptag_rml, factor=factor_rml_topups) + else: + adjust_reads(artifact=artifact, apptags=apptag_rml, factor=factor_rml) + for app in apptag_rna: + if app in artifact.samples[0].udf.get("Sequencing Analysis"): + if is_topup(artifact=artifact): + adjust_reads(artifact=artifact, apptags=apptag_rna, factor=factor_rna_topups) + else: + adjust_reads(artifact=artifact, apptags=apptag_rna, factor=factor_rna) + if is_adjusted(artifact=artifact): + LOG.info("Samples have already been adjusted.") click.echo("Udfs have been updated on all samples.") + process.udf["Adjusted Reads to Sequence"] = True + process.put() except LimsError as e: sys.exit(e.message) diff --git a/cg_lims/EPPs/udf/calculate/base.py b/cg_lims/EPPs/udf/calculate/base.py index d935d162..2ec3708c 100644 --- a/cg_lims/EPPs/udf/calculate/base.py +++ b/cg_lims/EPPs/udf/calculate/base.py @@ -28,6 +28,7 @@ from cg_lims.EPPs.udf.calculate.sum_missing_reads_in_pool import missing_reads_in_pool from cg_lims.EPPs.udf.calculate.twist_aliquot_amount import twist_aliquot_amount from cg_lims.EPPs.udf.calculate.twist_get_volumes_from_buffer import get_volumes_from_buffer +from cg_lims.EPPs.udf.calculate.adjust_missing_reads import adjust_missing_reads # commands from cg_lims.EPPs.udf.calculate.twist_pool import twist_pool @@ -64,3 +65,4 @@ def calculate(ctx): calculate.add_command(calculate_saphyr_concentration) calculate.add_command(ont_aliquot_volume) calculate.add_command(ont_available_sequencing_reload) +calculate.add_command(adjust_missing_reads) diff --git a/cg_lims/options.py b/cg_lims/options.py index af31875c..9c478a94 100644 --- a/cg_lims/options.py +++ b/cg_lims/options.py @@ -435,22 +435,212 @@ def apptag( help=help, ) +def apptag_wgs( + help: str = "String of UDF Sequencing Analysis, also known as apptag, for WGS samples", +) -> click.option: + return click.option( + "--apptag-wgs", + required=True, + multiple=True, + help=help, + ) + +def apptag_wgs_tumor( + help: str = "String of UDF Sequencing Analysis, also known as apptag, for WGS tumor samples", +) -> click.option: + return click.option( + "--apptag-wgs-tumor", + required=True, + multiple=True, + help=help, + ) + +def apptag_tga( + help: str = "String of UDF Sequencing Analysis, also known as apptag, for TGA samples", +) -> click.option: + return click.option( + "--apptag-tga", + required=True, + multiple=True, + help=help, + ) + +def apptag_micro( + help: str = "String of UDF Sequencing Analysis, also known as apptag, for micro samples", +) -> click.option: + return click.option( + "--apptag-micro", + required=True, + multiple=True, + help=help, + ) + +def apptag_rml( + help: str = "String of UDF Sequencing Analysis, also known as apptag, for RML samples", +) -> click.option: + return click.option( + "--apptag-rml", + required=True, + multiple=True, + help=help, + ) + +def apptag_virus( + help: str = "String of UDF Sequencing Analysis, also known as apptag, for virus samples", +) -> click.option: + return click.option( + "--apptag-virus", + required=True, + multiple=True, + help=help, + ) + +def apptag_rna( + help: str = "String of UDF Sequencing Analysis, also known as apptag, for RNA samples", +) -> click.option: + return click.option( + "--apptag-rna", + required=True, + multiple=True, + help=help, + ) + def factor( help: str = "Factor to multiply Reads to sequence (M) with", ) -> click.option: return click.option( "--factor", required=True, - multiple=True, + multiple=False, + help=help, + ) + +def factor_wgs_tumor( + help: str = "Factor to multiply Reads to sequence (M) with for WGS tumor samples", +) -> click.option: + return click.option( + "--factor-wgs-tumor", + required=True, + multiple=False, + help=help, + ) + +def factor_tga( + help: str = "Factor to multiply Reads to sequence (M) with for TGA samples", +) -> click.option: + return click.option( + "--factor-tga", + required=True, + multiple=False, + help=help, + ) + +def factor_micro( + help: str = "Factor to multiply Reads to sequence (M) with for micro samples", +) -> click.option: + return click.option( + "--factor-micro", + required=True, + multiple=False, + help=help, + ) + +def factor_rml( + help: str = "Factor to multiply Reads to sequence (M) with for RML samples", +) -> click.option: + return click.option( + "--factor-rml", + required=True, + multiple=False, + help=help, + ) + +def factor_rna( + help: str = "Factor to multiply Reads to sequence (M) with for RNA samples", +) -> click.option: + return click.option( + "--factor-rna", + required=True, + multiple=False, + help=help, + ) + +def factor_rna_topups( + help: str = "Factor to multiply Reads to sequence (M) with for RNA topup samples", +) -> click.option: + return click.option( + "--factor-rna-topups", + required=True, + multiple=False, + help=help, + ) + +def factor_rml_topups( + help: str = "Factor to multiply Reads to sequence (M) with for RML topup samples", +) -> click.option: + return click.option( + "--factor-rml-topups", + required=True, + multiple=False, + help=help, + ) + +def factor_tga_topups( + help: str = "Factor to multiply Reads to sequence (M) with for TGA topup samples", +) -> click.option: + return click.option( + "--factor-tga-topups", + required=True, + multiple=False, + help=help, + ) + +def factor_wgs_lower( + help: str = "Lower factor to multiply Reads to sequence (M) with for WGS samples", +) -> click.option: + return click.option( + "--factor-wgs-lower", + required=True, + multiple=False, + help=help, + ) + +def factor_wgs_higher( + help: str = "Higher factor to multiply Reads to sequence (M) with for WGS samples", +) -> click.option: + return click.option( + "--factor-wgs-higher", + required=True, + multiple=False, help=help, ) def threshold_reads( - help: str = "Threshold for determining which factor to adjust Reads to sequence (M) with for WGS topup samples", + help: str = "Threshold for Reads to sequence (M) during adjustment", ) -> click.option: return click.option( "--threshold-reads", - required=False, - multiple=True, + required=True, + multiple=False, + help=help, + ) + +def reset_micro_reads( + help: str = "A value to re-set Reads to sequence (M) for microbial samples", +) -> click.option: + return click.option( + "--reset-micro-reads", + required=True, + multiple=False, + help=help, + ) + +def reset_virus_reads( + help: str = "A value to re-set Reads to sequence (M) for virus samples", +) -> click.option: + return click.option( + "--reset-virus-reads", + required=True, + multiple=False, help=help, ) \ No newline at end of file From afc2162115f4c65dfe0827963cf3e7429f1a3671 Mon Sep 17 00:00:00 2001 From: idalindegaard Date: Wed, 21 Aug 2024 10:44:15 +0200 Subject: [PATCH 03/16] updated validate_udf_values --- .../udf/calculate/adjust_missing_reads.py | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py index e4874527..b2678ff3 100644 --- a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py +++ b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py @@ -23,20 +23,26 @@ def adjust_wgs_topups(artifact: Artifact, factor_wgs_lower: str, factor_wgs_high """A function that calculates adjusted reads to sequence for WGS topups, where the 'topup' factor is determined by a threshold for the reads to sequence. This is specified in the cli""" - reads = float(artifact.udf.get("Reads to sequence (M)")) - if reads < float(threshold_reads): - adjusted_reads = round(float(reads)*float(factor_wgs_lower), 1) - else: - adjusted_reads = round(float(reads)*float(factor_wgs_higher), 1) - artifact.udf["Reads to sequence (M)"] = str(adjusted_reads) - artifact.put() + valid_value = validate_udf_values(artifact=artifact) + + if valid_value: + reads = float(artifact.udf.get("Reads to sequence (M)")) + if reads < float(threshold_reads): + adjusted_reads = round(float(reads)*float(factor_wgs_lower), 1) + else: + adjusted_reads = round(float(reads)*float(factor_wgs_higher), 1) + artifact.udf["Reads to sequence (M)"] = str(adjusted_reads) + artifact.put() def reset_microbial_reads(artifact: Artifact, reset_microbial_reads: str) -> None: """A function that resets the reads to sequence for microbial samples, and the threshold_reads specifies what they are supposed to be reset to""" - artifact.udf["Reads to sequence (M)"] = reset_microbial_reads - artifact.put() + valid_value = validate_udf_values(artifact=artifact) + + if valid_value: + artifact.udf["Reads to sequence (M)"] = reset_microbial_reads + artifact.put() def is_topup(artifact: Artifact) -> bool: """A function that determines whether an artifact has already been sequenced before or not, and therefore is a topup @@ -72,7 +78,9 @@ def validate_udf_values(artifact: Artifact) -> bool: def adjust_reads(artifact: Artifact, apptags: tuple, factor: str) -> None: """Only artifacts that have passed the validation of acceptable Reads to Sequence (M) values will be adjusted""" - if validate_udf_values(artifact=artifact): + valid_value = validate_udf_values(artifact=artifact) + + if valid_value: adjusted_reads = calculate_adjusted_reads(artifact=artifact, factor=factor) artifact.udf["Reads to sequence (M)"] = str(adjusted_reads) artifact.put() From 92cc19071d9d23a19571a804e5c58746cfba17b7 Mon Sep 17 00:00:00 2001 From: idalindegaard Date: Wed, 21 Aug 2024 11:51:34 +0200 Subject: [PATCH 04/16] update validate_udf_values --- cg_lims/EPPs/udf/calculate/adjust_missing_reads.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py index b2678ff3..db53584c 100644 --- a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py +++ b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py @@ -68,7 +68,7 @@ def validate_udf_values(artifact: Artifact) -> bool: Then the function returns the output as 'False' and logs all those sample IDs in the EPP log""" output = True - if not artifact.udf["Reads to sequence (M)"] or float(artifact.udf["Reads to sequence (M)"]) < 0: + if not artifact.udf.get("Reads to sequence (M)") or float(artifact.udf["Reads to sequence (M)"]) < 0: output = False LOG.info( f"Sample {artifact.samples[0].id} has no or a negative value for Reads to sequence (M). Skipping." From b8e81eb8309c989b267345dd74a9ade102b879da Mon Sep 17 00:00:00 2001 From: idalindegaard Date: Wed, 21 Aug 2024 12:19:07 +0200 Subject: [PATCH 05/16] update validate_udf_values --- cg_lims/EPPs/udf/calculate/adjust_missing_reads.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py index db53584c..9798e96f 100644 --- a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py +++ b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py @@ -68,7 +68,7 @@ def validate_udf_values(artifact: Artifact) -> bool: Then the function returns the output as 'False' and logs all those sample IDs in the EPP log""" output = True - if not artifact.udf.get("Reads to sequence (M)") or float(artifact.udf["Reads to sequence (M)"]) < 0: + if not artifact.udf.get("Reads to sequence (M)") or float(artifact.udf.get("Reads to sequence (M)")) < 0: output = False LOG.info( f"Sample {artifact.samples[0].id} has no or a negative value for Reads to sequence (M). Skipping." From 54ff59c186b2f5d454cdf1181761228f4e304f3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Thu, 29 Aug 2024 14:04:55 +0200 Subject: [PATCH 06/16] Format with black and isort --- .../files/sample_sheet/create_sample_sheet.py | 4 +- .../udf/calculate/adjust_missing_reads.py | 131 +++++++++++++----- cg_lims/EPPs/udf/calculate/base.py | 2 +- cg_lims/options.py | 29 +++- 4 files changed, 123 insertions(+), 43 deletions(-) diff --git a/cg_lims/EPPs/files/sample_sheet/create_sample_sheet.py b/cg_lims/EPPs/files/sample_sheet/create_sample_sheet.py index bb768c70..3ed0a022 100644 --- a/cg_lims/EPPs/files/sample_sheet/create_sample_sheet.py +++ b/cg_lims/EPPs/files/sample_sheet/create_sample_sheet.py @@ -152,7 +152,9 @@ def calculate_index_hamming_distance( return string_hamming_distance( index_1=index_1.sequence[-len(index_2.sequence) :], index_2=index_2.sequence ) - message: str = f"Non-supported index type identified for indexes {index_1.sequence} and {index_2.sequence}: '{index_1.type}'." + message: str = ( + f"Non-supported index type identified for indexes {index_1.sequence} and {index_2.sequence}: '{index_1.type}'." + ) LOG.error(message) raise InvalidValueError(message) diff --git a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py index 9798e96f..5c33420c 100644 --- a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py +++ b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py @@ -2,38 +2,43 @@ import sys import click +from cg_lims import options from cg_lims.exceptions import LimsError, MissingCgFieldError, MissingUDFsError from cg_lims.get.artifacts import get_artifacts from cg_lims.get.samples import get_one_sample_from_artifact from cg_lims.status_db_api import StatusDBAPI from genologics.entities import Artifact from requests.exceptions import ConnectionError -from cg_lims import options LOG = logging.getLogger(__name__) + def calculate_adjusted_reads(artifact: Artifact, factor: str) -> float: """A function to calculate the adjusted reads to sequence for each artifact with the desired apptag""" reads = artifact.udf.get("Reads to sequence (M)") - return round(float(reads)*float(factor), 1) + return round(float(reads) * float(factor), 1) -def adjust_wgs_topups(artifact: Artifact, factor_wgs_lower: str, factor_wgs_higher: str, threshold_reads: str) -> None: - """A function that calculates adjusted reads to sequence for WGS topups, where the 'topup' factor is determined + +def adjust_wgs_topups( + artifact: Artifact, factor_wgs_lower: str, factor_wgs_higher: str, threshold_reads: str +) -> None: + """A function that calculates adjusted reads to sequence for WGS topups, where the 'topup' factor is determined by a threshold for the reads to sequence. This is specified in the cli""" - + valid_value = validate_udf_values(artifact=artifact) if valid_value: reads = float(artifact.udf.get("Reads to sequence (M)")) if reads < float(threshold_reads): - adjusted_reads = round(float(reads)*float(factor_wgs_lower), 1) + adjusted_reads = round(float(reads) * float(factor_wgs_lower), 1) else: - adjusted_reads = round(float(reads)*float(factor_wgs_higher), 1) + adjusted_reads = round(float(reads) * float(factor_wgs_higher), 1) artifact.udf["Reads to sequence (M)"] = str(adjusted_reads) artifact.put() + def reset_microbial_reads(artifact: Artifact, reset_microbial_reads: str) -> None: """A function that resets the reads to sequence for microbial samples, and the threshold_reads specifies what they are supposed to be reset to""" @@ -44,8 +49,9 @@ def reset_microbial_reads(artifact: Artifact, reset_microbial_reads: str) -> Non artifact.udf["Reads to sequence (M)"] = reset_microbial_reads artifact.put() + def is_topup(artifact: Artifact) -> bool: - """A function that determines whether an artifact has already been sequenced before or not, and therefore is a topup + """A function that determines whether an artifact has already been sequenced before or not, and therefore is a topup sample/artifact or not""" output = False @@ -53,6 +59,7 @@ def is_topup(artifact: Artifact) -> bool: output = True return output + def is_adjusted(artifact: Artifact) -> bool: """A function that checks if the process UDF Adjusted Reads to Sequence is set/true. This will be updated after the EPP to adjust the reads to sequence has run one time""" @@ -63,18 +70,23 @@ def is_adjusted(artifact: Artifact) -> bool: output = True return output + def validate_udf_values(artifact: Artifact) -> bool: """A function checking whether Reads to Sequence (M) has a negative/no value. Then the function returns the output as 'False' and logs all those sample IDs in the EPP log""" output = True - if not artifact.udf.get("Reads to sequence (M)") or float(artifact.udf.get("Reads to sequence (M)")) < 0: + if ( + not artifact.udf.get("Reads to sequence (M)") + or float(artifact.udf.get("Reads to sequence (M)")) < 0 + ): output = False LOG.info( f"Sample {artifact.samples[0].id} has no or a negative value for Reads to sequence (M). Skipping." ) return output + def adjust_reads(artifact: Artifact, apptags: tuple, factor: str) -> None: """Only artifacts that have passed the validation of acceptable Reads to Sequence (M) values will be adjusted""" @@ -85,24 +97,43 @@ def adjust_reads(artifact: Artifact, apptags: tuple, factor: str) -> None: artifact.udf["Reads to sequence (M)"] = str(adjusted_reads) artifact.put() + @click.command() @options.apptag_wgs(help="String of UDF Sequencing Analysis, also known as apptag, for WGS samples") -@options.apptag_wgs_tumor(help="String of UDF Sequencing Analysis, also known as apptag, for WGS tumor samples") +@options.apptag_wgs_tumor( + help="String of UDF Sequencing Analysis, also known as apptag, for WGS tumor samples" +) @options.apptag_tga(help="String of UDF Sequencing Analysis, also known as apptag, for TGA samples") -@options.apptag_micro(help="String of UDF Sequencing Analysis, also known as apptag, for micro samples") +@options.apptag_micro( + help="String of UDF Sequencing Analysis, also known as apptag, for micro samples" +) @options.apptag_rml(help="String of UDF Sequencing Analysis, also known as apptag, for RML samples") -@options.apptag_virus(help="String of UDF Sequencing Analysis, also known as apptag, for virus samples") +@options.apptag_virus( + help="String of UDF Sequencing Analysis, also known as apptag, for virus samples" +) @options.apptag_rna(help="String of UDF Sequencing Analysis, also known as apptag, for RNA samples") -@options.factor_wgs_tumor(help= "Factor to multiply Reads to sequence (M) with for WGS tumor samples") -@options.factor_tga(help= "Factor to multiply Reads to sequence (M) with for TGA samples") -@options.factor_micro(help= "Factor to multiply Reads to sequence (M) with for micro samples") -@options.factor_rml(help= "Factor to multiply Reads to sequence (M) with for RML samples") -@options.factor_rna(help= "Factor to multiply Reads to sequence (M) with for RNA samples") -@options.factor_rna_topups(help= "Factor to multiply Reads to sequence (M) with for RNA topup samples") -@options.factor_rml_topups(help= "Factor to multiply Reads to sequence (M) with for RML topup samples") -@options.factor_tga_topups(help= "Factor to multiply Reads to sequence (M) with for TGA topup samples") -@options.factor_wgs_lower(help= "Lower factor to multiply Reads to sequence (M) with for WGS samples") -@options.factor_wgs_higher(help= "Higher factor to multiply Reads to sequence (M) with for WGS samples") +@options.factor_wgs_tumor( + help="Factor to multiply Reads to sequence (M) with for WGS tumor samples" +) +@options.factor_tga(help="Factor to multiply Reads to sequence (M) with for TGA samples") +@options.factor_micro(help="Factor to multiply Reads to sequence (M) with for micro samples") +@options.factor_rml(help="Factor to multiply Reads to sequence (M) with for RML samples") +@options.factor_rna(help="Factor to multiply Reads to sequence (M) with for RNA samples") +@options.factor_rna_topups( + help="Factor to multiply Reads to sequence (M) with for RNA topup samples" +) +@options.factor_rml_topups( + help="Factor to multiply Reads to sequence (M) with for RML topup samples" +) +@options.factor_tga_topups( + help="Factor to multiply Reads to sequence (M) with for TGA topup samples" +) +@options.factor_wgs_lower( + help="Lower factor to multiply Reads to sequence (M) with for WGS samples" +) +@options.factor_wgs_higher( + help="Higher factor to multiply Reads to sequence (M) with for WGS samples" +) @options.threshold_reads(help="Threshold for Reads to sequence (M) during adjustment") @options.reset_micro_reads(help="A value to re-set Reads to sequence (M) for microbial samples") @options.reset_virus_reads(help="A value to re-set Reads to sequence (M) for virus samples") @@ -130,7 +161,7 @@ def adjust_missing_reads( reset_micro_reads: str, reset_virus_reads: str, ): - """Script to calculate the adjusted Reads to sequence (M) with a specific factor for specific apptags, + """Script to calculate the adjusted Reads to sequence (M) with a specific factor for specific apptags, specified in the command line""" process = ctx.obj["process"] @@ -141,41 +172,67 @@ def adjust_missing_reads( for artifact in artifacts: if not is_adjusted(artifact=artifact): for app in apptag_wgs: - if app in artifact.samples[0].udf.get("Sequencing Analysis") and is_topup(artifact=artifact): - adjust_wgs_topups(artifact=artifact, factor_wgs_lower=factor_wgs_lower, - factor_wgs_higher=factor_wgs_higher, threshold_reads=threshold_reads) + if app in artifact.samples[0].udf.get("Sequencing Analysis") and is_topup( + artifact=artifact + ): + adjust_wgs_topups( + artifact=artifact, + factor_wgs_lower=factor_wgs_lower, + factor_wgs_higher=factor_wgs_higher, + threshold_reads=threshold_reads, + ) for app in apptag_wgs_tumor: if app in artifact.samples[0].udf.get("Sequencing Analysis"): if is_topup(artifact=artifact): - adjust_wgs_topups(artifact=artifact, factor_wgs_lower=factor_wgs_lower, - factor_wgs_higher=factor_wgs_higher, threshold_reads=threshold_reads) - else: - adjust_reads(artifact=artifact, apptags=apptag_wgs_tumor, factor=factor_wgs_tumor) + adjust_wgs_topups( + artifact=artifact, + factor_wgs_lower=factor_wgs_lower, + factor_wgs_higher=factor_wgs_higher, + threshold_reads=threshold_reads, + ) + else: + adjust_reads( + artifact=artifact, apptags=apptag_wgs_tumor, factor=factor_wgs_tumor + ) for app in apptag_tga: if app in artifact.samples[0].udf.get("Sequencing Analysis"): if is_topup(artifact=artifact): - adjust_reads(artifact=artifact, apptags=apptag_tga, factor=factor_tga_topups) + adjust_reads( + artifact=artifact, apptags=apptag_tga, factor=factor_tga_topups + ) else: adjust_reads(artifact=artifact, apptags=apptag_tga, factor=factor_tga) for app in apptag_micro: if app in artifact.samples[0].udf.get("Sequencing Analysis"): if is_topup(artifact=artifact): - reset_microbial_reads(artifact=artifact, reset_microbial_reads=reset_micro_reads) + reset_microbial_reads( + artifact=artifact, reset_microbial_reads=reset_micro_reads + ) else: - adjust_reads(artifact=artifact, apptags=apptag_micro, factor=factor_micro) + adjust_reads( + artifact=artifact, apptags=apptag_micro, factor=factor_micro + ) for app in apptag_virus: - if app in artifact.samples[0].udf.get("Sequencing Analysis") and is_topup(artifact=artifact): - reset_microbial_reads(artifact=artifact, reset_microbial_reads=reset_virus_reads) + if app in artifact.samples[0].udf.get("Sequencing Analysis") and is_topup( + artifact=artifact + ): + reset_microbial_reads( + artifact=artifact, reset_microbial_reads=reset_virus_reads + ) for app in apptag_rml: if app in artifact.samples[0].udf.get("Sequencing Analysis"): if is_topup(artifact=artifact): - adjust_reads(artifact=artifact, apptags=apptag_rml, factor=factor_rml_topups) + adjust_reads( + artifact=artifact, apptags=apptag_rml, factor=factor_rml_topups + ) else: adjust_reads(artifact=artifact, apptags=apptag_rml, factor=factor_rml) for app in apptag_rna: if app in artifact.samples[0].udf.get("Sequencing Analysis"): if is_topup(artifact=artifact): - adjust_reads(artifact=artifact, apptags=apptag_rna, factor=factor_rna_topups) + adjust_reads( + artifact=artifact, apptags=apptag_rna, factor=factor_rna_topups + ) else: adjust_reads(artifact=artifact, apptags=apptag_rna, factor=factor_rna) if is_adjusted(artifact=artifact): diff --git a/cg_lims/EPPs/udf/calculate/base.py b/cg_lims/EPPs/udf/calculate/base.py index 2ec3708c..3a7fcf95 100644 --- a/cg_lims/EPPs/udf/calculate/base.py +++ b/cg_lims/EPPs/udf/calculate/base.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import click +from cg_lims.EPPs.udf.calculate.adjust_missing_reads import adjust_missing_reads from cg_lims.EPPs.udf.calculate.aliquot_volume import aliquot_volume from cg_lims.EPPs.udf.calculate.calculate_amount_ng import calculate_amount_ng from cg_lims.EPPs.udf.calculate.calculate_amount_ng_fmol import calculate_amount_ng_fmol @@ -28,7 +29,6 @@ from cg_lims.EPPs.udf.calculate.sum_missing_reads_in_pool import missing_reads_in_pool from cg_lims.EPPs.udf.calculate.twist_aliquot_amount import twist_aliquot_amount from cg_lims.EPPs.udf.calculate.twist_get_volumes_from_buffer import get_volumes_from_buffer -from cg_lims.EPPs.udf.calculate.adjust_missing_reads import adjust_missing_reads # commands from cg_lims.EPPs.udf.calculate.twist_pool import twist_pool diff --git a/cg_lims/options.py b/cg_lims/options.py index 9c478a94..fb4369ca 100644 --- a/cg_lims/options.py +++ b/cg_lims/options.py @@ -435,6 +435,7 @@ def apptag( help=help, ) + def apptag_wgs( help: str = "String of UDF Sequencing Analysis, also known as apptag, for WGS samples", ) -> click.option: @@ -445,6 +446,7 @@ def apptag_wgs( help=help, ) + def apptag_wgs_tumor( help: str = "String of UDF Sequencing Analysis, also known as apptag, for WGS tumor samples", ) -> click.option: @@ -455,6 +457,7 @@ def apptag_wgs_tumor( help=help, ) + def apptag_tga( help: str = "String of UDF Sequencing Analysis, also known as apptag, for TGA samples", ) -> click.option: @@ -465,6 +468,7 @@ def apptag_tga( help=help, ) + def apptag_micro( help: str = "String of UDF Sequencing Analysis, also known as apptag, for micro samples", ) -> click.option: @@ -475,6 +479,7 @@ def apptag_micro( help=help, ) + def apptag_rml( help: str = "String of UDF Sequencing Analysis, also known as apptag, for RML samples", ) -> click.option: @@ -485,6 +490,7 @@ def apptag_rml( help=help, ) + def apptag_virus( help: str = "String of UDF Sequencing Analysis, also known as apptag, for virus samples", ) -> click.option: @@ -495,6 +501,7 @@ def apptag_virus( help=help, ) + def apptag_rna( help: str = "String of UDF Sequencing Analysis, also known as apptag, for RNA samples", ) -> click.option: @@ -505,6 +512,7 @@ def apptag_rna( help=help, ) + def factor( help: str = "Factor to multiply Reads to sequence (M) with", ) -> click.option: @@ -515,6 +523,7 @@ def factor( help=help, ) + def factor_wgs_tumor( help: str = "Factor to multiply Reads to sequence (M) with for WGS tumor samples", ) -> click.option: @@ -525,6 +534,7 @@ def factor_wgs_tumor( help=help, ) + def factor_tga( help: str = "Factor to multiply Reads to sequence (M) with for TGA samples", ) -> click.option: @@ -535,6 +545,7 @@ def factor_tga( help=help, ) + def factor_micro( help: str = "Factor to multiply Reads to sequence (M) with for micro samples", ) -> click.option: @@ -545,6 +556,7 @@ def factor_micro( help=help, ) + def factor_rml( help: str = "Factor to multiply Reads to sequence (M) with for RML samples", ) -> click.option: @@ -555,6 +567,7 @@ def factor_rml( help=help, ) + def factor_rna( help: str = "Factor to multiply Reads to sequence (M) with for RNA samples", ) -> click.option: @@ -565,8 +578,9 @@ def factor_rna( help=help, ) + def factor_rna_topups( - help: str = "Factor to multiply Reads to sequence (M) with for RNA topup samples", + help: str = "Factor to multiply Reads to sequence (M) with for RNA topup samples", ) -> click.option: return click.option( "--factor-rna-topups", @@ -575,8 +589,9 @@ def factor_rna_topups( help=help, ) + def factor_rml_topups( - help: str = "Factor to multiply Reads to sequence (M) with for RML topup samples", + help: str = "Factor to multiply Reads to sequence (M) with for RML topup samples", ) -> click.option: return click.option( "--factor-rml-topups", @@ -585,8 +600,9 @@ def factor_rml_topups( help=help, ) + def factor_tga_topups( - help: str = "Factor to multiply Reads to sequence (M) with for TGA topup samples", + help: str = "Factor to multiply Reads to sequence (M) with for TGA topup samples", ) -> click.option: return click.option( "--factor-tga-topups", @@ -595,6 +611,7 @@ def factor_tga_topups( help=help, ) + def factor_wgs_lower( help: str = "Lower factor to multiply Reads to sequence (M) with for WGS samples", ) -> click.option: @@ -605,6 +622,7 @@ def factor_wgs_lower( help=help, ) + def factor_wgs_higher( help: str = "Higher factor to multiply Reads to sequence (M) with for WGS samples", ) -> click.option: @@ -615,6 +633,7 @@ def factor_wgs_higher( help=help, ) + def threshold_reads( help: str = "Threshold for Reads to sequence (M) during adjustment", ) -> click.option: @@ -625,6 +644,7 @@ def threshold_reads( help=help, ) + def reset_micro_reads( help: str = "A value to re-set Reads to sequence (M) for microbial samples", ) -> click.option: @@ -635,6 +655,7 @@ def reset_micro_reads( help=help, ) + def reset_virus_reads( help: str = "A value to re-set Reads to sequence (M) for virus samples", ) -> click.option: @@ -643,4 +664,4 @@ def reset_virus_reads( required=True, multiple=False, help=help, - ) \ No newline at end of file + ) From 4bd84c5d9d6be69bae1a3ac092e78d6ed6dbd0bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Thu, 29 Aug 2024 14:11:52 +0200 Subject: [PATCH 07/16] remove unused imports and variables --- cg_lims/EPPs/udf/calculate/adjust_missing_reads.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py index 5c33420c..3670e1d9 100644 --- a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py +++ b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py @@ -3,12 +3,9 @@ import click from cg_lims import options -from cg_lims.exceptions import LimsError, MissingCgFieldError, MissingUDFsError +from cg_lims.exceptions import LimsError from cg_lims.get.artifacts import get_artifacts -from cg_lims.get.samples import get_one_sample_from_artifact -from cg_lims.status_db_api import StatusDBAPI from genologics.entities import Artifact -from requests.exceptions import ConnectionError LOG = logging.getLogger(__name__) @@ -17,7 +14,6 @@ def calculate_adjusted_reads(artifact: Artifact, factor: str) -> float: """A function to calculate the adjusted reads to sequence for each artifact with the desired apptag""" reads = artifact.udf.get("Reads to sequence (M)") - return round(float(reads) * float(factor), 1) @@ -165,7 +161,6 @@ def adjust_missing_reads( specified in the command line""" process = ctx.obj["process"] - lims = ctx.obj["lims"] try: artifacts: List[Artifact] = get_artifacts(process=process) From a1c35bd90b5951bcc04c590da6d1687eb55f0cf6 Mon Sep 17 00:00:00 2001 From: Ida Lindegaard <82438141+idalindegaard@users.noreply.github.com> Date: Thu, 29 Aug 2024 16:40:00 +0200 Subject: [PATCH 08/16] Update cg_lims/EPPs/udf/calculate/adjust_missing_reads.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Karl Svärd <60181709+Karl-Svard@users.noreply.github.com> --- cg_lims/EPPs/udf/calculate/adjust_missing_reads.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py index 3670e1d9..832ea9b3 100644 --- a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py +++ b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py @@ -48,7 +48,7 @@ def reset_microbial_reads(artifact: Artifact, reset_microbial_reads: str) -> Non def is_topup(artifact: Artifact) -> bool: """A function that determines whether an artifact has already been sequenced before or not, and therefore is a topup - sample/artifact or not""" + sample/artifact""" output = False if artifact.samples[0].udf.get("Total Reads (M)"): From 23d3ba81af797fb993f1587284020760c4e5ecad Mon Sep 17 00:00:00 2001 From: Ida Lindegaard <82438141+idalindegaard@users.noreply.github.com> Date: Thu, 29 Aug 2024 16:48:55 +0200 Subject: [PATCH 09/16] Update cg_lims/EPPs/udf/calculate/adjust_missing_reads.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Karl Svärd <60181709+Karl-Svard@users.noreply.github.com> --- cg_lims/EPPs/udf/calculate/adjust_missing_reads.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py index 832ea9b3..869823f4 100644 --- a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py +++ b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py @@ -51,7 +51,7 @@ def is_topup(artifact: Artifact) -> bool: sample/artifact""" output = False - if artifact.samples[0].udf.get("Total Reads (M)"): + if artifact.samples[0].udf.get("Total Reads (M)") and artifact.samples[0].udf.get("Total Reads (M)") != 0: output = True return output From 49c5e650b3731fe4d34219bb626d92d44115d574 Mon Sep 17 00:00:00 2001 From: Ida Lindegaard <82438141+idalindegaard@users.noreply.github.com> Date: Fri, 30 Aug 2024 11:41:51 +0200 Subject: [PATCH 10/16] Update adjust_missing_reads.py - Added type hints - removed unused variable in function adjust_reads - renamed variable reset_microbial_reads to reset_reads so it wouldn't have the same name as the function it was used in --- .../udf/calculate/adjust_missing_reads.py | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py index 869823f4..7a8dcd13 100644 --- a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py +++ b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py @@ -13,7 +13,7 @@ def calculate_adjusted_reads(artifact: Artifact, factor: str) -> float: """A function to calculate the adjusted reads to sequence for each artifact with the desired apptag""" - reads = artifact.udf.get("Reads to sequence (M)") + reads: str = artifact.udf.get("Reads to sequence (M)") return round(float(reads) * float(factor), 1) @@ -23,7 +23,7 @@ def adjust_wgs_topups( """A function that calculates adjusted reads to sequence for WGS topups, where the 'topup' factor is determined by a threshold for the reads to sequence. This is specified in the cli""" - valid_value = validate_udf_values(artifact=artifact) + valid_value: bool = validate_udf_values(artifact=artifact) if valid_value: reads = float(artifact.udf.get("Reads to sequence (M)")) @@ -35,14 +35,14 @@ def adjust_wgs_topups( artifact.put() -def reset_microbial_reads(artifact: Artifact, reset_microbial_reads: str) -> None: +def reset_microbial_reads(artifact: Artifact, reset_reads: str) -> None: """A function that resets the reads to sequence for microbial samples, and the threshold_reads specifies what they are supposed to be reset to""" - valid_value = validate_udf_values(artifact=artifact) + valid_value: bool = validate_udf_values(artifact=artifact) if valid_value: - artifact.udf["Reads to sequence (M)"] = reset_microbial_reads + artifact.udf["Reads to sequence (M)"] = reset_reads artifact.put() @@ -50,7 +50,7 @@ def is_topup(artifact: Artifact) -> bool: """A function that determines whether an artifact has already been sequenced before or not, and therefore is a topup sample/artifact""" - output = False + output: bool = False if artifact.samples[0].udf.get("Total Reads (M)") and artifact.samples[0].udf.get("Total Reads (M)") != 0: output = True return output @@ -61,7 +61,7 @@ def is_adjusted(artifact: Artifact) -> bool: be updated after the EPP to adjust the reads to sequence has run one time""" process = artifact.parent_process - output = False + output: bool = False if process.udf.get("Adjusted Reads to Sequence"): output = True return output @@ -71,7 +71,7 @@ def validate_udf_values(artifact: Artifact) -> bool: """A function checking whether Reads to Sequence (M) has a negative/no value. Then the function returns the output as 'False' and logs all those sample IDs in the EPP log""" - output = True + output: bool = True if ( not artifact.udf.get("Reads to sequence (M)") or float(artifact.udf.get("Reads to sequence (M)")) < 0 @@ -83,10 +83,10 @@ def validate_udf_values(artifact: Artifact) -> bool: return output -def adjust_reads(artifact: Artifact, apptags: tuple, factor: str) -> None: +def adjust_reads(artifact: Artifact, factor: str) -> None: """Only artifacts that have passed the validation of acceptable Reads to Sequence (M) values will be adjusted""" - valid_value = validate_udf_values(artifact=artifact) + valid_value: bool = validate_udf_values(artifact=artifact) if valid_value: adjusted_reads = calculate_adjusted_reads(artifact=artifact, factor=factor) @@ -187,49 +187,49 @@ def adjust_missing_reads( ) else: adjust_reads( - artifact=artifact, apptags=apptag_wgs_tumor, factor=factor_wgs_tumor + artifact=artifact, factor=factor_wgs_tumor ) for app in apptag_tga: if app in artifact.samples[0].udf.get("Sequencing Analysis"): if is_topup(artifact=artifact): adjust_reads( - artifact=artifact, apptags=apptag_tga, factor=factor_tga_topups + artifact=artifact, factor=factor_tga_topups ) else: - adjust_reads(artifact=artifact, apptags=apptag_tga, factor=factor_tga) + adjust_reads(artifact=artifact, factor=factor_tga) for app in apptag_micro: if app in artifact.samples[0].udf.get("Sequencing Analysis"): if is_topup(artifact=artifact): reset_microbial_reads( - artifact=artifact, reset_microbial_reads=reset_micro_reads + artifact=artifact, reset_reads=reset_micro_reads ) else: adjust_reads( - artifact=artifact, apptags=apptag_micro, factor=factor_micro + artifact=artifact, factor=factor_micro ) for app in apptag_virus: if app in artifact.samples[0].udf.get("Sequencing Analysis") and is_topup( artifact=artifact ): reset_microbial_reads( - artifact=artifact, reset_microbial_reads=reset_virus_reads + artifact=artifact, reset_reads=reset_virus_reads ) for app in apptag_rml: if app in artifact.samples[0].udf.get("Sequencing Analysis"): if is_topup(artifact=artifact): adjust_reads( - artifact=artifact, apptags=apptag_rml, factor=factor_rml_topups + artifact=artifact, factor=factor_rml_topups ) else: - adjust_reads(artifact=artifact, apptags=apptag_rml, factor=factor_rml) + adjust_reads(artifact=artifact, factor=factor_rml) for app in apptag_rna: if app in artifact.samples[0].udf.get("Sequencing Analysis"): if is_topup(artifact=artifact): adjust_reads( - artifact=artifact, apptags=apptag_rna, factor=factor_rna_topups + artifact=artifact, factor=factor_rna_topups ) else: - adjust_reads(artifact=artifact, apptags=apptag_rna, factor=factor_rna) + adjust_reads(artifact=artifact, factor=factor_rna) if is_adjusted(artifact=artifact): LOG.info("Samples have already been adjusted.") click.echo("Udfs have been updated on all samples.") From 5c22fe78958f0a5e0462c8b18a7dc4671e31b936 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Fri, 30 Aug 2024 14:26:02 +0200 Subject: [PATCH 11/16] added some more type hints --- .../udf/calculate/adjust_missing_reads.py | 48 ++++++++----------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py index 7a8dcd13..bf90356d 100644 --- a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py +++ b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py @@ -1,11 +1,12 @@ import logging import sys +from typing import List import click from cg_lims import options from cg_lims.exceptions import LimsError from cg_lims.get.artifacts import get_artifacts -from genologics.entities import Artifact +from genologics.entities import Artifact, Process LOG = logging.getLogger(__name__) @@ -26,11 +27,11 @@ def adjust_wgs_topups( valid_value: bool = validate_udf_values(artifact=artifact) if valid_value: - reads = float(artifact.udf.get("Reads to sequence (M)")) + reads: float = float(artifact.udf.get("Reads to sequence (M)")) if reads < float(threshold_reads): - adjusted_reads = round(float(reads) * float(factor_wgs_lower), 1) + adjusted_reads: float = round(float(reads) * float(factor_wgs_lower), 1) else: - adjusted_reads = round(float(reads) * float(factor_wgs_higher), 1) + adjusted_reads: float = round(float(reads) * float(factor_wgs_higher), 1) artifact.udf["Reads to sequence (M)"] = str(adjusted_reads) artifact.put() @@ -51,7 +52,10 @@ def is_topup(artifact: Artifact) -> bool: sample/artifact""" output: bool = False - if artifact.samples[0].udf.get("Total Reads (M)") and artifact.samples[0].udf.get("Total Reads (M)") != 0: + if ( + artifact.samples[0].udf.get("Total Reads (M)") + and artifact.samples[0].udf.get("Total Reads (M)") != 0 + ): output = True return output @@ -60,7 +64,7 @@ def is_adjusted(artifact: Artifact) -> bool: """A function that checks if the process UDF Adjusted Reads to Sequence is set/true. This will be updated after the EPP to adjust the reads to sequence has run one time""" - process = artifact.parent_process + process: Process = artifact.parent_process output: bool = False if process.udf.get("Adjusted Reads to Sequence"): output = True @@ -89,7 +93,7 @@ def adjust_reads(artifact: Artifact, factor: str) -> None: valid_value: bool = validate_udf_values(artifact=artifact) if valid_value: - adjusted_reads = calculate_adjusted_reads(artifact=artifact, factor=factor) + adjusted_reads: float = calculate_adjusted_reads(artifact=artifact, factor=factor) artifact.udf["Reads to sequence (M)"] = str(adjusted_reads) artifact.put() @@ -160,7 +164,7 @@ def adjust_missing_reads( """Script to calculate the adjusted Reads to sequence (M) with a specific factor for specific apptags, specified in the command line""" - process = ctx.obj["process"] + process: Process = ctx.obj["process"] try: artifacts: List[Artifact] = get_artifacts(process=process) @@ -186,48 +190,34 @@ def adjust_missing_reads( threshold_reads=threshold_reads, ) else: - adjust_reads( - artifact=artifact, factor=factor_wgs_tumor - ) + adjust_reads(artifact=artifact, factor=factor_wgs_tumor) for app in apptag_tga: if app in artifact.samples[0].udf.get("Sequencing Analysis"): if is_topup(artifact=artifact): - adjust_reads( - artifact=artifact, factor=factor_tga_topups - ) + adjust_reads(artifact=artifact, factor=factor_tga_topups) else: adjust_reads(artifact=artifact, factor=factor_tga) for app in apptag_micro: if app in artifact.samples[0].udf.get("Sequencing Analysis"): if is_topup(artifact=artifact): - reset_microbial_reads( - artifact=artifact, reset_reads=reset_micro_reads - ) + reset_microbial_reads(artifact=artifact, reset_reads=reset_micro_reads) else: - adjust_reads( - artifact=artifact, factor=factor_micro - ) + adjust_reads(artifact=artifact, factor=factor_micro) for app in apptag_virus: if app in artifact.samples[0].udf.get("Sequencing Analysis") and is_topup( artifact=artifact ): - reset_microbial_reads( - artifact=artifact, reset_reads=reset_virus_reads - ) + reset_microbial_reads(artifact=artifact, reset_reads=reset_virus_reads) for app in apptag_rml: if app in artifact.samples[0].udf.get("Sequencing Analysis"): if is_topup(artifact=artifact): - adjust_reads( - artifact=artifact, factor=factor_rml_topups - ) + adjust_reads(artifact=artifact, factor=factor_rml_topups) else: adjust_reads(artifact=artifact, factor=factor_rml) for app in apptag_rna: if app in artifact.samples[0].udf.get("Sequencing Analysis"): if is_topup(artifact=artifact): - adjust_reads( - artifact=artifact, factor=factor_rna_topups - ) + adjust_reads(artifact=artifact, factor=factor_rna_topups) else: adjust_reads(artifact=artifact, factor=factor_rna) if is_adjusted(artifact=artifact): From 9f7fa23795a4f0d5c5c286629a7b05807a48bcc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Fri, 30 Aug 2024 15:15:16 +0200 Subject: [PATCH 12/16] restructure to decrease indentation --- .../udf/calculate/adjust_missing_reads.py | 94 +++++++++---------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py index bf90356d..696e238b 100644 --- a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py +++ b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py @@ -169,59 +169,59 @@ def adjust_missing_reads( try: artifacts: List[Artifact] = get_artifacts(process=process) for artifact in artifacts: - if not is_adjusted(artifact=artifact): - for app in apptag_wgs: - if app in artifact.samples[0].udf.get("Sequencing Analysis") and is_topup( - artifact=artifact - ): + if is_adjusted(artifact=artifact): + LOG.info("Samples have already been adjusted.") + continue + for app in apptag_wgs: + if app in artifact.samples[0].udf.get("Sequencing Analysis") and is_topup( + artifact=artifact + ): + adjust_wgs_topups( + artifact=artifact, + factor_wgs_lower=factor_wgs_lower, + factor_wgs_higher=factor_wgs_higher, + threshold_reads=threshold_reads, + ) + for app in apptag_wgs_tumor: + if app in artifact.samples[0].udf.get("Sequencing Analysis"): + if is_topup(artifact=artifact): adjust_wgs_topups( artifact=artifact, factor_wgs_lower=factor_wgs_lower, factor_wgs_higher=factor_wgs_higher, threshold_reads=threshold_reads, ) - for app in apptag_wgs_tumor: - if app in artifact.samples[0].udf.get("Sequencing Analysis"): - if is_topup(artifact=artifact): - adjust_wgs_topups( - artifact=artifact, - factor_wgs_lower=factor_wgs_lower, - factor_wgs_higher=factor_wgs_higher, - threshold_reads=threshold_reads, - ) - else: - adjust_reads(artifact=artifact, factor=factor_wgs_tumor) - for app in apptag_tga: - if app in artifact.samples[0].udf.get("Sequencing Analysis"): - if is_topup(artifact=artifact): - adjust_reads(artifact=artifact, factor=factor_tga_topups) - else: - adjust_reads(artifact=artifact, factor=factor_tga) - for app in apptag_micro: - if app in artifact.samples[0].udf.get("Sequencing Analysis"): - if is_topup(artifact=artifact): - reset_microbial_reads(artifact=artifact, reset_reads=reset_micro_reads) - else: - adjust_reads(artifact=artifact, factor=factor_micro) - for app in apptag_virus: - if app in artifact.samples[0].udf.get("Sequencing Analysis") and is_topup( - artifact=artifact - ): - reset_microbial_reads(artifact=artifact, reset_reads=reset_virus_reads) - for app in apptag_rml: - if app in artifact.samples[0].udf.get("Sequencing Analysis"): - if is_topup(artifact=artifact): - adjust_reads(artifact=artifact, factor=factor_rml_topups) - else: - adjust_reads(artifact=artifact, factor=factor_rml) - for app in apptag_rna: - if app in artifact.samples[0].udf.get("Sequencing Analysis"): - if is_topup(artifact=artifact): - adjust_reads(artifact=artifact, factor=factor_rna_topups) - else: - adjust_reads(artifact=artifact, factor=factor_rna) - if is_adjusted(artifact=artifact): - LOG.info("Samples have already been adjusted.") + else: + adjust_reads(artifact=artifact, factor=factor_wgs_tumor) + for app in apptag_tga: + if app in artifact.samples[0].udf.get("Sequencing Analysis"): + if is_topup(artifact=artifact): + adjust_reads(artifact=artifact, factor=factor_tga_topups) + else: + adjust_reads(artifact=artifact, factor=factor_tga) + for app in apptag_micro: + if app in artifact.samples[0].udf.get("Sequencing Analysis"): + if is_topup(artifact=artifact): + reset_microbial_reads(artifact=artifact, reset_reads=reset_micro_reads) + else: + adjust_reads(artifact=artifact, factor=factor_micro) + for app in apptag_virus: + if app in artifact.samples[0].udf.get("Sequencing Analysis") and is_topup( + artifact=artifact + ): + reset_microbial_reads(artifact=artifact, reset_reads=reset_virus_reads) + for app in apptag_rml: + if app in artifact.samples[0].udf.get("Sequencing Analysis"): + if is_topup(artifact=artifact): + adjust_reads(artifact=artifact, factor=factor_rml_topups) + else: + adjust_reads(artifact=artifact, factor=factor_rml) + for app in apptag_rna: + if app in artifact.samples[0].udf.get("Sequencing Analysis"): + if is_topup(artifact=artifact): + adjust_reads(artifact=artifact, factor=factor_rna_topups) + else: + adjust_reads(artifact=artifact, factor=factor_rna) click.echo("Udfs have been updated on all samples.") process.udf["Adjusted Reads to Sequence"] = True process.put() From 5c0ee40b29bddc1044d0ee600f8b9b7095811309 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Fri, 30 Aug 2024 15:21:29 +0200 Subject: [PATCH 13/16] save apptag as variable --- .../udf/calculate/adjust_missing_reads.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py index 696e238b..56e6f6a8 100644 --- a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py +++ b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py @@ -172,10 +172,9 @@ def adjust_missing_reads( if is_adjusted(artifact=artifact): LOG.info("Samples have already been adjusted.") continue + sample_apptag: str = artifact.samples[0].udf.get("Sequencing Analysis") for app in apptag_wgs: - if app in artifact.samples[0].udf.get("Sequencing Analysis") and is_topup( - artifact=artifact - ): + if app in sample_apptag and is_topup(artifact=artifact): adjust_wgs_topups( artifact=artifact, factor_wgs_lower=factor_wgs_lower, @@ -183,7 +182,7 @@ def adjust_missing_reads( threshold_reads=threshold_reads, ) for app in apptag_wgs_tumor: - if app in artifact.samples[0].udf.get("Sequencing Analysis"): + if app in sample_apptag: if is_topup(artifact=artifact): adjust_wgs_topups( artifact=artifact, @@ -194,30 +193,28 @@ def adjust_missing_reads( else: adjust_reads(artifact=artifact, factor=factor_wgs_tumor) for app in apptag_tga: - if app in artifact.samples[0].udf.get("Sequencing Analysis"): + if app in sample_apptag: if is_topup(artifact=artifact): adjust_reads(artifact=artifact, factor=factor_tga_topups) else: adjust_reads(artifact=artifact, factor=factor_tga) for app in apptag_micro: - if app in artifact.samples[0].udf.get("Sequencing Analysis"): + if app in sample_apptag: if is_topup(artifact=artifact): reset_microbial_reads(artifact=artifact, reset_reads=reset_micro_reads) else: adjust_reads(artifact=artifact, factor=factor_micro) for app in apptag_virus: - if app in artifact.samples[0].udf.get("Sequencing Analysis") and is_topup( - artifact=artifact - ): + if app in sample_apptag and is_topup(artifact=artifact): reset_microbial_reads(artifact=artifact, reset_reads=reset_virus_reads) for app in apptag_rml: - if app in artifact.samples[0].udf.get("Sequencing Analysis"): + if app in sample_apptag: if is_topup(artifact=artifact): adjust_reads(artifact=artifact, factor=factor_rml_topups) else: adjust_reads(artifact=artifact, factor=factor_rml) for app in apptag_rna: - if app in artifact.samples[0].udf.get("Sequencing Analysis"): + if app in sample_apptag: if is_topup(artifact=artifact): adjust_reads(artifact=artifact, factor=factor_rna_topups) else: From 2b70049820a5d7f7bc21f2dde199a76e98bf5990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Fri, 30 Aug 2024 15:41:32 +0200 Subject: [PATCH 14/16] modified is_adjusted check slighty --- .../EPPs/udf/calculate/adjust_missing_reads.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py index 56e6f6a8..29175e7e 100644 --- a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py +++ b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py @@ -4,7 +4,7 @@ import click from cg_lims import options -from cg_lims.exceptions import LimsError +from cg_lims.exceptions import InvalidValueError, LimsError from cg_lims.get.artifacts import get_artifacts from genologics.entities import Artifact, Process @@ -60,11 +60,10 @@ def is_topup(artifact: Artifact) -> bool: return output -def is_adjusted(artifact: Artifact) -> bool: +def is_adjusted(process: Process) -> bool: """A function that checks if the process UDF Adjusted Reads to Sequence is set/true. This will be updated after the EPP to adjust the reads to sequence has run one time""" - process: Process = artifact.parent_process output: bool = False if process.udf.get("Adjusted Reads to Sequence"): output = True @@ -168,10 +167,11 @@ def adjust_missing_reads( try: artifacts: List[Artifact] = get_artifacts(process=process) + if is_adjusted(process=process): + warning_message = "Samples have already been adjusted!" + LOG.warning(warning_message) + raise InvalidValueError(warning_message) for artifact in artifacts: - if is_adjusted(artifact=artifact): - LOG.info("Samples have already been adjusted.") - continue sample_apptag: str = artifact.samples[0].udf.get("Sequencing Analysis") for app in apptag_wgs: if app in sample_apptag and is_topup(artifact=artifact): @@ -219,8 +219,11 @@ def adjust_missing_reads( adjust_reads(artifact=artifact, factor=factor_rna_topups) else: adjust_reads(artifact=artifact, factor=factor_rna) - click.echo("Udfs have been updated on all samples.") process.udf["Adjusted Reads to Sequence"] = True process.put() + success_message = "Udfs have been updated on all samples." + LOG.info(success_message) + click.echo(success_message) except LimsError as e: + LOG.error(e.message) sys.exit(e.message) From 3544731a6a1c5195497540ff0b5e176829fe3023 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Fri, 30 Aug 2024 15:54:14 +0200 Subject: [PATCH 15/16] added more logging --- .../EPPs/udf/calculate/adjust_missing_reads.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py index 29175e7e..5ac8000c 100644 --- a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py +++ b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py @@ -6,6 +6,7 @@ from cg_lims import options from cg_lims.exceptions import InvalidValueError, LimsError from cg_lims.get.artifacts import get_artifacts +from cg_lims.get.samples import get_one_sample_from_artifact from genologics.entities import Artifact, Process LOG = logging.getLogger(__name__) @@ -15,7 +16,11 @@ def calculate_adjusted_reads(artifact: Artifact, factor: str) -> float: """A function to calculate the adjusted reads to sequence for each artifact with the desired apptag""" reads: str = artifact.udf.get("Reads to sequence (M)") - return round(float(reads) * float(factor), 1) + adjusted_reads: float = round(float(reads) * float(factor), 1) + LOG.info( + f"Reads adjusted for sample {get_one_sample_from_artifact(artifact=artifact).id}: {reads}M -> {adjusted_reads}M" + ) + return adjusted_reads def adjust_wgs_topups( @@ -29,9 +34,13 @@ def adjust_wgs_topups( if valid_value: reads: float = float(artifact.udf.get("Reads to sequence (M)")) if reads < float(threshold_reads): - adjusted_reads: float = round(float(reads) * float(factor_wgs_lower), 1) + adjusted_reads: float = calculate_adjusted_reads( + artifact=artifact, factor=factor_wgs_lower + ) else: - adjusted_reads: float = round(float(reads) * float(factor_wgs_higher), 1) + adjusted_reads: float = calculate_adjusted_reads( + artifact=artifact, factor=factor_wgs_higher + ) artifact.udf["Reads to sequence (M)"] = str(adjusted_reads) artifact.put() @@ -43,6 +52,9 @@ def reset_microbial_reads(artifact: Artifact, reset_reads: str) -> None: valid_value: bool = validate_udf_values(artifact=artifact) if valid_value: + LOG.info( + f"Reads reset for sample {get_one_sample_from_artifact(artifact=artifact).id}: {artifact.udf.get('Reads to sequence (M)')}M -> {reset_reads}M" + ) artifact.udf["Reads to sequence (M)"] = reset_reads artifact.put() From 19acb0bdc743a21b0a9b24a0b8994eb7f8b58285 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Mon, 2 Sep 2024 18:09:05 +0200 Subject: [PATCH 16/16] refactored main function --- .../udf/calculate/adjust_missing_reads.py | 129 ++++++++++++------ 1 file changed, 88 insertions(+), 41 deletions(-) diff --git a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py index 5ac8000c..51b6e1ed 100644 --- a/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py +++ b/cg_lims/EPPs/udf/calculate/adjust_missing_reads.py @@ -1,12 +1,11 @@ import logging import sys -from typing import List +from typing import List, Optional import click from cg_lims import options from cg_lims.exceptions import InvalidValueError, LimsError from cg_lims.get.artifacts import get_artifacts -from cg_lims.get.samples import get_one_sample_from_artifact from genologics.entities import Artifact, Process LOG = logging.getLogger(__name__) @@ -17,9 +16,7 @@ def calculate_adjusted_reads(artifact: Artifact, factor: str) -> float: reads: str = artifact.udf.get("Reads to sequence (M)") adjusted_reads: float = round(float(reads) * float(factor), 1) - LOG.info( - f"Reads adjusted for sample {get_one_sample_from_artifact(artifact=artifact).id}: {reads}M -> {adjusted_reads}M" - ) + LOG.info(f"Reads adjusted for sample {artifact.samples[0].id}: {reads}M -> {adjusted_reads}M") return adjusted_reads @@ -53,7 +50,7 @@ def reset_microbial_reads(artifact: Artifact, reset_reads: str) -> None: if valid_value: LOG.info( - f"Reads reset for sample {get_one_sample_from_artifact(artifact=artifact).id}: {artifact.udf.get('Reads to sequence (M)')}M -> {reset_reads}M" + f"Reads reset for sample {artifact.samples[0].id}: {artifact.udf.get('Reads to sequence (M)')}M -> {reset_reads}M" ) artifact.udf["Reads to sequence (M)"] = reset_reads artifact.put() @@ -109,6 +106,62 @@ def adjust_reads(artifact: Artifact, factor: str) -> None: artifact.put() +def adjust_wgs_artifact_reads( + artifact: Artifact, + reads_threshold: str, + lower_topup_factor: str, + higher_topup_factor: str, + factor: Optional[str] = None, +) -> None: + """A function for adjusting the reads of a WGS sample. The logic goes: + - Top-ups with read amounts above the 'reads_threshold' value will be adjusted according to 'higher_topup_factor' + - Top-ups with read amount below the 'reads_threshold' value will be adjusted according to 'lower_topup_factor' + - Non top-ups will be adjusted according to the optional 'factor' value + - Non top-up samples are untouched if no 'factor' value is given""" + + if is_topup(artifact=artifact): + adjust_wgs_topups( + artifact=artifact, + factor_wgs_lower=lower_topup_factor, + factor_wgs_higher=higher_topup_factor, + threshold_reads=reads_threshold, + ) + elif factor: + adjust_reads(artifact=artifact, factor=factor) + + +def adjust_artifact_reads( + artifact: Artifact, + topup_factor: str, + factor: Optional[str] = None, +) -> None: + """A function for adjusting the reads of a sample. The logic goes: + - Top-ups will be adjusted according to the 'topup_factor' value + - Non top-ups will be adjusted according to the optional 'factor' value + - Non top-up samples are untouched if no 'factor' value is given""" + + if is_topup(artifact=artifact): + adjust_reads(artifact=artifact, factor=topup_factor) + elif factor: + adjust_reads(artifact=artifact, factor=factor) + + +def adjust_micro_artifact_reads( + artifact: Artifact, + reset_amount: str, + factor: Optional[str] = None, +) -> None: + """A function for adjusting the reads of a microbial sample. The logic goes: + - The reads of a top-up sample gets reset according to the value of 'reset_amount' + - Non top-up samples will be adjusted according to the optional 'factor' value + - Non top-up samples are untouched if no 'factor' value is given""" + + if is_topup(artifact=artifact): + reset_microbial_reads(artifact=artifact, reset_reads=reset_amount) + elif factor: + adjust_reads(artifact=artifact, factor=factor) + + @click.command() @options.apptag_wgs(help="String of UDF Sequencing Analysis, also known as apptag, for WGS samples") @options.apptag_wgs_tumor( @@ -186,51 +239,45 @@ def adjust_missing_reads( for artifact in artifacts: sample_apptag: str = artifact.samples[0].udf.get("Sequencing Analysis") for app in apptag_wgs: - if app in sample_apptag and is_topup(artifact=artifact): - adjust_wgs_topups( + if app in sample_apptag: + adjust_wgs_artifact_reads( artifact=artifact, - factor_wgs_lower=factor_wgs_lower, - factor_wgs_higher=factor_wgs_higher, - threshold_reads=threshold_reads, + lower_topup_factor=factor_wgs_lower, + higher_topup_factor=factor_wgs_higher, + reads_threshold=threshold_reads, ) for app in apptag_wgs_tumor: if app in sample_apptag: - if is_topup(artifact=artifact): - adjust_wgs_topups( - artifact=artifact, - factor_wgs_lower=factor_wgs_lower, - factor_wgs_higher=factor_wgs_higher, - threshold_reads=threshold_reads, - ) - else: - adjust_reads(artifact=artifact, factor=factor_wgs_tumor) + adjust_wgs_artifact_reads( + artifact=artifact, + lower_topup_factor=factor_wgs_lower, + higher_topup_factor=factor_wgs_higher, + reads_threshold=threshold_reads, + factor=factor_wgs_tumor, + ) for app in apptag_tga: if app in sample_apptag: - if is_topup(artifact=artifact): - adjust_reads(artifact=artifact, factor=factor_tga_topups) - else: - adjust_reads(artifact=artifact, factor=factor_tga) - for app in apptag_micro: - if app in sample_apptag: - if is_topup(artifact=artifact): - reset_microbial_reads(artifact=artifact, reset_reads=reset_micro_reads) - else: - adjust_reads(artifact=artifact, factor=factor_micro) - for app in apptag_virus: - if app in sample_apptag and is_topup(artifact=artifact): - reset_microbial_reads(artifact=artifact, reset_reads=reset_virus_reads) + adjust_artifact_reads( + artifact=artifact, topup_factor=factor_tga_topups, factor=factor_tga + ) for app in apptag_rml: if app in sample_apptag: - if is_topup(artifact=artifact): - adjust_reads(artifact=artifact, factor=factor_rml_topups) - else: - adjust_reads(artifact=artifact, factor=factor_rml) + adjust_artifact_reads( + artifact=artifact, topup_factor=factor_rml_topups, factor=factor_rml + ) for app in apptag_rna: if app in sample_apptag: - if is_topup(artifact=artifact): - adjust_reads(artifact=artifact, factor=factor_rna_topups) - else: - adjust_reads(artifact=artifact, factor=factor_rna) + adjust_artifact_reads( + artifact=artifact, topup_factor=factor_rna_topups, factor=factor_rna + ) + for app in apptag_micro: + if app in sample_apptag: + adjust_micro_artifact_reads( + artifact=artifact, reset_amount=reset_micro_reads, factor=factor_micro + ) + for app in apptag_virus: + if app in sample_apptag: + adjust_micro_artifact_reads(artifact=artifact, reset_amount=reset_virus_reads) process.udf["Adjusted Reads to Sequence"] = True process.put() success_message = "Udfs have been updated on all samples."