From 3ff827e9cf2c6cb54acf84f20dc0c765f4213a9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= <60181709+Karl-Svard@users.noreply.github.com> Date: Wed, 25 Sep 2024 12:03:53 +0200 Subject: [PATCH] Count NTC reads in sequencing QC check (#531)(patch) ### Changed - NTCs now won't fail when having either reads or Q30 values below the given QC thresholds --- cg_lims/EPPs/qc/sequencing_quality_checker.py | 40 ++++++++++++++----- cg_lims/EPPs/qc/sequencing_quality_control.py | 2 +- cg_lims/get/samples.py | 14 +++++++ .../qc/test_sequencing_quality_checker.py | 31 ++++++++------ .../novaseq_standard/samples/ACC9628A2.xml | 1 + 5 files changed, 65 insertions(+), 23 deletions(-) diff --git a/cg_lims/EPPs/qc/sequencing_quality_checker.py b/cg_lims/EPPs/qc/sequencing_quality_checker.py index a175dc77..21044333 100644 --- a/cg_lims/EPPs/qc/sequencing_quality_checker.py +++ b/cg_lims/EPPs/qc/sequencing_quality_checker.py @@ -3,8 +3,12 @@ from cg_lims.EPPs.qc.models import SampleLane, SampleLaneSet from cg_lims.EPPs.qc.sequencing_artifact_manager import SequencingArtifactManager +from cg_lims.exceptions import MissingSampleError +from cg_lims.get.samples import is_negative_control from cg_lims.models.sample_lane_sequencing_metrics import SampleLaneSequencingMetrics from cg_lims.status_db_api import StatusDBAPI +from genologics.entities import Sample +from genologics.lims import Lims LOG = logging.getLogger(__name__) @@ -39,15 +43,17 @@ def _get_sequencing_metrics(self) -> List[SampleLaneSequencingMetrics]: self.metrics = metrics return metrics - def validate_sequencing_quality(self) -> str: + def validate_sequencing_quality(self, lims: Lims) -> str: """Validate the sequencing data for each sample in all lanes on a flow cell based on the number of reads and q30 scores.""" LOG.info(f"Validating sequencing quality for flow cell {self.flow_cell_name}") sequencing_metrics = self._get_sequencing_metrics() for metrics in sequencing_metrics: - passed_qc: bool = self._quality_control(metrics) - self._update_sample_with_quality_results(metrics, passed_qc) + passed_qc: bool = self._quality_control(metrics=metrics, lims=lims) + self._update_sample_with_quality_results( + metrics=metrics, passed_quality_control=passed_qc + ) if not passed_qc: self.failed_qc_count += 1 @@ -67,14 +73,28 @@ def _update_sample_with_quality_results( passed_quality_control=passed_quality_control, ) - def _quality_control(self, metrics: SampleLaneSequencingMetrics) -> bool: - return self._passes_quality_thresholds( - reads=metrics.sample_total_reads_in_lane, - q30_score=metrics.sample_base_percentage_passing_q30, - ) + def _quality_control(self, metrics: SampleLaneSequencingMetrics, lims: Lims) -> bool: + try: + sample: Sample = Sample(lims=lims, id=metrics.sample_internal_id) + negative_control: bool = is_negative_control(sample=sample) + return self._passes_quality_thresholds( + reads=metrics.sample_total_reads_in_lane, + q30_score=metrics.sample_base_percentage_passing_q30, + negative_control=negative_control, + ) + except MissingSampleError: + return self._passes_quality_thresholds( + reads=metrics.sample_total_reads_in_lane, + q30_score=metrics.sample_base_percentage_passing_q30, + negative_control=False, + ) - def _passes_quality_thresholds(self, q30_score: float, reads: int) -> bool: - """Check if the provided metrics pass the minimum quality thresholds.""" + def _passes_quality_thresholds( + self, q30_score: float, reads: int, negative_control: bool + ) -> bool: + """Check if the provided metrics pass the minimum quality thresholds. Negative controls always pass.""" + if negative_control: + return True passes_q30_threshold = q30_score >= self.q30_threshold passes_read_threshold = reads >= self.READS_MIN_THRESHOLD return passes_q30_threshold and passes_read_threshold diff --git a/cg_lims/EPPs/qc/sequencing_quality_control.py b/cg_lims/EPPs/qc/sequencing_quality_control.py index 9b3f7c5f..6d990dba 100644 --- a/cg_lims/EPPs/qc/sequencing_quality_control.py +++ b/cg_lims/EPPs/qc/sequencing_quality_control.py @@ -26,7 +26,7 @@ def sequencing_quality_control(ctx): cg_api_client=status_db_api, ) - quality_summary: str = quality_checker.validate_sequencing_quality() + quality_summary: str = quality_checker.validate_sequencing_quality(lims=lims) brief_summary: str = quality_checker.get_brief_summary() if quality_checker.samples_failed_quality_control(): diff --git a/cg_lims/get/samples.py b/cg_lims/get/samples.py index c1ce872f..7e3fdcca 100644 --- a/cg_lims/get/samples.py +++ b/cg_lims/get/samples.py @@ -1,5 +1,6 @@ import logging from typing import List +from xml.etree.ElementTree import ParseError from cg_lims.exceptions import MissingSampleError from genologics.entities import Artifact, Process, Sample @@ -37,3 +38,16 @@ def get_one_sample_from_artifact(artifact: Artifact) -> Sample: raise MissingSampleError(message=more_than_one_message) return samples[0] + + +def is_negative_control(sample: Sample) -> bool: + """Check if a given sample is a negative control.""" + try: + control: str = sample.udf.get("Control") + if control == "negative": + return True + return False + except ParseError: + error_message = f"Sample {sample} can't be found in the database." + LOG.error(error_message) + raise MissingSampleError(error_message) diff --git a/tests/EPPs/qc/test_sequencing_quality_checker.py b/tests/EPPs/qc/test_sequencing_quality_checker.py index 95a11e43..224adfb2 100644 --- a/tests/EPPs/qc/test_sequencing_quality_checker.py +++ b/tests/EPPs/qc/test_sequencing_quality_checker.py @@ -1,6 +1,7 @@ from typing import List from cg_lims.EPPs.qc.sequencing_quality_checker import SequencingQualityChecker +from genologics.lims import Lims from mock import Mock @@ -8,12 +9,13 @@ def test_quality_control_of_flow_cell_with_all_passing( sequencing_quality_checker: SequencingQualityChecker, novaseq_passing_metrics_response: Mock, mocker, + lims: Lims, ): - # GIVEN a flow cell where all samples passes the quality control + # GIVEN a flow cell with one negative control where all samples passes the quality control mocker.patch("requests.get", return_value=novaseq_passing_metrics_response) # WHEN validating the sequencing quality - sequencing_quality_checker.validate_sequencing_quality() + sequencing_quality_checker.validate_sequencing_quality(lims=lims) # THEN no samples should fail the quality control assert sequencing_quality_checker.failed_qc_count == 0 @@ -25,15 +27,16 @@ def test_all_samples_fail_q30( novaseq_sample_ids: List[str], novaseq_lanes, mocker, + lims: Lims, ): - # GIVEN a flow cell where all samples fail the quality control on Q30 + # GIVEN a flow cell with one negative control where all samples fail the quality control on Q30 mocker.patch("requests.get", return_value=novaseq_q30_fail_response) # WHEN validating the sequencing quality - sequencing_quality_checker.validate_sequencing_quality() + sequencing_quality_checker.validate_sequencing_quality(lims=lims) # THEN all samples in all lanes should fail the quality control - expected_fails: int = novaseq_lanes * len(novaseq_sample_ids) + expected_fails: int = novaseq_lanes * (len(novaseq_sample_ids) - 1) assert sequencing_quality_checker.failed_qc_count == expected_fails @@ -43,15 +46,16 @@ def test_all_samples_have_too_few_reads( novaseq_sample_ids: List[str], novaseq_lanes: int, mocker, + lims: Lims, ): - # GIVEN a flow cell where all samples in all lanes have too few reads + # GIVEN a flow cell with one negative control where all samples in all lanes have too few reads mocker.patch("requests.get", return_value=novaseq_reads_fail_response) # WHEN validating the sequencing quality - sequencing_quality_checker.validate_sequencing_quality() + sequencing_quality_checker.validate_sequencing_quality(lims=lims) # THEN all samples in all lanes should fail the quality control - expected_fails: int = novaseq_lanes * len(novaseq_sample_ids) + expected_fails: int = novaseq_lanes * (len(novaseq_sample_ids) - 1) assert sequencing_quality_checker.failed_qc_count == expected_fails @@ -59,12 +63,13 @@ def test_some_samples_fail_quality_control( sequencing_quality_checker: SequencingQualityChecker, novaseq_two_failing_metrics_response: Mock, mocker, + lims: Lims, ): - # GIVEN a flow cell where some samples fail the quality control + # GIVEN a flow cell with one negative control where some samples (not the NTC) fail the quality control mocker.patch("requests.get", return_value=novaseq_two_failing_metrics_response) # WHEN validating the sequencing quality - sequencing_quality_checker.validate_sequencing_quality() + sequencing_quality_checker.validate_sequencing_quality(lims=lims) # THEN some samples in all lanes should fail the quality control assert sequencing_quality_checker.failed_qc_count == 2 @@ -76,12 +81,13 @@ def test_metrics_missing_for_samples_in_lane( missing_sample_id: str, missing_lane: int, mocker, + lims: Lims, ): # GIVEN metrics missing data for a sample in lims mocker.patch("requests.get", return_value=novaseq_missing_metrics_for_sample_in_lane_response) # WHEN validating the sequencing quality - summary: str = sequencing_quality_checker.validate_sequencing_quality() + summary: str = sequencing_quality_checker.validate_sequencing_quality(lims=lims) # THEN the sample with missing metrics should fail qc assert sequencing_quality_checker.failed_qc_count == 1 @@ -96,12 +102,13 @@ def test_sample_missing_in_lims( novaseq_metrics_with_extra_sample_response: Mock, sample_id_missing_in_lims: str, mocker, + lims: Lims, ): # GIVEN metrics with a sample not in lims mocker.patch("requests.get", return_value=novaseq_metrics_with_extra_sample_response) # WHEN validating the sequencing quality - summary: str = sequencing_quality_checker.validate_sequencing_quality() + summary: str = sequencing_quality_checker.validate_sequencing_quality(lims=lims) # THEN all samples pass the quality control assert sequencing_quality_checker.failed_qc_count == 0 diff --git a/tests/fixtures/novaseq_standard/samples/ACC9628A2.xml b/tests/fixtures/novaseq_standard/samples/ACC9628A2.xml index 31e64475..cadb84ee 100644 --- a/tests/fixtures/novaseq_standard/samples/ACC9628A2.xml +++ b/tests/fixtures/novaseq_standard/samples/ACC9628A2.xml @@ -31,4 +31,5 @@ 0 NEXTflex® v2 UDI Barcodes 1 - 96 13 + negative \ No newline at end of file