From 3ff827e9cf2c6cb54acf84f20dc0c765f4213a9f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Karl=20Sv=C3=A4rd?=
<60181709+Karl-Svard@users.noreply.github.com>
Date: Wed, 25 Sep 2024 12:03:53 +0200
Subject: [PATCH] Count NTC reads in sequencing QC check (#531)(patch)
### Changed
- NTCs now won't fail when having either reads or Q30 values below the given QC thresholds
---
cg_lims/EPPs/qc/sequencing_quality_checker.py | 40 ++++++++++++++-----
cg_lims/EPPs/qc/sequencing_quality_control.py | 2 +-
cg_lims/get/samples.py | 14 +++++++
.../qc/test_sequencing_quality_checker.py | 31 ++++++++------
.../novaseq_standard/samples/ACC9628A2.xml | 1 +
5 files changed, 65 insertions(+), 23 deletions(-)
diff --git a/cg_lims/EPPs/qc/sequencing_quality_checker.py b/cg_lims/EPPs/qc/sequencing_quality_checker.py
index a175dc77..21044333 100644
--- a/cg_lims/EPPs/qc/sequencing_quality_checker.py
+++ b/cg_lims/EPPs/qc/sequencing_quality_checker.py
@@ -3,8 +3,12 @@
from cg_lims.EPPs.qc.models import SampleLane, SampleLaneSet
from cg_lims.EPPs.qc.sequencing_artifact_manager import SequencingArtifactManager
+from cg_lims.exceptions import MissingSampleError
+from cg_lims.get.samples import is_negative_control
from cg_lims.models.sample_lane_sequencing_metrics import SampleLaneSequencingMetrics
from cg_lims.status_db_api import StatusDBAPI
+from genologics.entities import Sample
+from genologics.lims import Lims
LOG = logging.getLogger(__name__)
@@ -39,15 +43,17 @@ def _get_sequencing_metrics(self) -> List[SampleLaneSequencingMetrics]:
self.metrics = metrics
return metrics
- def validate_sequencing_quality(self) -> str:
+ def validate_sequencing_quality(self, lims: Lims) -> str:
"""Validate the sequencing data for each sample in all lanes on a flow cell based on the number of reads and q30 scores."""
LOG.info(f"Validating sequencing quality for flow cell {self.flow_cell_name}")
sequencing_metrics = self._get_sequencing_metrics()
for metrics in sequencing_metrics:
- passed_qc: bool = self._quality_control(metrics)
- self._update_sample_with_quality_results(metrics, passed_qc)
+ passed_qc: bool = self._quality_control(metrics=metrics, lims=lims)
+ self._update_sample_with_quality_results(
+ metrics=metrics, passed_quality_control=passed_qc
+ )
if not passed_qc:
self.failed_qc_count += 1
@@ -67,14 +73,28 @@ def _update_sample_with_quality_results(
passed_quality_control=passed_quality_control,
)
- def _quality_control(self, metrics: SampleLaneSequencingMetrics) -> bool:
- return self._passes_quality_thresholds(
- reads=metrics.sample_total_reads_in_lane,
- q30_score=metrics.sample_base_percentage_passing_q30,
- )
+ def _quality_control(self, metrics: SampleLaneSequencingMetrics, lims: Lims) -> bool:
+ try:
+ sample: Sample = Sample(lims=lims, id=metrics.sample_internal_id)
+ negative_control: bool = is_negative_control(sample=sample)
+ return self._passes_quality_thresholds(
+ reads=metrics.sample_total_reads_in_lane,
+ q30_score=metrics.sample_base_percentage_passing_q30,
+ negative_control=negative_control,
+ )
+ except MissingSampleError:
+ return self._passes_quality_thresholds(
+ reads=metrics.sample_total_reads_in_lane,
+ q30_score=metrics.sample_base_percentage_passing_q30,
+ negative_control=False,
+ )
- def _passes_quality_thresholds(self, q30_score: float, reads: int) -> bool:
- """Check if the provided metrics pass the minimum quality thresholds."""
+ def _passes_quality_thresholds(
+ self, q30_score: float, reads: int, negative_control: bool
+ ) -> bool:
+ """Check if the provided metrics pass the minimum quality thresholds. Negative controls always pass."""
+ if negative_control:
+ return True
passes_q30_threshold = q30_score >= self.q30_threshold
passes_read_threshold = reads >= self.READS_MIN_THRESHOLD
return passes_q30_threshold and passes_read_threshold
diff --git a/cg_lims/EPPs/qc/sequencing_quality_control.py b/cg_lims/EPPs/qc/sequencing_quality_control.py
index 9b3f7c5f..6d990dba 100644
--- a/cg_lims/EPPs/qc/sequencing_quality_control.py
+++ b/cg_lims/EPPs/qc/sequencing_quality_control.py
@@ -26,7 +26,7 @@ def sequencing_quality_control(ctx):
cg_api_client=status_db_api,
)
- quality_summary: str = quality_checker.validate_sequencing_quality()
+ quality_summary: str = quality_checker.validate_sequencing_quality(lims=lims)
brief_summary: str = quality_checker.get_brief_summary()
if quality_checker.samples_failed_quality_control():
diff --git a/cg_lims/get/samples.py b/cg_lims/get/samples.py
index c1ce872f..7e3fdcca 100644
--- a/cg_lims/get/samples.py
+++ b/cg_lims/get/samples.py
@@ -1,5 +1,6 @@
import logging
from typing import List
+from xml.etree.ElementTree import ParseError
from cg_lims.exceptions import MissingSampleError
from genologics.entities import Artifact, Process, Sample
@@ -37,3 +38,16 @@ def get_one_sample_from_artifact(artifact: Artifact) -> Sample:
raise MissingSampleError(message=more_than_one_message)
return samples[0]
+
+
+def is_negative_control(sample: Sample) -> bool:
+ """Check if a given sample is a negative control."""
+ try:
+ control: str = sample.udf.get("Control")
+ if control == "negative":
+ return True
+ return False
+ except ParseError:
+ error_message = f"Sample {sample} can't be found in the database."
+ LOG.error(error_message)
+ raise MissingSampleError(error_message)
diff --git a/tests/EPPs/qc/test_sequencing_quality_checker.py b/tests/EPPs/qc/test_sequencing_quality_checker.py
index 95a11e43..224adfb2 100644
--- a/tests/EPPs/qc/test_sequencing_quality_checker.py
+++ b/tests/EPPs/qc/test_sequencing_quality_checker.py
@@ -1,6 +1,7 @@
from typing import List
from cg_lims.EPPs.qc.sequencing_quality_checker import SequencingQualityChecker
+from genologics.lims import Lims
from mock import Mock
@@ -8,12 +9,13 @@ def test_quality_control_of_flow_cell_with_all_passing(
sequencing_quality_checker: SequencingQualityChecker,
novaseq_passing_metrics_response: Mock,
mocker,
+ lims: Lims,
):
- # GIVEN a flow cell where all samples passes the quality control
+ # GIVEN a flow cell with one negative control where all samples passes the quality control
mocker.patch("requests.get", return_value=novaseq_passing_metrics_response)
# WHEN validating the sequencing quality
- sequencing_quality_checker.validate_sequencing_quality()
+ sequencing_quality_checker.validate_sequencing_quality(lims=lims)
# THEN no samples should fail the quality control
assert sequencing_quality_checker.failed_qc_count == 0
@@ -25,15 +27,16 @@ def test_all_samples_fail_q30(
novaseq_sample_ids: List[str],
novaseq_lanes,
mocker,
+ lims: Lims,
):
- # GIVEN a flow cell where all samples fail the quality control on Q30
+ # GIVEN a flow cell with one negative control where all samples fail the quality control on Q30
mocker.patch("requests.get", return_value=novaseq_q30_fail_response)
# WHEN validating the sequencing quality
- sequencing_quality_checker.validate_sequencing_quality()
+ sequencing_quality_checker.validate_sequencing_quality(lims=lims)
# THEN all samples in all lanes should fail the quality control
- expected_fails: int = novaseq_lanes * len(novaseq_sample_ids)
+ expected_fails: int = novaseq_lanes * (len(novaseq_sample_ids) - 1)
assert sequencing_quality_checker.failed_qc_count == expected_fails
@@ -43,15 +46,16 @@ def test_all_samples_have_too_few_reads(
novaseq_sample_ids: List[str],
novaseq_lanes: int,
mocker,
+ lims: Lims,
):
- # GIVEN a flow cell where all samples in all lanes have too few reads
+ # GIVEN a flow cell with one negative control where all samples in all lanes have too few reads
mocker.patch("requests.get", return_value=novaseq_reads_fail_response)
# WHEN validating the sequencing quality
- sequencing_quality_checker.validate_sequencing_quality()
+ sequencing_quality_checker.validate_sequencing_quality(lims=lims)
# THEN all samples in all lanes should fail the quality control
- expected_fails: int = novaseq_lanes * len(novaseq_sample_ids)
+ expected_fails: int = novaseq_lanes * (len(novaseq_sample_ids) - 1)
assert sequencing_quality_checker.failed_qc_count == expected_fails
@@ -59,12 +63,13 @@ def test_some_samples_fail_quality_control(
sequencing_quality_checker: SequencingQualityChecker,
novaseq_two_failing_metrics_response: Mock,
mocker,
+ lims: Lims,
):
- # GIVEN a flow cell where some samples fail the quality control
+ # GIVEN a flow cell with one negative control where some samples (not the NTC) fail the quality control
mocker.patch("requests.get", return_value=novaseq_two_failing_metrics_response)
# WHEN validating the sequencing quality
- sequencing_quality_checker.validate_sequencing_quality()
+ sequencing_quality_checker.validate_sequencing_quality(lims=lims)
# THEN some samples in all lanes should fail the quality control
assert sequencing_quality_checker.failed_qc_count == 2
@@ -76,12 +81,13 @@ def test_metrics_missing_for_samples_in_lane(
missing_sample_id: str,
missing_lane: int,
mocker,
+ lims: Lims,
):
# GIVEN metrics missing data for a sample in lims
mocker.patch("requests.get", return_value=novaseq_missing_metrics_for_sample_in_lane_response)
# WHEN validating the sequencing quality
- summary: str = sequencing_quality_checker.validate_sequencing_quality()
+ summary: str = sequencing_quality_checker.validate_sequencing_quality(lims=lims)
# THEN the sample with missing metrics should fail qc
assert sequencing_quality_checker.failed_qc_count == 1
@@ -96,12 +102,13 @@ def test_sample_missing_in_lims(
novaseq_metrics_with_extra_sample_response: Mock,
sample_id_missing_in_lims: str,
mocker,
+ lims: Lims,
):
# GIVEN metrics with a sample not in lims
mocker.patch("requests.get", return_value=novaseq_metrics_with_extra_sample_response)
# WHEN validating the sequencing quality
- summary: str = sequencing_quality_checker.validate_sequencing_quality()
+ summary: str = sequencing_quality_checker.validate_sequencing_quality(lims=lims)
# THEN all samples pass the quality control
assert sequencing_quality_checker.failed_qc_count == 0
diff --git a/tests/fixtures/novaseq_standard/samples/ACC9628A2.xml b/tests/fixtures/novaseq_standard/samples/ACC9628A2.xml
index 31e64475..cadb84ee 100644
--- a/tests/fixtures/novaseq_standard/samples/ACC9628A2.xml
+++ b/tests/fixtures/novaseq_standard/samples/ACC9628A2.xml
@@ -31,4 +31,5 @@
0
NEXTflex® v2 UDI Barcodes 1 - 96
13
+ negative
\ No newline at end of file