Skip to content

Commit

Permalink
Count NTC reads in sequencing QC check (#531)(patch)
Browse files Browse the repository at this point in the history
### Changed
- NTCs now won't fail when having either reads or Q30 values below the given QC thresholds
  • Loading branch information
Karl-Svard authored Sep 25, 2024
1 parent 57ffdb8 commit 3ff827e
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 23 deletions.
40 changes: 30 additions & 10 deletions cg_lims/EPPs/qc/sequencing_quality_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@

from cg_lims.EPPs.qc.models import SampleLane, SampleLaneSet
from cg_lims.EPPs.qc.sequencing_artifact_manager import SequencingArtifactManager
from cg_lims.exceptions import MissingSampleError
from cg_lims.get.samples import is_negative_control
from cg_lims.models.sample_lane_sequencing_metrics import SampleLaneSequencingMetrics
from cg_lims.status_db_api import StatusDBAPI
from genologics.entities import Sample
from genologics.lims import Lims

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -39,15 +43,17 @@ def _get_sequencing_metrics(self) -> List[SampleLaneSequencingMetrics]:
self.metrics = metrics
return metrics

def validate_sequencing_quality(self) -> str:
def validate_sequencing_quality(self, lims: Lims) -> str:
"""Validate the sequencing data for each sample in all lanes on a flow cell based on the number of reads and q30 scores."""
LOG.info(f"Validating sequencing quality for flow cell {self.flow_cell_name}")

sequencing_metrics = self._get_sequencing_metrics()

for metrics in sequencing_metrics:
passed_qc: bool = self._quality_control(metrics)
self._update_sample_with_quality_results(metrics, passed_qc)
passed_qc: bool = self._quality_control(metrics=metrics, lims=lims)
self._update_sample_with_quality_results(
metrics=metrics, passed_quality_control=passed_qc
)

if not passed_qc:
self.failed_qc_count += 1
Expand All @@ -67,14 +73,28 @@ def _update_sample_with_quality_results(
passed_quality_control=passed_quality_control,
)

def _quality_control(self, metrics: SampleLaneSequencingMetrics) -> bool:
return self._passes_quality_thresholds(
reads=metrics.sample_total_reads_in_lane,
q30_score=metrics.sample_base_percentage_passing_q30,
)
def _quality_control(self, metrics: SampleLaneSequencingMetrics, lims: Lims) -> bool:
try:
sample: Sample = Sample(lims=lims, id=metrics.sample_internal_id)
negative_control: bool = is_negative_control(sample=sample)
return self._passes_quality_thresholds(
reads=metrics.sample_total_reads_in_lane,
q30_score=metrics.sample_base_percentage_passing_q30,
negative_control=negative_control,
)
except MissingSampleError:
return self._passes_quality_thresholds(
reads=metrics.sample_total_reads_in_lane,
q30_score=metrics.sample_base_percentage_passing_q30,
negative_control=False,
)

def _passes_quality_thresholds(self, q30_score: float, reads: int) -> bool:
"""Check if the provided metrics pass the minimum quality thresholds."""
def _passes_quality_thresholds(
self, q30_score: float, reads: int, negative_control: bool
) -> bool:
"""Check if the provided metrics pass the minimum quality thresholds. Negative controls always pass."""
if negative_control:
return True
passes_q30_threshold = q30_score >= self.q30_threshold
passes_read_threshold = reads >= self.READS_MIN_THRESHOLD
return passes_q30_threshold and passes_read_threshold
Expand Down
2 changes: 1 addition & 1 deletion cg_lims/EPPs/qc/sequencing_quality_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def sequencing_quality_control(ctx):
cg_api_client=status_db_api,
)

quality_summary: str = quality_checker.validate_sequencing_quality()
quality_summary: str = quality_checker.validate_sequencing_quality(lims=lims)
brief_summary: str = quality_checker.get_brief_summary()

if quality_checker.samples_failed_quality_control():
Expand Down
14 changes: 14 additions & 0 deletions cg_lims/get/samples.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
from typing import List
from xml.etree.ElementTree import ParseError

from cg_lims.exceptions import MissingSampleError
from genologics.entities import Artifact, Process, Sample
Expand Down Expand Up @@ -37,3 +38,16 @@ def get_one_sample_from_artifact(artifact: Artifact) -> Sample:
raise MissingSampleError(message=more_than_one_message)

return samples[0]


def is_negative_control(sample: Sample) -> bool:
"""Check if a given sample is a negative control."""
try:
control: str = sample.udf.get("Control")
if control == "negative":
return True
return False
except ParseError:
error_message = f"Sample {sample} can't be found in the database."
LOG.error(error_message)
raise MissingSampleError(error_message)
31 changes: 19 additions & 12 deletions tests/EPPs/qc/test_sequencing_quality_checker.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
from typing import List

from cg_lims.EPPs.qc.sequencing_quality_checker import SequencingQualityChecker
from genologics.lims import Lims
from mock import Mock


def test_quality_control_of_flow_cell_with_all_passing(
sequencing_quality_checker: SequencingQualityChecker,
novaseq_passing_metrics_response: Mock,
mocker,
lims: Lims,
):
# GIVEN a flow cell where all samples passes the quality control
# GIVEN a flow cell with one negative control where all samples passes the quality control
mocker.patch("requests.get", return_value=novaseq_passing_metrics_response)

# WHEN validating the sequencing quality
sequencing_quality_checker.validate_sequencing_quality()
sequencing_quality_checker.validate_sequencing_quality(lims=lims)

# THEN no samples should fail the quality control
assert sequencing_quality_checker.failed_qc_count == 0
Expand All @@ -25,15 +27,16 @@ def test_all_samples_fail_q30(
novaseq_sample_ids: List[str],
novaseq_lanes,
mocker,
lims: Lims,
):
# GIVEN a flow cell where all samples fail the quality control on Q30
# GIVEN a flow cell with one negative control where all samples fail the quality control on Q30
mocker.patch("requests.get", return_value=novaseq_q30_fail_response)

# WHEN validating the sequencing quality
sequencing_quality_checker.validate_sequencing_quality()
sequencing_quality_checker.validate_sequencing_quality(lims=lims)

# THEN all samples in all lanes should fail the quality control
expected_fails: int = novaseq_lanes * len(novaseq_sample_ids)
expected_fails: int = novaseq_lanes * (len(novaseq_sample_ids) - 1)
assert sequencing_quality_checker.failed_qc_count == expected_fails


Expand All @@ -43,28 +46,30 @@ def test_all_samples_have_too_few_reads(
novaseq_sample_ids: List[str],
novaseq_lanes: int,
mocker,
lims: Lims,
):
# GIVEN a flow cell where all samples in all lanes have too few reads
# GIVEN a flow cell with one negative control where all samples in all lanes have too few reads
mocker.patch("requests.get", return_value=novaseq_reads_fail_response)

# WHEN validating the sequencing quality
sequencing_quality_checker.validate_sequencing_quality()
sequencing_quality_checker.validate_sequencing_quality(lims=lims)

# THEN all samples in all lanes should fail the quality control
expected_fails: int = novaseq_lanes * len(novaseq_sample_ids)
expected_fails: int = novaseq_lanes * (len(novaseq_sample_ids) - 1)
assert sequencing_quality_checker.failed_qc_count == expected_fails


def test_some_samples_fail_quality_control(
sequencing_quality_checker: SequencingQualityChecker,
novaseq_two_failing_metrics_response: Mock,
mocker,
lims: Lims,
):
# GIVEN a flow cell where some samples fail the quality control
# GIVEN a flow cell with one negative control where some samples (not the NTC) fail the quality control
mocker.patch("requests.get", return_value=novaseq_two_failing_metrics_response)

# WHEN validating the sequencing quality
sequencing_quality_checker.validate_sequencing_quality()
sequencing_quality_checker.validate_sequencing_quality(lims=lims)

# THEN some samples in all lanes should fail the quality control
assert sequencing_quality_checker.failed_qc_count == 2
Expand All @@ -76,12 +81,13 @@ def test_metrics_missing_for_samples_in_lane(
missing_sample_id: str,
missing_lane: int,
mocker,
lims: Lims,
):
# GIVEN metrics missing data for a sample in lims
mocker.patch("requests.get", return_value=novaseq_missing_metrics_for_sample_in_lane_response)

# WHEN validating the sequencing quality
summary: str = sequencing_quality_checker.validate_sequencing_quality()
summary: str = sequencing_quality_checker.validate_sequencing_quality(lims=lims)

# THEN the sample with missing metrics should fail qc
assert sequencing_quality_checker.failed_qc_count == 1
Expand All @@ -96,12 +102,13 @@ def test_sample_missing_in_lims(
novaseq_metrics_with_extra_sample_response: Mock,
sample_id_missing_in_lims: str,
mocker,
lims: Lims,
):
# GIVEN metrics with a sample not in lims
mocker.patch("requests.get", return_value=novaseq_metrics_with_extra_sample_response)

# WHEN validating the sequencing quality
summary: str = sequencing_quality_checker.validate_sequencing_quality()
summary: str = sequencing_quality_checker.validate_sequencing_quality(lims=lims)

# THEN all samples pass the quality control
assert sequencing_quality_checker.failed_qc_count == 0
Expand Down
1 change: 1 addition & 0 deletions tests/fixtures/novaseq_standard/samples/ACC9628A2.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,5 @@
<udf:field type="Numeric" name="Reads missing (M)">0</udf:field>
<udf:field type="String" name="Index type">NEXTflex® v2 UDI Barcodes 1 - 96</udf:field>
<udf:field type="String" name="Index number">13</udf:field>
<udf:field type="String" name="Control">negative</udf:field>
</smp:sample>

0 comments on commit 3ff827e

Please sign in to comment.