Skip to content

Commit

Permalink
Add new EPP for the creation of ONT sample sheets (#483)(minor)
Browse files Browse the repository at this point in the history
### Added
- new EPP for generating ONT sample sheets
- new models for these sample sheets
  • Loading branch information
Karl-Svard authored Mar 25, 2024
1 parent 4575be5 commit e8e630d
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 0 deletions.
2 changes: 2 additions & 0 deletions cg_lims/EPPs/files/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from cg_lims.EPPs.files.ont_json_to_udf import parse_ont_report
from cg_lims.EPPs.files.placement_map.make_96well_placement_map import placement_map
from cg_lims.EPPs.files.pooling_map.make_pooling_map import pool_map
from cg_lims.EPPs.files.sample_sheet.create_ont_sample_sheet import create_ont_sample_sheet
from cg_lims.EPPs.files.sample_sheet.create_sample_sheet import create_sample_sheet
from cg_lims.EPPs.files.xml_to_udf import parse_run_parameters

Expand All @@ -26,6 +27,7 @@ def files(ctx):
files.add_command(hamilton)
files.add_command(trouble_shoot_kapa)
files.add_command(make_barcode_csv)
files.add_command(create_ont_sample_sheet)
files.add_command(create_sample_sheet)
files.add_command(parse_run_parameters)
files.add_command(parse_ont_report)
106 changes: 106 additions & 0 deletions cg_lims/EPPs/files/sample_sheet/create_ont_sample_sheet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import logging
import sys
from pathlib import Path
from typing import List

import click
from cg_lims import options
from cg_lims.EPPs.files.sample_sheet.models import NanoporeSampleSheetHeader
from cg_lims.exceptions import LimsError, MissingUDFsError
from cg_lims.files.manage_csv_files import build_csv
from cg_lims.get.artifacts import get_artifacts
from cg_lims.get.samples import get_one_sample_from_artifact
from genologics.entities import Artifact, Process

LOG = logging.getLogger(__name__)


def get_flow_cell_id(artifact: Artifact) -> str:
"""Return the flow cell ID of an artifact from the connected container's name."""
container_name: str = artifact.container.name
if not container_name:
raise MissingUDFsError(f"Artifact {artifact.name} is missing a flow cell ID!")
return container_name


def get_flow_cell_type(process: Process) -> str:
"""Return the flow cell type used for the sequencing run."""
if not process.udf.get("ONT Flow Cell Type"):
raise MissingUDFsError(f"Sample sheet generation requires a flow cell type!")
return process.udf.get("ONT Flow Cell Type")


def get_sample_id(artifact: Artifact) -> str:
"""Return the sample ID for a given artifact."""
return get_one_sample_from_artifact(artifact=artifact).id


def get_experiment_name(process: Process) -> str:
"""Return the experiment name used for the sequencing run."""
if not process.udf.get("Experiment Name"):
raise MissingUDFsError(f"Sample sheet generation requires an experiment name!")
return process.udf.get("Experiment Name")


def get_kit(process: Process) -> str:
"""Return the prep kits used, in the format required for sample sheet generation."""
library_kit: str = process.udf.get("ONT Prep Kit")
expansion_kit: str = process.udf.get("ONT Expansion Kit")
if not library_kit:
raise MissingUDFsError("Sample sheet generation requires a library kit name!")
if expansion_kit:
library_kit = f"{library_kit} {expansion_kit}"
return library_kit


def get_header() -> List[str]:
"""Return the header of the sample sheet."""
return [
NanoporeSampleSheetHeader.FLOW_CELL_ID,
NanoporeSampleSheetHeader.FLOW_CELL_PROD_CODE,
NanoporeSampleSheetHeader.SAMPLE_ID,
NanoporeSampleSheetHeader.EXPERIMENT_ID,
NanoporeSampleSheetHeader.KIT,
]


def get_row(artifact: Artifact, process: Process) -> List[str]:
"""Return the sample sheet row of one sample."""
return [
get_flow_cell_id(artifact=artifact),
get_flow_cell_type(process=process),
get_sample_id(artifact=artifact),
get_experiment_name(process=process),
get_kit(process=process),
]


def get_sample_sheet_content(process: Process) -> List[List[str]]:
"""Return the sample sheet content."""
rows: List = []
artifacts: List[Artifact] = get_artifacts(process=process)
for artifact in artifacts:
rows.append(get_row(artifact=artifact, process=process))
return rows


@click.command()
@options.file_placeholder(help="File placeholder name.")
@click.pass_context
def create_ont_sample_sheet(ctx, file: str):
"""Create an Oxford Nanopore sample sheet .csv file from an 'ONT Start Sequencing' step."""
LOG.info(f"Running {ctx.command_path} with params: {ctx.params}")

process: Process = ctx.obj["process"]

try:
header: List[str] = get_header()
sample_sheet_content: List[List[str]] = get_sample_sheet_content(process=process)
file_path: Path = Path(f"{file}_sample_sheet_{get_experiment_name(process=process)}.csv")
build_csv(rows=sample_sheet_content, headers=header, file=file_path)
message: str = "The sample sheet was successfully generated."
LOG.info(message)
click.echo(message)
except LimsError as e:
LOG.error(e.message)
sys.exit(e.message)
8 changes: 8 additions & 0 deletions cg_lims/EPPs/files/sample_sheet/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,3 +178,11 @@ def get_bclconversion_data_row(self) -> str:
if self.run_settings.barcode_mismatches:
line = line + f",{self.barcode_mismatch_index_1},{self.barcode_mismatch_index_2}"
return line + "\n"


class NanoporeSampleSheetHeader(StrEnum):
FLOW_CELL_ID: str = "flow_cell_id"
FLOW_CELL_PROD_CODE: str = "flow_cell_product_code"
SAMPLE_ID: str = "sample_id"
EXPERIMENT_ID: str = "experiment_id"
KIT: str = "kit"

0 comments on commit e8e630d

Please sign in to comment.