Skip to content

Commit

Permalink
SMRT Link EPPs (#563)
Browse files Browse the repository at this point in the history
### Added
- New EPP for creating Sample Setup CSVs
- New EPP for creating pooling calculation CSVs
- New EPP for calculating Revio pooling volumes
- New EPP for automatically setting Revio sequencing options
- New models and classes for SMRT Link CSV creation
  • Loading branch information
Karl-Svard authored Dec 12, 2024
1 parent 4a56481 commit a93b5ba
Show file tree
Hide file tree
Showing 15 changed files with 724 additions and 18 deletions.
4 changes: 4 additions & 0 deletions cg_lims/EPPs/files/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from cg_lims.EPPs.files.pooling_map.make_pooling_map import pool_map
from cg_lims.EPPs.files.sample_sheet.create_ont_sample_sheet import create_ont_sample_sheet
from cg_lims.EPPs.files.sample_sheet.create_sample_sheet import create_sample_sheet
from cg_lims.EPPs.files.smrt_link.run_design import create_smrtlink_run_design
from cg_lims.EPPs.files.smrt_link.sample_setup import create_smrtlink_sample_setup
from cg_lims.EPPs.files.xml_to_udf import parse_run_parameters


Expand All @@ -31,3 +33,5 @@ def files(ctx):
files.add_command(create_sample_sheet)
files.add_command(parse_run_parameters)
files.add_command(parse_ont_report)
files.add_command(create_smrtlink_sample_setup)
files.add_command(create_smrtlink_run_design)
15 changes: 1 addition & 14 deletions cg_lims/EPPs/files/sample_sheet/create_sample_sheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,13 @@
SampleSheetHeader,
)
from cg_lims.exceptions import InvalidValueError, LimsError
from cg_lims.get.artifacts import get_artifact_lane, get_artifacts
from cg_lims.get.artifacts import get_artifact_lane, get_artifacts, get_non_pooled_artifacts
from genologics.entities import Artifact, Process, ReagentType
from genologics.lims import Lims

LOG = logging.getLogger(__name__)


def get_non_pooled_artifacts(artifact: Artifact) -> List[Artifact]:
"""Return the parent artifact of the sample. Should hold the reagent_label"""
artifacts: List[Artifact] = []

if len(artifact.samples) == 1:
artifacts.append(artifact)
return artifacts

for artifact in artifact.input_artifact_list():
artifacts.extend(get_non_pooled_artifacts(artifact))
return artifacts


def get_reagent_label(artifact: Artifact) -> Optional[str]:
"""Return the first and only reagent label from an artifact"""
labels: List[str] = artifact.reagent_labels
Expand Down
Empty file.
306 changes: 306 additions & 0 deletions cg_lims/EPPs/files/smrt_link/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,306 @@
import logging
import re
from typing import Any, Dict, List, Optional, Pattern

import pandas as pd
from cg_lims.enums import StrEnum
from cg_lims.exceptions import MissingUDFsError
from cg_lims.get.artifacts import get_artifacts, get_non_pooled_artifacts
from cg_lims.get.fields import get_smrtbell_adapter_name
from genologics.lims import Artifact, Container, Process

LOG = logging.getLogger(__name__)


SAMPLE_SETUP_CSV_HEADER: List[str] = [
"Sample Name",
"Comment",
"System Name",
"Binding Kit",
"Plate",
"Well",
"Number of Samples",
"Application",
"Available Starting Sample Volume (uL)",
"Starting Sample Concentration (ng/uL)",
"Insert Size (bp)",
"Control Kit",
"Cleanup Anticipated Yield (%)",
"On Plate Loading Concentration (pM)",
"Cells to Bind (cells)",
"Prepare Entire Sample",
"Sequencing Primer",
"Target Annealing Sample Concentration (nM)",
"Target Annealing Primer Concentration (nM)",
"Target Binding Concentration (nM)",
"Target Polymerase Concentration (X)",
"Binding Time (min)",
"Cleanup Bead Type",
"Cleanup Bead Concentration (X)",
"Minimum Pipetting Volume (uL)",
"Percent of Annealing Reaction To Use In Binding (%)",
"AMPure Diluted Bound Complex Volume (uL)",
"AMPure Diluted Bound Complex Concentration (ng/uL)",
"AMPure Purified Complex Volume (uL)",
"AMPure Purified Complex Concentration (ng/uL)",
"ProNex Diluted Bound Complex Volume (uL)",
"ProNex Diluted Bound Complex Concentration (ng/uL)",
"ProNex Purified Complex Volume (uL)",
"ProNex Purified Complex Concentration (ng/uL)",
"Requested Cells Alternate (cells)",
"Requested OPLC Alternate (pM)",
]


PLATE_PART_NUMBERS: Dict[str, str] = {
"Revio sequencing plate": "102118800",
"Revio sequencing plate - 1rxn": "102412400",
"Revio SPRQ sequencing plate": "103496700",
}


POLYMERASE_KITS: Dict[str, str] = {
"Revio polymerase kit": "Lxxxxx102739100123199",
"Revio SPRQ polymerase kit": "Lxxxxx103496900123199",
}


class RevioIndexSets(StrEnum):
SMRTBELL_INDEX_SET: str = "43f950a9-8bde-3855-6b25-c13368069745"


class RunDesignHeader(StrEnum):
RUN_SETTINGS: str = "[Run Settings]"
SMRT_CELL_SETTINGS: str = "[SMRT Cell Settings]"
SAMPLES: str = "[Samples]"


class SampleSetup:
sample_name: str
system_name: str
binding_kit: str
number_of_samples: int
application: str
available_volume: float
starting_concentration: float
size: int
loading_conc: float
number_of_cells_to_load: int
prepare_entire_sample: bool
sequencing_primer: str
minimum_pipetting_volume: float

def __init__(self, artifact: Artifact):
process = artifact.parent_process
self.sample_name = artifact.samples[0].id
self.system_name = process.udf.get("Sequencing Instrument")
self.binding_kit = POLYMERASE_KITS[process.udf.get("Binding Kit")]
self.number_of_samples = len(artifact.samples)
self.application = process.udf.get("Revio Application")
self.available_volume = artifact.udf.get("Volume (ul)")
self.starting_concentration = artifact.udf.get("Input Concentration (ng/ul)")
self.size = artifact.udf.get("Size (bp)")
self.loading_conc = process.udf.get("Loading Concentration (pM)")
self.number_of_cells_to_load = artifact.udf.get("SMRT Cells to Load")
self.prepare_entire_sample = False
self.sequencing_primer = process.udf.get("Sequencing Primer")
self.minimum_pipetting_volume = 1

def get_sample_setup_row(self) -> List[str]:
"""Return a list containing row information for a sample."""
return [
self.sample_name,
"",
self.system_name,
self.binding_kit,
"",
"",
self.number_of_samples,
self.application,
self.available_volume,
self.starting_concentration,
self.size,
"",
"",
self.loading_conc,
self.number_of_samples,
self.prepare_entire_sample,
self.sequencing_primer,
"",
"",
"",
"",
"",
"",
"",
self.minimum_pipetting_volume,
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
]


def _build_plate_dict(process: Process) -> Dict[Any, Any]:
"""Create a sequencing plate dict containing plate position (int) and Container object"""
containers: List[Container] = process.output_containers()
plate_1: str = process.udf.get("Plate 1")
plate_2: str = process.udf.get("Plate 2")
plate_dict: Dict[Any, Any] = {}
for container in containers:
if container.name == plate_1:
plate_dict[1] = container.name
plate_dict[container.name] = 1
elif plate_2 and container.name == plate_2:
plate_dict[2] = container.name
plate_dict[container.name] = 2
else:
raise MissingUDFsError(f"Error: Container {container.name} is missing from run set up.")
return plate_dict


def _convert_well(well: str) -> str:
"""Convert a well from the format in Clarity LIMS to the one used in SMRT Link. For example: A:1 -> A01"""
return well.replace(":", "0")


def _get_smrt_cell_well(pool: Artifact, plate_dict: Dict[Any, Any]) -> str:
"""Return the SMRT Cell well position of a pool."""
plate: Container = pool.container
well: str = _convert_well(well=pool.location[1])
return f"{plate_dict[plate.name]}_{well}"


def _is_indexed(pool: Artifact) -> bool:
"""Check if the given pool is barcoded or not."""
input_artifacts: List[Artifact] = pool.input_artifact_list()
for input_artifact in input_artifacts:
if input_artifact.reagent_labels:
return True
return False


def _trim_unsupported_characters(name: str) -> str:
"""Return a trimmed sample name only containing supported characters"""
pattern: Pattern[str] = re.compile(r"[^A-Za-z0-9 _:\.-]")
return re.sub(pattern=pattern, repl=" ", string=name)


class RevioRun:
process_id: Process
plates: Dict[Any, Any]
pools: List[Artifact]
run_name: str
instrument_type: str
plate_1_type: str
plate_2_type: Optional[str]
file_version: int = 1
run_comments: Optional[str]
adaptive_loading: bool
base_kinetics: bool
consensus_mode: str
data_project: int = 1

def __init__(self, process: Process):
self.process = process
self.plates = _build_plate_dict(process=process)
self.pools = get_artifacts(process=process)
self.run_name = process.udf.get("Run Name")
self.instrument_type = process.udf.get("Instrument Type")
self.plate_1_type = process.udf.get("Plate 1 Type")
self.plate_2_type = process.udf.get("Plate 2 Type")
self.run_comments = f"Generated by automation in Clarity LIMS step {process.id}"
self.adaptive_loading = process.udf.get("Adaptive Loading")
self.base_kinetics = process.udf.get("Include Base Kinetics")
self.consensus_mode = process.udf.get("Consensus Mode")

def _create_run_settings(self) -> str:
"""Return the [Run Settings] section of the run design."""
plate_rows: str = f"Plate 1,{PLATE_PART_NUMBERS[self.plate_1_type]}\n"
if self.plates[2]:
plate_rows += f"Plate 2,{PLATE_PART_NUMBERS[self.plate_2_type]}\n"
return (
f"{RunDesignHeader.RUN_SETTINGS}\n"
f"Instrument Type,{self.instrument_type}\n"
f"Run Name,{self.run_name}\n"
f"Run Comments,{self.run_comments}\n"
f"{plate_rows}"
f"CSV Version,{self.file_version}\n"
)

def _create_smrt_cell_settings(self) -> str:
"""Return the [SMRT Cell Settings] section of the run design."""
df: pd.DataFrame = pd.DataFrame(
{
RunDesignHeader.SMRT_CELL_SETTINGS: [
"Well Name",
"Library Type",
"Application",
"Polymerase Kit",
"Movie Acquisition Time (hours)",
"Insert Size (bp)",
"Library Concentration (pM)",
"Use Adaptive Loading",
"Include Base Kinetics",
"Consensus Mode",
"Sample is indexed",
"Indexes",
"Assign Data To Project",
]
}
)
for pool in self.pools:
well: str = _get_smrt_cell_well(pool=pool, plate_dict=self.plates)
if _is_indexed(pool=pool):
index_set: str = RevioIndexSets.SMRTBELL_INDEX_SET
else:
index_set: str = pool.samples[0].id

df[well] = [
_trim_unsupported_characters(name=pool.name),
pool.udf.get("Library Type"),
pool.udf.get("Revio Application"),
POLYMERASE_KITS[pool.udf.get("Polymerase Kit")],
pool.udf.get("Movie Acquisition Time (hours)"),
pool.udf.get("Mean Size (bp)"),
pool.udf.get("Library Concentration (pM)"),
self.adaptive_loading,
self.base_kinetics,
self.consensus_mode,
_is_indexed(pool=pool),
index_set,
self.data_project,
]
return df.to_csv(index=False)

def _get_sample_settings(self) -> str:
"""Return the [SMRT Cell Settings] section of the run design."""
section = f"Bio Sample Name,Plate Well,Adapter,Adapter2"
for pool in self.pools:
artifacts: List[Artifact] = get_non_pooled_artifacts(artifact=pool)
for artifact in artifacts:
row = (
f"\n{artifact.samples[0].id},"
f"{_get_smrt_cell_well(pool=pool, plate_dict=self.plates)},"
f"{get_smrtbell_adapter_name(artifact=artifact)},"
f"{get_smrtbell_adapter_name(artifact=artifact)}"
)
section += row
return RunDesignHeader.SAMPLES + "\n" + section + "\n"

def create_csv(self) -> str:
"""Return the Run Design CSV of a step."""
return (
self._create_run_settings()
+ "\n"
+ self._create_smrt_cell_settings()
+ "\n"
+ self._get_sample_settings()
)
29 changes: 29 additions & 0 deletions cg_lims/EPPs/files/smrt_link/run_design.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import logging
import sys

import click
from cg_lims import options
from cg_lims.EPPs.files.smrt_link.models import RevioRun
from cg_lims.exceptions import LimsError
from genologics.entities import Process

LOG = logging.getLogger(__name__)


@click.command()
@options.file_placeholder()
@click.pass_context
def create_smrtlink_run_design(ctx, file: str):
"""Create a run design .csv file for SMRT Link import."""
LOG.info(f"Running {ctx.command_path} with params: {ctx.params}")

process: Process = ctx.obj["process"]

try:
revio_run: RevioRun = RevioRun(process=process)
csv_string: str = revio_run.create_csv()
with open(f"{file}_run_design.csv", "w") as file:
file.write(csv_string)
click.echo("The run design CSV was successfully generated.")
except LimsError as e:
sys.exit(e.message)
Loading

0 comments on commit a93b5ba

Please sign in to comment.