From b33bc3381d78549080743566b796c6ce7ef80991 Mon Sep 17 00:00:00 2001 From: jjtimmons Date: Sat, 12 Nov 2022 10:25:24 -0500 Subject: [PATCH] Add json output (--json) --- README.md | 11 +- primers/main.py | 37 +++- primers/{offtargets.py => off_targets.py} | 6 +- primers/primers.py | 174 ++++++++++++------ ...offtargets_test.py => off_targets_test.py} | 8 +- tests/primers_test.py | 9 +- 6 files changed, 165 insertions(+), 80 deletions(-) rename primers/{offtargets.py => off_targets.py} (89%) rename tests/{offtargets_test.py => off_targets_test.py} (77%) diff --git a/README.md b/README.md index 6e84218..526876f 100644 --- a/README.md +++ b/README.md @@ -38,9 +38,10 @@ print(fwd.tm_total) # 70.0 ### CLI ```txt -$ dir tm ttm dg pen seq - FWD 60.8 67.0 -1.86 5.93 GGTCTCAATGAGACAATAGCACACAC - REV 60.8 65.8 0 3.2 GAAGACTTTCGTATGCTGACCTAG +$ primers AATGAGACAATAGCACACACAGCTAGGTCAGCATACGAAA -f GGTCTC -r GAAGAC + dir tm ttm dg pen seq + FWD 61.8 67.6 -1.86 5.23 GGTCTCAATGAGACAATAGCACACACA + REV 61.9 66.5 -0.88 4.85 GAAGACTTTCGTATGCTGACCTAGC ``` ```txt @@ -50,14 +51,14 @@ usage: primers [-h] [-f SEQ] [-fl INT INT] [-r SEQ] [-rl INT INT] [-t SEQ] [--ve Create PCR primers for a DNA sequence. Logs the FWD and REV primer with columns: - dir, tm, ttm, dg, pen, seq + dir, tm, ttm, dg, p, seq Where: dir = FWD or REV. tm = Melting temperature of the annealing/binding part of the primer (Celsius). ttm = The total melting temperature of the primer with added seq (Celsius). dg = The minimum free energy of the primer's secondary structure (kcal/mol). - pen = The primer's penalty score. Lower is better. + p = The primer's penalty score. Lower is better. seq = The sequence of the primer in the 5' to the 3' direction. positional arguments: diff --git a/primers/main.py b/primers/main.py index 250bdf6..2db7110 100644 --- a/primers/main.py +++ b/primers/main.py @@ -1,11 +1,14 @@ """Console entrypoint for creating PCR primers""" import argparse +import json import sys from typing import List from . import __version__, primers -from .primers import PRIMER_FMT + + +"""{fwd} {tm} {tm_total} {gc} {dg} {penalty} {seq}""" def run(): @@ -25,9 +28,24 @@ def run(): offtarget_check=args.t, ) - print(PRIMER_FMT.format("dir", "tm", "ttm", "dg", "pen", "seq")) - print(fwd) - print(rev) + if args.json: + print(json.dumps([fwd.dict(), rev.dict()])) + else: + table_fmt = "{:>5} {:>5} {:>5} {:>3} {:>6} {:>5} {}" + print(table_fmt.format("dir", "tm", "ttm", "gc", "dg", "p", "seq")) + + for p in [fwd, rev]: + print( + table_fmt.format( + "FWD" if p.fwd else "REV", + p.tm, + p.tm_total, + p.gc, + p.dg, + p.scoring.penalty, + p.seq, + ) + ) def parse_args(args: List[str]) -> argparse.Namespace: @@ -46,13 +64,14 @@ def parse_args(args: List[str]) -> argparse.Namespace: parser = argparse.ArgumentParser( description="""Create PCR primers for a DNA sequence. -Logs the FWD and REV primer with columns: - dir, tm, ttm, dg, pen, seq +By default, the primers are logged in table format in rows: + dir, tm, ttm, gc, dg, pen, seq Where: dir = FWD or REV. tm = The melting temperature of the annealing portion of the primer (Celsius). ttm = The total melting temperature of the primer with added seq (Celsius). + gc = The GC ratio of the primer. dg = The minimum free energy of the primer (kcal/mol). pen = The primer's penalty score. Lower is better. seq = The sequence of the primer in the 5' to the 3' direction. @@ -98,6 +117,12 @@ def parse_args(args: List[str]) -> argparse.Namespace: default="", metavar="SEQ", ) + parser.add_argument( + "-j", + "--json", + action=argparse.BooleanOptionalAction, + help="whether to write the primers in a JSON array", + ) parser.add_argument( "--version", action="version", version="seqfold {ver}".format(ver=__version__) ) diff --git a/primers/offtargets.py b/primers/off_targets.py similarity index 89% rename from primers/offtargets.py rename to primers/off_targets.py index 4e587e7..bc87a99 100644 --- a/primers/offtargets.py +++ b/primers/off_targets.py @@ -1,12 +1,12 @@ -"""Find offtargets. +"""Find off-target binding sites. """ from collections import defaultdict from typing import List, Dict -def offtargets(seq: str, check_seq: str) -> List[int]: - """Return a list of offtarget counts for primers whose end is that index. +def off_targets(seq: str, check_seq: str) -> List[int]: + """Return a list of off-target counts for primers whose end is that index. For example, offtarget_cache[20] -> returns the number of offtarget binding sites whose last bp ends in the 20th index of `seq` diff --git a/primers/primers.py b/primers/primers.py index 7f8cedd..5e0d909 100644 --- a/primers/primers.py +++ b/primers/primers.py @@ -28,82 +28,117 @@ import heapq from logging import warning -from typing import Tuple, NamedTuple, List, Optional +from typing import Any, Dict, Tuple, NamedTuple, List, Optional from seqfold import gc_cache, dg_cache, tm_cache -from .offtargets import offtargets +from .off_targets import off_targets LEN_MIN = 15 # min length of the annealing portion of primers LEN_MAX = 32 # max length of the annealing portion of primers, based on IDT guidelines -PRIMER_FMT: str = "{:>5} {:>5} {:>5} {:>6} {:>5} {}" -"""{fwd} {tm} {tm_total} {dg} {penalty} {seq}""" + +class Scoring(NamedTuple): + """A scoring for a single Primer.""" + + penalty: float + """The high-level penalty for this primer""" + + penalty_tm: float + """Penalty for each degree of tm suboptimality (diff from optimal)""" + + penalty_tm_diff: float + """Penalty for each degree of tm difference between primers in a pair""" + + penalty_gc: float + """Penalty for each percentage point of GC suboptimality (diff from optional)""" + + penalty_len: float + """Penalty for each base pair length of suboptimality (diff from optimal)""" + + penalty_dg: float + """Penalty for every kcal/mol of free energy""" + + penalty_off_target: float + """Penalty for each off-target binding site""" class Primer(NamedTuple): - """A single Primer for PCR amplification of a DNA sequence. - - Attributes: - seq: The DNA sequence of the primer; 5' to 3' - tm: The melting temperature of the primer (Celsius): - for the binding region pre-addition of added sequence - tm_total: The melting temperature of the total primer (Celsius): - the tm of the primer with the binding and added sequence - gc: The GC percentage of the primer - dg: The minimum free energy of the primer - fwd: Whether the primer anneals in the FWD - direction of the template sequence - penalty: The penalty score for this primer - """ + """A single Primer for PCR amplification of a DNA sequence.""" seq: str + """The DNA sequence of the primer; 5' to 3'""" + + len: int + """The length of the seq""" + tm: float + """The melting temperature of the primer (Celsius) for the + binding region pre-addition of added sequence""" + tm_total: float + """The melting temperature of the total primer (Celsius): + the tm of the primer with the binding and added sequence""" + gc: float + """The GC ratio of the primer""" + dg: float + """The minimum free energy of the primer (kcal/mol)""" + fwd: bool - offtargets: int - penalty: float + """Whether the primer anneals in the FWD direction of the template sequence""" - def __str__(self) -> str: - """Create a string representation of the primer.""" + off_target_count: int + """The count of off-targets in the primer""" - return PRIMER_FMT.format( - "FWD" if self.fwd else "REV", - self.tm, - self.tm_total, - round(self.dg, 2), - round(self.penalty, 2), - self.seq, - ) + scoring: Scoring + """Scoring of this primer (contains penalty)""" + + @property + def penalty(self) -> float: + """Penalty of the primer.""" + return self.scoring.penalty + + def dict(self) -> Dict[str, Any]: + j = self._asdict() + j["scoring"] = self.scoring._asdict() + return j class PrimerFactory(NamedTuple): """A factory for creating Primers with penalties. Holds the optimal values for a primer and the penalty for differences - between primers' properties and those optimal values. - - Attributes: - optimal_tm: Optimal tm of a primer - optimal_gc: Optimal GC ratio of a primer - optimal_len: Optimal length of a primer - penalty_tm: Penalty for a large tm difference - penalty_tm_diff: Penalty for differences between primers in a pair - penalty_dg: Penalty for very negative free energies - penalty_offtarget: Penalty for offtargets + between primers' properties and optimal values. """ optimal_tm: float + """Optimal tm of a primer""" + optimal_gc: float + """Optimal GC ratio of a primer""" + optimal_len: int + """Optimal length of a primer""" + penalty_tm: float + """Penalty for each degree of tm suboptimality (diff from optimal)""" + + penalty_tm_diff: float + """Penalty for each degree of tm difference between primers in a pair""" + penalty_gc: float + """Penalty for each percentage point of GC suboptimality (diff from optional)""" + penalty_len: float - penalty_tm_diff: float + """Penalty for each base pair length of suboptimality (diff from optimal)""" + penalty_dg: float - penalty_offtarget: float + """Penalty for every kcal/mol of free energy""" + + penalty_off_target: float + """Penalty for each off-target binding site""" def build( self, @@ -113,7 +148,7 @@ def build( gc: float, dg: float, fwd: bool, - offtargets: int, + off_target_count: int, ) -> Primer: """Create a Primer with a scored penalty. @@ -124,7 +159,7 @@ def build( gc: GC ratio of the created primer dg: Minimum free energy (kcal/mol) of the folded DNA sequence fwd: Whether this is a FWD primer - offtargets: The number of offtarget binding sites in the template sequence + off_target_count: The number of offtarget binding sites in the template sequence Returns: Primer: A Primer with a penalty score @@ -135,18 +170,29 @@ def build( penalty_gc = abs(gc - self.optimal_gc) * self.penalty_gc * 100 penalty_len = abs(len(seq) - self.optimal_len) * self.penalty_len penalty_dg = abs(dg) * self.penalty_dg - penalty_offtarget = offtargets * self.penalty_offtarget - penalty = penalty_tm + penalty_gc + penalty_len + penalty_dg + penalty_offtarget + penalty_off_target = off_target_count * self.penalty_off_target + penalty = ( + penalty_tm + penalty_gc + penalty_len + penalty_dg + penalty_off_target + ) return Primer( seq=seq, + len=len(seq), tm=tm, tm_total=tm_total, - gc=gc, - dg=dg, + gc=round(gc, 2), + dg=round(dg, 2), fwd=fwd, - offtargets=offtargets, - penalty=penalty, + off_target_count=off_target_count, + scoring=Scoring( + penalty_tm=round(penalty_tm, 2), + penalty_tm_diff=0, # unknown at this point + penalty_len=penalty_len, + penalty_gc=penalty_gc, + penalty_dg=round(penalty_dg, 2), + penalty_off_target=penalty_off_target, + penalty=round(penalty, 2), + ), ) def build_pair(self, fwd: Primer, rev: Primer) -> Tuple[Primer, Primer]: @@ -162,8 +208,18 @@ def build_pair(self, fwd: Primer, rev: Primer) -> Tuple[Primer, Primer]: penalty_tm_diff = abs(fwd.tm - rev.tm) * self.penalty_tm_diff - new_fwd = fwd._replace(penalty=fwd.penalty + penalty_tm_diff) - new_rev = rev._replace(penalty=rev.penalty + penalty_tm_diff) + new_fwd = fwd._replace( + scoring=fwd.scoring._replace( + penalty=fwd.scoring.penalty + penalty_tm_diff, + penalty_tm_diff=penalty_tm_diff, + ) + ) + new_rev = rev._replace( + scoring=rev.scoring._replace( + penalty=rev.scoring.penalty + penalty_tm_diff, + penalty_tm_diff=penalty_tm_diff, + ) + ) return new_fwd, new_rev @@ -183,7 +239,7 @@ def primers( penalty_len: float = 0.5, penalty_tm_diff: float = 1.0, penalty_dg: float = 2.0, - penalty_offtarget: float = 20.0, + penalty_off_target: float = 20.0, ) -> Tuple[Primer, Primer]: """Create primers for PCR amplification of the sequence. @@ -209,7 +265,7 @@ def primers( penalty_len: Penalty for differences in primer length penalty_diff_tm: Penalty for tm differences between primers penalty_dg: Penalty for minimum free energy of a primer - penalty_offtarget: Penalty for offtarget binding sites in the `seq` + penalty_off_target: Penalty for offtarget binding sites in the `seq` Returns: (Primer, Primer): Primers for PCR amplification @@ -228,7 +284,7 @@ def primers( penalty_len=penalty_len, penalty_tm_diff=penalty_tm_diff, penalty_dg=penalty_dg, - penalty_offtarget=penalty_offtarget, + penalty_off_target=penalty_off_target, ) # set min/max if additional sequence was provided at FWD/REV @@ -303,7 +359,7 @@ def _primers( gc = gc_cache(seq) tm = tm_cache(seq) dg = dg_cache(seq) - ot = offtargets(seq, offtarget_check) + ot = off_targets(seq, offtarget_check) assert len(gc) == len(tm) == len(dg) @@ -351,13 +407,13 @@ def _choose( for p in row: if not p: continue - heapq.heappush(ranked_fwd, (p.penalty, p)) + heapq.heappush(ranked_fwd, (p.scoring.penalty, p)) for row in rev_primers: for p in row: if not p: continue - heapq.heappush(ranked_rev, (p.penalty, p)) + heapq.heappush(ranked_rev, (p.scoring.penalty, p)) if not ranked_fwd: raise RuntimeError("Failed to create any primers in the FWD direction") @@ -370,7 +426,7 @@ def _choose( for _, fwd in heapq.nsmallest(10, ranked_fwd): for _, rev in heapq.nsmallest(10, ranked_rev): new_fwd, new_rev = factory.build_pair(fwd, rev) - new_penalty = new_fwd.penalty + new_rev.penalty + new_penalty = new_fwd.scoring.penalty + new_rev.scoring.penalty if new_penalty < min_penalty: min_penalty = new_penalty min_fwd, min_rev = fwd, rev diff --git a/tests/offtargets_test.py b/tests/off_targets_test.py similarity index 77% rename from tests/offtargets_test.py rename to tests/off_targets_test.py index 9368f3c..7019603 100644 --- a/tests/offtargets_test.py +++ b/tests/off_targets_test.py @@ -1,19 +1,19 @@ from unittest import TestCase -from primers.offtargets import offtargets +from primers.off_targets import off_targets -class TestOfftargets(TestCase): +class TestOffTargets(TestCase): """Test offtarget detection.""" - def test_offtargets(self): + def test_off_targets(self): """Find and cache offtarget binding sites.""" # GTGGCTAGCC is one by removed from GTGGCTAGGC in seq parent = "CTGACTCTACTTGGAAATGTGGCTAGGCCTTTGCCCACGCACCTGATCGGTCCTGTGGCTAGCCTCGTTTGCTTTTTAGGACCGGATGAACTACAGAGCATTGCAAGAATC" seq = "CTGACTCTACTTGGAAATGTGGCTAGGCCTT" - ot = offtargets(seq, parent) + ot = off_targets(seq, parent) self.assertEqual(0, ot[0]) self.assertEqual(len(seq), len(ot)) diff --git a/tests/primers_test.py b/tests/primers_test.py index e0b8f05..fad3264 100644 --- a/tests/primers_test.py +++ b/tests/primers_test.py @@ -33,8 +33,8 @@ def test_primers(self): self.assertTrue(p2.gc) self.assertTrue(p1.fwd) self.assertFalse(p2.fwd) - self.assertFalse(p1.offtargets) - self.assertFalse(p2.offtargets) + self.assertFalse(p1.off_target_count) + self.assertFalse(p2.off_target_count) self.assertTrue(p1.penalty) self.assertTrue(p2.penalty) @@ -91,6 +91,9 @@ def test_parse_add_len(self): def test_primers_parent(self): """Create primers given a parent with diff-case sequence.""" - ps = primers("AATGAGACAATAGCACACACAGCTAGGTCAGCATACGAAA", offtarget_check="ggaattacgtAATGAGACAATAGCACACACAGCTAGGTCAGCATACGAAAggaccagttacagga") + ps = primers( + "AATGAGACAATAGCACACACAGCTAGGTCAGCATACGAAA", + offtarget_check="ggaattacgtAATGAGACAATAGCACACACAGCTAGGTCAGCATACGAAAggaccagttacagga", + ) self.assertTrue(ps)