From b33bc3381d78549080743566b796c6ce7ef80991 Mon Sep 17 00:00:00 2001
From: jjtimmons <josh.timmons@hashicorp.com>
Date: Sat, 12 Nov 2022 10:25:24 -0500
Subject: [PATCH] Add json output (--json)

---
 README.md                                     |  11 +-
 primers/main.py                               |  37 +++-
 primers/{offtargets.py => off_targets.py}     |   6 +-
 primers/primers.py                            | 174 ++++++++++++------
 ...offtargets_test.py => off_targets_test.py} |   8 +-
 tests/primers_test.py                         |   9 +-
 6 files changed, 165 insertions(+), 80 deletions(-)
 rename primers/{offtargets.py => off_targets.py} (89%)
 rename tests/{offtargets_test.py => off_targets_test.py} (77%)

diff --git a/README.md b/README.md
index 6e84218..526876f 100644
--- a/README.md
+++ b/README.md
@@ -38,9 +38,10 @@ print(fwd.tm_total) # 70.0
 ### CLI
 
 ```txt
-$ dir    tm   ttm     dg   pen  seq
-  FWD  60.8  67.0  -1.86  5.93  GGTCTCAATGAGACAATAGCACACAC
-  REV  60.8  65.8      0   3.2  GAAGACTTTCGTATGCTGACCTAG
+$ primers AATGAGACAATAGCACACACAGCTAGGTCAGCATACGAAA -f GGTCTC -r GAAGAC
+  dir    tm   ttm     dg   pen  seq
+  FWD  61.8  67.6  -1.86  5.23  GGTCTCAATGAGACAATAGCACACACA
+  REV  61.9  66.5  -0.88  4.85  GAAGACTTTCGTATGCTGACCTAGC
 ```
 
 ```txt
@@ -50,14 +51,14 @@ usage: primers [-h] [-f SEQ] [-fl INT INT] [-r SEQ] [-rl INT INT] [-t SEQ] [--ve
 Create PCR primers for a DNA sequence.
 
 Logs the FWD and REV primer with columns:
-    dir, tm, ttm, dg, pen, seq
+    dir, tm, ttm, dg, p, seq
 
 Where:
     dir = FWD or REV.
     tm  = Melting temperature of the annealing/binding part of the primer (Celsius).
     ttm = The total melting temperature of the primer with added seq (Celsius).
     dg  = The minimum free energy of the primer's secondary structure (kcal/mol).
-    pen = The primer's penalty score. Lower is better.
+    p   = The primer's penalty score. Lower is better.
     seq = The sequence of the primer in the 5' to the 3' direction.
 
 positional arguments:
diff --git a/primers/main.py b/primers/main.py
index 250bdf6..2db7110 100644
--- a/primers/main.py
+++ b/primers/main.py
@@ -1,11 +1,14 @@
 """Console entrypoint for creating PCR primers"""
 
 import argparse
+import json
 import sys
 from typing import List
 
 from . import __version__, primers
-from .primers import PRIMER_FMT
+
+
+"""{fwd} {tm} {tm_total} {gc} {dg} {penalty} {seq}"""
 
 
 def run():
@@ -25,9 +28,24 @@ def run():
         offtarget_check=args.t,
     )
 
-    print(PRIMER_FMT.format("dir", "tm", "ttm", "dg", "pen", "seq"))
-    print(fwd)
-    print(rev)
+    if args.json:
+        print(json.dumps([fwd.dict(), rev.dict()]))
+    else:
+        table_fmt = "{:>5} {:>5} {:>5} {:>3} {:>6} {:>5}  {}"
+        print(table_fmt.format("dir", "tm", "ttm", "gc", "dg", "p", "seq"))
+
+        for p in [fwd, rev]:
+            print(
+                table_fmt.format(
+                    "FWD" if p.fwd else "REV",
+                    p.tm,
+                    p.tm_total,
+                    p.gc,
+                    p.dg,
+                    p.scoring.penalty,
+                    p.seq,
+                )
+            )
 
 
 def parse_args(args: List[str]) -> argparse.Namespace:
@@ -46,13 +64,14 @@ def parse_args(args: List[str]) -> argparse.Namespace:
     parser = argparse.ArgumentParser(
         description="""Create PCR primers for a DNA sequence.
 
-Logs the FWD and REV primer with columns:
-    dir, tm, ttm, dg, pen, seq
+By default, the primers are logged in table format in rows:
+    dir, tm, ttm, gc, dg, pen, seq
 
 Where:
     dir = FWD or REV.
     tm  = The melting temperature of the annealing portion of the primer (Celsius).
     ttm = The total melting temperature of the primer with added seq (Celsius).
+    gc  = The GC ratio of the primer.
     dg  = The minimum free energy of the primer (kcal/mol).
     pen = The primer's penalty score. Lower is better.
     seq = The sequence of the primer in the 5' to the 3' direction.
@@ -98,6 +117,12 @@ def parse_args(args: List[str]) -> argparse.Namespace:
         default="",
         metavar="SEQ",
     )
+    parser.add_argument(
+        "-j",
+        "--json",
+        action=argparse.BooleanOptionalAction,
+        help="whether to write the primers in a JSON array",
+    )
     parser.add_argument(
         "--version", action="version", version="seqfold {ver}".format(ver=__version__)
     )
diff --git a/primers/offtargets.py b/primers/off_targets.py
similarity index 89%
rename from primers/offtargets.py
rename to primers/off_targets.py
index 4e587e7..bc87a99 100644
--- a/primers/offtargets.py
+++ b/primers/off_targets.py
@@ -1,12 +1,12 @@
-"""Find offtargets.
+"""Find off-target binding sites.
 """
 
 from collections import defaultdict
 from typing import List, Dict
 
 
-def offtargets(seq: str, check_seq: str) -> List[int]:
-    """Return a list of offtarget counts for primers whose end is that index.
+def off_targets(seq: str, check_seq: str) -> List[int]:
+    """Return a list of off-target counts for primers whose end is that index.
 
     For example, offtarget_cache[20] -> returns the number of offtarget binding
     sites whose last bp ends in the 20th index of `seq`
diff --git a/primers/primers.py b/primers/primers.py
index 7f8cedd..5e0d909 100644
--- a/primers/primers.py
+++ b/primers/primers.py
@@ -28,82 +28,117 @@
 
 import heapq
 from logging import warning
-from typing import Tuple, NamedTuple, List, Optional
+from typing import Any, Dict, Tuple, NamedTuple, List, Optional
 
 from seqfold import gc_cache, dg_cache, tm_cache
-from .offtargets import offtargets
+from .off_targets import off_targets
 
 
 LEN_MIN = 15  # min length of the annealing portion of primers
 LEN_MAX = 32  # max length of the annealing portion of primers, based on IDT guidelines
 
-PRIMER_FMT: str = "{:>5} {:>5} {:>5} {:>6} {:>5}  {}"
-"""{fwd} {tm} {tm_total} {dg} {penalty} {seq}"""
+
+class Scoring(NamedTuple):
+    """A scoring for a single Primer."""
+
+    penalty: float
+    """The high-level penalty for this primer"""
+
+    penalty_tm: float
+    """Penalty for each degree of tm suboptimality (diff from optimal)"""
+
+    penalty_tm_diff: float
+    """Penalty for each degree of tm difference between primers in a pair"""
+
+    penalty_gc: float
+    """Penalty for each percentage point of GC suboptimality (diff from optional)"""
+
+    penalty_len: float
+    """Penalty for each base pair length of suboptimality (diff from optimal)"""
+
+    penalty_dg: float
+    """Penalty for every kcal/mol of free energy"""
+
+    penalty_off_target: float
+    """Penalty for each off-target binding site"""
 
 
 class Primer(NamedTuple):
-    """A single Primer for PCR amplification of a DNA sequence.
-
-    Attributes:
-        seq: The DNA sequence of the primer; 5' to 3'
-        tm: The melting temperature of the primer (Celsius):
-            for the binding region pre-addition of added sequence
-        tm_total: The melting temperature of the total primer (Celsius):
-            the tm of the primer with the binding and added sequence
-        gc: The GC percentage of the primer
-        dg: The minimum free energy of the primer
-        fwd: Whether the primer anneals in the FWD
-            direction of the template sequence
-        penalty: The penalty score for this primer
-    """
+    """A single Primer for PCR amplification of a DNA sequence."""
 
     seq: str
+    """The DNA sequence of the primer; 5' to 3'"""
+
+    len: int
+    """The length of the seq"""
+
     tm: float
+    """The melting temperature of the primer (Celsius) for the
+    binding region pre-addition of added sequence"""
+
     tm_total: float
+    """The melting temperature of the total primer (Celsius):
+    the tm of the primer with the binding and added sequence"""
+
     gc: float
+    """The GC ratio of the primer"""
+
     dg: float
+    """The minimum free energy of the primer (kcal/mol)"""
+
     fwd: bool
-    offtargets: int
-    penalty: float
+    """Whether the primer anneals in the FWD direction of the template sequence"""
 
-    def __str__(self) -> str:
-        """Create a string representation of the primer."""
+    off_target_count: int
+    """The count of off-targets in the primer"""
 
-        return PRIMER_FMT.format(
-            "FWD" if self.fwd else "REV",
-            self.tm,
-            self.tm_total,
-            round(self.dg, 2),
-            round(self.penalty, 2),
-            self.seq,
-        )
+    scoring: Scoring
+    """Scoring of this primer (contains penalty)"""
+
+    @property
+    def penalty(self) -> float:
+        """Penalty of the primer."""
+        return self.scoring.penalty
+
+    def dict(self) -> Dict[str, Any]:
+        j = self._asdict()
+        j["scoring"] = self.scoring._asdict()
+        return j
 
 
 class PrimerFactory(NamedTuple):
     """A factory for creating Primers with penalties.
 
     Holds the optimal values for a primer and the penalty for differences
-    between primers' properties and those optimal values.
-
-    Attributes:
-        optimal_tm: Optimal tm of a primer
-        optimal_gc: Optimal GC ratio of a primer
-        optimal_len: Optimal length of a primer
-        penalty_tm: Penalty for a large tm difference
-        penalty_tm_diff: Penalty for differences between primers in a pair
-        penalty_dg: Penalty for very negative free energies
-        penalty_offtarget: Penalty for offtargets
+    between primers' properties and optimal values.
     """
 
     optimal_tm: float
+    """Optimal tm of a primer"""
+
     optimal_gc: float
+    """Optimal GC ratio of a primer"""
+
     optimal_len: int
+    """Optimal length of a primer"""
+
     penalty_tm: float
+    """Penalty for each degree of tm suboptimality (diff from optimal)"""
+
+    penalty_tm_diff: float
+    """Penalty for each degree of tm difference between primers in a pair"""
+
     penalty_gc: float
+    """Penalty for each percentage point of GC suboptimality (diff from optional)"""
+
     penalty_len: float
-    penalty_tm_diff: float
+    """Penalty for each base pair length of suboptimality (diff from optimal)"""
+
     penalty_dg: float
-    penalty_offtarget: float
+    """Penalty for every kcal/mol of free energy"""
+
+    penalty_off_target: float
+    """Penalty for each off-target binding site"""
 
     def build(
         self,
@@ -113,7 +148,7 @@ def build(
         gc: float,
         dg: float,
         fwd: bool,
-        offtargets: int,
+        off_target_count: int,
     ) -> Primer:
         """Create a Primer with a scored penalty.
 
@@ -124,7 +159,7 @@ def build(
             gc: GC ratio of the created primer
             dg: Minimum free energy (kcal/mol) of the folded DNA sequence
             fwd: Whether this is a FWD primer
-            offtargets: The number of offtarget binding sites in the template sequence
+            off_target_count: The number of offtarget binding sites in the template sequence
 
         Returns:
             Primer: A Primer with a penalty score
@@ -135,18 +170,29 @@ def build(
         penalty_gc = abs(gc - self.optimal_gc) * self.penalty_gc * 100
         penalty_len = abs(len(seq) - self.optimal_len) * self.penalty_len
         penalty_dg = abs(dg) * self.penalty_dg
-        penalty_offtarget = offtargets * self.penalty_offtarget
-        penalty = penalty_tm + penalty_gc + penalty_len + penalty_dg + penalty_offtarget
+        penalty_off_target = off_target_count * self.penalty_off_target
+        penalty = (
+            penalty_tm + penalty_gc + penalty_len + penalty_dg + penalty_off_target
+        )
 
         return Primer(
             seq=seq,
+            len=len(seq),
             tm=tm,
             tm_total=tm_total,
-            gc=gc,
-            dg=dg,
+            gc=round(gc, 2),
+            dg=round(dg, 2),
             fwd=fwd,
-            offtargets=offtargets,
-            penalty=penalty,
+            off_target_count=off_target_count,
+            scoring=Scoring(
+                penalty_tm=round(penalty_tm, 2),
+                penalty_tm_diff=0,  # unknown at this point
+                penalty_len=penalty_len,
+                penalty_gc=penalty_gc,
+                penalty_dg=round(penalty_dg, 2),
+                penalty_off_target=penalty_off_target,
+                penalty=round(penalty, 2),
+            ),
         )
 
     def build_pair(self, fwd: Primer, rev: Primer) -> Tuple[Primer, Primer]:
@@ -162,8 +208,18 @@ def build_pair(self, fwd: Primer, rev: Primer) -> Tuple[Primer, Primer]:
 
         penalty_tm_diff = abs(fwd.tm - rev.tm) * self.penalty_tm_diff
 
-        new_fwd = fwd._replace(penalty=fwd.penalty + penalty_tm_diff)
-        new_rev = rev._replace(penalty=rev.penalty + penalty_tm_diff)
+        new_fwd = fwd._replace(
+            scoring=fwd.scoring._replace(
+                penalty=fwd.scoring.penalty + penalty_tm_diff,
+                penalty_tm_diff=penalty_tm_diff,
+            )
+        )
+        new_rev = rev._replace(
+            scoring=rev.scoring._replace(
+                penalty=rev.scoring.penalty + penalty_tm_diff,
+                penalty_tm_diff=penalty_tm_diff,
+            )
+        )
 
         return new_fwd, new_rev
 
@@ -183,7 +239,7 @@ def primers(
     penalty_len: float = 0.5,
     penalty_tm_diff: float = 1.0,
     penalty_dg: float = 2.0,
-    penalty_offtarget: float = 20.0,
+    penalty_off_target: float = 20.0,
 ) -> Tuple[Primer, Primer]:
     """Create primers for PCR amplification of the sequence.
 
@@ -209,7 +265,7 @@ def primers(
         penalty_len: Penalty for differences in primer length
         penalty_diff_tm: Penalty for tm differences between primers
         penalty_dg: Penalty for minimum free energy of a primer
-        penalty_offtarget: Penalty for offtarget binding sites in the `seq`
+        penalty_off_target: Penalty for offtarget binding sites in the `seq`
 
     Returns:
         (Primer, Primer): Primers for PCR amplification
@@ -228,7 +284,7 @@ def primers(
         penalty_len=penalty_len,
         penalty_tm_diff=penalty_tm_diff,
         penalty_dg=penalty_dg,
-        penalty_offtarget=penalty_offtarget,
+        penalty_off_target=penalty_off_target,
     )
 
     # set min/max if additional sequence was provided at FWD/REV
@@ -303,7 +359,7 @@ def _primers(
     gc = gc_cache(seq)
     tm = tm_cache(seq)
     dg = dg_cache(seq)
-    ot = offtargets(seq, offtarget_check)
+    ot = off_targets(seq, offtarget_check)
 
     assert len(gc) == len(tm) == len(dg)
 
@@ -351,13 +407,13 @@ def _choose(
         for p in row:
             if not p:
                 continue
-            heapq.heappush(ranked_fwd, (p.penalty, p))
+            heapq.heappush(ranked_fwd, (p.scoring.penalty, p))
 
     for row in rev_primers:
         for p in row:
             if not p:
                 continue
-            heapq.heappush(ranked_rev, (p.penalty, p))
+            heapq.heappush(ranked_rev, (p.scoring.penalty, p))
 
     if not ranked_fwd:
         raise RuntimeError("Failed to create any primers in the FWD direction")
@@ -370,7 +426,7 @@ def _choose(
     for _, fwd in heapq.nsmallest(10, ranked_fwd):
         for _, rev in heapq.nsmallest(10, ranked_rev):
             new_fwd, new_rev = factory.build_pair(fwd, rev)
-            new_penalty = new_fwd.penalty + new_rev.penalty
+            new_penalty = new_fwd.scoring.penalty + new_rev.scoring.penalty
             if new_penalty < min_penalty:
                 min_penalty = new_penalty
                 min_fwd, min_rev = fwd, rev
diff --git a/tests/offtargets_test.py b/tests/off_targets_test.py
similarity index 77%
rename from tests/offtargets_test.py
rename to tests/off_targets_test.py
index 9368f3c..7019603 100644
--- a/tests/offtargets_test.py
+++ b/tests/off_targets_test.py
@@ -1,19 +1,19 @@
 from unittest import TestCase
 
-from primers.offtargets import offtargets
+from primers.off_targets import off_targets
 
 
-class TestOfftargets(TestCase):
+class TestOffTargets(TestCase):
     """Test offtarget detection."""
 
-    def test_offtargets(self):
+    def test_off_targets(self):
         """Find and cache offtarget binding sites."""
 
         # GTGGCTAGCC is one by removed from GTGGCTAGGC in seq
         parent = "CTGACTCTACTTGGAAATGTGGCTAGGCCTTTGCCCACGCACCTGATCGGTCCTGTGGCTAGCCTCGTTTGCTTTTTAGGACCGGATGAACTACAGAGCATTGCAAGAATC"
         seq = "CTGACTCTACTTGGAAATGTGGCTAGGCCTT"
 
-        ot = offtargets(seq, parent)
+        ot = off_targets(seq, parent)
 
         self.assertEqual(0, ot[0])
         self.assertEqual(len(seq), len(ot))
diff --git a/tests/primers_test.py b/tests/primers_test.py
index e0b8f05..fad3264 100644
--- a/tests/primers_test.py
+++ b/tests/primers_test.py
@@ -33,8 +33,8 @@ def test_primers(self):
         self.assertTrue(p2.gc)
         self.assertTrue(p1.fwd)
         self.assertFalse(p2.fwd)
-        self.assertFalse(p1.offtargets)
-        self.assertFalse(p2.offtargets)
+        self.assertFalse(p1.off_target_count)
+        self.assertFalse(p2.off_target_count)
         self.assertTrue(p1.penalty)
         self.assertTrue(p2.penalty)
 
@@ -91,6 +91,9 @@ def test_parse_add_len(self):
     def test_primers_parent(self):
         """Create primers given a parent with diff-case sequence."""
 
-        ps = primers("AATGAGACAATAGCACACACAGCTAGGTCAGCATACGAAA", offtarget_check="ggaattacgtAATGAGACAATAGCACACACAGCTAGGTCAGCATACGAAAggaccagttacagga")
+        ps = primers(
+            "AATGAGACAATAGCACACACAGCTAGGTCAGCATACGAAA",
+            offtarget_check="ggaattacgtAATGAGACAATAGCACACACAGCTAGGTCAGCATACGAAAggaccagttacagga",
+        )
 
         self.assertTrue(ps)