From ffc2767f01076419b8781a10ea88df0587610cc4 Mon Sep 17 00:00:00 2001 From: JJTimmons Date: Tue, 2 Jul 2019 17:23:21 -0400 Subject: [PATCH] fix fragment based design --- README.md | 27 ++++++++++++++++++++++++++- internal/repp/fragments.go | 8 ++++++-- internal/repp/input.go | 5 +++-- test/input/fragments.fa | 4 ++++ 4 files changed, 39 insertions(+), 5 deletions(-) create mode 100644 test/input/fragments.fa diff --git a/README.md b/README.md index db4ff4be5..77be74735 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ See [the docs](https://jjtimmons.github.io/repp/) or use `--help` on any command To design a plasmid based on its expected sequence save it to a FASTA or Genbank file. For example: -``` +```txt >2ndVal_mScarlet-I CAACCTTACCAGAGGGCGCCCCAGCTGGCAATTCCGACGTCTAAGAAACCATTATTATCA... ``` @@ -46,6 +46,31 @@ Then call `repp make sequence` to design it. The following example uses Addgene repp make sequence --in "./2ndVal_mScarlet-I.fa" --addgene --dbs "parts_library.fa" ``` +### Features + +To design a plasmid based on the features it should contain, specify the features by name. By default, these should refer to features that are in REPP's feature database (`~/.repp/features.tsv`). Features can also refer to fragments, as in the following example where a plasmid is specified by its constituent list of iGEM parts: + +```bash +repp make features "BBa_R0062,BBa_B0034,BBa_C0040,BBa_B0010,BBa_B0012" --backbone pSB1C3 --enzymes "EcoRI,PstI" --igem +``` + +### Fragments + +To design a plasmid from its constiuent fragments, save them to a multi-FASTA. + +```txt +>GFP +ATGAGTAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGG... +>backbone +TACTAGTAGCGGCCGCTGCAGTCCGGCAAAAAAGGGCAAGGTGTCACCACCCTGCCCTT... +``` + +And call the file from `repp make fragments`: + +```bash +repp make fragments --in "./fragments.fa" --out "plasmid.json" +``` + ### Databases `REPP` includes three embedded databases from large public repositories: [Addgene](https://www.addgene.org/), [iGEM](http://parts.igem.org/Main_Page), and [DNASU](https://dnasu.org/DNASU/Home.do). Each embedded database and its file path after installation are as follows: diff --git a/internal/repp/fragments.go b/internal/repp/fragments.go index c8c0e51de..837e8806e 100644 --- a/internal/repp/fragments.go +++ b/internal/repp/fragments.go @@ -33,7 +33,6 @@ func FragmentsCmd(cmd *cobra.Command, args []string) { flags, conf := parseCmdFlags(cmd, args, true) // read in the constituent fragments - var frags []*Frag frags, err := read(flags.in, false) if err != nil { stderr.Fatalln(err) @@ -44,12 +43,17 @@ func FragmentsCmd(cmd *cobra.Command, args []string) { frags = append([]*Frag{flags.backbone}, frags...) } + // set the conf property on each frag + for _, f := range frags { + f.conf = conf + } + target, solution := fragments(frags, conf) // write the single list of fragments as a possible solution to the output file writeJSON( flags.out, - target.ID, + flags.in, target.Seq, [][]*Frag{solution}, len(target.Seq), diff --git a/internal/repp/input.go b/internal/repp/input.go index 6c2bd2d55..bc8ca8cf2 100644 --- a/internal/repp/input.go +++ b/internal/repp/input.go @@ -88,13 +88,14 @@ func NewFlags( // parseCmdFlags gathers the in path, out path, etc from a cobra cmd object // returns Flags and a Config struct for repp.Plasmid or repp.Fragments. func parseCmdFlags(cmd *cobra.Command, args []string, strict bool) (*Flags, *config.Config) { + cmdName := strings.ToLower(cmd.Name()) + var err error fs := &Flags{} // parsed flags p := inputParser{} c := config.New() if fs.in, err = cmd.Flags().GetString("in"); fs.in == "" || err != nil { - cmdName := strings.ToLower(cmd.Name()) if cmdName == "features" { fs.in = p.parseFeatureInput(args) } else if cmdName == "sequence" && len(args) > 0 { @@ -143,7 +144,7 @@ func parseCmdFlags(cmd *cobra.Command, args []string, strict bool) (*Flags, *con } filters, err := cmd.Flags().GetString("exclude") - if strict && err != nil { + if strict && err != nil && cmdName != "fragments" { cmd.Help() stderr.Fatalf("failed to parse filters: %v", err) } diff --git a/test/input/fragments.fa b/test/input/fragments.fa new file mode 100644 index 000000000..a4f5b722d --- /dev/null +++ b/test/input/fragments.fa @@ -0,0 +1,4 @@ +>gfp +ATGAGTAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCAACATACGGAAAACTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCAAGATACCCAGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAAAGAACTATATTTTTCAAAGATGACGGGAACTACAAGACACGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATAGAATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTTGGACACAAATTGGAATACAACTATAACTCACACAATGTATACATCATGGCAGACAAACAAAAGAATGGAATCAAAGTTAACTTCAAAATTAGACACAACATTGAAGATGGAAGCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCCACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGAGAGACCACATGGTCCTTCTTGAGTTTGTAACAGCTGCTGGGATTACACATGGCATGGATGAACTATACAAATAG +>backbone +TACTAGTAGCGGCCGCTGCAGTCCGGCAAAAAAGGGCAAGGTGTCACCACCCTGCCCTTTTTCTTTAAAACCGAAAAGATTACTTCGCGTTATGCAGGCTTCCTCGCTCACTGACTCGCTGCGCTCGGTCGTTCGGCTGCGGCGAGCGGTATCAGCTCACTCAAAGGCGGTAATACGGTTATCCACAGAATCAGGGGATAACGCAGGAAAGAACATGTGAGCAAAAGGCCAGCAAAAGGCCAGGAACCGTAAAAAGGCCGCGTTGCTGGCGTTTTTCCACAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGAACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGCTCGAGGCTTGGATTCTCACCAATAAAAAACGCCCGGCGGCAACCGAGCGTTCTGAACAAATCCAGATGGAGTTCTGAGGTCATTACTGGATCTATCAACAGGAGTCCAAGCGAGCTCGATATCAAATTACGCCCCGCCCTGCCACTCATCGCAGTACTGTTGTAATTCATTAAGCATTCTGCCGACATGGAAGCCATCACAAACGGCATGATGAACCTGAATCGCCAGCGGCATCAGCACCTTGTCGCCTTGCGTATAATATTTGCCCATGGTGAAAACGGGGGCGAAGAAGTTGTCCATATTGGCCACGTTTAAATCAAAACTGGTGAAACTCACCCAGGGATTGGCTGAGACGAAAAACATATTCTCAATAAACCCTTTAGGGAAATAGGCCAGGTTTTCACCGTAACACGCCACATCTTGCGAATATATGTGTAGAAACTGCCGGAAATCGTCGTGGTATTCACTCCAGAGCGATGAAAACGTTTCAGTTTGCTCATGGAAAACGGTGTAACAAGGGTGAACACTATCCCATATCACCAGCTCACCGTCTTTCATTGCCATACGAAATTCCGGATGAGCATTCATCAGGCGGGCAAGAATGTGAATAAAGGCCGGATAAAACTTGTGCTTATTTTTCTTTACGGTCTTTAAAAAGGCCGTAATATCCAGCTGAACGGTCTGGTTATAGGTACATTGAGCAACTGACTGAAATGCCTCAAAATGTTCTTTACGATGCCATTGGGATATATCAACGGTGGTATATCCAGTGATTTTTTTCTCCATTTTAGCTTCCTTAGCTCCTGAAAATCTCGATAACTCAAAAAATACGCCCGGTAGTGATCTTATTTCATTATGGTGAAAGTTGGAACCTCTTACGTGCCCGATCAACTCGAGTGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTATAAAAATAGGCGTATCACGAGGCAGAATTTCAGATAAAAAAAATCCTTAGCTTTCGCTAAGGATGATTTCTGGAATTCGCGGCCGCTTCTAGAG