Skip to content

Commit

Permalink
Refactor docs (#60)
Browse files Browse the repository at this point in the history
* added a couple of example functions to io_test.go.

* added Gff IO example tests.

* added gbk IO example tests.

* added JSON IO example tests.

* refactored parse functions to accept bytes instead of strings.

* adding sample json file for testing.

* added example tests for primer functions.

* make complementBaseRuneMap private.

* made defaultCodonTable maps private.

* made getCodonFrequency private.

* recommented BoothLeastRotation.

* added example test to hash_test.go.

* modified RotateSequence and made boothLeastRotation private.

* added example tests to translation_test.go.
TimothyStiles authored Oct 31, 2020
1 parent 67bc453 commit 9cd47eb
Showing 12 changed files with 423 additions and 46 deletions.
193 changes: 193 additions & 0 deletions data/sample.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
{
"Meta": {
"Name": "",
"GffVersion": "",
"RegionStart": 0,
"RegionEnd": 0,
"Size": 0,
"Type": "",
"GenbankDivision": "",
"Date": "",
"Definition": "Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p (AXL2) and Rev7p (REV7) genes, complete cds.",
"Accession": "U49845",
"Version": "U49845.1 GI:1293613",
"Keywords": ".",
"Organism": "Saccharomyces cerevisiae Eukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes; Saccharomycetales; Saccharomycetaceae; Saccharomyces.",
"Source": "Saccharomyces cerevisiae (baker's yeast)",
"Origin": "",
"Locus": {
"Name": "SCU49845",
"SequenceLength": "5028",
"MoleculeType": "DNA",
"GenBankDivision": "PLN",
"ModDate": "21-JUN-1999",
"SequenceCoding": "bp",
"Circular": false
},
"References": [
{
"Index": "1",
"Authors": "Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.",
"Title": "Cloning and sequence of REV7, a gene whose function is required for DNA damage-induced mutagenesis in Saccharomyces cerevisiae",
"Journal": "Yeast 10 (11), 1503-1509 (1994)",
"PubMed": "7871890",
"Remark": "",
"Range": "(bases 1 to 5028)"
},
{
"Index": "2",
"Authors": "Roemer,T., Madden,K., Chang,J. and Snyder,M.",
"Title": "Selection of axial growth sites in yeast requires Axl2p, a novel plasma membrane glycoprotein",
"Journal": "Genes Dev. 10 (7), 777-793 (1996)",
"PubMed": "8846915",
"Remark": "",
"Range": "(bases 1 to 5028)"
},
{
"Index": "3",
"Authors": "Roemer,T.",
"Title": "Direct Submission",
"Journal": "Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, New Haven, CT, USA",
"PubMed": "",
"Remark": "",
"Range": "(bases 1 to 5028)"
}
],
"Primaries": null
},
"Features": [
{
"Name": "",
"Source": "",
"Type": "source",
"Start": 1,
"End": 5028,
"Complement": false,
"FivePrimePartial": false,
"ThreePrimePartial": false,
"Score": "",
"Strand": "",
"Phase": "",
"Attributes": {
"chromosome": "IX",
"db_xref": "taxon:4932",
"map": "9",
"organism": "Saccharomyces cerevisiae"
},
"Location": "1..5028",
"Sequence": ""
},
{
"Name": "",
"Source": "",
"Type": "CDS",
"Start": 1,
"End": 206,
"Complement": false,
"FivePrimePartial": true,
"ThreePrimePartial": false,
"Score": "",
"Strand": "",
"Phase": "",
"Attributes": {
"codon_start": "3",
"db_xref": "GI:1293614",
"product": "TCP1-beta",
"protein_id": "AAA98665.1",
"translation": "SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM"
},
"Location": "\u003c1..206",
"Sequence": ""
},
{
"Name": "",
"Source": "",
"Type": "gene",
"Start": 687,
"End": 3158,
"Complement": false,
"FivePrimePartial": false,
"ThreePrimePartial": false,
"Score": "",
"Strand": "",
"Phase": "",
"Attributes": {
"gene": "AXL2"
},
"Location": "687..3158",
"Sequence": ""
},
{
"Name": "",
"Source": "",
"Type": "CDS",
"Start": 687,
"End": 3158,
"Complement": false,
"FivePrimePartial": false,
"ThreePrimePartial": false,
"Score": "",
"Strand": "",
"Phase": "",
"Attributes": {
"codon_start": "1",
"db_xref": "GI:1293615",
"function": "required for axial budding pattern of S.cerevisiae",
"gene": "AXL2",
"note": "plasma membrane glycoprotein",
"product": "Axl2p",
"protein_id": "AAA98666.1",
"translation": "MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML"
},
"Location": "687..3158",
"Sequence": ""
},
{
"Name": "",
"Source": "",
"Type": "gene",
"Start": 3300,
"End": 4037,
"Complement": true,
"FivePrimePartial": false,
"ThreePrimePartial": false,
"Score": "",
"Strand": "",
"Phase": "",
"Attributes": {
"gene": "REV7"
},
"Location": "complement(3300..4037)",
"Sequence": ""
},
{
"Name": "",
"Source": "",
"Type": "CDS",
"Start": 3300,
"End": 4037,
"Complement": true,
"FivePrimePartial": false,
"ThreePrimePartial": false,
"Score": "",
"Strand": "",
"Phase": "",
"Attributes": {
"codon_start": "1",
"db_xref": "GI:1293616",
"gene": "REV7",
"product": "Rev7p",
"protein_id": "AAA98667.1",
"translation": "MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF"
},
"Location": "complement(3300..4037)",
"Sequence": ""
}
],
"Sequence": {
"Description": "",
"Hash": "",
"HashFunction": "",
"Sequence": "gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgatc"
}
}
19 changes: 13 additions & 6 deletions hash.go
Original file line number Diff line number Diff line change
@@ -36,10 +36,11 @@ import (
// BLAKE2b_384 // import golang.org/x/crypto/blake2b
// BLAKE2b_512 // import golang.org/x/crypto/blake2b

// BoothLeastRotation gets the least rotation of a circular string.
// https://en.wikipedia.org/wiki/Lexicographically_minimal_string_rotation
// this is generally over commented but I'm keeping it this way for now. - Tim
func BoothLeastRotation(sequence string) int {
// boothLeastRotation gets the least rotation of a circular string.
func boothLeastRotation(sequence string) int {

// https://en.wikipedia.org/wiki/Lexicographically_minimal_string_rotation
// this is generally over commented but I'm keeping it this way for now. - Tim

// first concatenate the sequence to itself to avoid modular arithmateic
sequence += sequence // maybe do this as a buffer just for speed? May get annoying with larger sequences.
@@ -89,8 +90,14 @@ func BoothLeastRotation(sequence string) int {

// RotateSequence rotates circular sequences to deterministic point.
func RotateSequence(sequence string) string {
rotationIndex := BoothLeastRotation(sequence)
concatenatedSequence := sequence + sequence
rotationIndex := boothLeastRotation(sequence)
var sequenceBuilder strings.Builder

// writing the same sequence twice. using build incase of very long circular genome.
sequenceBuilder.WriteString(sequence)
sequenceBuilder.WriteString(sequence)

concatenatedSequence := sequenceBuilder.String()
sequence = concatenatedSequence[rotationIndex : rotationIndex+len(sequence)]
return sequence
}
16 changes: 16 additions & 0 deletions hash_test.go
Original file line number Diff line number Diff line change
@@ -9,6 +9,13 @@ import (
"lukechampine.com/blake3"
)

func ExampleHash() {
puc19 := ReadGbk("data/puc19.gbk")
fmt.Println(puc19.Hash(blake3.New(32, nil))) // passing new hash.Hash struct to Hasher

// output: 4b0616d1b3fc632e42d78521deb38b44fba95cca9fde159e01cd567fa996ceb9
}

func TestHashRegression(t *testing.T) {
puc19GbkBlake3Hash := "4b0616d1b3fc632e42d78521deb38b44fba95cca9fde159e01cd567fa996ceb9"
puc19 := ReadGbk("data/puc19.gbk")
@@ -28,6 +35,15 @@ func TestHashRegression(t *testing.T) {
}
}

func ExampleRotateSequence() {
sequence := ReadGbk("data/puc19.gbk")
sequenceLength := len(sequence.Sequence)
testSequence := sequence.Sequence[sequenceLength/2:] + sequence.Sequence[0:sequenceLength/2]

fmt.Println(RotateSequence(sequence.Sequence) == RotateSequence(testSequence))
// output: true
}

func TestLeastRotation(t *testing.T) {
sequence := ReadGbk("data/puc19.gbk")
var sequenceBuffer bytes.Buffer
20 changes: 11 additions & 9 deletions io.go
Original file line number Diff line number Diff line change
@@ -139,7 +139,9 @@ GFF specific IO related things begin here.
******************************************************************************/

// ParseGff Takes in a string representing a gffv3 file and parses it into an Sequence object.
func ParseGff(gff string) Sequence {
func ParseGff(gffBytes []byte) Sequence {

gff := string(gffBytes)
sequence := Sequence{}

lines := strings.Split(gff, "\n")
@@ -327,7 +329,7 @@ func ReadGff(path string) Sequence {
if err != nil {
// return 0, fmt.Errorf("Failed to open file %s for unpack: %s", gzFilePath, err)
} else {
sequence = ParseGff(string(file))
sequence = ParseGff(file)
}
return sequence
}
@@ -392,8 +394,8 @@ FASTA specific IO related things begin here.
******************************************************************************/

// ParseFASTA parses a Sequence struct from a FASTA file and adds appropriate pointers to the structs.
func ParseFASTA(fasta string) Sequence {

func ParseFASTA(fastaBytes []byte) Sequence {
fasta := string(fastaBytes)
var sequence Sequence
var feature Feature
var features []Feature
@@ -500,8 +502,8 @@ func ReadFASTA(path string) Sequence {
if err != nil {
// return 0, fmt.Errorf("Failed to open file %s for unpack: %s", gzFilePath, err)
}
annotatedSequenceArray := ParseFASTA(string(file))
return annotatedSequenceArray
sequence := ParseFASTA(file)
return sequence
}

// WriteFASTA writes a Sequence struct out to FASTA.
@@ -522,8 +524,9 @@ GBK specific IO related things begin here.
******************************************************************************/

// ParseGbk takes in a string representing a gbk/gb/genbank file and parses it into an Sequence object.
func ParseGbk(gbk string) Sequence {
func ParseGbk(gbkBytes []byte) Sequence {

gbk := string(gbkBytes)
lines := strings.Split(gbk, "\n")

// Create meta struct
@@ -716,8 +719,7 @@ func ReadGbk(path string) Sequence {
if err != nil {
// return 0, fmt.Errorf("Failed to open file %s for unpack: %s", gzFilePath, err)
} else {
gbkString := string(file)
sequence = ParseGbk(gbkString)
sequence = ParseGbk(file)

}
return sequence
103 changes: 101 additions & 2 deletions io_test.go
Original file line number Diff line number Diff line change
@@ -29,6 +29,45 @@ Gff related tests and benchmarks begin here.
******************************************************************************/

func ExampleReadGff() {

sequence := ReadGff("data/ecoli-mg1655.gff")
fmt.Println(sequence.Meta.Name)
// Output: U00096.3
}

func ExampleParseGff() {
file, _ := ioutil.ReadFile("data/ecoli-mg1655.gff")
sequence := ParseGff(file)

fmt.Println(sequence.Meta.Name)
// Output: U00096.3
}

func ExampleBuildGff() {

sequence := ReadGff("data/ecoli-mg1655.gff")
gffBytes := BuildGff(sequence)
reparsedSequence := ParseGff(gffBytes)

fmt.Println(reparsedSequence.Meta.Name)
// Output: U00096.3

}

func ExampleWriteGff() {
sequence := ReadGff("data/ecoli-mg1655.gff")
WriteGff(sequence, "data/test.gff")
testSequence := ReadGff("data/test.gff")

os.Remove("data/test.gff")

fmt.Println(testSequence.Meta.Name)
// Output: U00096.3
}

// TODO should delete output files.

func TestGffIO(t *testing.T) {
testInputPath := "data/ecoli-mg1655.gff"
testOutputPath := "data/test.gff"
@@ -65,7 +104,7 @@ func TestGffIO(t *testing.T) {

func BenchmarkReadGff(b *testing.B) {
for i := 0; i < b.N; i++ {
ParseGff("data/ecoli-mg1655.gff")
ReadGff("data/ecoli-mg1655.gff")
}
}

@@ -87,6 +126,40 @@ Gbk/gb/genbank related benchmarks begin here.
******************************************************************************/

func ExampleReadGbk() {
sequence := ReadGbk("data/puc19.gbk")
fmt.Println(sequence.Meta.Locus.ModificationDate)
// Output: 22-OCT-2019
}

func ExampleParseGbk() {
file, _ := ioutil.ReadFile("data/puc19.gbk")
sequence := ParseGbk(file)

fmt.Println(sequence.Meta.Locus.ModificationDate)
// Output: 22-OCT-2019
}

func ExampleBuildGbk() {
sequence := ReadGbk("data/puc19.gbk")
gbkBytes := BuildGbk(sequence)
testSequence := ParseGbk(gbkBytes)

fmt.Println(testSequence.Meta.Locus.ModificationDate)
// Output: 22-OCT-2019
}

func ExampleWriteGbk() {
sequence := ReadGbk("data/puc19.gbk")
WriteGbk(sequence, "data/test.gbk")
testSequence := ReadGbk("data/test.gbk")

os.Remove("data/test.gbk")

fmt.Println(testSequence.Meta.Locus.ModificationDate)
// Output: 22-OCT-2019
}

func TestGbkIO(t *testing.T) {
gbk := ReadGbk("data/puc19.gbk")
WriteGbk(gbk, "data/puc19gbktest.gbk")
@@ -202,6 +275,32 @@ JSON related tests begin here.
******************************************************************************/

func ExampleReadJSON() {
sequence := ReadJSON("data/sample.json")

fmt.Println(sequence.Meta.Source)
//output: Saccharomyces cerevisiae (baker's yeast)
}

func ExampleParseJSON() {
file, _ := ioutil.ReadFile("data/sample.json")
sequence := ParseJSON(file)

fmt.Println(sequence.Meta.Source)
//output: Saccharomyces cerevisiae (baker's yeast)
}

func ExampleWriteJSON() {
sequence := ReadJSON("data/sample.json")
WriteJSON(sequence, "data/test.json")
testSequence := ReadJSON("data/test.json")

os.Remove("data/test.json")

fmt.Println(testSequence.Meta.Source)
//output: Saccharomyces cerevisiae (baker's yeast)
}

func TestJSONIO(t *testing.T) {
testSequence := ReadGbk("data/bsub.gbk")
WriteJSON(testSequence, "data/test.json")
@@ -248,7 +347,7 @@ func ExampleReadFASTA() {

func ExampleParseFASTA() {
file, _ := ioutil.ReadFile("data/base.fasta")
sequence := ParseFASTA(string(file))
sequence := ParseFASTA(file)

fmt.Println(sequence.Features[0].Description)
// Output: gi|5524211|gb|AAD44166.1| cytochrome b [Elephas maximus maximus]
20 changes: 8 additions & 12 deletions poly/commands.go
Original file line number Diff line number Diff line change
@@ -292,9 +292,7 @@ func optimizeCommand(c *cli.Context) error {

if isNumeric(c.String("ct")) {
codonTableNumber, _ := strconv.Atoi(c.String("ct"))
codonTable = poly.DefaultCodonTablesByNumber[codonTableNumber]
} else {
codonTable = poly.DefaultCodonTablesByName[c.String("ct")]
codonTable = poly.GetCodonTable(codonTableNumber)
}

// if a file exists to weigh the table. Weigh it.
@@ -347,9 +345,7 @@ func translateCommand(c *cli.Context) error {

if isNumeric(c.String("ct")) {
codonTableNumber, _ := strconv.Atoi(c.String("ct"))
codonTable = poly.DefaultCodonTablesByNumber[codonTableNumber]
} else {
codonTable = poly.DefaultCodonTablesByName[c.String("ct")]
codonTable = poly.GetCodonTable(codonTableNumber)
}

// uncomment below to parse sequence from pipe
@@ -364,7 +360,7 @@ func translateCommand(c *cli.Context) error {
}

// a simple helper function to convert an *os.File type into a string.
func stdinToString(file io.Reader) string {
func stdinToBytes(file io.Reader) []byte {
var stringBuffer bytes.Buffer
reader := bufio.NewReader(file)
for {
@@ -374,7 +370,7 @@ func stdinToString(file io.Reader) string {
}
stringBuffer.WriteRune(input)
}
return stringBuffer.String()
return stringBuffer.Bytes()
}

// a simple helper function to remove duplicates from a list of strings.
@@ -420,13 +416,13 @@ func parseStdin(c *cli.Context) poly.Sequence {
var sequence poly.Sequence
// logic for determining input format, then parses accordingly.
if c.String("i") == "json" {
json.Unmarshal([]byte(stdinToString(c.App.Reader)), &sequence)
json.Unmarshal([]byte(stdinToBytes(c.App.Reader)), &sequence)
} else if c.String("i") == "gbk" || c.String("i") == "gb" {
sequence = poly.ParseGbk(stdinToString(c.App.Reader))
sequence = poly.ParseGbk(stdinToBytes(c.App.Reader))
} else if c.String("i") == "gff" {
sequence = poly.ParseGff(stdinToString(c.App.Reader))
sequence = poly.ParseGff(stdinToBytes(c.App.Reader))
} else if c.String("i") == "string" {
sequence.Sequence = stdinToString(c.App.Reader)
sequence.Sequence = string(stdinToBytes(c.App.Reader))
}
return sequence
}
2 changes: 1 addition & 1 deletion poly/commands_test.go
Original file line number Diff line number Diff line change
@@ -200,7 +200,7 @@ func TestOptimizeString(t *testing.T) {
// should return codon optimized sequence
optimizeOutputString := strings.TrimSpace(writeBuffer.String())

translation := poly.Translate(optimizeOutputString, poly.DefaultCodonTablesByNumber[1])
translation := poly.Translate(optimizeOutputString, poly.GetCodonTable(1))

if translation != gfpTranslation {
t.Errorf("TestOptimizeCommand for string output has failed. Returned %q, want %q", translation, gfpTranslation)
2 changes: 1 addition & 1 deletion poly/main.go
Original file line number Diff line number Diff line change
@@ -178,7 +178,7 @@ func application() *cli.App {
&cli.StringFlag{
Name: "ct",
Aliases: []string{"--codon-table"},
Value: "Standard",
Value: "1",
Usage: "Specify codon table used for organism. Defaults to Standard NCBI table.",
},
&cli.BoolFlag{
34 changes: 33 additions & 1 deletion primers_test.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
package poly

import (
"fmt"
"math"
"testing"
)

func ExampleMarmurDoty() {
sequenceString := "ACGTCCGGACTT"
meltingTemp := MarmurDoty(sequenceString)

fmt.Println(meltingTemp)
// output: 31
}

func TestMarmurDoty(t *testing.T) {
testSeq := "ACGTCCGGACTT"
expectedTM := 31.0
@@ -13,6 +22,19 @@ func TestMarmurDoty(t *testing.T) {
}
}

func ExampleSantaLucia() {

sequenceString := "ACGATGGCAGTAGCATGC" //"GTAAAACGACGGCCAGT" // M13 fwd
testCPrimer := 0.1e-6 // primer concentration
testCNa := 350e-3 // salt concentration
testCMg := 0.0 // magnesium concentration
expectedTM := 62.7 // roughly what we're expecting with a margin of error
meltingTemp, _, _ := SantaLucia(sequenceString, testCPrimer, testCNa, testCMg)
withinMargin := math.Abs(expectedTM-meltingTemp)/expectedTM >= 0.02 // checking margin of error

fmt.Println(withinMargin)
// output: false
}
func TestSantaLucia(t *testing.T) {
testSeq := "ACGATGGCAGTAGCATGC" //"GTAAAACGACGGCCAGT" // M13 fwd
testCPrimer := 0.1e-6
@@ -24,7 +46,17 @@ func TestSantaLucia(t *testing.T) {
}
}

func TestCalcTM(t *testing.T) {
func ExampleMeltingTemp() {
sequenceString := "GTAAAACGACGGCCAGT" // M13 fwd
expectedTM := 52.8
meltingTemp := MeltingTemp(sequenceString)
withinMargin := math.Abs(expectedTM-meltingTemp)/expectedTM >= 0.02

fmt.Println(withinMargin)
// output: false
}

func TestMeltingTemp(t *testing.T) {
testSeq := "GTAAAACGACGGCCAGT" // M13 fwd
expectedTM := 52.8
if calcTM := MeltingTemp(testSeq); math.Abs(expectedTM-calcTM)/expectedTM >= 0.02 {
6 changes: 3 additions & 3 deletions sequence.go
Original file line number Diff line number Diff line change
@@ -5,8 +5,8 @@ import (
"strings"
)

// ComplementBaseRuneMap provides 1:1 mapping between bases and their complements
var ComplementBaseRuneMap = map[rune]rune{
// complementBaseRuneMap provides 1:1 mapping between bases and their complements
var complementBaseRuneMap = map[rune]rune{
65: 84, // A -> T
66: 86, // B -> V
67: 71, // C -> G
@@ -65,7 +65,7 @@ func ReverseComplement(sequence string) string {

// ComplementBase accepts a base pair and returns its complement base pair
func ComplementBase(basePair rune) rune {
return ComplementBaseRuneMap[basePair]
return complementBaseRuneMap[basePair]
}

// getFeatureSequence takes a feature and location object and returns a sequence string.
17 changes: 11 additions & 6 deletions transformations.go
Original file line number Diff line number Diff line change
@@ -116,7 +116,7 @@ func Optimize(aminoAcids string, codonTable CodonTable) string {
func (codonTable CodonTable) CreateWeights(sequence string) CodonTable {

sequence = strings.ToUpper(sequence)
codonFrequencyMap := GetCodonFrequency(sequence)
codonFrequencyMap := getCodonFrequency(sequence)

for aminoAcidIndex, aminoAcid := range codonTable.AminoAcids {

@@ -146,8 +146,8 @@ func (codonTable CodonTable) CreateWeights(sequence string) CodonTable {
return codonTable
}

// GetCodonFrequency takes a DNA sequence and returns a hashmap of its codons and their frequencies.
func GetCodonFrequency(sequence string) map[string]int {
// getCodonFrequency takes a DNA sequence and returns a hashmap of its codons and their frequencies.
func getCodonFrequency(sequence string) map[string]int {

codonFrequencyHashMap := map[string]int{}
var currentCodon strings.Builder
@@ -285,8 +285,13 @@ func generateCodonTable(aminoAcids, starts string) CodonTable {
return CodonTable{startCodons, stopCodons, aminoAcidSlice}
}

// DefaultCodonTablesByNumber stores all codon tables published by NCBI https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi using numbered indeces.
var DefaultCodonTablesByNumber = map[int]CodonTable{
// GetCodonTable takes the index of desired NCBI codon table and returns it.
func GetCodonTable(index int) CodonTable {
return defaultCodonTablesByNumber[index]
}

// defaultCodonTablesByNumber stores all codon tables published by NCBI https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi using numbered indeces.
var defaultCodonTablesByNumber = map[int]CodonTable{
1: generateCodonTable("FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**--*----M---------------M----------------------------"),
2: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG", "----------**--------------------MMMM----------**---M------------"),
3: generateCodonTable("FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**----------------------MM---------------M------------"),
@@ -314,7 +319,7 @@ var DefaultCodonTablesByNumber = map[int]CodonTable{
33: generateCodonTable("FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", "---M-------*-------M---------------M---------------M------------")}

// DefaultCodonTablesByName stores all codon tables published by NCBI https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi using named indeces.
var DefaultCodonTablesByName = map[string]CodonTable{
var defaultCodonTablesByName = map[string]CodonTable{
"Standard": generateCodonTable("FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**--*----M---------------M----------------------------"), // 1
"VertebrateMitochondrial": generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG", "----------**--------------------MMMM----------**---M------------"), // 2
"YeastMitochondrial": generateCodonTable("FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**----------------------MM---------------M------------"), // 3
37 changes: 32 additions & 5 deletions transformations_test.go
Original file line number Diff line number Diff line change
@@ -1,22 +1,49 @@
package poly

import (
"fmt"
"strings"
"testing"
)

func ExampleTranslate() {

gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"
gfpDnaSequence := "ATGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA"
testTranslation := Translate(gfpDnaSequence, GetCodonTable(11)) // need to specify which codons map to which amino acids per NCBI table

fmt.Println(gfpTranslation == testTranslation)
// output: true
}

func TestTranslation(t *testing.T) {
gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"
gfpDnaSequence := "ATGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA"
if got := Translate(gfpDnaSequence, DefaultCodonTablesByNumber[11]); got != gfpTranslation {
if got := Translate(gfpDnaSequence, GetCodonTable(11)); got != gfpTranslation {
t.Errorf("TestTranslation has failed. Translate has returned %q, want %q", got, gfpTranslation)
}

}

func ExampleOptimize() {

gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"
codonTable := GetCodonTable(11)

sequence := ReadGbk("data/bsub.gbk")
rawSequence := sequence.Sequence
codonTable.CreateWeights(rawSequence)

optimizedSequence := Optimize(gfpTranslation, codonTable)
optimizedSequenceTranslation := Translate(optimizedSequence, codonTable)

fmt.Println(optimizedSequenceTranslation == gfpTranslation)
// output: true
}

func TestOptimize(t *testing.T) {
gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"
codonTable := DefaultCodonTablesByNumber[11]
codonTable := GetCodonTable(11)

sequence := ReadGbk("data/bsub.gbk")
rawSequence := sequence.Sequence
@@ -33,7 +60,7 @@ func TestOptimize(t *testing.T) {

func TestGetCodonFrequency(t *testing.T) {

translationTable := DefaultCodonTablesByNumber[11].generateTranslationTable()
translationTable := GetCodonTable(11).generateTranslationTable()

var codons strings.Builder

@@ -51,7 +78,7 @@ func TestGetCodonFrequency(t *testing.T) {
t.Errorf("TestGetCodonFrequency has failed. aggregrated codon string is not the correct length.")
}

codonFrequencyHashMap := GetCodonFrequency(codonString)
codonFrequencyHashMap := getCodonFrequency(codonString)

if len(codonFrequencyHashMap) != 64 {
t.Errorf("TestGetCodonFrequency has failed. codonFrequencyHashMap does not contain every codon.")
@@ -63,7 +90,7 @@ func TestGetCodonFrequency(t *testing.T) {
}
}

doubleCodonFrequencyHashMap := GetCodonFrequency(codonString + codonString)
doubleCodonFrequencyHashMap := getCodonFrequency(codonString + codonString)

if len(doubleCodonFrequencyHashMap) != 64 {
t.Errorf("TestGetCodonFrequency has failed. doubleCodonFrequencyHashMap does not contain every codon.")

0 comments on commit 9cd47eb

Please sign in to comment.