diff --git a/examples/ontology.pct/index.html b/examples/ontology.pct/index.html index 6c12497..4b6da43 100644 --- a/examples/ontology.pct/index.html +++ b/examples/ontology.pct/index.html @@ -1090,7 +1090,7 @@
'0.0.26'+
'0.0.27'
/home/runner/work/picea/picea/picea/ontology.py:32: UserWarning: Accessed GO term by alt ID GO:0008150, returning main GO term with ID GO:0007582 +/home/runner/work/picea/picea/picea/ontology.py:32: UserWarning: Accessed GO term by alt ID GO:0008150, returning main GO term with ID GO:0000004 warnings.warn(f"Accessed GO term by alt ID {ID}, " f"returning main GO term with ID {alt_id}") /home/runner/work/picea/picea/picea/ontology.py:32: UserWarning: Accessed GO term by alt ID GO:0003674, returning main GO term with ID GO:0005554 warnings.warn(f"Accessed GO term by alt ID {ID}, " f"returning main GO term with ID {alt_id}") @@ -1532,7 +1532,7 @@Ontology.pct
[('GO:0009791', ['post-embryonic development'], 5), ('GO:0032501', ['multicellular organismal process'], 1), - ('GO:0007582', ['biological_process'], 0), + ('GO:0000004', ['biological_process'], 0), ('GO:0007275', ['multicellular organism development'], 4), ('GO:0048856', ['anatomical structure development'], 2), ('GO:0032502', ['developmental process'], 1), @@ -1582,7 +1582,7 @@Ontology.pct
<picea.ontology.Ontology at 0x7f3ebc0e9c90>+
<picea.ontology.Ontology at 0x7f4598e2cf10>
Preparing metadata (pyproject.toml) ... -+
Preparing metadata (pyproject.toml) ... - done +
done -Building wheels for collected packages: pygraphviz +Building wheels for collected packages: pygraphviz
{'_ID': 'GO:0010431', '_original_ID': 'GO:0010431', - '_container': <picea.ontology.Ontology at 0x7f3eae711150>, + '_container': <picea.ontology.Ontology at 0x7f4598e2cac0>, '_children': ['GO:0010162', 'GO:1990068'], '_parents': ['GO:0003006', 'GO:0021700', 'GO:0048609', 'GO:0048316'], 'name': ['seed maturation'], @@ -2075,7 +2075,7 @@Ontology.pct
{'_ID': 'GO:0048316', '_original_ID': 'GO:0048316', - '_container': <picea.ontology.Ontology at 0x7f3eae711150>, + '_container': <picea.ontology.Ontology at 0x7f4598e2cac0>, '_children': ['GO:0009793', 'GO:0009960', 'GO:0010214', @@ -2134,7 +2134,7 @@Ontology.pct
[('GO:0036422', ['heptaprenyl diphosphate synthase activity']), - ('GO:0000010', ['heptaprenyl diphosphate synthase activity']), - ('GO:0000022', ['mitotic spindle elongation']), +[('GO:0000010', ['heptaprenyl diphosphate synthase activity']), + ('GO:0036422', ['heptaprenyl diphosphate synthase activity']), ('GO:1905121', ['mitotic spindle elongation']), - ('GO:0000049', ['tRNA binding']), + ('GO:0000022', ['mitotic spindle elongation']), ('GO:0000946', ['tRNA binding']), - ('GO:0006871', ['urea cycle']), + ('GO:0000049', ['tRNA binding']), ('GO:0000050', ['urea cycle']), ('GO:0006594', ['urea cycle']), - ('GO:0000057', ['ribosomal large subunit export from nucleus']), + ('GO:0006871', ['urea cycle']), ('GO:0000055', ['ribosomal large subunit export from nucleus']), - ('GO:0000056', ['ribosomal small subunit export from nucleus']), + ('GO:0000057', ['ribosomal large subunit export from nucleus']), ('GO:0000058', ['ribosomal small subunit export from nucleus']), - ('GO:0000070', ['mitotic sister chromatid segregation']), + ('GO:0000056', ['ribosomal small subunit export from nucleus']), ('GO:0016359', ['mitotic sister chromatid segregation']), - ('GO:0030475', ['initial mitotic spindle pole body separation']), + ('GO:0000070', ['mitotic sister chromatid segregation']), ('GO:0000073', ['initial mitotic spindle pole body separation']), - ('GO:0072395', ['cell cycle checkpoint signaling']), - ('GO:0071779', ['cell cycle checkpoint signaling']), - ('GO:0000075', ['cell cycle checkpoint signaling']), - ('GO:0072404', ['cell cycle checkpoint signaling']), + ('GO:0030475', ['initial mitotic spindle pole body separation']), ('GO:0031576', ['cell cycle checkpoint signaling']), + ('GO:0071779', ['cell cycle checkpoint signaling']), ('GO:0072407', ['cell cycle checkpoint signaling']), + ('GO:0072404', ['cell cycle checkpoint signaling']), + ('GO:0000075', ['cell cycle checkpoint signaling']), + ('GO:0072395', ['cell cycle checkpoint signaling']), ('GO:0000076', ['DNA replication checkpoint signaling']), ('GO:0072437', ['DNA replication checkpoint signaling']), ('GO:0072422', ['DNA damage checkpoint signaling']), @@ -2236,38 +2236,38 @@Ontology.pct
['S-adenosyl-L-methionine transmembrane transporter activity']), ('GO:0000095', ['S-adenosyl-L-methionine transmembrane transporter activity']), - ('GO:0000100', ['S-methylmethionine transmembrane transporter activity']), ('GO:0015178', ['S-methylmethionine transmembrane transporter activity']), + ('GO:0000100', ['S-methylmethionine transmembrane transporter activity']), ('GO:0000103', ['sulfate assimilation']), ('GO:0019378', ['sulfate assimilation']), - ('GO:0019739', ['succinate dehydrogenase activity']), ('GO:0000104', ['succinate dehydrogenase activity']), + ('GO:0019739', ['succinate dehydrogenase activity']), + ('GO:0045816', ['negative regulation of transcription by RNA polymerase II']), ('GO:0000122', ['negative regulation of transcription by RNA polymerase II']), ('GO:0010553', ['negative regulation of transcription by RNA polymerase II']), - ('GO:0045816', ['negative regulation of transcription by RNA polymerase II']), - ('GO:0000124', ['SAGA complex']), ('GO:0030914', ['SAGA complex']), + ('GO:0000124', ['SAGA complex']), ('GO:0000125', ['SAGA complex']), - ('GO:0036281', ['flocculation']), - ('GO:0032128', ['flocculation']), - ('GO:0043689', ['flocculation']), + ('GO:0036282', ['flocculation']), ('GO:0000501', ['flocculation']), + ('GO:0032128', ['flocculation']), + ('GO:0036281', ['flocculation']), ('GO:0043690', ['flocculation']), ('GO:0000128', ['flocculation']), - ('GO:0036282', ['flocculation']), - ('GO:0030607', ['establishment of mitotic spindle orientation']), + ('GO:0043689', ['flocculation']), ('GO:0030609', ['establishment of mitotic spindle orientation']), ('GO:0000132', ['establishment of mitotic spindle orientation']), + ('GO:0030607', ['establishment of mitotic spindle orientation']), ('GO:0030898', ['microfilament motor activity']), ('GO:0000146', ['microfilament motor activity']), ('GO:0016548', ['rRNA modification']), ('GO:0000154', ['rRNA modification']), - ('GO:0009096', ['tryptophan biosynthetic process']), ('GO:0000162', ['tryptophan biosynthetic process']), + ('GO:0009096', ['tryptophan biosynthetic process']), ('GO:0000165', ['MAPK cascade']), ('GO:0007255', ['MAPK cascade']), - ('GO:0000179', ['rRNA (adenine-N6,N6-)-dimethyltransferase activity']), ('GO:0043790', ['rRNA (adenine-N6,N6-)-dimethyltransferase activity']), + ('GO:0000179', ['rRNA (adenine-N6,N6-)-dimethyltransferase activity']), ('GO:0000212', ['meiotic spindle organization']), ('GO:0043147', ['meiotic spindle organization']), ('GO:0000215', ["tRNA 2'-phosphotransferase activity"]), @@ -2275,67 +2275,67 @@Ontology.pct
('GO:0000355', ['spliceosomal tri-snRNP complex assembly']), ('GO:0000244', ['spliceosomal tri-snRNP complex assembly']), ('GO:0000351', ['spliceosomal tri-snRNP complex assembly']), - ('GO:0050576', ['3-keto sterol reductase activity']), ('GO:0000253', ['3-keto sterol reductase activity']), + ('GO:0050576', ['3-keto sterol reductase activity']), ('GO:0005051', ['peroxisome targeting sequence binding']), ('GO:0000268', ['peroxisome targeting sequence binding']), ('GO:0000270', ['peptidoglycan metabolic process']), ('GO:0009284', ['peptidoglycan metabolic process']), - ('GO:0044244', ['polysaccharide catabolic process']), ('GO:0000272', ['polysaccharide catabolic process']), - ('GO:0007067', ['mitotic cell cycle']), + ('GO:0044244', ['polysaccharide catabolic process']), ('GO:0000278', ['mitotic cell cycle']), - ('GO:0000292', ['RNA fragment catabolic process']), + ('GO:0007067', ['mitotic cell cycle']), ('GO:0030452', ['RNA fragment catabolic process']), + ('GO:0000292', ['RNA fragment catabolic process']), ('GO:0000310', ['xanthine phosphoribosyltransferase activity']), ('GO:0009043', ['xanthine phosphoribosyltransferase activity']), - ('GO:0010388', ['protein deneddylation']), ('GO:0000338', ['protein deneddylation']), + ('GO:0010388', ['protein deneddylation']), ('GO:0000371', ['mRNA branch site recognition']), - ('GO:0000348', ['mRNA branch site recognition']), ('GO:0000370', ['mRNA branch site recognition']), - ('GO:0000349', - ['generation of catalytic spliceosome for first transesterification step']), + ('GO:0000348', ['mRNA branch site recognition']), ('GO:0000357', ['generation of catalytic spliceosome for first transesterification step']), ('GO:0000356', ['generation of catalytic spliceosome for first transesterification step']), + ('GO:0000349', + ['generation of catalytic spliceosome for first transesterification step']), ('GO:0000350', ['generation of catalytic spliceosome for second transesterification step']), ('GO:0000358', ['generation of catalytic spliceosome for second transesterification step']), ('GO:0000359', ['generation of catalytic spliceosome for second transesterification step']), + ('GO:0000354', ['cis assembly of pre-catalytic spliceosome']), ('GO:0000361', ['cis assembly of pre-catalytic spliceosome']), ('GO:0000360', ['cis assembly of pre-catalytic spliceosome']), - ('GO:0000354', ['cis assembly of pre-catalytic spliceosome']), ('GO:0000375', ['RNA splicing, via transesterification reactions']), ('GO:0031202', ['RNA splicing, via transesterification reactions']), ('GO:0000385', ['RNA splicing, via transesterification reactions']), - ('GO:0000396', - ['spliceosome conformational change to release U4 (or U4atac) and U1 (or U11)']), ('GO:0000397', ['spliceosome conformational change to release U4 (or U4atac) and U1 (or U11)']), + ('GO:0000396', + ['spliceosome conformational change to release U4 (or U4atac) and U1 (or U11)']), ('GO:0000388', ['spliceosome conformational change to release U4 (or U4atac) and U1 (or U11)']), ('GO:0000382', ["mRNA 3'-splice site recognition"]), ('GO:0000389', ["mRNA 3'-splice site recognition"]), ('GO:0000383', ["mRNA 3'-splice site recognition"]), - ('GO:0000390', ['spliceosomal complex disassembly']), ('GO:0000391', ['spliceosomal complex disassembly']), + ('GO:0000390', ['spliceosomal complex disassembly']), ('GO:0000392', ['spliceosomal complex disassembly']), ('GO:0000395', ["mRNA 5'-splice site recognition"]), - ('GO:0000369', ["mRNA 5'-splice site recognition"]), ('GO:0000368', ["mRNA 5'-splice site recognition"]), - ('GO:0006374', ['mRNA splicing, via spliceosome']), + ('GO:0000369', ["mRNA 5'-splice site recognition"]), ('GO:0006375', ['mRNA splicing, via spliceosome']), + ('GO:0006374', ['mRNA splicing, via spliceosome']), ('GO:0000398', ['mRNA splicing, via spliceosome']), - ('GO:0000418', ['RNA polymerase IV complex']), ('GO:0000420', ['RNA polymerase IV complex']), + ('GO:0000418', ['RNA polymerase IV complex']), ('GO:0000419', ['RNA polymerase V complex']), ('GO:0080137', ['RNA polymerase V complex']), - ('GO:0000438', ['core TFIIH complex portion of holo TFIIH complex']), ('GO:0000443', ['core TFIIH complex portion of holo TFIIH complex']), + ('GO:0000438', ['core TFIIH complex portion of holo TFIIH complex']), ('GO:0000439', ['transcription factor TFIIH core complex']), ('GO:0000441', ['transcription factor TFIIH core complex']), ('GO:0000440', ['core TFIIH complex portion of NEF3 complex']), @@ -2346,432 +2346,432 @@Ontology.pct
['cleavage in ITS2 between 5.8S rRNA and LSU-rRNA of tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']), ('GO:0000448', ['cleavage in ITS2 between 5.8S rRNA and LSU-rRNA of tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']), - ('GO:1990041', - ['maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']), ('GO:0000462', ['maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']), + ('GO:1990041', + ['maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']), ('GO:0000703', ['oxidized pyrimidine nucleobase lesion DNA N-glycosylase activity']), ('GO:0019004', ['oxidized pyrimidine nucleobase lesion DNA N-glycosylase activity']), - ('GO:0000724', ['double-strand break repair via homologous recombination']), ('GO:0016924', ['double-strand break repair via homologous recombination']), - ('GO:0000741', ['karyogamy']), + ('GO:0000724', ['double-strand break repair via homologous recombination']), ('GO:0007335', ['karyogamy']), - ('GO:0000743', - ['nuclear migration involved in conjugation with cellular fusion']), + ('GO:0000741', ['karyogamy']), ('GO:0006946', ['nuclear migration involved in conjugation with cellular fusion']), - ('GO:0000747', ['conjugation with cellular fusion']), - ('GO:0030477', ['conjugation with cellular fusion']), + ('GO:0000743', + ['nuclear migration involved in conjugation with cellular fusion']), ('GO:0030461', ['conjugation with cellular fusion']), - ('GO:0007322', ['conjugation with cellular fusion']), ('GO:0007333', ['conjugation with cellular fusion']), - ('GO:0007328', - ['response to pheromone triggering conjugation with cellular fusion']), + ('GO:0000747', ['conjugation with cellular fusion']), + ('GO:0007322', ['conjugation with cellular fusion']), + ('GO:0030477', ['conjugation with cellular fusion']), ('GO:0030434', ['response to pheromone triggering conjugation with cellular fusion']), + ('GO:0007328', + ['response to pheromone triggering conjugation with cellular fusion']), ('GO:0000749', ['response to pheromone triggering conjugation with cellular fusion']), - ('GO:0000750', - ['pheromone-dependent signal transduction involved in conjugation with cellular fusion']), ('GO:0007330', ['pheromone-dependent signal transduction involved in conjugation with cellular fusion']), ('GO:0030454', ['pheromone-dependent signal transduction involved in conjugation with cellular fusion']), + ('GO:0000750', + ['pheromone-dependent signal transduction involved in conjugation with cellular fusion']), ('GO:0000751', ['mitotic cell cycle G1 arrest in response to pheromone']), ('GO:0030571', ['mitotic cell cycle G1 arrest in response to pheromone']), - ('GO:0007334', - ['agglutination involved in conjugation with cellular fusion']), ('GO:0000752', ['agglutination involved in conjugation with cellular fusion']), + ('GO:0007334', + ['agglutination involved in conjugation with cellular fusion']), ('GO:0000753', ['cell morphogenesis involved in conjugation with cellular fusion']), ('GO:0007332', ['cell morphogenesis involved in conjugation with cellular fusion']), ('GO:0030453', ['adaptation of signaling pathway by response to pheromone involved in conjugation with cellular fusion']), - ('GO:0007331', - ['adaptation of signaling pathway by response to pheromone involved in conjugation with cellular fusion']), ('GO:0000754', ['adaptation of signaling pathway by response to pheromone involved in conjugation with cellular fusion']), + ('GO:0007331', + ['adaptation of signaling pathway by response to pheromone involved in conjugation with cellular fusion']), ('GO:0000218', ['cytogamy']), - ('GO:0000755', ['cytogamy']), ('GO:0030462', ['cytogamy']), + ('GO:0000755', ['cytogamy']), ('GO:0007325', ['peptide pheromone export']), ('GO:0000770', ['peptide pheromone export']), ('GO:0097521', ['chromosome, centromeric region']), ('GO:0000775', ['chromosome, centromeric region']), - ('GO:0000777', ['kinetochore']), ('GO:0031617', ['kinetochore']), - ('GO:0005699', ['kinetochore']), ('GO:0000778', ['kinetochore']), + ('GO:0000777', ['kinetochore']), + ('GO:0005699', ['kinetochore']), ('GO:0000776', ['kinetochore']), - ('GO:0000780', ['condensed chromosome, centromeric region']), ('GO:0000779', ['condensed chromosome, centromeric region']), - ('GO:0000784', ['chromosome, telomeric region']), + ('GO:0000780', ['condensed chromosome, centromeric region']), ('GO:0000781', ['chromosome, telomeric region']), - ('GO:0000785', ['chromatin']), - ('GO:0000790', ['chromatin']), + ('GO:0000784', ['chromosome, telomeric region']), ('GO:0000789', ['chromatin']), ('GO:0005717', ['chromatin']), - ('GO:0000786', ['nucleosome']), + ('GO:0000785', ['chromatin']), + ('GO:0000790', ['chromatin']), ('GO:0000787', ['nucleosome']), - ('GO:0000788', ['nucleosome']), ('GO:0005718', ['nucleosome']), - ('GO:0000791', ['euchromatin']), + ('GO:0000788', ['nucleosome']), + ('GO:0000786', ['nucleosome']), ('GO:0005719', ['euchromatin']), ('GO:0035327', ['euchromatin']), - ('GO:0000792', ['heterochromatin']), - ('GO:0035328', ['heterochromatin']), + ('GO:0000791', ['euchromatin']), ('GO:0005720', ['heterochromatin']), + ('GO:0035328', ['heterochromatin']), + ('GO:0000792', ['heterochromatin']), ('GO:0005716', ['synaptonemal complex']), ('GO:0000795', ['synaptonemal complex']), - ('GO:0000797', ['condensin complex']), ('GO:0008620', ['condensin complex']), - ('GO:0005676', ['condensin complex']), - ('GO:0061814', ['condensin complex']), ('GO:0000799', ['condensin complex']), - ('GO:0000796', ['condensin complex']), + ('GO:0000797', ['condensin complex']), ('GO:0008621', ['condensin complex']), - ('GO:0045791', ['cell morphogenesis']), - ('GO:0007148', ['cell morphogenesis']), + ('GO:0061814', ['condensin complex']), + ('GO:0000796', ['condensin complex']), + ('GO:0005676', ['condensin complex']), ('GO:0000902', ['cell morphogenesis']), ('GO:0045790', ['cell morphogenesis']), - ('GO:0007104', ['cytokinesis']), - ('GO:0033205', ['cytokinesis']), - ('GO:0000910', ['cytokinesis']), + ('GO:0007148', ['cell morphogenesis']), + ('GO:0045791', ['cell morphogenesis']), ('GO:0016288', ['cytokinesis']), + ('GO:0000910', ['cytokinesis']), + ('GO:0033205', ['cytokinesis']), + ('GO:0007104', ['cytokinesis']), + ('GO:0000915', ['actomyosin contractile ring assembly']), ('GO:0045573', ['actomyosin contractile ring assembly']), ('GO:2000708', ['actomyosin contractile ring assembly']), - ('GO:0000915', ['actomyosin contractile ring assembly']), ('GO:0071937', ['division septum assembly']), - ('GO:0000917', ['division septum assembly']), ('GO:1902411', ['division septum assembly']), - ('GO:2000695', ['septum digestion after cytokinesis']), + ('GO:0000917', ['division septum assembly']), ('GO:1902409', ['septum digestion after cytokinesis']), ('GO:0000920', ['septum digestion after cytokinesis']), + ('GO:2000695', ['septum digestion after cytokinesis']), ('GO:0000922', ['spindle pole']), ('GO:0030615', ['spindle pole']), - ('GO:0000929', ['gamma-tubulin ring complex']), - ('GO:0000925', ['gamma-tubulin ring complex']), - ('GO:0055031', ['gamma-tubulin ring complex']), - ('GO:0055033', ['gamma-tubulin ring complex']), ('GO:0061494', ['gamma-tubulin ring complex']), - ('GO:0055032', ['gamma-tubulin ring complex']), - ('GO:0000924', ['gamma-tubulin ring complex']), + ('GO:0000925', ['gamma-tubulin ring complex']), ('GO:0000926', ['gamma-tubulin ring complex']), ('GO:0000931', ['gamma-tubulin ring complex']), + ('GO:0000929', ['gamma-tubulin ring complex']), ('GO:0008274', ['gamma-tubulin ring complex']), - ('GO:0043187', ['division septum']), + ('GO:0055033', ['gamma-tubulin ring complex']), + ('GO:0055031', ['gamma-tubulin ring complex']), + ('GO:0055032', ['gamma-tubulin ring complex']), + ('GO:0000924', ['gamma-tubulin ring complex']), ('GO:0000935', ['division septum']), + ('GO:0043187', ['division septum']), ('GO:0000939', ['inner kinetochore']), ('GO:0000941', ['inner kinetochore']), ('GO:0000940', ['outer kinetochore']), ('GO:0000942', ['outer kinetochore']), - ('GO:0000976', ['transcription cis-regulatory region binding']), - ('GO:0000984', ['transcription cis-regulatory region binding']), ('GO:0044212', ['transcription cis-regulatory region binding']), + ('GO:0000984', ['transcription cis-regulatory region binding']), ('GO:0000975', ['transcription cis-regulatory region binding']), + ('GO:0000976', ['transcription cis-regulatory region binding']), ('GO:0001017', ['transcription cis-regulatory region binding']), ('GO:0000977', ['RNA polymerase II transcription regulatory region sequence-specific DNA binding']), ('GO:0001012', ['RNA polymerase II transcription regulatory region sequence-specific DNA binding']), - ('GO:0000978', - ['RNA polymerase II cis-regulatory region sequence-specific DNA binding']), ('GO:0000980', ['RNA polymerase II cis-regulatory region sequence-specific DNA binding']), - ('GO:0000981', - ['DNA-binding transcription factor activity, RNA polymerase II-specific']), - ('GO:0001201', + ('GO:0000978', + ['RNA polymerase II cis-regulatory region sequence-specific DNA binding']), + ('GO:0001203', ['DNA-binding transcription factor activity, RNA polymerase II-specific']), - ('GO:0001133', + ('GO:0000982', ['DNA-binding transcription factor activity, RNA polymerase II-specific']), - ('GO:0001203', + ('GO:0003705', ['DNA-binding transcription factor activity, RNA polymerase II-specific']), ('GO:0001202', ['DNA-binding transcription factor activity, RNA polymerase II-specific']), - ('GO:0003705', + ('GO:0001133', ['DNA-binding transcription factor activity, RNA polymerase II-specific']), - ('GO:0001200', + ('GO:0001201', ['DNA-binding transcription factor activity, RNA polymerase II-specific']), - ('GO:0000982', + ('GO:0000981', + ['DNA-binding transcription factor activity, RNA polymerase II-specific']), + ('GO:0001200', ['DNA-binding transcription factor activity, RNA polymerase II-specific']), - ('GO:0035326', ['cis-regulatory region sequence-specific DNA binding']), - ('GO:0000987', ['cis-regulatory region sequence-specific DNA binding']), ('GO:0000986', ['cis-regulatory region sequence-specific DNA binding']), ('GO:0001158', ['cis-regulatory region sequence-specific DNA binding']), + ('GO:0035326', ['cis-regulatory region sequence-specific DNA binding']), ('GO:0001159', ['cis-regulatory region sequence-specific DNA binding']), + ('GO:0000987', ['cis-regulatory region sequence-specific DNA binding']), ('GO:0001150', ['cis-regulatory region sequence-specific DNA binding']), - ('GO:0001034', - ['RNA polymerase III general transcription initiation factor activity']), ('GO:0000995', ['RNA polymerase III general transcription initiation factor activity']), + ('GO:0001034', + ['RNA polymerase III general transcription initiation factor activity']), ('GO:0001002', ['RNA polymerase III type 1 promoter sequence-specific DNA binding']), ('GO:0001030', ['RNA polymerase III type 1 promoter sequence-specific DNA binding']), - ('GO:0001031', - ['RNA polymerase III type 2 promoter sequence-specific DNA binding']), ('GO:0001003', ['RNA polymerase III type 2 promoter sequence-specific DNA binding']), - ('GO:0001006', - ['RNA polymerase III type 3 promoter sequence-specific DNA binding']), + ('GO:0001031', + ['RNA polymerase III type 2 promoter sequence-specific DNA binding']), ('GO:0001032', ['RNA polymerase III type 3 promoter sequence-specific DNA binding']), - ('GO:0001045', ['mitochondrial promoter sequence-specific DNA binding']), + ('GO:0001006', + ['RNA polymerase III type 3 promoter sequence-specific DNA binding']), + ('GO:0070363', ['mitochondrial promoter sequence-specific DNA binding']), ('GO:0070364', ['mitochondrial promoter sequence-specific DNA binding']), - ('GO:0001018', ['mitochondrial promoter sequence-specific DNA binding']), + ('GO:0001045', ['mitochondrial promoter sequence-specific DNA binding']), ('GO:0070361', ['mitochondrial promoter sequence-specific DNA binding']), - ('GO:0000997', ['mitochondrial promoter sequence-specific DNA binding']), - ('GO:0070363', ['mitochondrial promoter sequence-specific DNA binding']), + ('GO:0001018', ['mitochondrial promoter sequence-specific DNA binding']), ('GO:0001044', ['mitochondrial promoter sequence-specific DNA binding']), ('GO:0070362', ['mitochondrial promoter sequence-specific DNA binding']), + ('GO:0000997', ['mitochondrial promoter sequence-specific DNA binding']), ('GO:0001039', ['RNA polymerase III hybrid type promoter sequence-specific DNA binding']), ('GO:0001037', ['RNA polymerase III hybrid type promoter sequence-specific DNA binding']), + ('GO:0001046', ['core promoter sequence-specific DNA binding']), ('GO:0000985', ['core promoter sequence-specific DNA binding']), ('GO:0001047', ['core promoter sequence-specific DNA binding']), - ('GO:0001046', ['core promoter sequence-specific DNA binding']), ('GO:0001109', ['promoter clearance during DNA-templated transcription']), ('GO:0001122', ['promoter clearance during DNA-templated transcription']), - ('GO:0001112', ['DNA-templated transcription open complex formation']), ('GO:0001127', ['DNA-templated transcription open complex formation']), - ('GO:0001146', - ['transcription termination site sequence-specific DNA binding']), - ('GO:0001147', - ['transcription termination site sequence-specific DNA binding']), + ('GO:0001112', ['DNA-templated transcription open complex formation']), ('GO:0001160', ['transcription termination site sequence-specific DNA binding']), ('GO:0001145', ['transcription termination site sequence-specific DNA binding']), ('GO:0001148', ['transcription termination site sequence-specific DNA binding']), + ('GO:0001147', + ['transcription termination site sequence-specific DNA binding']), + ('GO:0001146', + ['transcription termination site sequence-specific DNA binding']), ('GO:0044213', ['intronic transcription regulatory region sequence-specific DNA binding']), ('GO:0001161', ['intronic transcription regulatory region sequence-specific DNA binding']), - ('GO:0001013', - ['RNA polymerase I transcription regulatory region sequence-specific DNA binding']), ('GO:0001163', ['RNA polymerase I transcription regulatory region sequence-specific DNA binding']), - ('GO:0001164', - ['RNA polymerase I core promoter sequence-specific DNA binding']), + ('GO:0001013', + ['RNA polymerase I transcription regulatory region sequence-specific DNA binding']), ('GO:0001187', ['RNA polymerase I core promoter sequence-specific DNA binding']), + ('GO:0001164', + ['RNA polymerase I core promoter sequence-specific DNA binding']), ('GO:0001166', ['RNA polymerase I cis-regulatory region sequence-specific DNA binding']), ('GO:0001165', ['RNA polymerase I cis-regulatory region sequence-specific DNA binding']), - ('GO:0001176', ['DNA-templated transcriptional start site selection']), ('GO:0001173', ['DNA-templated transcriptional start site selection']), - ('GO:0001182', ['RNA polymerase I promoter clearance']), + ('GO:0001176', ['DNA-templated transcriptional start site selection']), ('GO:0001184', ['RNA polymerase I promoter clearance']), + ('GO:0001182', ['RNA polymerase I promoter clearance']), ('GO:0001189', ['RNA polymerase I preinitiation complex assembly']), ('GO:0001188', ['RNA polymerase I preinitiation complex assembly']), - ('GO:0001194', - ['maintenance of transcriptional fidelity during transcription elongation']), ('GO:0001192', ['maintenance of transcriptional fidelity during transcription elongation']), + ('GO:0001194', + ['maintenance of transcriptional fidelity during transcription elongation']), ('GO:0001216', ['DNA-binding transcription activator activity']), - ('GO:0001215', ['DNA-binding transcription activator activity']), ('GO:0001140', ['DNA-binding transcription activator activity']), - ('GO:0001218', ['DNA-binding transcription repressor activity']), - ('GO:0001219', ['DNA-binding transcription repressor activity']), - ('GO:0001141', ['DNA-binding transcription repressor activity']), + ('GO:0001215', ['DNA-binding transcription activator activity']), ('GO:0001220', ['DNA-binding transcription repressor activity']), - ('GO:0001217', ['DNA-binding transcription repressor activity']), + ('GO:0001141', ['DNA-binding transcription repressor activity']), + ('GO:0001219', ['DNA-binding transcription repressor activity']), + ('GO:0001217', ['DNA-binding transcription repressor activity']), + ('GO:0001218', ['DNA-binding transcription repressor activity']), ('GO:0001224', ['transcription coregulator binding']), ('GO:0001221', ['transcription coregulator binding']), ('GO:0001226', ['transcription corepressor binding']), ('GO:0001222', ['transcription corepressor binding']), - ('GO:0001225', ['transcription coactivator binding']), ('GO:0001223', ['transcription coactivator binding']), - ('GO:0001206', - ['DNA-binding transcription repressor activity, RNA polymerase II-specific']), + ('GO:0001225', ['transcription coactivator binding']), ('GO:0001227', ['DNA-binding transcription repressor activity, RNA polymerase II-specific']), - ('GO:0001210', + ('GO:0001206', ['DNA-binding transcription repressor activity, RNA polymerase II-specific']), - ('GO:0001078', + ('GO:0001210', ['DNA-binding transcription repressor activity, RNA polymerase II-specific']), ('GO:0001214', ['DNA-binding transcription repressor activity, RNA polymerase II-specific']), - ('GO:0001211', - ['DNA-binding transcription activator activity, RNA polymerase II-specific']), - ('GO:0001212', + ('GO:0001078', + ['DNA-binding transcription repressor activity, RNA polymerase II-specific']), + ('GO:0001205', ['DNA-binding transcription activator activity, RNA polymerase II-specific']), ('GO:0001077', ['DNA-binding transcription activator activity, RNA polymerase II-specific']), - ('GO:0001205', + ('GO:0001211', ['DNA-binding transcription activator activity, RNA polymerase II-specific']), - ('GO:0001209', + ('GO:0001212', ['DNA-binding transcription activator activity, RNA polymerase II-specific']), ('GO:0001213', ['DNA-binding transcription activator activity, RNA polymerase II-specific']), + ('GO:0001209', + ['DNA-binding transcription activator activity, RNA polymerase II-specific']), ('GO:0001228', ['DNA-binding transcription activator activity, RNA polymerase II-specific']), ('GO:0055027', ['chlamydospore formation']), ('GO:0001410', ['chlamydospore formation']), ('GO:0042833', ['response to protozoan']), ('GO:0001562', ['response to protozoan']), - ('GO:0001589', + ('GO:0001590', ['dopamine neurotransmitter receptor activity, coupled via Gs']), ('GO:0001588', ['dopamine neurotransmitter receptor activity, coupled via Gs']), - ('GO:0001590', + ('GO:0001589', ['dopamine neurotransmitter receptor activity, coupled via Gs']), - ('GO:0001593', + ('GO:0001591', ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']), ('GO:0001670', ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']), - ('GO:0001591', - ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']), ('GO:0001592', ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']), - ('GO:0008501', ['G protein-coupled adenosine receptor activity']), + ('GO:0001593', + ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']), ('GO:0001610', ['G protein-coupled adenosine receptor activity']), - ('GO:0001611', ['G protein-coupled adenosine receptor activity']), - ('GO:0001609', ['G protein-coupled adenosine receptor activity']), ('GO:0001612', ['G protein-coupled adenosine receptor activity']), ('GO:0001613', ['G protein-coupled adenosine receptor activity']), - ('GO:0035586', ['purinergic nucleotide receptor activity']), + ('GO:0001611', ['G protein-coupled adenosine receptor activity']), + ('GO:0001609', ['G protein-coupled adenosine receptor activity']), + ('GO:0008501', ['G protein-coupled adenosine receptor activity']), ('GO:0001614', ['purinergic nucleotide receptor activity']), - ('GO:0045032', ['G protein-coupled ADP receptor activity']), + ('GO:0035586', ['purinergic nucleotide receptor activity']), ('GO:0001621', ['G protein-coupled ADP receptor activity']), - ('GO:0016522', - ['pituitary adenylate cyclase-activating polypeptide receptor activity']), + ('GO:0045032', ['G protein-coupled ADP receptor activity']), ('GO:0001634', ['pituitary adenylate cyclase-activating polypeptide receptor activity']), + ('GO:0016522', + ['pituitary adenylate cyclase-activating polypeptide receptor activity']), ('GO:0001644', ['cAMP receptor activity']), ('GO:0001646', ['cAMP receptor activity']), ('GO:0001654', ['eye development']), ('GO:0042460', ['eye development']), ('GO:0043081', ['male germ cell nucleus']), ('GO:0001673', ['male germ cell nucleus']), - ('GO:0001674', ['female germ cell nucleus']), ('GO:0043080', ['female germ cell nucleus']), - ('GO:0001694', ['histamine biosynthetic process']), + ('GO:0001674', ['female germ cell nucleus']), ('GO:0001693', ['histamine biosynthetic process']), - ('GO:0048276', ['gastrulation with mouth forming second']), - ('GO:0001702', ['gastrulation with mouth forming second']), + ('GO:0001694', ['histamine biosynthetic process']), ('GO:0010003', ['gastrulation with mouth forming second']), - ('GO:0001744', ['insect visual primordium formation']), + ('GO:0001702', ['gastrulation with mouth forming second']), + ('GO:0048276', ['gastrulation with mouth forming second']), ('GO:0007457', ['insect visual primordium formation']), + ('GO:0001744', ['insect visual primordium formation']), ('GO:0048049', ['insect visual primordium development']), ('GO:0001748', ['insect visual primordium development']), ('GO:0007459', ['compound eye photoreceptor fate commitment']), ('GO:0001752', ['compound eye photoreceptor fate commitment']), - ('GO:0001806', ['type IV hypersensitivity']), ('GO:0016069', ['type IV hypersensitivity']), - ('GO:0042089', ['cytokine production']), + ('GO:0001806', ['type IV hypersensitivity']), + ('GO:0050663', ['cytokine production']), ('GO:0042032', ['cytokine production']), - ('GO:0042107', ['cytokine production']), + ('GO:0042089', ['cytokine production']), ('GO:0001816', ['cytokine production']), - ('GO:0050663', ['cytokine production']), - ('GO:0001817', ['regulation of cytokine production']), + ('GO:0042107', ['cytokine production']), ('GO:0042035', ['regulation of cytokine production']), ('GO:0050707', ['regulation of cytokine production']), - ('GO:0050710', ['negative regulation of cytokine production']), + ('GO:0001817', ['regulation of cytokine production']), ('GO:0042036', ['negative regulation of cytokine production']), ('GO:0001818', ['negative regulation of cytokine production']), - ('GO:0042108', ['positive regulation of cytokine production']), - ('GO:0001819', ['positive regulation of cytokine production']), + ('GO:0050710', ['negative regulation of cytokine production']), ('GO:0050715', ['positive regulation of cytokine production']), + ('GO:0001819', ['positive regulation of cytokine production']), + ('GO:0042108', ['positive regulation of cytokine production']), ('GO:0001679', ['neural tube formation']), ('GO:0001841', ['neural tube formation']), - ('GO:0080087', ['(1->3)-beta-D-glucan binding']), ('GO:0001872', ['(1->3)-beta-D-glucan binding']), - ('GO:0001942', ['hair follicle development']), + ('GO:0080087', ['(1->3)-beta-D-glucan binding']), ('GO:0001943', ['hair follicle development']), + ('GO:0001942', ['hair follicle development']), ('GO:0002003', ['angiotensin maturation']), ('GO:0002005', ['angiotensin maturation']), ('GO:0002036', ['regulation of L-glutamate import across plasma membrane']), ('GO:1900920', ['regulation of L-glutamate import across plasma membrane']), - ('GO:1900921', - ['negative regulation of L-glutamate import across plasma membrane']), ('GO:0002037', ['negative regulation of L-glutamate import across plasma membrane']), - ('GO:0002038', - ['positive regulation of L-glutamate import across plasma membrane']), + ('GO:1900921', + ['negative regulation of L-glutamate import across plasma membrane']), ('GO:1900922', ['positive regulation of L-glutamate import across plasma membrane']), - ('GO:0002046', ['opsin binding']), + ('GO:0002038', + ['positive regulation of L-glutamate import across plasma membrane']), ('GO:0016030', ['opsin binding']), - ('GO:0002163', ['dystroglycan binding']), + ('GO:0002046', ['opsin binding']), ('GO:0002162', ['dystroglycan binding']), ('GO:0002166', ['dystroglycan binding']), + ('GO:0002163', ['dystroglycan binding']), ('GO:0002214', ['defense response to insect']), ('GO:0002213', ['defense response to insect']), - ('GO:0002215', ['defense response to nematode']), ('GO:0002216', ['defense response to nematode']), - ('GO:0002218', ['activation of innate immune response']), + ('GO:0002215', ['defense response to nematode']), ('GO:0002219', ['activation of innate immune response']), + ('GO:0002218', ['activation of innate immune response']), + ('GO:0002367', ['cytokine production involved in immune response']), ('GO:0002374', ['cytokine production involved in immune response']), ('GO:0002375', ['cytokine production involved in immune response']), - ('GO:0002367', ['cytokine production involved in immune response']), - ('GO:0002377', ['immunoglobulin production']), ('GO:0048305', ['immunoglobulin production']), + ('GO:0002377', ['immunoglobulin production']), ('GO:0002378', ['immunoglobulin production']), - ('GO:0002379', - ['immunoglobulin production involved in immunoglobulin-mediated immune response']), ('GO:0002381', ['immunoglobulin production involved in immunoglobulin-mediated immune response']), + ('GO:0002379', + ['immunoglobulin production involved in immunoglobulin-mediated immune response']), ('GO:0002380', ['immunoglobulin production involved in immunoglobulin-mediated immune response']), - ('GO:0002386', ['mucosal immune response']), ('GO:0002422', ['mucosal immune response']), ('GO:0002385', ['mucosal immune response']), - ('GO:0002535', - ['platelet activating factor production involved in inflammatory response']), + ('GO:0002386', ['mucosal immune response']), ('GO:0002391', ['platelet activating factor production involved in inflammatory response']), - ('GO:0002392', + ('GO:0002535', ['platelet activating factor production involved in inflammatory response']), ('GO:0002390', ['platelet activating factor production involved in inflammatory response']), - ('GO:0002443', ['leukocyte mediated immunity']), + ('GO:0002392', + ['platelet activating factor production involved in inflammatory response']), ('GO:0042087', ['leukocyte mediated immunity']), ('GO:0019723', ['leukocyte mediated immunity']), - ('GO:0002637', ['regulation of immunoglobulin production']), + ('GO:0002443', ['leukocyte mediated immunity']), ('GO:0002640', ['regulation of immunoglobulin production']), + ('GO:0002637', ['regulation of immunoglobulin production']), ('GO:0051023', ['regulation of immunoglobulin production']), + ('GO:0002641', ['negative regulation of immunoglobulin production']), ('GO:0051025', ['negative regulation of immunoglobulin production']), ('GO:0002638', ['negative regulation of immunoglobulin production']), - ('GO:0002641', ['negative regulation of immunoglobulin production']), - ('GO:0051024', ['positive regulation of immunoglobulin production']), ('GO:0002642', ['positive regulation of immunoglobulin production']), + ('GO:0051024', ['positive regulation of immunoglobulin production']), ('GO:0002639', ['positive regulation of immunoglobulin production']), - ('GO:0045845', ['regulation of natural killer cell mediated immunity']), ('GO:0002715', ['regulation of natural killer cell mediated immunity']), - ('GO:0002716', - ['negative regulation of natural killer cell mediated immunity']), + ('GO:0045845', ['regulation of natural killer cell mediated immunity']), ('GO:0030102', ['negative regulation of natural killer cell mediated immunity']), - ('GO:0002717', - ['positive regulation of natural killer cell mediated immunity']), + ('GO:0002716', + ['negative regulation of natural killer cell mediated immunity']), ('GO:0045846', ['positive regulation of natural killer cell mediated immunity']), - ('GO:0002718', + ('GO:0002717', + ['positive regulation of natural killer cell mediated immunity']), + ('GO:0002742', ['regulation of cytokine production involved in immune response']), ('GO:0002739', ['regulation of cytokine production involved in immune response']), - ('GO:0002742', + ('GO:0002718', ['regulation of cytokine production involved in immune response']), - ('GO:0002719', + ('GO:0002740', ['negative regulation of cytokine production involved in immune response']), ('GO:0002743', ['negative regulation of cytokine production involved in immune response']), - ('GO:0002740', + ('GO:0002719', ['negative regulation of cytokine production involved in immune response']), + ('GO:0002720', + ['positive regulation of cytokine production involved in immune response']), ('GO:0002744', ['positive regulation of cytokine production involved in immune response']), ('GO:0002741', ['positive regulation of cytokine production involved in immune response']), - ('GO:0002720', - ['positive regulation of cytokine production involved in immune response']), ('GO:0039528', ['cytoplasmic pattern recognition receptor signaling pathway']), ('GO:0002753', @@ -2783,47 +2783,47 @@Ontology.pct
('GO:0002949', ['tRNA threonylcarbamoyladenosine modification']), ('GO:0010802', ['respiratory system process']), ('GO:0003016', ['respiratory system process']), - ('GO:0003124', ['epinephrine-mediated vasodilation']), ('GO:0003123', ['epinephrine-mediated vasodilation']), ('GO:0003121', ['epinephrine-mediated vasodilation']), + ('GO:0003124', ['epinephrine-mediated vasodilation']), ('GO:0003126', ['norepinephrine-mediated vasodilation']), - ('GO:0003122', ['norepinephrine-mediated vasodilation']), ('GO:0003125', ['norepinephrine-mediated vasodilation']), - ('GO:0003341', ['cilium movement']), + ('GO:0003122', ['norepinephrine-mediated vasodilation']), ('GO:0036142', ['cilium movement']), - ('GO:0003352', ['regulation of cilium movement']), + ('GO:0003341', ['cilium movement']), ('GO:1900172', ['regulation of cilium movement']), - ('GO:1900174', ['positive regulation of cilium movement']), + ('GO:0003352', ['regulation of cilium movement']), ('GO:0003353', ['positive regulation of cilium movement']), + ('GO:1900174', ['positive regulation of cilium movement']), ('GO:0003354', ['negative regulation of cilium movement']), ('GO:1900173', ['negative regulation of cilium movement']), ('GO:0036144', ['regulation of cilium beat frequency']), ('GO:0003356', ['regulation of cilium beat frequency']), - ('GO:0003376', ['sphingosine-1-phosphate receptor signaling pathway']), ('GO:0001789', ['sphingosine-1-phosphate receptor signaling pathway']), + ('GO:0003376', ['sphingosine-1-phosphate receptor signaling pathway']), ('GO:0000496', ['nucleic acid binding']), ('GO:0003676', ['nucleic acid binding']), ('GO:0043566', ['DNA binding']), ('GO:0003677', ['DNA binding']), - ('GO:0004003', ['DNA helicase activity']), ('GO:0003679', ['DNA helicase activity']), + ('GO:0004003', ['DNA helicase activity']), ('GO:0003678', ['DNA helicase activity']), - ('GO:0033170', ['DNA clamp loader activity']), ('GO:0003689', ['DNA clamp loader activity']), + ('GO:0033170', ['DNA clamp loader activity']), ('GO:0003697', ['single-stranded DNA binding']), ('GO:0003698', ['single-stranded DNA binding']), ('GO:0003699', ['single-stranded DNA binding']), - ('GO:0001204', ['DNA-binding transcription factor activity']), - ('GO:0001151', ['DNA-binding transcription factor activity']), - ('GO:0001130', ['DNA-binding transcription factor activity']), ('GO:0001199', ['DNA-binding transcription factor activity']), - ('GO:0000130', ['DNA-binding transcription factor activity']), - ('GO:0003700', ['DNA-binding transcription factor activity']), ('GO:0001131', ['DNA-binding transcription factor activity']), ('GO:0001071', ['DNA-binding transcription factor activity']), + ('GO:0001151', ['DNA-binding transcription factor activity']), + ('GO:0003700', ['DNA-binding transcription factor activity']), + ('GO:0001204', ['DNA-binding transcription factor activity']), + ('GO:0000130', ['DNA-binding transcription factor activity']), + ('GO:0001130', ['DNA-binding transcription factor activity']), ('GO:0003712', ['transcription coregulator activity']), - ('GO:0001104', ['transcription coregulator activity']), ('GO:0016455', ['transcription coregulator activity']), + ('GO:0001104', ['transcription coregulator activity']), ('GO:0001105', ['transcription coactivator activity']), ('GO:0003713', ['transcription coactivator activity']), ('GO:0003714', ['transcription corepressor activity']), @@ -2831,80 +2831,80 @@Ontology.pct
('GO:0003723', ['RNA binding']), ('GO:0000498', ['RNA binding']), ('GO:0044822', ['RNA binding']), - ('GO:0004004', ['RNA helicase activity']), ('GO:0003724', ['RNA helicase activity']), - ('GO:0003971', ['double-stranded RNA adenosine deaminase activity']), + ('GO:0004004', ['RNA helicase activity']), ('GO:0003726', ['double-stranded RNA adenosine deaminase activity']), - ('GO:0003727', ['single-stranded RNA binding']), + ('GO:0003971', ['double-stranded RNA adenosine deaminase activity']), ('GO:0003728', ['single-stranded RNA binding']), - ('GO:0003729', ['mRNA binding']), + ('GO:0003727', ['single-stranded RNA binding']), ('GO:0000499', ['mRNA binding']), - ('GO:0003740', ['structural constituent of ribosome']), - ('GO:0003742', ['structural constituent of ribosome']), - ('GO:0003741', ['structural constituent of ribosome']), + ('GO:0003729', ['mRNA binding']), ('GO:0003738', ['structural constituent of ribosome']), - ('GO:0003736', ['structural constituent of ribosome']), ('GO:0003735', ['structural constituent of ribosome']), - ('GO:0003739', ['structural constituent of ribosome']), ('GO:0003737', ['structural constituent of ribosome']), - ('GO:0003745', ['translation initiation factor activity']), + ('GO:0003739', ['structural constituent of ribosome']), + ('GO:0003742', ['structural constituent of ribosome']), + ('GO:0003741', ['structural constituent of ribosome']), + ('GO:0003740', ['structural constituent of ribosome']), + ('GO:0003736', ['structural constituent of ribosome']), ('GO:0003744', ['translation initiation factor activity']), + ('GO:0003745', ['translation initiation factor activity']), ('GO:0003743', ['translation initiation factor activity']), - ('GO:0003746', ['translation elongation factor activity']), ('GO:0008182', ['translation elongation factor activity']), + ('GO:0003746', ['translation elongation factor activity']), ('GO:0008183', ['translation elongation factor activity']), - ('GO:0003747', ['translation release factor activity']), ('GO:0003749', ['translation release factor activity']), + ('GO:0003747', ['translation release factor activity']), ('GO:0003748', ['translation release factor activity']), - ('GO:0004752', ['peptidyl-prolyl cis-trans isomerase activity']), ('GO:0003755', ['peptidyl-prolyl cis-trans isomerase activity']), + ('GO:0004752', ['peptidyl-prolyl cis-trans isomerase activity']), ('GO:0042028', ['peptidyl-prolyl cis-trans isomerase activity']), - ('GO:0006467', ['protein disulfide isomerase activity']), ('GO:0003756', ['protein disulfide isomerase activity']), + ('GO:0006467', ['protein disulfide isomerase activity']), ('GO:0003777', ['microtubule motor activity']), ('GO:1990939', ['microtubule motor activity']), - ('GO:0003838', ['sterol 24-C-methyltransferase activity']), ('GO:0102101', ['sterol 24-C-methyltransferase activity']), + ('GO:0003838', ['sterol 24-C-methyltransferase activity']), ('GO:0003841', ['1-acylglycerol-3-phosphate O-acyltransferase activity']), ('GO:0004469', ['1-acylglycerol-3-phosphate O-acyltransferase activity']), ('GO:0003843', ['1,3-beta-D-glucan synthase activity']), ('GO:0009981', ['1,3-beta-D-glucan synthase activity']), ('GO:0003853', ['2-methylbutanoyl-CoA dehydrogenase activity']), ('GO:0047119', ['2-methylbutanoyl-CoA dehydrogenase activity']), - ('GO:0003863', - ['3-methyl-2-oxobutanoate dehydrogenase (2-methylpropanoyl-transferring) activity']), ('GO:0003826', ['3-methyl-2-oxobutanoate dehydrogenase (2-methylpropanoyl-transferring) activity']), - ('GO:0046913', ['ATP citrate synthase activity']), + ('GO:0003863', + ['3-methyl-2-oxobutanoate dehydrogenase (2-methylpropanoyl-transferring) activity']), ('GO:0003878', ['ATP citrate synthase activity']), + ('GO:0046913', ['ATP citrate synthase activity']), ('GO:0003886', ['DNA (cytosine-5-)-methyltransferase activity']), ('GO:0008326', ['DNA (cytosine-5-)-methyltransferase activity']), - ('GO:0003894', ['DNA-directed DNA polymerase activity']), - ('GO:0008723', ['DNA-directed DNA polymerase activity']), ('GO:0016000', ['DNA-directed DNA polymerase activity']), - ('GO:0003893', ['DNA-directed DNA polymerase activity']), + ('GO:0016452', ['DNA-directed DNA polymerase activity']), + ('GO:0003888', ['DNA-directed DNA polymerase activity']), ('GO:0016448', ['DNA-directed DNA polymerase activity']), - ('GO:0003889', ['DNA-directed DNA polymerase activity']), - ('GO:0003890', ['DNA-directed DNA polymerase activity']), - ('GO:0003891', ['DNA-directed DNA polymerase activity']), + ('GO:0003887', ['DNA-directed DNA polymerase activity']), + ('GO:0019984', ['DNA-directed DNA polymerase activity']), ('GO:0016450', ['DNA-directed DNA polymerase activity']), - ('GO:0016452', ['DNA-directed DNA polymerase activity']), + ('GO:0003893', ['DNA-directed DNA polymerase activity']), ('GO:0016451', ['DNA-directed DNA polymerase activity']), - ('GO:0019984', ['DNA-directed DNA polymerase activity']), - ('GO:0003887', ['DNA-directed DNA polymerase activity']), - ('GO:0003888', ['DNA-directed DNA polymerase activity']), ('GO:0015999', ['DNA-directed DNA polymerase activity']), + ('GO:0003891', ['DNA-directed DNA polymerase activity']), + ('GO:0003894', ['DNA-directed DNA polymerase activity']), + ('GO:0008723', ['DNA-directed DNA polymerase activity']), ('GO:0003895', ['DNA-directed DNA polymerase activity']), + ('GO:0003890', ['DNA-directed DNA polymerase activity']), ('GO:0016449', ['DNA-directed DNA polymerase activity']), - ('GO:0003896', ['DNA primase activity']), + ('GO:0003889', ['DNA-directed DNA polymerase activity']), ('GO:0003897', ['DNA primase activity']), + ('GO:0003896', ['DNA primase activity']), ('GO:0003898', ['DNA primase activity']), - ('GO:0000129', ["DNA-directed 5'-3' RNA polymerase activity"]), ('GO:0003899', ["DNA-directed 5'-3' RNA polymerase activity"]), + ('GO:0000129', ["DNA-directed 5'-3' RNA polymerase activity"]), ('GO:0003905', ['alkylbase DNA N-glycosylase activity']), ('GO:0004036', ['alkylbase DNA N-glycosylase activity']), - ('GO:0009387', ['DNA topoisomerase activity']), ('GO:0003916', ['DNA topoisomerase activity']), + ('GO:0009387', ['DNA topoisomerase activity']), ('GO:0003918', ['DNA topoisomerase type II (double strand cut, ATP-hydrolyzing) activity']), ('GO:0061505', @@ -2913,17 +2913,17 @@Ontology.pct
('GO:0003924', ['GTPase activity']), ('GO:0003925', ['G protein activity']), ('GO:0003927', ['G protein activity']), - ('GO:0008468', ['NADPH dehydrogenase activity']), ('GO:0016660', ['NADPH dehydrogenase activity']), + ('GO:0008468', ['NADPH dehydrogenase activity']), ('GO:0003959', ['NADPH dehydrogenase activity']), - ('GO:0019282', ['O-acetylhomoserine aminocarboxypropyltransferase activity']), ('GO:0003961', ['O-acetylhomoserine aminocarboxypropyltransferase activity']), + ('GO:0019282', ['O-acetylhomoserine aminocarboxypropyltransferase activity']), ('GO:0003962', ['cystathionine gamma-synthase activity']), ('GO:0000505', ['cystathionine gamma-synthase activity']), ('GO:0052854', ['(S)-2-hydroxy-acid oxidase activity']), + ('GO:0052852', ['(S)-2-hydroxy-acid oxidase activity']), ('GO:0008891', ['(S)-2-hydroxy-acid oxidase activity']), ('GO:0052853', ['(S)-2-hydroxy-acid oxidase activity']), - ('GO:0052852', ['(S)-2-hydroxy-acid oxidase activity']), ('GO:0003973', ['(S)-2-hydroxy-acid oxidase activity']), ('GO:0047318', ['N2-acetyl-L-ornithine:2-oxoglutarate 5-aminotransferase activity']), @@ -2931,39 +2931,39 @@Ontology.pct
['N2-acetyl-L-ornithine:2-oxoglutarate 5-aminotransferase activity']), ('GO:0003994', ['aconitate hydratase activity']), ('GO:0052632', ['aconitate hydratase activity']), - ('GO:0019109', ['acyl-CoA dehydrogenase activity']), ('GO:0003995', ['acyl-CoA dehydrogenase activity']), - ('GO:0050250', ['aldehyde oxidase activity']), + ('GO:0019109', ['acyl-CoA dehydrogenase activity']), ('GO:0004031', ['aldehyde oxidase activity']), + ('GO:0050250', ['aldehyde oxidase activity']), ('GO:0019851', ['aminoacyl-tRNA hydrolase activity']), ('GO:0019850', ['aminoacyl-tRNA hydrolase activity']), ('GO:0004045', ['aminoacyl-tRNA hydrolase activity']), - ('GO:0042172', ['arginyl-tRNA--protein transferase activity']), ('GO:0004057', ['arginyl-tRNA--protein transferase activity']), - ('GO:0004058', ['aromatic-L-amino-acid decarboxylase activity']), + ('GO:0042172', ['arginyl-tRNA--protein transferase activity']), ('GO:0016400', ['aromatic-L-amino-acid decarboxylase activity']), + ('GO:0004058', ['aromatic-L-amino-acid decarboxylase activity']), ('GO:0004093', ['carnitine O-acetyltransferase activity']), ('GO:0004094', ['carnitine O-acetyltransferase activity']), ('GO:0004092', ['carnitine O-acetyltransferase activity']), ('GO:0004096', ['catalase activity']), ('GO:0016953', ['catalase activity']), ('GO:0016952', ['catalase activity']), + ('GO:0102316', ['catechol oxidase activity']), ('GO:0036263', ['catechol oxidase activity']), ('GO:0004097', ['catechol oxidase activity']), ('GO:0036264', ['catechol oxidase activity']), - ('GO:0102316', ['catechol oxidase activity']), ('GO:0004123', ['cystathionine gamma-lyase activity']), ('GO:0016225', ['cystathionine gamma-lyase activity']), - ('GO:0008461', ['delta(3)-delta(2)-enoyl-CoA isomerase activity']), ('GO:0004165', ['delta(3)-delta(2)-enoyl-CoA isomerase activity']), - ('GO:0004101', - ['dolichyl-phosphate alpha-N-acetylglucosaminyltransferase activity']), + ('GO:0008461', ['delta(3)-delta(2)-enoyl-CoA isomerase activity']), ('GO:0004166', ['dolichyl-phosphate alpha-N-acetylglucosaminyltransferase activity']), + ('GO:0004101', + ['dolichyl-phosphate alpha-N-acetylglucosaminyltransferase activity']), ('GO:0004167', ['dopachrome isomerase activity']), ('GO:0048059', ['dopachrome isomerase activity']), - ('GO:0016809', ['endopeptidase activity']), ('GO:0004175', ['endopeptidase activity']), + ('GO:0016809', ['endopeptidase activity']), ('GO:0004280', ['ATP-dependent peptidase activity']), ('GO:0004176', ['ATP-dependent peptidase activity']), ('GO:0016510', ['enoyl-CoA hydratase activity']), @@ -2984,201 +2984,201 @@Ontology.pct
('GO:0004379', ['glycylpeptide N-tetradecanoyltransferase activity']), ('GO:0004386', ['helicase activity']), ('GO:0008026', ['helicase activity']), + ('GO:0004405', ['histone acetyltransferase activity']), ('GO:0043166', ['histone acetyltransferase activity']), - ('GO:0004406', ['histone acetyltransferase activity']), ('GO:0004404', ['histone acetyltransferase activity']), ('GO:0004403', ['histone acetyltransferase activity']), + ('GO:0004406', ['histone acetyltransferase activity']), ('GO:0046971', ['histone acetyltransferase activity']), ('GO:0004402', ['histone acetyltransferase activity']), - ('GO:0004405', ['histone acetyltransferase activity']), ('GO:0042282', ['hydroxymethylglutaryl-CoA reductase (NADPH) activity']), ('GO:0004420', ['hydroxymethylglutaryl-CoA reductase (NADPH) activity']), ('GO:0016315', ['phosphatidylinositol-3-phosphate phosphatase activity']), ('GO:0004438', ['phosphatidylinositol-3-phosphate phosphatase activity']), - ('GO:0001668', - ['phosphatidylinositol-4,5-bisphosphate 5-phosphatase activity']), ('GO:0004439', ['phosphatidylinositol-4,5-bisphosphate 5-phosphatase activity']), - ('GO:0004467', ['long-chain fatty acid-CoA ligase activity']), + ('GO:0001668', + ['phosphatidylinositol-4,5-bisphosphate 5-phosphatase activity']), ('GO:0003996', ['long-chain fatty acid-CoA ligase activity']), - ('GO:0004472', ['malate dehydrogenase (decarboxylating) (NAD+) activity']), + ('GO:0004467', ['long-chain fatty acid-CoA ligase activity']), ('GO:0004471', ['malate dehydrogenase (decarboxylating) (NAD+) activity']), + ('GO:0004472', ['malate dehydrogenase (decarboxylating) (NAD+) activity']), ('GO:0016619', ['malate dehydrogenase (decarboxylating) (NAD+) activity']), - ('GO:0070128', ['methionyl-tRNA formyltransferase activity']), ('GO:0001718', ['methionyl-tRNA formyltransferase activity']), ('GO:0004479', ['methionyl-tRNA formyltransferase activity']), - ('GO:0008702', ['methylenetetrahydrofolate reductase (NAD(P)H) activity']), + ('GO:0070128', ['methionyl-tRNA formyltransferase activity']), ('GO:0004489', ['methylenetetrahydrofolate reductase (NAD(P)H) activity']), + ('GO:0008702', ['methylenetetrahydrofolate reductase (NAD(P)H) activity']), ('GO:0004499', ['N,N-dimethylaniline monooxygenase activity']), ('GO:0047076', ['N,N-dimethylaniline monooxygenase activity']), - ('GO:0004523', ['RNA-DNA hybrid ribonuclease activity']), ('GO:0004524', ['RNA-DNA hybrid ribonuclease activity']), + ('GO:0004523', ['RNA-DNA hybrid ribonuclease activity']), ('GO:0004527', ['exonuclease activity']), ('GO:0008857', ['exonuclease activity']), ('GO:0004536', ['DNA nuclease activity']), ('GO:0004537', ['DNA nuclease activity']), ('GO:0016800', ['hydrolase activity, hydrolyzing O-glycosyl compounds']), ('GO:0004553', ['hydrolase activity, hydrolyzing O-glycosyl compounds']), - ('GO:0004556', ['alpha-amylase activity']), ('GO:0103025', ['alpha-amylase activity']), + ('GO:0004556', ['alpha-amylase activity']), + ('GO:0004558', ['alpha-1,4-glucosidase activity']), ('GO:0016982', ['alpha-1,4-glucosidase activity']), ('GO:0004562', ['alpha-1,4-glucosidase activity']), - ('GO:0004558', ['alpha-1,4-glucosidase activity']), + ('GO:0004601', ['peroxidase activity']), ('GO:0016686', ['peroxidase activity']), + ('GO:0016693', ['peroxidase activity']), ('GO:0016687', ['peroxidase activity']), - ('GO:0004601', ['peroxidase activity']), ('GO:0016685', ['peroxidase activity']), - ('GO:0016693', ['peroxidase activity']), ('GO:0004602', ['glutathione peroxidase activity']), ('GO:0016224', ['glutathione peroxidase activity']), - ('GO:0004615', ['phosphomannomutase activity']), ('GO:0008971', ['phosphomannomutase activity']), - ('GO:0004622', ['lysophospholipase activity']), + ('GO:0004615', ['phosphomannomutase activity']), ('GO:0045126', ['lysophospholipase activity']), - ('GO:0102567', ['phospholipase A2 activity']), - ('GO:0102568', ['phospholipase A2 activity']), + ('GO:0004622', ['lysophospholipase activity']), ('GO:0004623', ['phospholipase A2 activity']), - ('GO:0042298', ['phospholipase C activity']), + ('GO:0102568', ['phospholipase A2 activity']), + ('GO:0102567', ['phospholipase A2 activity']), ('GO:0004629', ['phospholipase C activity']), - ('GO:0004648', - ['O-phospho-L-serine:2-oxoglutarate aminotransferase activity']), + ('GO:0042298', ['phospholipase C activity']), ('GO:0004646', ['O-phospho-L-serine:2-oxoglutarate aminotransferase activity']), + ('GO:0004648', + ['O-phospho-L-serine:2-oxoglutarate aminotransferase activity']), ('GO:0018223', ['protein farnesyltransferase activity']), ('GO:0004660', ['protein farnesyltransferase activity']), ('GO:0018224', ['protein geranylgeranyltransferase activity']), ('GO:0004661', ['protein geranylgeranyltransferase activity']), - ('GO:0004671', - ['protein C-terminal S-isoprenylcysteine carboxyl O-methyltransferase activity']), ('GO:0018225', ['protein C-terminal S-isoprenylcysteine carboxyl O-methyltransferase activity']), + ('GO:0004671', + ['protein C-terminal S-isoprenylcysteine carboxyl O-methyltransferase activity']), ('GO:0050222', ['protein kinase activity']), ('GO:0004672', ['protein kinase activity']), ('GO:0008896', ['protein histidine kinase activity']), ('GO:0004673', ['protein histidine kinase activity']), - ('GO:0004674', ['protein serine/threonine kinase activity']), - ('GO:0004696', ['protein serine/threonine kinase activity']), + ('GO:0004700', ['protein serine/threonine kinase activity']), ('GO:0106311', ['protein serine/threonine kinase activity']), + ('GO:0004674', ['protein serine/threonine kinase activity']), ('GO:0004695', ['protein serine/threonine kinase activity']), - ('GO:0004700', ['protein serine/threonine kinase activity']), + ('GO:0004696', ['protein serine/threonine kinase activity']), ('GO:0004688', ['calmodulin-dependent protein kinase activity']), - ('GO:0004685', ['calmodulin-dependent protein kinase activity']), ('GO:0004684', ['calmodulin-dependent protein kinase activity']), ('GO:0004683', ['calmodulin-dependent protein kinase activity']), + ('GO:0004685', ['calmodulin-dependent protein kinase activity']), ('GO:0004689', ['phosphorylase kinase activity']), ('GO:0008606', ['phosphorylase kinase activity']), ('GO:0008602', ['cAMP-dependent protein kinase activity']), ('GO:0004691', ['cAMP-dependent protein kinase activity']), - ('GO:0016537', ['cyclin-dependent protein serine/threonine kinase activity']), ('GO:0004693', ['cyclin-dependent protein serine/threonine kinase activity']), - ('GO:0004697', ['diacylglycerol-dependent serine/threonine kinase activity']), + ('GO:0016537', ['cyclin-dependent protein serine/threonine kinase activity']), ('GO:0004701', ['diacylglycerol-dependent serine/threonine kinase activity']), + ('GO:0004697', ['diacylglycerol-dependent serine/threonine kinase activity']), ('GO:0004678', ['G protein-coupled receptor kinase activity']), ('GO:0004703', ['G protein-coupled receptor kinase activity']), ('GO:0016908', ['MAP kinase activity']), + ('GO:0016909', ['MAP kinase activity']), ('GO:0004707', ['MAP kinase activity']), ('GO:0008339', ['MAP kinase activity']), - ('GO:0016909', ['MAP kinase activity']), ('GO:0008338', ['MAP kinase activity']), - ('GO:0004710', ['MAP kinase kinase kinase activity']), ('GO:0004709', ['MAP kinase kinase kinase activity']), - ('GO:0004713', ['protein tyrosine kinase activity']), + ('GO:0004710', ['MAP kinase kinase kinase activity']), ('GO:0004718', ['protein tyrosine kinase activity']), + ('GO:0004713', ['protein tyrosine kinase activity']), ('GO:0018056', ['protein-lysine 6-oxidase activity']), ('GO:0004720', ['protein-lysine 6-oxidase activity']), - ('GO:0008598', ['protein serine/threonine phosphatase activity']), - ('GO:0004722', ['protein serine/threonine phosphatase activity']), - ('GO:0015071', ['protein serine/threonine phosphatase activity']), - ('GO:0030360', ['protein serine/threonine phosphatase activity']), + ('GO:0000158', ['protein serine/threonine phosphatase activity']), ('GO:0004724', ['protein serine/threonine phosphatase activity']), - ('GO:0000163', ['protein serine/threonine phosphatase activity']), + ('GO:0030360', ['protein serine/threonine phosphatase activity']), ('GO:0008600', ['protein serine/threonine phosphatase activity']), - ('GO:0106307', ['protein serine/threonine phosphatase activity']), ('GO:0030361', ['protein serine/threonine phosphatase activity']), + ('GO:0106307', ['protein serine/threonine phosphatase activity']), + ('GO:0000163', ['protein serine/threonine phosphatase activity']), ('GO:0030357', ['protein serine/threonine phosphatase activity']), - ('GO:0030358', ['protein serine/threonine phosphatase activity']), ('GO:0106306', ['protein serine/threonine phosphatase activity']), - ('GO:0000158', ['protein serine/threonine phosphatase activity']), + ('GO:0008598', ['protein serine/threonine phosphatase activity']), + ('GO:0015071', ['protein serine/threonine phosphatase activity']), + ('GO:0030358', ['protein serine/threonine phosphatase activity']), + ('GO:0004722', ['protein serine/threonine phosphatase activity']), ('GO:0004723', ['calcium-dependent protein serine/threonine phosphatase activity']), ('GO:0008596', ['calcium-dependent protein serine/threonine phosphatase activity']), - ('GO:0004741', - ['[pyruvate dehydrogenase (acetyl-transferring)]-phosphatase activity']), ('GO:0019906', ['[pyruvate dehydrogenase (acetyl-transferring)]-phosphatase activity']), + ('GO:0004741', + ['[pyruvate dehydrogenase (acetyl-transferring)]-phosphatase activity']), ('GO:0030523', ['dihydrolipoyllysine-residue acetyltransferase activity']), ('GO:0004742', ['dihydrolipoyllysine-residue acetyltransferase activity']), ('GO:0016959', ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']), - ('GO:0016960', + ('GO:0016961', ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']), ('GO:0004748', ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']), - ('GO:0016961', + ('GO:0016960', ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']), ('GO:0004763', ['serine-pyruvate transaminase activity']), - ('GO:0004760', ['serine-pyruvate transaminase activity']), ('GO:0004762', ['serine-pyruvate transaminase activity']), ('GO:0004761', ['serine-pyruvate transaminase activity']), - ('GO:0030230', ['sphingomyelin phosphodiesterase activity']), + ('GO:0004760', ['serine-pyruvate transaminase activity']), ('GO:0004767', ['sphingomyelin phosphodiesterase activity']), + ('GO:0030230', ['sphingomyelin phosphodiesterase activity']), ('GO:0030231', ['sphingomyelin phosphodiesterase activity']), - ('GO:0043735', ['stearoyl-CoA 9-desaturase activity']), - ('GO:0004768', ['stearoyl-CoA 9-desaturase activity']), ('GO:0016214', ['stearoyl-CoA 9-desaturase activity']), + ('GO:0004768', ['stearoyl-CoA 9-desaturase activity']), + ('GO:0043735', ['stearoyl-CoA 9-desaturase activity']), ('GO:0004772', ['sterol O-acyltransferase activity']), ('GO:0017066', ['sterol O-acyltransferase activity']), - ('GO:0008952', ['succinate-semialdehyde dehydrogenase (NAD+) activity']), ('GO:0004777', ['succinate-semialdehyde dehydrogenase (NAD+) activity']), + ('GO:0008952', ['succinate-semialdehyde dehydrogenase (NAD+) activity']), ('GO:0016954', ['superoxide dismutase activity']), + ('GO:0004784', ['superoxide dismutase activity']), ('GO:0008383', ['superoxide dismutase activity']), ('GO:0004785', ['superoxide dismutase activity']), - ('GO:0004784', ['superoxide dismutase activity']), ('GO:0008382', ['superoxide dismutase activity']), ('GO:0004796', ['thromboxane-A synthase activity']), ('GO:0008400', ['thromboxane-A synthase activity']), - ('GO:0004804', ['transposase activity']), ('GO:0004803', ['transposase activity']), - ('GO:0004808', - ['tRNA (5-methylaminomethyl-2-thiouridylate)(34)-methyltransferase activity']), + ('GO:0004804', ['transposase activity']), ('GO:0016425', ['tRNA (5-methylaminomethyl-2-thiouridylate)(34)-methyltransferase activity']), - ('GO:0004810', ['CCA tRNA nucleotidyltransferase activity']), + ('GO:0004808', + ['tRNA (5-methylaminomethyl-2-thiouridylate)(34)-methyltransferase activity']), ('GO:0016437', ['CCA tRNA nucleotidyltransferase activity']), - ('GO:0004812', ['aminoacyl-tRNA ligase activity']), + ('GO:0004810', ['CCA tRNA nucleotidyltransferase activity']), ('GO:0017100', ['aminoacyl-tRNA ligase activity']), + ('GO:0004812', ['aminoacyl-tRNA ligase activity']), ('GO:0016876', ['aminoacyl-tRNA ligase activity']), ('GO:0004833', ['tryptophan 2,3-dioxygenase activity']), ('GO:0004426', ['tryptophan 2,3-dioxygenase activity']), - ('GO:0004842', ['ubiquitin-protein transferase activity']), - ('GO:0004840', ['ubiquitin-protein transferase activity']), ('GO:0004841', ['ubiquitin-protein transferase activity']), - ('GO:0036459', ['cysteine-type deubiquitinase activity']), + ('GO:0004840', ['ubiquitin-protein transferase activity']), + ('GO:0004842', ['ubiquitin-protein transferase activity']), ('GO:0004843', ['cysteine-type deubiquitinase activity']), - ('GO:0004857', ['enzyme inhibitor activity']), + ('GO:0036459', ['cysteine-type deubiquitinase activity']), ('GO:0048551', ['enzyme inhibitor activity']), - ('GO:0004864', ['protein phosphatase inhibitor activity']), + ('GO:0004857', ['enzyme inhibitor activity']), ('GO:1990681', ['protein phosphatase inhibitor activity']), + ('GO:0004864', ['protein phosphatase inhibitor activity']), ('GO:0004870', ['cysteine-type endopeptidase inhibitor activity']), ('GO:0004869', ['cysteine-type endopeptidase inhibitor activity']), ('GO:0004875', ['complement receptor activity']), ('GO:0004942', ['complement receptor activity']), - ('GO:0004943', ['complement component C3a receptor activity']), ('GO:0004876', ['complement component C3a receptor activity']), + ('GO:0004943', ['complement component C3a receptor activity']), ('GO:0004944', ['complement component C5a receptor activity']), ('GO:0004878', ['complement component C5a receptor activity']), ('GO:0003708', ['nuclear receptor activity']), - ('GO:0038051', ['nuclear receptor activity']), ('GO:0004879', ['nuclear receptor activity']), - ('GO:0004882', ['nuclear receptor activity']), - ('GO:0004886', ['nuclear receptor activity']), + ('GO:0004880', ['nuclear receptor activity']), ('GO:0038052', ['nuclear receptor activity']), + ('GO:0004886', ['nuclear receptor activity']), + ('GO:0004887', ['nuclear receptor activity']), + ('GO:0004882', ['nuclear receptor activity']), ('GO:0004884', ['nuclear receptor activity']), ('GO:0008434', ['nuclear receptor activity']), - ('GO:0004887', ['nuclear receptor activity']), ('GO:0038050', ['nuclear receptor activity']), - ('GO:0004880', ['nuclear receptor activity']), + ('GO:0038051', ['nuclear receptor activity']), ('GO:0004888', ['transmembrane signaling receptor activity']), ('GO:0099600', ['transmembrane signaling receptor activity']), ('GO:0004926', ['transmembrane signaling receptor activity']), @@ -3194,165 +3194,165 @@Ontology.pct
('GO:0004909', ['interleukin-1, type I, activating receptor activity']), ('GO:0004910', ['interleukin-1, type II, blocking receptor activity']), ('GO:0019968', ['interleukin-1, type II, blocking receptor activity']), - ('GO:0004899', ['leukemia inhibitory factor receptor activity']), ('GO:0004923', ['leukemia inhibitory factor receptor activity']), + ('GO:0004899', ['leukemia inhibitory factor receptor activity']), + ('GO:0001624', ['G protein-coupled receptor activity']), ('GO:0004930', ['G protein-coupled receptor activity']), - ('GO:0016526', ['G protein-coupled receptor activity']), ('GO:0001622', ['G protein-coupled receptor activity']), ('GO:0001623', ['G protein-coupled receptor activity']), + ('GO:0016526', ['G protein-coupled receptor activity']), ('GO:0001625', ['G protein-coupled receptor activity']), - ('GO:0001624', ['G protein-coupled receptor activity']), - ('GO:0001599', ['endothelin receptor activity']), ('GO:0001600', ['endothelin receptor activity']), ('GO:0004962', ['endothelin receptor activity']), - ('GO:0004976', ['luteinizing hormone receptor activity']), + ('GO:0001599', ['endothelin receptor activity']), ('GO:0004964', ['luteinizing hormone receptor activity']), - ('GO:0004993', ['G protein-coupled serotonin receptor activity']), - ('GO:0016609', ['G protein-coupled serotonin receptor activity']), + ('GO:0004976', ['luteinizing hormone receptor activity']), ('GO:0001585', ['G protein-coupled serotonin receptor activity']), - ('GO:0005000', ['vasopressin receptor activity']), + ('GO:0016609', ['G protein-coupled serotonin receptor activity']), + ('GO:0004993', ['G protein-coupled serotonin receptor activity']), ('GO:0016931', ['vasopressin receptor activity']), + ('GO:0005000', ['vasopressin receptor activity']), ('GO:0005023', ['epidermal growth factor receptor activity']), ('GO:0005006', ['epidermal growth factor receptor activity']), - ('GO:0036326', ['vascular endothelial growth factor receptor activity']), - ('GO:0036329', ['vascular endothelial growth factor receptor activity']), ('GO:0036328', ['vascular endothelial growth factor receptor activity']), + ('GO:0036329', ['vascular endothelial growth factor receptor activity']), + ('GO:0005021', ['vascular endothelial growth factor receptor activity']), ('GO:0036327', ['vascular endothelial growth factor receptor activity']), ('GO:0036330', ['vascular endothelial growth factor receptor activity']), - ('GO:0005021', ['vascular endothelial growth factor receptor activity']), + ('GO:0036326', ['vascular endothelial growth factor receptor activity']), + ('GO:0005033', ['tumor necrosis factor receptor activity']), ('GO:0005031', ['tumor necrosis factor receptor activity']), ('GO:0005032', ['tumor necrosis factor receptor activity']), - ('GO:0005033', ['tumor necrosis factor receptor activity']), ('GO:0005041', ['low-density lipoprotein particle receptor activity']), ('GO:0008032', ['low-density lipoprotein particle receptor activity']), - ('GO:0005048', ['signal sequence binding']), ('GO:0008249', ['signal sequence binding']), + ('GO:0005048', ['signal sequence binding']), ('GO:0008262', ['nuclear export signal receptor activity']), ('GO:0005049', ['nuclear export signal receptor activity']), - ('GO:0005069', - ['transmembrane receptor protein tyrosine kinase adaptor activity']), ('GO:0005068', ['transmembrane receptor protein tyrosine kinase adaptor activity']), + ('GO:0005069', + ['transmembrane receptor protein tyrosine kinase adaptor activity']), + ('GO:0097024', ['protein kinase C binding']), + ('GO:0072569', ['protein kinase C binding']), ('GO:0005080', ['protein kinase C binding']), ('GO:0072568', ['protein kinase C binding']), - ('GO:0072569', ['protein kinase C binding']), - ('GO:0097024', ['protein kinase C binding']), - ('GO:0019839', ['guanyl-nucleotide exchange factor activity']), - ('GO:0017112', ['guanyl-nucleotide exchange factor activity']), - ('GO:0005089', ['guanyl-nucleotide exchange factor activity']), ('GO:0008321', ['guanyl-nucleotide exchange factor activity']), - ('GO:0016220', ['guanyl-nucleotide exchange factor activity']), + ('GO:0017034', ['guanyl-nucleotide exchange factor activity']), ('GO:0005088', ['guanyl-nucleotide exchange factor activity']), + ('GO:0005090', ['guanyl-nucleotide exchange factor activity']), + ('GO:0016220', ['guanyl-nucleotide exchange factor activity']), + ('GO:0017132', ['guanyl-nucleotide exchange factor activity']), ('GO:0016219', ['guanyl-nucleotide exchange factor activity']), + ('GO:0005085', ['guanyl-nucleotide exchange factor activity']), ('GO:0008433', ['guanyl-nucleotide exchange factor activity']), ('GO:0005087', ['guanyl-nucleotide exchange factor activity']), - ('GO:0005086', ['guanyl-nucleotide exchange factor activity']), - ('GO:0017034', ['guanyl-nucleotide exchange factor activity']), - ('GO:0017132', ['guanyl-nucleotide exchange factor activity']), - ('GO:0005085', ['guanyl-nucleotide exchange factor activity']), - ('GO:0005090', ['guanyl-nucleotide exchange factor activity']), ('GO:0030676', ['guanyl-nucleotide exchange factor activity']), - ('GO:0005098', ['GTPase activator activity']), - ('GO:0005101', ['GTPase activator activity']), + ('GO:0005089', ['guanyl-nucleotide exchange factor activity']), + ('GO:0005086', ['guanyl-nucleotide exchange factor activity']), + ('GO:0017112', ['guanyl-nucleotide exchange factor activity']), + ('GO:0019839', ['guanyl-nucleotide exchange factor activity']), ('GO:0005100', ['GTPase activator activity']), - ('GO:0030675', ['GTPase activator activity']), - ('GO:0017123', ['GTPase activator activity']), - ('GO:0046582', ['GTPase activator activity']), - ('GO:0005099', ['GTPase activator activity']), ('GO:0008060', ['GTPase activator activity']), + ('GO:0005101', ['GTPase activator activity']), ('GO:0005096', ['GTPase activator activity']), ('GO:0005097', ['GTPase activator activity']), - ('GO:0005104', ['fibroblast growth factor receptor binding']), + ('GO:0005098', ['GTPase activator activity']), + ('GO:0030675', ['GTPase activator activity']), + ('GO:0017123', ['GTPase activator activity']), + ('GO:0005099', ['GTPase activator activity']), + ('GO:0046582', ['GTPase activator activity']), ('GO:0001521', ['fibroblast growth factor receptor binding']), ('GO:0005162', ['fibroblast growth factor receptor binding']), - ('GO:0005109', ['frizzled binding']), + ('GO:0005104', ['fibroblast growth factor receptor binding']), ('GO:0005110', ['frizzled binding']), - ('GO:0008185', ['epidermal growth factor receptor binding']), + ('GO:0005109', ['frizzled binding']), ('GO:0005154', ['epidermal growth factor receptor binding']), - ('GO:0005159', ['insulin-like growth factor receptor binding']), + ('GO:0008185', ['epidermal growth factor receptor binding']), ('GO:0005067', ['insulin-like growth factor receptor binding']), + ('GO:0005159', ['insulin-like growth factor receptor binding']), ('GO:0005478', ['transporter activity']), ('GO:0005215', ['transporter activity']), ('GO:0008095', ['inositol 1,4,5-trisphosphate-gated calcium channel activity']), ('GO:0005220', ['inositol 1,4,5-trisphosphate-gated calcium channel activity']), - ('GO:0015285', ['gap junction channel activity']), ('GO:0005243', ['gap junction channel activity']), ('GO:0015286', ['gap junction channel activity']), + ('GO:0015285', ['gap junction channel activity']), ('GO:0005245', ['voltage-gated calcium channel activity']), ('GO:0015270', ['voltage-gated calcium channel activity']), ('GO:0010173', ['voltage-gated calcium channel activity']), - ('GO:0005224', ['intracellularly ATP-gated chloride channel activity']), ('GO:0005260', ['intracellularly ATP-gated chloride channel activity']), - ('GO:0005261', ['monoatomic cation channel activity']), - ('GO:0015338', ['monoatomic cation channel activity']), + ('GO:0005224', ['intracellularly ATP-gated chloride channel activity']), ('GO:0015281', ['monoatomic cation channel activity']), + ('GO:0015338', ['monoatomic cation channel activity']), + ('GO:0005261', ['monoatomic cation channel activity']), ('GO:0015206', ['allantoin:proton symporter activity']), ('GO:0005274', ['allantoin:proton symporter activity']), - ('GO:0005275', ['amine transmembrane transporter activity']), ('GO:0005279', ['amine transmembrane transporter activity']), - ('GO:0005285', ['amino acid:sodium symporter activity']), + ('GO:0005275', ['amine transmembrane transporter activity']), ('GO:0005283', ['amino acid:sodium symporter activity']), ('GO:0005284', ['amino acid:sodium symporter activity']), + ('GO:0005285', ['amino acid:sodium symporter activity']), ('GO:0005295', ['neutral L-amino acid:sodium symporter activity']), ('GO:0005282', ['neutral L-amino acid:sodium symporter activity']), - ('GO:0005302', ['L-tyrosine transmembrane transporter activity']), ('GO:0015508', ['L-tyrosine transmembrane transporter activity']), + ('GO:0005302', ['L-tyrosine transmembrane transporter activity']), ('GO:0005312', ['dicarboxylic acid transmembrane transporter activity']), - ('GO:0005310', ['dicarboxylic acid transmembrane transporter activity']), ('GO:0015365', ['dicarboxylic acid transmembrane transporter activity']), + ('GO:0005310', ['dicarboxylic acid transmembrane transporter activity']), ('GO:1901677', ['phosphate transmembrane transporter activity']), - ('GO:0005315', ['phosphate transmembrane transporter activity']), ('GO:0005317', ['phosphate transmembrane transporter activity']), - ('GO:0005324', ['long-chain fatty acid transmembrane transporter activity']), - ('GO:0008562', ['long-chain fatty acid transmembrane transporter activity']), + ('GO:0005315', ['phosphate transmembrane transporter activity']), ('GO:0005325', ['long-chain fatty acid transmembrane transporter activity']), - ('GO:0005329', ['dopamine:sodium symporter activity']), + ('GO:0008562', ['long-chain fatty acid transmembrane transporter activity']), + ('GO:0005324', ['long-chain fatty acid transmembrane transporter activity']), ('GO:0005330', ['dopamine:sodium symporter activity']), - ('GO:0005334', ['norepinephrine:sodium symporter activity']), + ('GO:0005329', ['dopamine:sodium symporter activity']), ('GO:0005333', ['norepinephrine:sodium symporter activity']), - ('GO:0005335', ['serotonin:sodium:chloride symporter activity']), + ('GO:0005334', ['norepinephrine:sodium symporter activity']), ('GO:0005336', ['serotonin:sodium:chloride symporter activity']), + ('GO:0005335', ['serotonin:sodium:chloride symporter activity']), ('GO:0015222', ['serotonin:sodium:chloride symporter activity']), - ('GO:0005339', ['nucleotide-sugar transmembrane transporter activity']), ('GO:0005338', ['nucleotide-sugar transmembrane transporter activity']), + ('GO:0005339', ['nucleotide-sugar transmembrane transporter activity']), ('GO:0005341', ['nucleotide-sulfate transmembrane transporter activity']), ('GO:0005340', ['nucleotide-sulfate transmembrane transporter activity']), ('GO:0005344', ['oxygen carrier activity']), ('GO:0015033', ['oxygen carrier activity']), - ('GO:0005347', ['ATP transmembrane transporter activity']), ('GO:0005348', ['ATP transmembrane transporter activity']), - ('GO:0015542', ['carbohydrate:proton symporter activity']), - ('GO:0005351', ['carbohydrate:proton symporter activity']), + ('GO:0005347', ['ATP transmembrane transporter activity']), ('GO:0005403', ['carbohydrate:proton symporter activity']), + ('GO:0005351', ['carbohydrate:proton symporter activity']), + ('GO:0015542', ['carbohydrate:proton symporter activity']), ('GO:0019192', ['fructose transmembrane transporter activity']), - ('GO:0005353', ['fructose transmembrane transporter activity']), ('GO:0015585', ['fructose transmembrane transporter activity']), - ('GO:0005355', ['glucose transmembrane transporter activity']), + ('GO:0005353', ['fructose transmembrane transporter activity']), ('GO:0015579', ['glucose transmembrane transporter activity']), - ('GO:0005356', ['glucose:proton symporter activity']), + ('GO:0005355', ['glucose transmembrane transporter activity']), ('GO:0005361', ['glucose:proton symporter activity']), + ('GO:0005356', ['glucose:proton symporter activity']), ('GO:0015581', ['maltose transmembrane transporter activity']), ('GO:0005363', ['maltose transmembrane transporter activity']), ('GO:0005371', ['tricarboxylate secondary active transmembrane transporter activity']), ('GO:0005370', ['tricarboxylate secondary active transmembrane transporter activity']), - ('GO:0005380', ['copper ion transmembrane transporter activity']), - ('GO:0005375', ['copper ion transmembrane transporter activity']), - ('GO:0005378', ['copper ion transmembrane transporter activity']), - ('GO:0015088', ['copper ion transmembrane transporter activity']), ('GO:0005379', ['copper ion transmembrane transporter activity']), - ('GO:0097689', ['iron ion transmembrane transporter activity']), + ('GO:0015088', ['copper ion transmembrane transporter activity']), + ('GO:0005378', ['copper ion transmembrane transporter activity']), + ('GO:0005375', ['copper ion transmembrane transporter activity']), + ('GO:0005380', ['copper ion transmembrane transporter activity']), ('GO:0016033', ['iron ion transmembrane transporter activity']), - ('GO:0005382', ['iron ion transmembrane transporter activity']), + ('GO:0097689', ['iron ion transmembrane transporter activity']), ('GO:0005381', ['iron ion transmembrane transporter activity']), - ('GO:0005415', ['nucleoside:sodium symporter activity']), + ('GO:0005382', ['iron ion transmembrane transporter activity']), ('GO:0008522', ['nucleoside:sodium symporter activity']), - ('GO:0015321', ['sodium:phosphate symporter activity']), + ('GO:0005415', ['nucleoside:sodium symporter activity']), ('GO:0005436', ['sodium:phosphate symporter activity']), - ('GO:0005471', ['ATP:ADP antiporter activity']), + ('GO:0015321', ['sodium:phosphate symporter activity']), + ('GO:0005349', ['ATP:ADP antiporter activity']), ...]
[('GO:0005554', ['molecular_function']), ('GO:0008372', ['cellular_component']), - ('GO:0007582', ['biological_process'])]+ ('GO:0000004', ['biological_process'])]
{'_ID': 'GO:0005554', '_original_ID': 'GO:0005554', - '_container': <picea.ontology.Ontology at 0x7f3eae711150>, + '_container': <picea.ontology.Ontology at 0x7f4598e2cac0>, '_children': ['GO:0003774', 'GO:0003824', 'GO:0005198', 'GO:0005478', 'GO:0005215', 'GO:0005488', - 'GO:0009055', - 'GO:0009053', 'GO:0009054', + 'GO:0009053', + 'GO:0009055', 'GO:0016209', 'GO:0038024', 'GO:0044183', 'GO:0045182', 'GO:0045735', 'GO:0060089', - 'GO:0032947', 'GO:0060090', - 'GO:0090729', + 'GO:0032947', 'GO:0050827', + 'GO:0090729', 'GO:0098772', 'GO:0140104', 'GO:0140110', @@ -3467,10 +3467,10 @@Ontology.pct
'GO:0140691', 'GO:0140776', 'GO:0140777', - 'GO:0140911', - 'GO:0034291', 'GO:0034290', + 'GO:0034291', 'GO:0034292', + 'GO:0140911', 'GO:0140912', 'GO:0141047', 'GO:0180020', @@ -3494,7 +3494,7 @@Ontology.pct
{'_ID': 'GO:0005554', '_original_ID': 'GO:0005554', - '_container': <picea.ontology.Ontology at 0x7f3eae711150>, + '_container': <picea.ontology.Ontology at 0x7f4598e2cac0>, '_children': ['GO:0003774', 'GO:0003824', 'GO:0005198', 'GO:0005478', 'GO:0005215', 'GO:0005488', - 'GO:0009055', - 'GO:0009053', 'GO:0009054', + 'GO:0009053', + 'GO:0009055', 'GO:0016209', 'GO:0038024', 'GO:0044183', 'GO:0045182', 'GO:0045735', 'GO:0060089', - 'GO:0032947', 'GO:0060090', - 'GO:0090729', + 'GO:0032947', 'GO:0050827', + 'GO:0090729', 'GO:0098772', 'GO:0140104', 'GO:0140110', @@ -3560,10 +3560,10 @@Ontology.pct
'GO:0140691', 'GO:0140776', 'GO:0140777', - 'GO:0140911', - 'GO:0034291', 'GO:0034290', + 'GO:0034291', 'GO:0034292', + 'GO:0140911', 'GO:0140912', 'GO:0141047', 'GO:0180020', diff --git a/examples/sequence_annotation/index.html b/examples/sequence_annotation/index.html index b7c3495..065bfb7 100644 --- a/examples/sequence_annotation/index.html +++ b/examples/sequence_annotation/index.html @@ -1150,7 +1150,7 @@Sequence annotation
Out[1]:-'0.0.26'+'0.0.27'
[<SequenceInterval type=gene ID=gene00001 loc=ctg123..1000..9000..+ at 0x7f7c3c64f400>]+
[<SequenceInterval type=gene ID=gene00001 loc=ctg123..1000..9000..+ at 0x7f217c13b3d0>]
[<SequenceInterval type=exon ID=exon00001 loc=ctg123..1300..1500..+ at 0x7f7c3c64f0a0>, - <SequenceInterval type=exon ID=exon00003 loc=ctg123..3000..3902..+ at 0x7f7c3c64ef50>, - <SequenceInterval type=exon ID=exon00004 loc=ctg123..5000..5500..+ at 0x7f7c3c64eef0>, - <SequenceInterval type=exon ID=exon00005 loc=ctg123..7000..9000..+ at 0x7f7c3c64eec0>, - <SequenceInterval type=CDS ID=cds00003.1 loc=ctg123..3301..3902..+ at 0x7f7c3c64e650>, - <SequenceInterval type=CDS ID=cds00003.2 loc=ctg123..5000..5500..+ at 0x7f7c3c64e680>, - <SequenceInterval type=CDS ID=cds00003.3 loc=ctg123..7000..7600..+ at 0x7f7c3c64e8f0>, - <SequenceInterval type=CDS ID=cds00004.1 loc=ctg123..3391..3902..+ at 0x7f7c3c64f6a0>, - <SequenceInterval type=CDS ID=cds00004.2 loc=ctg123..5000..5500..+ at 0x7f7c3c64f100>, - <SequenceInterval type=CDS ID=cds00004.3 loc=ctg123..7000..7600..+ at 0x7f7c3c64ef80>]+
[<SequenceInterval type=exon ID=exon00001 loc=ctg123..1300..1500..+ at 0x7f217c13b070>, + <SequenceInterval type=exon ID=exon00003 loc=ctg123..3000..3902..+ at 0x7f217c13af20>, + <SequenceInterval type=exon ID=exon00004 loc=ctg123..5000..5500..+ at 0x7f217c13aec0>, + <SequenceInterval type=exon ID=exon00005 loc=ctg123..7000..9000..+ at 0x7f217c13ae90>, + <SequenceInterval type=CDS ID=cds00003.1 loc=ctg123..3301..3902..+ at 0x7f217c13a620>, + <SequenceInterval type=CDS ID=cds00003.2 loc=ctg123..5000..5500..+ at 0x7f217c13a650>, + <SequenceInterval type=CDS ID=cds00003.3 loc=ctg123..7000..7600..+ at 0x7f217c13a8c0>, + <SequenceInterval type=CDS ID=cds00004.1 loc=ctg123..3391..3902..+ at 0x7f217c13b670>, + <SequenceInterval type=CDS ID=cds00004.2 loc=ctg123..5000..5500..+ at 0x7f217c13b0d0>, + <SequenceInterval type=CDS ID=cds00004.3 loc=ctg123..7000..7600..+ at 0x7f217c13af50>]
'0.0.26'+
'0.0.27'
<SequenceInterval type=gene ID=gene00001 loc=ctg123..1000..9000..+ at 0x7f864c1400d0>+
<SequenceInterval type=gene ID=gene00001 loc=ctg123..1000..9000..+ at 0x7fbec4082e00>
'0.0.26'+
'0.0.27'
<matplotlib.collections.PathCollection at 0x7ff61fa01900>+
<matplotlib.collections.PathCollection at 0x7f076221da50>
Sprucing up bioinformatics analysis
pip install picea\n
picea has datastructures and methods to perform common bioinformatics tasks. Currently sequences, sequence annotations, trees, and ontologies are supported.
Example:
from picea import Tree, treeplot\nimport matplotlib.pyplot as plt\n\nnewick = '(((a,b),(c,d)),e)'\ntree = Tree.from_newick(newick)\n\nfig, (ax1, ax2) = plt.subplots(ncols = 2, figsize = (10, 4))\n\n#left-to-right layout with direct links\ntreeplot(tree, style='rectangular', ltr=True, ax=ax1)\n\n#right-to-left layout with square links\ntreeplot(tree, style='square', ltr=False, ax=ax2)\n
"},{"location":"CONTRIBUTING/","title":"Contributing","text":"Deploying to pypi:
poetry check\npoetry run coverage run\npoetry run coverage report\npoetry version <major,minor,patch>\npoetry build\npoetry deploy\n
"},{"location":"LICENSE/","title":"License","text":"The MIT License (MIT)
Copyright (c) 2020 Rens Holmer.
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"},{"location":"API/ontology/","title":"Ontology","text":" Bases: DirectedAcyclicGraph
picea/ontology.py
class Ontology(DirectedAcyclicGraph):\n def __init__(self):\n super().__init__()\n self._header: List[str] = []\n\n def __getitem__(self, ID) -> OntologyTerm:\n term = self._elements[ID]\n if not term._children and not term._parents and term.__dict__.get(\"alt_id\"):\n alt_id = term.__dict__.get(\"alt_id\")[0]\n term = self._elements[alt_id]\n warnings.warn(f\"Accessed GO term by alt ID {ID}, \" f\"returning main GO term with ID {alt_id}\")\n return term\n\n @classmethod\n def from_obo(cls, filename: str = None, string: str = None, skip_obsolete=True) -> \"Ontology\":\n assert filename or string\n assert not (filename and string)\n ontology = cls()\n if filename:\n with open(filename) as filehandle:\n string = filehandle.read()\n\n obo_iter = (el for _, el in groupby(string.strip().split(\"\\n\"), lambda line: line[:1] == \"[\"))\n\n ontology._header = list(next(obo_iter))\n\n for element in obo_iter:\n element = next(element)\n if element != \"[Term]\":\n continue\n attributes = defaultdict(list)\n for attribute in next(obo_iter):\n if not attribute:\n continue\n attr_key, attr_value = attribute.split(\":\", 1)\n attributes[attr_key].append(attr_value.strip())\n if skip_obsolete and attributes.get(\"is_obsolete\"):\n continue\n\n ID = attributes.pop(\"id\")[0].strip()\n parents = [p.split(\"!\")[0].strip() for p in attributes.get(\"is_a\", \"\")]\n\n for relationship in attributes.get(\"relationship\", []):\n relation_type, go_id = relationship.split(\"!\")[0].strip().split(\" \")\n if relation_type == \"part_of\":\n parents.append(go_id)\n\n alt_ids = {*attributes.pop(\"alt_id\", []), ID}\n for alt_id in alt_ids:\n ontology[alt_id] = OntologyTerm(\n ID=alt_id,\n parents=parents,\n container=ontology,\n alt_id=[id for id in alt_ids if id != alt_id],\n **attributes,\n )\n\n for ontology_term in ontology:\n for parent_id in ontology_term._parents:\n parent_term = ontology[parent_id]\n parent_term._children.append(ontology_term.ID)\n\n return ontology\n
"},{"location":"API/sequence/","title":"Sequence","text":" Bases: set
Alphabet of arbitrary biological sequences
Examples:
>>> DNA = Alphabet('DNA', 'ACGT')\n>>> DNA\nAlphabet(name='DNA', members='ACGT')\n
>>> Protein = Alphabet('AminoAcid', '*-?ACDEFGHIKLMNPQRSTVWXY')\n>>> Protein\nAlphabet(name='AminoAcid', members='*-?ACDEFGHIKLMNPQRSTVWXY')\n
Parameters:
Name Type Description Defaultname
str
Alphabet name
requiredmembers
Iterable[str]
Letters of the alphabet
required Source code inpicea/sequence.py
@dataclass(frozen=True)\nclass Alphabet(set):\n \"\"\"Alphabet of arbitrary biological sequences\n\n Examples:\n >>> DNA = Alphabet('DNA', 'ACGT')\n >>> DNA\n Alphabet(name='DNA', members='ACGT')\n\n >>> Protein = Alphabet('AminoAcid', '*-?ACDEFGHIKLMNPQRSTVWXY')\n >>> Protein\n Alphabet(name='AminoAcid', members='*-?ACDEFGHIKLMNPQRSTVWXY')\n\n\n Args:\n name (str): Alphabet name\n members (Iterable[str]): Letters of the alphabet\n \"\"\"\n\n name: str\n members: Iterable[str]\n\n def __post_init__(self) -> None:\n super().__init__(self.members)\n\n def __deepcopy__(self, memo) -> \"Alphabet\":\n return Alphabet(self, self.name)\n\n def score(\n self,\n sequence: str,\n match: float = 1.0,\n mismatch: float = -1.0,\n n_chars: int = 100,\n ) -> float:\n \"\"\"Scores how well a sequence matches an alphabet by summing \\\n (mis)matches of sequence letters that are not in the alphabet \\\n and (mis)matches of alphabet letters that are not in the sequence.\n\n Args:\n sequence (str): Sequence string for which to determine how well \\\n it fits the alphabet\n match (float, optional): match score. Defaults to 1.0.\n mismatch (float, optional): mismatch score. Defaults to -1.0.\n n_chars (int, optional): number of sequence characters to use in \\\n scoring. Large numbers incur a significant computational cost.\n\n Returns:\n (float): Score of how well a sequence matches the alphabet\n \"\"\"\n return sum(match if s in self else mismatch for s in sequence[:n_chars]) + sum(\n match if s in sequence[:n_chars] else mismatch for s in self\n )\n\n def validate(self, sequence: str) -> bool:\n \"\"\"Determine whether a sequence strictly fits an alphabet\n\n Args:\n sequence (str): Sequence string\n\n Returns:\n bool: true if all characters in sequence are in the alphabet\n \"\"\"\n return sum(1 if s not in self else 0 for s in sequence) == 0\n\n def complement(self, sequence: str) -> str:\n \"\"\"Returns complementary strand of DNA or RNA sequence strings\n\n Examples:\n >>> DNA = Alphabet('DNA', 'ACGT')\n >>> DNA.complement('AACTACG')\n 'TTGATGC'\n\n Args:\n sequence (str): Sequence string\n\n Returns:\n str: complementary strand sequence string\n \"\"\"\n if self.name == \"DNA\":\n complement = dict(zip(\"acgtnACGTN-?\", \"tgcanTGCAN-?\"))\n elif self.name == \"RNA\":\n complement = dict(zip(\"acgunACGUN-?\", \"ugcanUGCAN-?\"))\n else:\n raise TypeError(\"Cannot complement non-DNA or non-RNA alphabet\")\n return \"\".join(complement[s] for s in sequence)\n\n def translate(self, sequence: str) -> str:\n \"\"\"Translate DNA or RNA sequence string to amino acid string\n\n Examples:\n >>> DNA = Alphabet('DNA', 'ACGT')\n >>> DNA.translate('ATGACGACGTAA')\n 'MTT*'\n\n Args:\n sequence (str): Sequence string (sequence length must be multiple of 3)\n\n Returns:\n str: Amino acid string\n \"\"\"\n if self.name not in (\"DNA\", \"RNA\"):\n raise TypeError(\"Cannot translate non-DNA or non-RNA alphabet\")\n codons = re.findall(\"...\", sequence.upper())\n return \"\".join(TRANSLATION.get(codon, \"X\") for codon in codons)\n
Container for a single biological sequence
Examples:
>>> s1 = Sequence('test_dna', 'ACGATCGACTAGCA')\n>>> s1\nSequence(header='test_dna', alphabet=Alphabet(name='DNA', members='-?acgtnACGNT'))\n>>> s2 = Sequence('test_aa', 'QAPISAIWPOIWQ*')\n>>> s2\nSequence(header='test_aa', alphabet=Alphabet(name='AminoAcid', members='*-?acdefghiklmnpqrstvwxyACDEFGHIKLMNPQRSTVWXY'))\n
Returns:
Type Description Source code inpicea/sequence.py
@dataclass\nclass Sequence:\n \"\"\"Container for a single biological sequence\n\n Examples:\n >>> s1 = Sequence('test_dna', 'ACGATCGACTAGCA')\n >>> s1\n Sequence(header='test_dna', \\\nalphabet=Alphabet(name='DNA', members='-?acgtnACGNT'))\n >>> s2 = Sequence('test_aa', 'QAPISAIWPOIWQ*')\n >>> s2\n Sequence(header='test_aa', \\\nalphabet=Alphabet(name='AminoAcid', \\\nmembers='*-?acdefghiklmnpqrstvwxyACDEFGHIKLMNPQRSTVWXY'))\n\n Returns:\n [type]: [description]\n \"\"\"\n\n header: str = None\n sequence: str = field(repr=False, default=None)\n alphabet: Alphabet = None\n annotation: Optional[SequenceAnnotation] = field(default_factory=SequenceAnnotation, repr=False)\n\n def __post_init__(self):\n if self.alphabet is not None:\n return\n if self.sequence is None:\n self.alphabet = alphabets.DNA\n else:\n self.alphabet = sorted(alphabets, key=lambda alphabet: alphabet.score(self.sequence)).pop()\n\n def __getitem__(self, key):\n return Sequence(self.header, self.sequence[key])\n\n def __len__(self):\n \"\"\"Length of the sequence\n\n Examples:\n >>> s = Sequence('test_dna', 'ACGTA')\n >>> len(s)\n 5\n \"\"\"\n return len(self.sequence)\n\n @property\n def reverse(self):\n return Sequence(self.header, self.sequence[::-1])\n\n @property\n def complement(self):\n return Sequence(self.header, self.alphabet.complement(self.sequence))\n\n @property\n def reverse_complement(self):\n return Sequence(self.header, self.alphabet.complement(self.sequence[::-1]))\n\n @property\n def amino_acids(self):\n if self.alphabet.name == \"AminoAcid\":\n return self\n else:\n return Sequence(self.header, self.alphabet.translate(self.sequence))\n\n def to_dict(self) -> Dict[str, str]:\n \"\"\"Make dictionary with header and sequence elements\n\n Examples:\n >>> s = Sequence('test', 'ACGTA')\n >>> s.to_dict()\n {'header': 'test', 'sequence': 'ACGTA'}\n\n Returns:\n Dict[str, str]: sequence dictionary\n \"\"\"\n return dict(header=self.header, sequence=self.sequence)\n\n @classmethod\n def from_fasta(cls, string: str) -> \"Sequence\":\n \"\"\"Create a sequence object from a fasta formatted file. _single sequence only_\n\n Examples:\n >>> fasta_string = '>test\\\\nACGT'\n >>> Sequence.from_fasta(fasta_string)\n Sequence(header='test', \\\nalphabet=Alphabet(name='DNA', members='-?acgtnACGNT'))\n\n Arguments:\n string (str)\n\n Returns:\n Sequence\n \"\"\"\n lines = string.strip().split(\"\\n\")\n header = lines[0][1:]\n sequence = \"\".join(lines[1:])\n return cls(header, sequence)\n\n def to_fasta(self, linewidth: int = 80) -> str:\n \"\"\"Make fasta formatted sequence entry\n\n Returns:\n str: sequence in fasta format\n \"\"\"\n sequence_lines = \"\\n\".join(re.findall(f\".{{1,{linewidth}}}\", self.sequence))\n return f\">{self.header}\\n{sequence_lines}\"\n
Bases: SequenceReader
picea/sequence.py
class BatchSequenceReader(SequenceReader):\n def __init__(\n self,\n string: str = None,\n filename: str = None,\n filetype: str = None,\n batchsize: int = 10,\n ) -> None:\n \"\"\"[summary]\n\n Args:\n string (str, optional): [description]. Defaults to None.\n filename (str, optional): [description]. Defaults to None.\n filetype (str, optional): [description]. Defaults to None.\n batchsize (int, optional): [description]. Defaults to 10.\n\n Returns:\n [type]: [description]\n\n Yields:\n [type]: [description]\n \"\"\"\n super().__init__(string, filename, filetype)\n self.batchsize = batchsize\n self._currentbatch = SequenceCollection()\n\n def __iter__(self) -> Iterable[\"SequenceCollection\"]:\n for s in self._iter():\n self._currentbatch[s.header] = s\n if len(self._currentbatch) == self.batchsize:\n yield self._currentbatch\n self._currentbatch = SequenceCollection()\n\n def __next__(self) -> \"SequenceCollection\":\n currentbatch = self._currentbatch\n self._currentbatch = SequenceCollection()\n if len(currentbatch) == self.batchsize:\n return currentbatch\n for s in self._iter():\n currentbatch[s.header] = s\n if len(currentbatch) == self.batchsize:\n yield currentbatch\n
(Partially) Abstract Base Class for sequence collections. Classes extending from this baseclass should override __setitem__
, __getitem__
, __delitem__
, headers
, and n_seqs
.
If the above methods are implemented, this automatically enables the following methods: from_fasta
, to_fasta
, from_json
, to_json
.
Parameters:
Name Type Description Defaultsequences
Optional[Iterable[Tuple[str, str]]]
Iterable of (header, sequence) tuples. Defaults to None.
None
sequence_annotation
Optional[SequenceAnnotation]
picea SequenceAnnotation object. Defaults to None.\n
None
Raises:
Type DescriptionNotImplementedError
Abstract Base Class can not be initialized and serves as a template only
Source code inpicea/sequence.py
class AbstractSequenceCollection(metaclass=ABCMeta):\n \"\"\"\n (Partially) Abstract Base Class for sequence collections.\n Classes extending from this baseclass should override\n `__setitem__`, `__getitem__`, `__delitem__`, `headers`, and `n_seqs`.\n\n If the above methods are implemented, this automatically enables the\n following methods: `from_fasta`, `to_fasta`, `from_json`, `to_json`.\n\n Args:\n sequences (Optional[Iterable[Tuple[str, str]]], optional):\n Iterable of (header, sequence) tuples. Defaults to None.\n sequence_annotation (Optional[SequenceAnnotation]):\n picea SequenceAnnotation object. Defaults to None.\n\n Raises:\n NotImplementedError: Abstract Base Class can not be initialized\n and serves as a template only\n \"\"\"\n\n @abstractmethod\n def __init__(\n self,\n sequences: Optional[Iterable[Sequence]] = None,\n sequence_annotation: Optional[\"SequenceAnnotation\"] = None,\n ) -> None:\n raise NotImplementedError(\n (\"Classes extending from AbstractSequenceCollection should \" \"implement __init__ method\")\n )\n\n @abstractmethod\n def __setitem__(self, header: str, seq: str) -> None:\n raise NotImplementedError(\n (\"Classes extending from AbstractSequenceCollection should \" \"implement __setitem__ method\")\n )\n\n @abstractmethod\n def __getitem__(self, header: str) -> Sequence:\n raise NotImplementedError(\n (\"Classes extending from AbstractSequenceCollection should \" \"implement __getitem__ method\")\n )\n\n @abstractmethod\n def __delitem__(self, header: str) -> None:\n raise NotImplementedError(\n (\"Classes extending from AbstractSequenceCollection should \" \"implement __delitem__ method\")\n )\n\n def __iter__(self) -> Iterable[Sequence]:\n for header in self.headers:\n yield self[header]\n\n def __len__(self) -> int:\n return len(self.headers)\n\n def __add__(self: SequenceType, other: SequenceType) -> SequenceType:\n new_collection = self.__class__()\n return new_collection\n\n @property\n @abstractmethod\n def headers(self) -> List[str]:\n \"\"\"List of sequences headers.\n Overridden in subclasses.\n\n Raises:\n NotImplementedError\n\n Returns:\n List[str]: List of sequence headers\n \"\"\"\n raise NotImplementedError(\n (\"Classes extending from AbstractSequenceCollection should \" \"implement headers property\")\n )\n\n @property\n def iloc(self) -> SequenceIndex:\n \"\"\"[summary]\n\n Returns:\n SequenceIndex: [description]\n \"\"\"\n return SequenceIndex(self)\n\n @property\n def sequences(self) -> List[str]:\n \"\"\"List of sequences without headers\n\n Returns:\n List[str]: list of sequences\n \"\"\"\n return [self[header].sequence for header in self.headers]\n\n @property\n @abstractmethod\n def n_seqs(self) -> int:\n \"\"\"Return the number of sequences in the collection.\n Overridden in subclasses\n\n Raises:\n NotImplementedError\n\n Returns:\n int: number of sequences\n \"\"\"\n raise NotImplementedError(\n (\"Classes extending from AbstractSequenceCollection should \" \"implement n_seqs property\")\n )\n\n @classmethod\n def from_sequence_iter(cls, sequence_iter: Iterable[Sequence]) -> \"SequenceCollection\":\n \"\"\"[summary]\n\n Raises:\n NotImplementedError: [description]\n\n Returns:\n [type]: [description]\n \"\"\"\n sequencecollection = cls()\n for seq in sequence_iter:\n sequencecollection[seq.header] = seq.sequence\n return sequencecollection\n\n @classmethod\n def from_fasta(\n cls,\n filename: str = None,\n string: str = None,\n ) -> \"SequenceCollection\":\n \"\"\"Parse a fasta formatted string into a SequenceCollection object\n\n Keyword Arguments:\n filename {String} -- filename string (default: {None})\n string {String} -- fasta formatted string (default: {None})\n\n Returns:\n SequenceCollection -- SequenceCollection instance\n \"\"\"\n sequencecollection = cls()\n\n for seq in SequenceReader(string=string, filename=filename, filetype=\"fasta\"):\n sequencecollection[seq.header] = seq.sequence\n return sequencecollection\n\n def to_fasta(self, linewidth: int = 80) -> str:\n \"\"\"Get a fasta-formatted string of the sequence collection\n\n Returns:\n str: Multi-line fasta-formatted string\n \"\"\"\n return \"\\n\".join([seq.to_fasta(linewidth=linewidth) for seq in self])\n\n @classmethod\n def from_json(cls, filename: Optional[str] = None, string: Optional[str] = None) -> \"SequenceCollection\":\n \"\"\"[summary]\n\n Keyword Arguments:\n string {String} -- JSON formatted string\n\n Returns:\n SequenceCollection -- SequenceCollection instance\n \"\"\"\n sequencecollection = cls()\n\n for seq in SequenceReader(string=string, filename=filename, filetype=\"json\"):\n sequencecollection[seq.header] = seq.sequence\n\n return sequencecollection\n\n def to_json(self, indent: Optional[int] = None) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n gene_dicts = [seq.to_dict() for seq in self]\n return json.dumps(gene_dicts, indent=indent)\n\n @abstractmethod\n def pop(self, header: str) -> Sequence:\n \"\"\"[summary]\n\n Args:\n header (str): [description]\n\n Returns:\n Sequence: [description]\n \"\"\"\n raise NotImplementedError((\"Classes extending from AbstractSequenceCollection should \" \"implement pop method\"))\n\n def batch_rename(self, rename_func: Callable[[str], str]) -> None:\n \"\"\"Rename all headers by calling `rename_func` on each header\n\n Args:\n rename_func (Callable): [description]\n \"\"\"\n for header in self.headers:\n s: Sequence = self.pop(header)\n s.header = rename_func(s.header)\n self[s.header] = s.sequence\n
Bases: AbstractSequenceCollection
A container for multiple (unaligned) DNA or amino acid sequences
Source code inpicea/sequence.py
class SequenceCollection(AbstractSequenceCollection):\n \"\"\"\n A container for multiple (unaligned) DNA or amino acid sequences\n \"\"\"\n\n def __init__(\n self: \"SequenceCollection\",\n sequences: Iterable[Tuple[str, str]] = None,\n sequence_annotation: \"SequenceAnnotation\" = None,\n ):\n self._collection = dict()\n if sequences:\n for header, sequence in sequences:\n self[header] = sequence\n self.sequence_annotation = sequence_annotation\n\n def __setitem__(self, header: str, seq: str) -> None:\n if header in self._collection:\n warn(f'Turning duplicate header \"{header}\" into unique header')\n new_header = header\n modifier = 0\n while new_header in self.headers:\n modifier += 1\n new_header = f\"{header}_{modifier}\"\n header = new_header\n self._collection[header] = seq\n\n def __getitem__(self, header: str) -> Sequence:\n sequence = self._collection[header]\n return Sequence(header, sequence)\n\n def __delitem__(self, header: str) -> None:\n del self._collection[header]\n\n @property\n def headers(self) -> List[str]:\n return list(self._collection.keys())\n\n @property\n def n_seqs(self) -> int:\n return len(self._collection.keys())\n\n def align(\n self, method: Optional[str] = \"mafft\", method_kwargs: Optional[Mapping[str, str]] = None\n ) -> \"MultipleSequenceAlignment\":\n \"\"\"[summary]\n\n Args:\n method (str, optional): [description]. Defaults to 'mafft'.\n method_kwargs (Mapping[str, str], optional): [description]. \\\n Defaults to dict().\n\n Returns:\n [type]: [description]\n \"\"\"\n if not method_kwargs:\n method_kwargs = dict()\n fasta = self.to_fasta()\n command = [method, *chain(*method_kwargs.items()), \"-\"]\n process = Popen(command, stdin=PIPE, stdout=PIPE, stderr=PIPE)\n stdout, _ = process.communicate(input=fasta.encode())\n aligned_fasta = stdout.decode().strip()\n return MultipleSequenceAlignment.from_fasta(string=aligned_fasta)\n\n def pop(self, header: str) -> Sequence:\n sequence = self._collection.pop(header)\n return Sequence(header, sequence)\n
Bases: SequenceCollection
A container for multiple aligned DNA or amino acid sequences
Source code inpicea/sequence.py
class MultipleSequenceAlignment(SequenceCollection):\n \"\"\"\n A container for multiple aligned DNA or amino acid sequences\n \"\"\"\n\n def __init__(\n self,\n sequences: Optional[Iterable[Sequence]] = None,\n sequence_annotation: Optional[\"SequenceAnnotation\"] = None,\n ) -> None:\n super(MultipleSequenceAlignment).__init__()\n self._collection = np.empty((0, 0), dtype=\"uint8\")\n self._header_idx = dict()\n if sequences:\n for seq in sequences:\n self[seq.header] = seq.sequence\n # if sequence_annotation:\n # sequence_annotation.sequence_collection = self\n self.sequence_annotation = sequence_annotation\n\n def __setitem__(self, header: str, seq: str) -> None:\n seq = seq.encode()\n if header in self._header_idx:\n warn(f'Turning duplicate header \"{header}\" into unique header')\n new_header = header\n modifier = 0\n while new_header in self._header_idx:\n modifier += 1\n new_header = f\"{header}_{modifier}\"\n header = new_header\n n_seq, n_char = self._collection.shape\n if n_seq == 0:\n self._collection = np.array([[*seq]], dtype=\"uint8\")\n else:\n len_diff = len(seq) - n_char\n\n filler1 = np.array([[*b\"-\"] * len_diff], dtype=\"uint8\")\n arr = np.hstack((self._collection, np.repeat(filler1, n_seq, axis=0)))\n\n filler2 = np.array([*b\"-\"] * -len_diff, dtype=\"uint8\")\n new_row = np.array([[*seq, *filler2]], dtype=\"uint8\")\n\n arr = np.vstack((arr, new_row))\n self._collection = arr\n self._header_idx[header] = n_seq\n\n def __getitem__(self, header: str) -> Sequence:\n idx = self._header_idx[header]\n n_chars = self._collection.shape[1]\n sequence = self._collection[idx].view(f\"S{n_chars}\")[0].decode()\n return Sequence(header, sequence)\n\n @property\n def headers(self) -> List[str]:\n return list(self._header_idx.keys())\n\n @property\n def n_seqs(self) -> int:\n return self._collection.shape[0]\n\n @property\n def n_chars(self) -> int:\n return self._collection.shape[1]\n\n @property\n def shape(self) -> int:\n return self._collection.shape\n\n def to_nexus(self) -> str:\n \"\"\" \"\"\"\n sequences = \"\\n\".join([f\"{s.header} {s.sequence}\" for s in self])\n return (\n \"begin data;\"\n f\"\\tdimensions ntax={self.n_seqs} nchar={self.n_chars};\"\n \"\\tformat datatype=dna gap=-;\"\n \"\\tmatrix\"\n f\"\\t{sequences}\"\n \"\\t;\"\n \"end;\"\n )\n\n def pop(self, header: str) -> Sequence:\n pop_idx = self._header_idx[header]\n n_chars = self._collection.shape[1]\n sequence = self._collection[pop_idx].view(f\"S{n_chars}\")[0].decode()\n del self._header_idx[header]\n self._header_idx = {h: (idx if idx < pop_idx else idx - 1) for h, idx in self._header_idx.items()}\n self._collection = np.delete(self._collection, (pop_idx,), axis=0)\n return Sequence(header, sequence)\n\n def pairwise_distances(self, distance_measure: str = \"identity\") -> npt.NDArray[np.float64]:\n pass\n
Bases: DirectedAcyclicGraph
picea/sequence.py
class SequenceAnnotation(DirectedAcyclicGraph):\n def __init__(self, sequence: Optional[\"Sequence\"] = None) -> None:\n \"\"\"[summary]\n\n Args:\n sequence (Optional[Sequence], optional): [description]. Defaults\\\n to None.\n \"\"\"\n super().__init__()\n if sequence:\n sequence.annotation = self\n self.sequence = sequence\n self._gff_headers = list()\n\n @property\n def intervals(self):\n return list(self)\n\n def _link_parents(self) -> None:\n \"\"\"\n Add explicit link from parent to child intervals\n GFF/GTF files only contain links of child to parent\n This modifies elements in place\n \"\"\"\n for interval in self:\n if interval.parent:\n for parent_ID in interval.parent:\n try:\n parent = self[parent_ID]\n except KeyError as exc:\n raise KeyError(\n f\"Interval {interval.ID} is listing {parent_ID} \"\n \"as Parent, but parent could not be found.\"\n ) from exc\n parent._children.append(interval.ID)\n\n @classmethod\n def from_gtf(\n cls,\n filename: Optional[str] = None,\n string: Optional[str] = None,\n sequence: Optional[\"Sequence\"] = None,\n link_parents: Optional[bool] = True,\n ) -> \"SequenceAnnotation\":\n \"\"\"[summary]\n\n Raises:\n IndexError: [description]\n IndexError: [description]\n\n Returns:\n [type]: [description]\n \"\"\"\n assert filename or string\n assert not (filename and string)\n sequence_annotation = cls(sequence=sequence)\n header = True\n\n # start with just reading all intervals\n if filename:\n with open(filename) as filehandle:\n string = filehandle.read()\n for line_number, line in enumerate(string.split(\"\\n\")):\n line = line.strip()\n if not line:\n continue\n if line[0] == \"#\":\n if header:\n sequence_annotation._gff_headers.append(line)\n continue\n else:\n header = False\n interval = SequenceInterval.from_gtf_line(gtf_line=line, line_number=line_number)\n interval._container = sequence_annotation\n sequence_annotation[interval.ID] = interval\n # fix missing gene and transcript intervals\n transcript_child_counter = Counter()\n new_intervals = dict()\n for interval in sequence_annotation:\n gene_id = interval.gff_attributes[\"gene_id\"][0]\n transcript_id = interval.gff_attributes[\"transcript_id\"][0]\n interval_type = interval.interval_type\n id_tuple = (gene_id, transcript_id, interval_type)\n child_count = transcript_child_counter[id_tuple]\n transcript_child_counter.update([id_tuple])\n interval._ID = f\"{transcript_id}.{interval_type}_{child_count}\"\n if transcript_id not in new_intervals:\n # new transcript interval\n transcript_interval = deepcopy(interval)\n transcript_interval._container = interval._container\n transcript_interval._ID = transcript_id\n transcript_interval.interval_type = \"mRNA\"\n transcript_interval.parent = [gene_id]\n # new gene interval\n gene_interval = deepcopy(interval)\n gene_interval._container = interval._container\n gene_interval._ID = gene_id\n gene_interval.interval_type = \"gene\"\n gene_interval.parent = None\n\n new_intervals[transcript_id] = transcript_interval\n new_intervals[gene_id] = gene_interval\n\n interval.parent = [transcript_id]\n new_intervals[interval.ID] = interval\n sequence_annotation._intervals = new_intervals\n\n # set children\n if link_parents:\n sequence_annotation._link_parents()\n\n # fix gene and transcript start and stop coordinates\n genes = sequence_annotation.groupby(\"interval_type\")[\"gene\"]\n for gene in genes:\n # fix gene first\n start = 10e9\n end = 0\n for child in gene.children:\n start = min(start, child.start)\n end = max(end, child.end)\n gene.start = start\n gene.end = end\n\n # fix transcripts\n transcripts = gene.children.groupby(\"interval_type\")[\"mRNA\"]\n for transcript in transcripts:\n start = 10e9\n end = 0\n for child in transcript.children:\n start = min(start, child.start)\n end = max(end, child.end)\n transcript.end = end\n transcript.start = start\n\n return sequence_annotation\n\n def to_gtf(self) -> str:\n return \"\\n\".join(interval.to_gtf_line() for interval in self)\n\n @classmethod\n def from_gff(\n cls,\n filename: Optional[str] = None,\n string: Optional[str] = None,\n sequence: Optional[\"Sequence\"] = None,\n link_parents: bool = True,\n ) -> \"SequenceAnnotation\":\n \"\"\"[summary]\n\n Args:\n filename ([type], optional): [description]. Defaults to None.\n string ([type], optional): [description]. Defaults to None.\n sequence ([type], optional): [description].\n Defaults to None.\n\n Returns:\n [type]: [description]\n \"\"\"\n assert filename or string\n assert not (filename and string)\n sequence_annotation = cls(sequence=sequence)\n header = True\n if filename:\n with open(filename) as filehandle:\n string = filehandle.read()\n for line_number, line in enumerate(string.split(\"\\n\")):\n line = line.strip()\n if not line:\n continue\n if line == \"##FASTA\":\n break\n if line[0] == \"#\":\n if header:\n sequence_annotation._gff_headers.append(line)\n continue\n else:\n header = False\n\n interval = SequenceInterval.from_gff_line(gff_line=line, line_number=line_number)\n interval._container = sequence_annotation\n sequence_annotation[interval.ID] = interval\n\n if link_parents:\n sequence_annotation._link_parents()\n\n return sequence_annotation\n\n def to_gff(self) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n return \"\".join(interval.to_gff_line(trailing_newline=True) for interval in self)\n\n @classmethod\n def from_json(\n cls,\n filename: Optional[str] = None,\n string: Optional[str] = None,\n sequence: Optional[\"Sequence\"] = None,\n ) -> \"SequenceAnnotation\":\n \"\"\"[summary]\"\"\"\n assert filename or string\n assert not (filename and string)\n if filename:\n with open(filename) as filehandle:\n string = filehandle.read()\n\n sequence_annotation = cls(sequence=sequence)\n\n gene_dicts = json.loads(string)\n assert isinstance(gene_dicts, list)\n\n for top_dict in gene_dicts:\n child_dicts = top_dict.pop(\"children\", list())\n top_interval = SequenceInterval.from_dict(interval_dict=top_dict)\n top_interval._container = sequence_annotation\n sequence_annotation[top_interval.ID] = top_interval\n for child_dict in child_dicts:\n child_interval = SequenceInterval.from_dict(interval_dict=child_dict)\n child_interval._container = sequence_annotation\n sequence_annotation[child_interval.ID] = child_interval\n for interval in sequence_annotation:\n if interval.parent:\n for parent_ID in interval.parent:\n try:\n parent = sequence_annotation[parent_ID]\n except IndexError as err:\n raise IndexError(\n \"Interval {interval.ID} is listing {parent_ID} \" \"as Parent, but parent could not be found.\"\n ) from err\n parent._children.append(interval.ID)\n return sequence_annotation\n\n def to_json(self, indent: Optional[int] = None) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n interval_dicts = [interval.to_dict() for interval in self]\n return json.dumps(interval_dicts, indent=indent)\n
Bases: DAGElement
picea/sequence.py
class SequenceInterval(DAGElement):\n _predefined_gff3_attributes = (\n \"ID\",\n \"name\",\n \"alias\",\n \"parent\",\n \"target\",\n \"gap\",\n \"derives_from\",\n \"note\",\n \"dbxref\",\n \"ontology_term\",\n \"is_circular\",\n )\n _fixed_gff3_fields = (\n \"seqid\",\n \"source\",\n \"interval_type\",\n \"start\",\n \"end\",\n \"score\",\n \"strand\",\n \"phase\",\n )\n _gtf_interval_types = dict(mRNA=\"transcript\")\n\n def __init__(\n self,\n ID: Optional[str] = None,\n seqid: Optional[str] = None,\n source: Optional[str] = None,\n interval_type: Optional[str] = None,\n start: Optional[int] = None,\n end: Optional[int] = None,\n score: Optional[float] = None,\n strand: Optional[str] = None,\n phase: Optional[str] = None,\n children: Optional[List[str]] = None,\n container: Optional[SequenceAnnotation] = None,\n **kwargs,\n ):\n \"\"\"[summary]\n\n Args:\n ID (Optional[str], optional): [description]. Defaults to None.\n seqid (Optional[str], optional): [description]. Defaults to None.\n source (Optional[str], optional): [description]. Defaults to None.\n interval_type (Optional[str], optional): [description]. Defaults\n to None.\n start (Optional[int], optional): [description]. Defaults to None.\n end (Optional[int], optional): [description]. Defaults to None.\n score (Optional[float], optional): [description]. Defaults to None.\n strand (Optional[str], optional): [description]. Defaults to None.\n phase (Optional[str], optional): [description]. Defaults to\n None.\n children (Optional[List], optional): [description]. Defaults to\n None.\n container (Optional[SequenceAnnotation], optional): [description].\n Defaults to None.\n \"\"\"\n # interval ID is a property (see below) with getter and setter\n # self._ID = ID\n # self._original_ID = ID\n parents = kwargs.pop(\"parent\", None)\n super().__init__(ID=ID, children=children, container=container, parents=parents)\n\n # Standard gff fields\n self.seqid = seqid\n self.source = source\n self.interval_type = interval_type\n self.start = start\n self.end = end\n self.score = score\n self.strand = strand\n self.phase = phase\n\n # Set attributes with predefined meanings in the gff spec to None\n for attr in self._predefined_gff3_attributes:\n # ID and parent are handled separately in DAG superclass\n if attr in {\"ID\", \"parent\"}:\n continue\n self[attr] = kwargs.get(attr, None)\n\n # Any additional attributes\n for key, value in kwargs.items():\n self[key] = value\n\n # Additional fields, used internally\n # self._container = container\n # if children is None:\n # children = []\n # self._children = children\n\n def __repr__(self):\n return (\n f\"<SequenceInterval type={self.interval_type} \"\n f\"ID={self.ID} \"\n f\"loc={self.seqid}..{self.start}..{self.end}..{self.strand} \"\n f\"at {hex(id(self))}>\"\n )\n\n @property\n def parent(self):\n return self._parents\n\n @parent.setter\n def parent(self, parent_ID: Union[List[str], str]):\n if isinstance(parent_ID, str):\n parent_ID = [parent_ID]\n self._parents = parent_ID\n\n @property\n def gff_attributes(self) -> Dict[str, str]:\n gff_attributes = {\n attr: self[attr] # dictionary comprehension\n for attr in self.__dict__\n if attr not in self._fixed_gff3_fields # skip column 1-8 in gff3\n and attr\n not in (\n \"_parents\",\n \"_children\",\n \"_container\",\n \"_ID\",\n \"_original_ID\",\n ) # internal use only\n and self[attr] is not None # no empty attributes\n }\n\n # Add attributes handled by DAG\n gff_attributes[\"ID\"] = [self.ID]\n if self._parents:\n gff_attributes[\"Parent\"] = self._parents\n\n return gff_attributes\n\n @property\n def gtf_attributes(self) -> Dict[str, str]:\n def get_gtf_type(gff_interval_type):\n return self._gtf_interval_types.get(gff_interval_type, gff_interval_type)\n\n if self.parents:\n parent_ids = {f\"{get_gtf_type(parent.interval_type)}_id\": parent.ID for parent in self.parents}\n else:\n parent_ids = dict()\n return {**self.gff_attributes, **parent_ids}\n\n @classmethod\n def from_gtf_line(cls, gtf_line: Optional[str] = None, line_number: Optional[int] = None) -> \"SequenceInterval\":\n \"\"\"[summary]\n\n Returns:\n [type]: [description]\n\n Yields:\n [type]: [description]\n \"\"\"\n return cls.from_gff_line(gtf_line, line_number, parse_gtf_attribute_string)\n\n def to_gtf_line(self) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n interval_type = self._gtf_interval_types.get(self.interval_type, self.interval_type)\n return \"\\t\".join(\n [\n self.seqid,\n self.source,\n interval_type,\n str(self.start),\n str(self.end),\n str(self.score),\n self.strand,\n str(self.phase),\n format_gtf_attribute_string(self.gtf_attributes),\n ]\n )\n\n @classmethod\n def from_gff_line(\n cls,\n gff_line: Optional[str] = None,\n line_number: Optional[int] = None,\n attribute_parser: Callable = parse_gff_attribute_string,\n ) -> \"SequenceInterval\":\n \"\"\"[summary]\n\n Args:\n gff_line (Optional[str], optional): [description]. Defaults\n to None.\n line_number (Optional[int], optional): [description]. Defaults\n to None.\n\n Returns:\n [type]: [description]\n \"\"\"\n gff_parts = gff_line.split(\"\\t\")\n assert len(gff_parts) == 9, gff_parts\n seqid, source, interval_type, start, end, score, strand, phase = gff_parts[:8]\n try:\n start = int(start)\n end = int(end)\n except ValueError as err:\n error = \"GFF start and end fields must be integer\"\n if line_number:\n error = f\"{error}, gff line {line_number}\"\n raise ValueError(error) from err\n\n if score != \".\":\n try:\n score = float(score)\n except ValueError as err:\n error = \"GFF score field must be a float\"\n if line_number:\n error = f\"{error}, gff line {line_number}\"\n raise ValueError(error) from err\n\n if strand not in (\"+\", \"-\", \".\"):\n error = 'GFF strand must be one of \"+\", \"-\" or \".\"'\n if line_number:\n error = f\"{error}, gff line {line_number}\"\n raise ValueError(error)\n\n if phase not in (\"0\", \"1\", \"2\", \".\"):\n error = 'GFF phase must be one of \"0\", \"1\", \"2\" or \".\"'\n if line_number:\n error = f\"{error}, gff line {line_number}\"\n raise ValueError(error)\n elif phase != \".\":\n phase = int(phase)\n\n # Disable phase checking of CDS for now...\n # if interval_type == 'CDS' and phase not in ('0', '1', '2'):\n # error = 'GFF intervals of type CDS must have phase of\\\n # \"0\", \"1\" or \"2\"'\n # if line_number:\n # error = f'{error}, gff line {line_number}'\n # raise ValueError(error)\n\n attributes = attribute_parser(gff_parts[8])\n\n ID = attributes.pop(\"ID\", [str(uuid.uuid4())])[0]\n\n return cls(\n seqid=seqid,\n source=source,\n interval_type=interval_type,\n start=start,\n end=end,\n score=score,\n strand=strand,\n phase=phase,\n ID=ID,\n **attributes,\n )\n\n def to_gff_line(self, trailing_newline: bool = False) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n # attributes = dict(ID=self.ID, **self.gff_attributes)\n\n gff_line = \"\\t\".join(\n [\n self.seqid,\n self.source,\n self.interval_type,\n str(self.start),\n str(self.end),\n str(self.score),\n self.strand,\n str(self.phase),\n format_gff_attribute_string(self.gff_attributes),\n ]\n )\n if trailing_newline:\n gff_line = f\"{gff_line}\\n\"\n return gff_line\n\n @classmethod\n def from_dict(cls, interval_dict: Dict[str, Any]) -> \"SequenceInterval\":\n \"\"\"[summary]\n Args:\n interval_dict\n\n Returns:\n [type]: [description]\n \"\"\"\n attributes = interval_dict.pop(\"attributes\", dict())\n return cls(**interval_dict, **attributes)\n\n def to_dict(self, include_children: bool = False) -> Dict[str, Any]:\n \"\"\"[summary]\n\n Returns:\n Dict[str, Any]: [description]\n \"\"\"\n attributes = dict(**self.gff_attributes)\n attributes.pop(\"ID\")\n interval_dict = dict(\n ID=self.ID,\n seqid=self.seqid,\n source=self.source,\n interval_type=self.interval_type,\n start=self.start,\n end=self.end,\n score=self.score,\n strand=self.strand,\n phase=self.phase,\n attributes=attributes,\n )\n if include_children:\n children = [child.to_dict() for child in self.children[1:]]\n interval_dict[\"children\"] = children\n return interval_dict\n\n def to_json(self, include_children: bool = False, indent: Optional[int] = None) -> str:\n \"\"\"[summary]\n\n Args:\n include_children (bool, optional): [description]. Defaults to \\\n False.\n\n Returns:\n str: [description]\n \"\"\"\n return json.dumps(self.to_dict(include_children=include_children), indent=indent)\n
"},{"location":"API/sequence/#picea.Alphabet.complement","title":"complement(sequence)
","text":"Returns complementary strand of DNA or RNA sequence strings
Examples:
>>> DNA = Alphabet('DNA', 'ACGT')\n>>> DNA.complement('AACTACG')\n'TTGATGC'\n
Parameters:
Name Type Description Defaultsequence
str
Sequence string
requiredReturns:
Name Type Descriptionstr
str
complementary strand sequence string
Source code inpicea/sequence.py
def complement(self, sequence: str) -> str:\n \"\"\"Returns complementary strand of DNA or RNA sequence strings\n\n Examples:\n >>> DNA = Alphabet('DNA', 'ACGT')\n >>> DNA.complement('AACTACG')\n 'TTGATGC'\n\n Args:\n sequence (str): Sequence string\n\n Returns:\n str: complementary strand sequence string\n \"\"\"\n if self.name == \"DNA\":\n complement = dict(zip(\"acgtnACGTN-?\", \"tgcanTGCAN-?\"))\n elif self.name == \"RNA\":\n complement = dict(zip(\"acgunACGUN-?\", \"ugcanUGCAN-?\"))\n else:\n raise TypeError(\"Cannot complement non-DNA or non-RNA alphabet\")\n return \"\".join(complement[s] for s in sequence)\n
"},{"location":"API/sequence/#picea.Alphabet.score","title":"score(sequence, match=1.0, mismatch=-1.0, n_chars=100)
","text":"Scores how well a sequence matches an alphabet by summing (mis)matches of sequence letters that are not in the alphabet and (mis)matches of alphabet letters that are not in the sequence.
Parameters:
Name Type Description Defaultsequence
str
Sequence string for which to determine how well it fits the alphabet
requiredmatch
float
match score. Defaults to 1.0.
1.0
mismatch
float
mismatch score. Defaults to -1.0.
-1.0
n_chars
int
number of sequence characters to use in scoring. Large numbers incur a significant computational cost.
100
Returns:
Type Descriptionfloat
Score of how well a sequence matches the alphabet
Source code inpicea/sequence.py
def score(\n self,\n sequence: str,\n match: float = 1.0,\n mismatch: float = -1.0,\n n_chars: int = 100,\n) -> float:\n \"\"\"Scores how well a sequence matches an alphabet by summing \\\n (mis)matches of sequence letters that are not in the alphabet \\\n and (mis)matches of alphabet letters that are not in the sequence.\n\n Args:\n sequence (str): Sequence string for which to determine how well \\\n it fits the alphabet\n match (float, optional): match score. Defaults to 1.0.\n mismatch (float, optional): mismatch score. Defaults to -1.0.\n n_chars (int, optional): number of sequence characters to use in \\\n scoring. Large numbers incur a significant computational cost.\n\n Returns:\n (float): Score of how well a sequence matches the alphabet\n \"\"\"\n return sum(match if s in self else mismatch for s in sequence[:n_chars]) + sum(\n match if s in sequence[:n_chars] else mismatch for s in self\n )\n
"},{"location":"API/sequence/#picea.Alphabet.translate","title":"translate(sequence)
","text":"Translate DNA or RNA sequence string to amino acid string
Examples:
>>> DNA = Alphabet('DNA', 'ACGT')\n>>> DNA.translate('ATGACGACGTAA')\n'MTT*'\n
Parameters:
Name Type Description Defaultsequence
str
Sequence string (sequence length must be multiple of 3)
requiredReturns:
Name Type Descriptionstr
str
Amino acid string
Source code inpicea/sequence.py
def translate(self, sequence: str) -> str:\n \"\"\"Translate DNA or RNA sequence string to amino acid string\n\n Examples:\n >>> DNA = Alphabet('DNA', 'ACGT')\n >>> DNA.translate('ATGACGACGTAA')\n 'MTT*'\n\n Args:\n sequence (str): Sequence string (sequence length must be multiple of 3)\n\n Returns:\n str: Amino acid string\n \"\"\"\n if self.name not in (\"DNA\", \"RNA\"):\n raise TypeError(\"Cannot translate non-DNA or non-RNA alphabet\")\n codons = re.findall(\"...\", sequence.upper())\n return \"\".join(TRANSLATION.get(codon, \"X\") for codon in codons)\n
"},{"location":"API/sequence/#picea.Alphabet.validate","title":"validate(sequence)
","text":"Determine whether a sequence strictly fits an alphabet
Parameters:
Name Type Description Defaultsequence
str
Sequence string
requiredReturns:
Name Type Descriptionbool
bool
true if all characters in sequence are in the alphabet
Source code inpicea/sequence.py
def validate(self, sequence: str) -> bool:\n \"\"\"Determine whether a sequence strictly fits an alphabet\n\n Args:\n sequence (str): Sequence string\n\n Returns:\n bool: true if all characters in sequence are in the alphabet\n \"\"\"\n return sum(1 if s not in self else 0 for s in sequence) == 0\n
"},{"location":"API/sequence/#picea.Sequence.__len__","title":"__len__()
","text":"Length of the sequence
Examples:
>>> s = Sequence('test_dna', 'ACGTA')\n>>> len(s)\n5\n
Source code in picea/sequence.py
def __len__(self):\n \"\"\"Length of the sequence\n\n Examples:\n >>> s = Sequence('test_dna', 'ACGTA')\n >>> len(s)\n 5\n \"\"\"\n return len(self.sequence)\n
"},{"location":"API/sequence/#picea.Sequence.from_fasta","title":"from_fasta(string)
classmethod
","text":"Create a sequence object from a fasta formatted file. single sequence only
Examples:
>>> fasta_string = '>test\\nACGT'\n>>> Sequence.from_fasta(fasta_string)\nSequence(header='test', alphabet=Alphabet(name='DNA', members='-?acgtnACGNT'))\n
Returns:
Type DescriptionSequence
Sequence
Source code inpicea/sequence.py
@classmethod\n def from_fasta(cls, string: str) -> \"Sequence\":\n \"\"\"Create a sequence object from a fasta formatted file. _single sequence only_\n\n Examples:\n >>> fasta_string = '>test\\\\nACGT'\n >>> Sequence.from_fasta(fasta_string)\n Sequence(header='test', \\\nalphabet=Alphabet(name='DNA', members='-?acgtnACGNT'))\n\n Arguments:\n string (str)\n\n Returns:\n Sequence\n \"\"\"\n lines = string.strip().split(\"\\n\")\n header = lines[0][1:]\n sequence = \"\".join(lines[1:])\n return cls(header, sequence)\n
"},{"location":"API/sequence/#picea.Sequence.to_dict","title":"to_dict()
","text":"Make dictionary with header and sequence elements
Examples:
>>> s = Sequence('test', 'ACGTA')\n>>> s.to_dict()\n{'header': 'test', 'sequence': 'ACGTA'}\n
Returns:
Type DescriptionDict[str, str]
Dict[str, str]: sequence dictionary
Source code inpicea/sequence.py
def to_dict(self) -> Dict[str, str]:\n \"\"\"Make dictionary with header and sequence elements\n\n Examples:\n >>> s = Sequence('test', 'ACGTA')\n >>> s.to_dict()\n {'header': 'test', 'sequence': 'ACGTA'}\n\n Returns:\n Dict[str, str]: sequence dictionary\n \"\"\"\n return dict(header=self.header, sequence=self.sequence)\n
"},{"location":"API/sequence/#picea.Sequence.to_fasta","title":"to_fasta(linewidth=80)
","text":"Make fasta formatted sequence entry
Returns:
Name Type Descriptionstr
str
sequence in fasta format
Source code inpicea/sequence.py
def to_fasta(self, linewidth: int = 80) -> str:\n \"\"\"Make fasta formatted sequence entry\n\n Returns:\n str: sequence in fasta format\n \"\"\"\n sequence_lines = \"\\n\".join(re.findall(f\".{{1,{linewidth}}}\", self.sequence))\n return f\">{self.header}\\n{sequence_lines}\"\n
"},{"location":"API/sequence/#picea.BatchSequenceReader.__init__","title":"__init__(string=None, filename=None, filetype=None, batchsize=10)
","text":"[summary]
Parameters:
Name Type Description Defaultstring
str
[description]. Defaults to None.
None
filename
str
[description]. Defaults to None.
None
filetype
str
[description]. Defaults to None.
None
batchsize
int
[description]. Defaults to 10.
10
Returns:
Type DescriptionNone
Yields:
Type DescriptionNone
Source code in picea/sequence.py
def __init__(\n self,\n string: str = None,\n filename: str = None,\n filetype: str = None,\n batchsize: int = 10,\n) -> None:\n \"\"\"[summary]\n\n Args:\n string (str, optional): [description]. Defaults to None.\n filename (str, optional): [description]. Defaults to None.\n filetype (str, optional): [description]. Defaults to None.\n batchsize (int, optional): [description]. Defaults to 10.\n\n Returns:\n [type]: [description]\n\n Yields:\n [type]: [description]\n \"\"\"\n super().__init__(string, filename, filetype)\n self.batchsize = batchsize\n self._currentbatch = SequenceCollection()\n
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.headers","title":"headers: List[str]
abstractmethod
property
","text":"List of sequences headers. Overridden in subclasses.
Returns:
Type DescriptionList[str]
List[str]: List of sequence headers
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.iloc","title":"iloc: SequenceIndex
property
","text":"[summary]
Returns:
Name Type DescriptionSequenceIndex
SequenceIndex
[description]
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.n_seqs","title":"n_seqs: int
abstractmethod
property
","text":"Return the number of sequences in the collection. Overridden in subclasses
Returns:
Name Type Descriptionint
int
number of sequences
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.sequences","title":"sequences: List[str]
property
","text":"List of sequences without headers
Returns:
Type DescriptionList[str]
List[str]: list of sequences
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.batch_rename","title":"batch_rename(rename_func)
","text":"Rename all headers by calling rename_func
on each header
Parameters:
Name Type Description Defaultrename_func
Callable
[description]
required Source code inpicea/sequence.py
def batch_rename(self, rename_func: Callable[[str], str]) -> None:\n \"\"\"Rename all headers by calling `rename_func` on each header\n\n Args:\n rename_func (Callable): [description]\n \"\"\"\n for header in self.headers:\n s: Sequence = self.pop(header)\n s.header = rename_func(s.header)\n self[s.header] = s.sequence\n
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.from_fasta","title":"from_fasta(filename=None, string=None)
classmethod
","text":"Parse a fasta formatted string into a SequenceCollection object
Other Parameters:
Name Type Descriptionfilename
{String} -- filename string (default
{None})
string
{String} -- fasta formatted string (default
{None})
Returns:
Type DescriptionSequenceCollection
SequenceCollection -- SequenceCollection instance
Source code inpicea/sequence.py
@classmethod\ndef from_fasta(\n cls,\n filename: str = None,\n string: str = None,\n) -> \"SequenceCollection\":\n \"\"\"Parse a fasta formatted string into a SequenceCollection object\n\n Keyword Arguments:\n filename {String} -- filename string (default: {None})\n string {String} -- fasta formatted string (default: {None})\n\n Returns:\n SequenceCollection -- SequenceCollection instance\n \"\"\"\n sequencecollection = cls()\n\n for seq in SequenceReader(string=string, filename=filename, filetype=\"fasta\"):\n sequencecollection[seq.header] = seq.sequence\n return sequencecollection\n
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.from_json","title":"from_json(filename=None, string=None)
classmethod
","text":"[summary]
Returns:
Type DescriptionSequenceCollection
SequenceCollection -- SequenceCollection instance
Source code inpicea/sequence.py
@classmethod\ndef from_json(cls, filename: Optional[str] = None, string: Optional[str] = None) -> \"SequenceCollection\":\n \"\"\"[summary]\n\n Keyword Arguments:\n string {String} -- JSON formatted string\n\n Returns:\n SequenceCollection -- SequenceCollection instance\n \"\"\"\n sequencecollection = cls()\n\n for seq in SequenceReader(string=string, filename=filename, filetype=\"json\"):\n sequencecollection[seq.header] = seq.sequence\n\n return sequencecollection\n
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.from_sequence_iter","title":"from_sequence_iter(sequence_iter)
classmethod
","text":"[summary]
Raises:
Type DescriptionNotImplementedError
[description]
Returns:
Type DescriptionSequenceCollection
Source code in picea/sequence.py
@classmethod\ndef from_sequence_iter(cls, sequence_iter: Iterable[Sequence]) -> \"SequenceCollection\":\n \"\"\"[summary]\n\n Raises:\n NotImplementedError: [description]\n\n Returns:\n [type]: [description]\n \"\"\"\n sequencecollection = cls()\n for seq in sequence_iter:\n sequencecollection[seq.header] = seq.sequence\n return sequencecollection\n
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.pop","title":"pop(header)
abstractmethod
","text":"[summary]
Parameters:
Name Type Description Defaultheader
str
[description]
requiredReturns:
Name Type DescriptionSequence
Sequence
[description]
Source code inpicea/sequence.py
@abstractmethod\ndef pop(self, header: str) -> Sequence:\n \"\"\"[summary]\n\n Args:\n header (str): [description]\n\n Returns:\n Sequence: [description]\n \"\"\"\n raise NotImplementedError((\"Classes extending from AbstractSequenceCollection should \" \"implement pop method\"))\n
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.to_fasta","title":"to_fasta(linewidth=80)
","text":"Get a fasta-formatted string of the sequence collection
Returns:
Name Type Descriptionstr
str
Multi-line fasta-formatted string
Source code inpicea/sequence.py
def to_fasta(self, linewidth: int = 80) -> str:\n \"\"\"Get a fasta-formatted string of the sequence collection\n\n Returns:\n str: Multi-line fasta-formatted string\n \"\"\"\n return \"\\n\".join([seq.to_fasta(linewidth=linewidth) for seq in self])\n
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.to_json","title":"to_json(indent=None)
","text":"[summary]
Returns:
Name Type Descriptionstr
str
[description]
Source code inpicea/sequence.py
def to_json(self, indent: Optional[int] = None) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n gene_dicts = [seq.to_dict() for seq in self]\n return json.dumps(gene_dicts, indent=indent)\n
"},{"location":"API/sequence/#picea.SequenceCollection.align","title":"align(method='mafft', method_kwargs=None)
","text":"[summary]
Parameters:
Name Type Description Defaultmethod
str
[description]. Defaults to 'mafft'.
'mafft'
method_kwargs
Mapping[str, str]
[description]. Defaults to dict().
None
Returns:
Type DescriptionMultipleSequenceAlignment
Source code in picea/sequence.py
def align(\n self, method: Optional[str] = \"mafft\", method_kwargs: Optional[Mapping[str, str]] = None\n) -> \"MultipleSequenceAlignment\":\n \"\"\"[summary]\n\n Args:\n method (str, optional): [description]. Defaults to 'mafft'.\n method_kwargs (Mapping[str, str], optional): [description]. \\\n Defaults to dict().\n\n Returns:\n [type]: [description]\n \"\"\"\n if not method_kwargs:\n method_kwargs = dict()\n fasta = self.to_fasta()\n command = [method, *chain(*method_kwargs.items()), \"-\"]\n process = Popen(command, stdin=PIPE, stdout=PIPE, stderr=PIPE)\n stdout, _ = process.communicate(input=fasta.encode())\n aligned_fasta = stdout.decode().strip()\n return MultipleSequenceAlignment.from_fasta(string=aligned_fasta)\n
"},{"location":"API/sequence/#picea.MultipleSequenceAlignment.to_nexus","title":"to_nexus()
","text":"Source code in picea/sequence.py
def to_nexus(self) -> str:\n \"\"\" \"\"\"\n sequences = \"\\n\".join([f\"{s.header} {s.sequence}\" for s in self])\n return (\n \"begin data;\"\n f\"\\tdimensions ntax={self.n_seqs} nchar={self.n_chars};\"\n \"\\tformat datatype=dna gap=-;\"\n \"\\tmatrix\"\n f\"\\t{sequences}\"\n \"\\t;\"\n \"end;\"\n )\n
"},{"location":"API/sequence/#picea.SequenceAnnotation.__init__","title":"__init__(sequence=None)
","text":"[summary]
Parameters:
Name Type Description Defaultsequence
Optional[Sequence]
[description]. Defaults to None.
None
Source code in picea/sequence.py
def __init__(self, sequence: Optional[\"Sequence\"] = None) -> None:\n \"\"\"[summary]\n\n Args:\n sequence (Optional[Sequence], optional): [description]. Defaults\\\n to None.\n \"\"\"\n super().__init__()\n if sequence:\n sequence.annotation = self\n self.sequence = sequence\n self._gff_headers = list()\n
"},{"location":"API/sequence/#picea.SequenceAnnotation.from_gff","title":"from_gff(filename=None, string=None, sequence=None, link_parents=True)
classmethod
","text":"[summary]
Parameters:
Name Type Description Defaultfilename
[type]
[description]. Defaults to None.
None
string
[type]
[description]. Defaults to None.
None
sequence
[type]
[description]. Defaults to None.
None
Returns:
Type DescriptionSequenceAnnotation
Source code in picea/sequence.py
@classmethod\ndef from_gff(\n cls,\n filename: Optional[str] = None,\n string: Optional[str] = None,\n sequence: Optional[\"Sequence\"] = None,\n link_parents: bool = True,\n) -> \"SequenceAnnotation\":\n \"\"\"[summary]\n\n Args:\n filename ([type], optional): [description]. Defaults to None.\n string ([type], optional): [description]. Defaults to None.\n sequence ([type], optional): [description].\n Defaults to None.\n\n Returns:\n [type]: [description]\n \"\"\"\n assert filename or string\n assert not (filename and string)\n sequence_annotation = cls(sequence=sequence)\n header = True\n if filename:\n with open(filename) as filehandle:\n string = filehandle.read()\n for line_number, line in enumerate(string.split(\"\\n\")):\n line = line.strip()\n if not line:\n continue\n if line == \"##FASTA\":\n break\n if line[0] == \"#\":\n if header:\n sequence_annotation._gff_headers.append(line)\n continue\n else:\n header = False\n\n interval = SequenceInterval.from_gff_line(gff_line=line, line_number=line_number)\n interval._container = sequence_annotation\n sequence_annotation[interval.ID] = interval\n\n if link_parents:\n sequence_annotation._link_parents()\n\n return sequence_annotation\n
"},{"location":"API/sequence/#picea.SequenceAnnotation.from_gtf","title":"from_gtf(filename=None, string=None, sequence=None, link_parents=True)
classmethod
","text":"[summary]
Raises:
Type DescriptionIndexError
[description]
IndexError
[description]
Returns:
Type DescriptionSequenceAnnotation
Source code in picea/sequence.py
@classmethod\ndef from_gtf(\n cls,\n filename: Optional[str] = None,\n string: Optional[str] = None,\n sequence: Optional[\"Sequence\"] = None,\n link_parents: Optional[bool] = True,\n) -> \"SequenceAnnotation\":\n \"\"\"[summary]\n\n Raises:\n IndexError: [description]\n IndexError: [description]\n\n Returns:\n [type]: [description]\n \"\"\"\n assert filename or string\n assert not (filename and string)\n sequence_annotation = cls(sequence=sequence)\n header = True\n\n # start with just reading all intervals\n if filename:\n with open(filename) as filehandle:\n string = filehandle.read()\n for line_number, line in enumerate(string.split(\"\\n\")):\n line = line.strip()\n if not line:\n continue\n if line[0] == \"#\":\n if header:\n sequence_annotation._gff_headers.append(line)\n continue\n else:\n header = False\n interval = SequenceInterval.from_gtf_line(gtf_line=line, line_number=line_number)\n interval._container = sequence_annotation\n sequence_annotation[interval.ID] = interval\n # fix missing gene and transcript intervals\n transcript_child_counter = Counter()\n new_intervals = dict()\n for interval in sequence_annotation:\n gene_id = interval.gff_attributes[\"gene_id\"][0]\n transcript_id = interval.gff_attributes[\"transcript_id\"][0]\n interval_type = interval.interval_type\n id_tuple = (gene_id, transcript_id, interval_type)\n child_count = transcript_child_counter[id_tuple]\n transcript_child_counter.update([id_tuple])\n interval._ID = f\"{transcript_id}.{interval_type}_{child_count}\"\n if transcript_id not in new_intervals:\n # new transcript interval\n transcript_interval = deepcopy(interval)\n transcript_interval._container = interval._container\n transcript_interval._ID = transcript_id\n transcript_interval.interval_type = \"mRNA\"\n transcript_interval.parent = [gene_id]\n # new gene interval\n gene_interval = deepcopy(interval)\n gene_interval._container = interval._container\n gene_interval._ID = gene_id\n gene_interval.interval_type = \"gene\"\n gene_interval.parent = None\n\n new_intervals[transcript_id] = transcript_interval\n new_intervals[gene_id] = gene_interval\n\n interval.parent = [transcript_id]\n new_intervals[interval.ID] = interval\n sequence_annotation._intervals = new_intervals\n\n # set children\n if link_parents:\n sequence_annotation._link_parents()\n\n # fix gene and transcript start and stop coordinates\n genes = sequence_annotation.groupby(\"interval_type\")[\"gene\"]\n for gene in genes:\n # fix gene first\n start = 10e9\n end = 0\n for child in gene.children:\n start = min(start, child.start)\n end = max(end, child.end)\n gene.start = start\n gene.end = end\n\n # fix transcripts\n transcripts = gene.children.groupby(\"interval_type\")[\"mRNA\"]\n for transcript in transcripts:\n start = 10e9\n end = 0\n for child in transcript.children:\n start = min(start, child.start)\n end = max(end, child.end)\n transcript.end = end\n transcript.start = start\n\n return sequence_annotation\n
"},{"location":"API/sequence/#picea.SequenceAnnotation.from_json","title":"from_json(filename=None, string=None, sequence=None)
classmethod
","text":"[summary]
Source code inpicea/sequence.py
@classmethod\ndef from_json(\n cls,\n filename: Optional[str] = None,\n string: Optional[str] = None,\n sequence: Optional[\"Sequence\"] = None,\n) -> \"SequenceAnnotation\":\n \"\"\"[summary]\"\"\"\n assert filename or string\n assert not (filename and string)\n if filename:\n with open(filename) as filehandle:\n string = filehandle.read()\n\n sequence_annotation = cls(sequence=sequence)\n\n gene_dicts = json.loads(string)\n assert isinstance(gene_dicts, list)\n\n for top_dict in gene_dicts:\n child_dicts = top_dict.pop(\"children\", list())\n top_interval = SequenceInterval.from_dict(interval_dict=top_dict)\n top_interval._container = sequence_annotation\n sequence_annotation[top_interval.ID] = top_interval\n for child_dict in child_dicts:\n child_interval = SequenceInterval.from_dict(interval_dict=child_dict)\n child_interval._container = sequence_annotation\n sequence_annotation[child_interval.ID] = child_interval\n for interval in sequence_annotation:\n if interval.parent:\n for parent_ID in interval.parent:\n try:\n parent = sequence_annotation[parent_ID]\n except IndexError as err:\n raise IndexError(\n \"Interval {interval.ID} is listing {parent_ID} \" \"as Parent, but parent could not be found.\"\n ) from err\n parent._children.append(interval.ID)\n return sequence_annotation\n
"},{"location":"API/sequence/#picea.SequenceAnnotation.to_gff","title":"to_gff()
","text":"[summary]
Returns:
Name Type Descriptionstr
str
[description]
Source code inpicea/sequence.py
def to_gff(self) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n return \"\".join(interval.to_gff_line(trailing_newline=True) for interval in self)\n
"},{"location":"API/sequence/#picea.SequenceAnnotation.to_json","title":"to_json(indent=None)
","text":"[summary]
Returns:
Name Type Descriptionstr
str
[description]
Source code inpicea/sequence.py
def to_json(self, indent: Optional[int] = None) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n interval_dicts = [interval.to_dict() for interval in self]\n return json.dumps(interval_dicts, indent=indent)\n
"},{"location":"API/sequence/#picea.SequenceInterval.__init__","title":"__init__(ID=None, seqid=None, source=None, interval_type=None, start=None, end=None, score=None, strand=None, phase=None, children=None, container=None, **kwargs)
","text":"[summary]
Parameters:
Name Type Description DefaultID
Optional[str]
[description]. Defaults to None.
None
seqid
Optional[str]
[description]. Defaults to None.
None
source
Optional[str]
[description]. Defaults to None.
None
interval_type
Optional[str]
[description]. Defaults to None.
None
start
Optional[int]
[description]. Defaults to None.
None
end
Optional[int]
[description]. Defaults to None.
None
score
Optional[float]
[description]. Defaults to None.
None
strand
Optional[str]
[description]. Defaults to None.
None
phase
Optional[str]
[description]. Defaults to None.
None
children
Optional[List]
[description]. Defaults to None.
None
container
Optional[SequenceAnnotation]
[description]. Defaults to None.
None
Source code in picea/sequence.py
def __init__(\n self,\n ID: Optional[str] = None,\n seqid: Optional[str] = None,\n source: Optional[str] = None,\n interval_type: Optional[str] = None,\n start: Optional[int] = None,\n end: Optional[int] = None,\n score: Optional[float] = None,\n strand: Optional[str] = None,\n phase: Optional[str] = None,\n children: Optional[List[str]] = None,\n container: Optional[SequenceAnnotation] = None,\n **kwargs,\n):\n \"\"\"[summary]\n\n Args:\n ID (Optional[str], optional): [description]. Defaults to None.\n seqid (Optional[str], optional): [description]. Defaults to None.\n source (Optional[str], optional): [description]. Defaults to None.\n interval_type (Optional[str], optional): [description]. Defaults\n to None.\n start (Optional[int], optional): [description]. Defaults to None.\n end (Optional[int], optional): [description]. Defaults to None.\n score (Optional[float], optional): [description]. Defaults to None.\n strand (Optional[str], optional): [description]. Defaults to None.\n phase (Optional[str], optional): [description]. Defaults to\n None.\n children (Optional[List], optional): [description]. Defaults to\n None.\n container (Optional[SequenceAnnotation], optional): [description].\n Defaults to None.\n \"\"\"\n # interval ID is a property (see below) with getter and setter\n # self._ID = ID\n # self._original_ID = ID\n parents = kwargs.pop(\"parent\", None)\n super().__init__(ID=ID, children=children, container=container, parents=parents)\n\n # Standard gff fields\n self.seqid = seqid\n self.source = source\n self.interval_type = interval_type\n self.start = start\n self.end = end\n self.score = score\n self.strand = strand\n self.phase = phase\n\n # Set attributes with predefined meanings in the gff spec to None\n for attr in self._predefined_gff3_attributes:\n # ID and parent are handled separately in DAG superclass\n if attr in {\"ID\", \"parent\"}:\n continue\n self[attr] = kwargs.get(attr, None)\n\n # Any additional attributes\n for key, value in kwargs.items():\n self[key] = value\n
"},{"location":"API/sequence/#picea.SequenceInterval.from_dict","title":"from_dict(interval_dict)
classmethod
","text":"[summary] Args: interval_dict
Returns:
Type DescriptionSequenceInterval
Source code in picea/sequence.py
@classmethod\ndef from_dict(cls, interval_dict: Dict[str, Any]) -> \"SequenceInterval\":\n \"\"\"[summary]\n Args:\n interval_dict\n\n Returns:\n [type]: [description]\n \"\"\"\n attributes = interval_dict.pop(\"attributes\", dict())\n return cls(**interval_dict, **attributes)\n
"},{"location":"API/sequence/#picea.SequenceInterval.from_gff_line","title":"from_gff_line(gff_line=None, line_number=None, attribute_parser=parse_gff_attribute_string)
classmethod
","text":"[summary]
Parameters:
Name Type Description Defaultgff_line
Optional[str]
[description]. Defaults to None.
None
line_number
Optional[int]
[description]. Defaults to None.
None
Returns:
Type DescriptionSequenceInterval
Source code in picea/sequence.py
@classmethod\ndef from_gff_line(\n cls,\n gff_line: Optional[str] = None,\n line_number: Optional[int] = None,\n attribute_parser: Callable = parse_gff_attribute_string,\n) -> \"SequenceInterval\":\n \"\"\"[summary]\n\n Args:\n gff_line (Optional[str], optional): [description]. Defaults\n to None.\n line_number (Optional[int], optional): [description]. Defaults\n to None.\n\n Returns:\n [type]: [description]\n \"\"\"\n gff_parts = gff_line.split(\"\\t\")\n assert len(gff_parts) == 9, gff_parts\n seqid, source, interval_type, start, end, score, strand, phase = gff_parts[:8]\n try:\n start = int(start)\n end = int(end)\n except ValueError as err:\n error = \"GFF start and end fields must be integer\"\n if line_number:\n error = f\"{error}, gff line {line_number}\"\n raise ValueError(error) from err\n\n if score != \".\":\n try:\n score = float(score)\n except ValueError as err:\n error = \"GFF score field must be a float\"\n if line_number:\n error = f\"{error}, gff line {line_number}\"\n raise ValueError(error) from err\n\n if strand not in (\"+\", \"-\", \".\"):\n error = 'GFF strand must be one of \"+\", \"-\" or \".\"'\n if line_number:\n error = f\"{error}, gff line {line_number}\"\n raise ValueError(error)\n\n if phase not in (\"0\", \"1\", \"2\", \".\"):\n error = 'GFF phase must be one of \"0\", \"1\", \"2\" or \".\"'\n if line_number:\n error = f\"{error}, gff line {line_number}\"\n raise ValueError(error)\n elif phase != \".\":\n phase = int(phase)\n\n # Disable phase checking of CDS for now...\n # if interval_type == 'CDS' and phase not in ('0', '1', '2'):\n # error = 'GFF intervals of type CDS must have phase of\\\n # \"0\", \"1\" or \"2\"'\n # if line_number:\n # error = f'{error}, gff line {line_number}'\n # raise ValueError(error)\n\n attributes = attribute_parser(gff_parts[8])\n\n ID = attributes.pop(\"ID\", [str(uuid.uuid4())])[0]\n\n return cls(\n seqid=seqid,\n source=source,\n interval_type=interval_type,\n start=start,\n end=end,\n score=score,\n strand=strand,\n phase=phase,\n ID=ID,\n **attributes,\n )\n
"},{"location":"API/sequence/#picea.SequenceInterval.from_gtf_line","title":"from_gtf_line(gtf_line=None, line_number=None)
classmethod
","text":"[summary]
Returns:
Type DescriptionSequenceInterval
Yields:
Type DescriptionSequenceInterval
Source code in picea/sequence.py
@classmethod\ndef from_gtf_line(cls, gtf_line: Optional[str] = None, line_number: Optional[int] = None) -> \"SequenceInterval\":\n \"\"\"[summary]\n\n Returns:\n [type]: [description]\n\n Yields:\n [type]: [description]\n \"\"\"\n return cls.from_gff_line(gtf_line, line_number, parse_gtf_attribute_string)\n
"},{"location":"API/sequence/#picea.SequenceInterval.to_dict","title":"to_dict(include_children=False)
","text":"[summary]
Returns:
Type DescriptionDict[str, Any]
Dict[str, Any]: [description]
Source code inpicea/sequence.py
def to_dict(self, include_children: bool = False) -> Dict[str, Any]:\n \"\"\"[summary]\n\n Returns:\n Dict[str, Any]: [description]\n \"\"\"\n attributes = dict(**self.gff_attributes)\n attributes.pop(\"ID\")\n interval_dict = dict(\n ID=self.ID,\n seqid=self.seqid,\n source=self.source,\n interval_type=self.interval_type,\n start=self.start,\n end=self.end,\n score=self.score,\n strand=self.strand,\n phase=self.phase,\n attributes=attributes,\n )\n if include_children:\n children = [child.to_dict() for child in self.children[1:]]\n interval_dict[\"children\"] = children\n return interval_dict\n
"},{"location":"API/sequence/#picea.SequenceInterval.to_gff_line","title":"to_gff_line(trailing_newline=False)
","text":"[summary]
Returns:
Name Type Descriptionstr
str
[description]
Source code inpicea/sequence.py
def to_gff_line(self, trailing_newline: bool = False) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n # attributes = dict(ID=self.ID, **self.gff_attributes)\n\n gff_line = \"\\t\".join(\n [\n self.seqid,\n self.source,\n self.interval_type,\n str(self.start),\n str(self.end),\n str(self.score),\n self.strand,\n str(self.phase),\n format_gff_attribute_string(self.gff_attributes),\n ]\n )\n if trailing_newline:\n gff_line = f\"{gff_line}\\n\"\n return gff_line\n
"},{"location":"API/sequence/#picea.SequenceInterval.to_gtf_line","title":"to_gtf_line()
","text":"[summary]
Returns:
Name Type Descriptionstr
str
[description]
Source code inpicea/sequence.py
def to_gtf_line(self) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n interval_type = self._gtf_interval_types.get(self.interval_type, self.interval_type)\n return \"\\t\".join(\n [\n self.seqid,\n self.source,\n interval_type,\n str(self.start),\n str(self.end),\n str(self.score),\n self.strand,\n str(self.phase),\n format_gtf_attribute_string(self.gtf_attributes),\n ]\n )\n
"},{"location":"API/sequence/#picea.SequenceInterval.to_json","title":"to_json(include_children=False, indent=None)
","text":"[summary]
Parameters:
Name Type Description Defaultinclude_children
bool
[description]. Defaults to False.
False
Returns:
Name Type Descriptionstr
str
[description]
Source code inpicea/sequence.py
def to_json(self, include_children: bool = False, indent: Optional[int] = None) -> str:\n \"\"\"[summary]\n\n Args:\n include_children (bool, optional): [description]. Defaults to \\\n False.\n\n Returns:\n str: [description]\n \"\"\"\n return json.dumps(self.to_dict(include_children=include_children), indent=indent)\n
"},{"location":"API/tree/","title":"Tree","text":"Recursive Tree object
Source code inpicea/tree.py
@dataclass\nclass Tree:\n \"\"\"Recursive Tree object\"\"\"\n\n name: Optional[str] = None\n length: Optional[float] = None\n children: Optional[List[\"Tree\"]] = field(default_factory=list)\n\n ID: InitVar[Optional[int]] = None\n depth: InitVar[Optional[int]] = None\n parent: InitVar[Optional[\"Tree\"]] = None\n cumulative_length: InitVar[Optional[float]] = None\n\n def __post_init__(self, ID, *args, **kwargs):\n \"\"\"[summary]\n\n Args:\n ID ([type]): [description]\n \"\"\"\n self.ID = ID\n\n @property\n def loc(self) -> \"Tree\":\n \"\"\"Name based index\n\n Example:\n >>> from picea import Tree\n >>> newick = '(((a,b),(c,d)),e);'\n >>> tree = Tree.from_newick(newick)\n >>> tree.loc['a']\n Tree(name='a', length=None, children=[])\n\n Returns:\n Tree: tree node matching name\n\n Raises:\n IndexError\n \"\"\"\n return TreeIndex(iterator=self.depth_first, eq_func=lambda node, name: node.name == name)\n\n @property\n def iloc(self) -> \"Tree\":\n \"\"\"Index based index\n\n Example:\n >>> from picea import Tree\n >>> newick = '(((a,b),(c,d)),e);'\n >>> tree = Tree.from_newick(newick)\n >>> tree.iloc[2]\n Tree(name='', length=None, children=[Tree(name='a', length=None, \\\nchildren=[]), Tree(name='b', length=None, children=[])])\n\n Returns:\n Tree: tree node matching index\n \"\"\"\n return TreeIndex(iterator=self.depth_first, eq_func=lambda node, index: node.ID == index)\n\n @property\n def root(self) -> \"Tree\":\n \"\"\"Root node of the (sub)tree\n\n Returns:\n Tree: Root node\n \"\"\"\n root = self\n while root.parent:\n root = root.parent\n return root\n\n @property\n def nodes(self) -> List[\"Tree\"]:\n \"\"\"A list of all tree nodes in breadth-first order\n\n Returns:\n list: A list of all tree nodes\n \"\"\"\n return list(self.breadth_first())\n\n @property\n def leaves(self) -> List[\"Tree\"]:\n \"\"\"A list of leaf nodes only\n\n Returns:\n list: A list of leaf nodes only\n \"\"\"\n return [n for n in self.nodes if not n.children]\n\n @property\n def links(self) -> List[Tuple[\"Tree\", \"Tree\"]]:\n \"\"\"A list of all (parent, child) combinations\n\n Returns:\n list: All (parent,child) combinations\n \"\"\"\n _links = []\n for node in self.nodes:\n if node.children:\n for child in node.children:\n _links.append((node, child))\n return _links\n\n @classmethod\n def from_newick(cls, string: Optional[str] = None, filename: Optional[str] = None) -> \"Tree\":\n \"\"\"Parse a newick formatted string into a Tree object\n\n Arguments:\n newick_string (string): Newick formatted tree string\n\n Returns:\n Tree: Tree object\n \"\"\"\n assert filename or string\n assert not (filename and string)\n if filename:\n with open(filename) as filehandle:\n string = filehandle.read()\n tokens: list[str] = re.split(r\"\\s*(;|\\(|\\)|,|:)\\s*\", string)\n ID = 0\n tree = cls(ID=ID)\n ancestors: list[Tree] = list()\n found_branchlengths = False\n for i, token in enumerate(tokens):\n if token == \"(\":\n ID += 1\n subtree = cls(ID=ID)\n tree.children = [subtree]\n ancestors.append(tree)\n tree = subtree\n elif token == \",\":\n ID += 1\n subtree = cls(ID=ID)\n ancestors[-1].children.append(subtree)\n tree = subtree\n elif token == \")\":\n tree = ancestors.pop()\n else:\n previous_token = tokens[i - 1]\n if previous_token in (\"(\", \")\", \",\"):\n tree.name = token\n elif previous_token == \":\":\n found_branchlengths = True\n tree.length = float(token)\n tree.cumulative_length = 0.0\n tree.depth = 0\n queue: list[Tree] = [tree]\n while queue:\n node = queue.pop(0)\n if found_branchlengths:\n if node.length is None:\n warn(\n \"Found branchlengths on some parts of the tree, but node \"\n f\"{node.ID} has no branchlength specified, setting to \"\n \"branchlength 0.0\"\n )\n node.length = 0.0\n node.cumulative_length = 0.0\n for child in node.children:\n child.parent = node\n child.depth = node.depth + 1\n if child.length:\n child.cumulative_length = node.cumulative_length + abs(child.length)\n queue += node.children\n\n return tree\n\n def to_newick(self, branch_lengths: bool = False) -> str:\n \"\"\"Make a Newick formatted string\n\n Args:\n branch_lengths (bool, optional): Whether to include branch lengths\\\n in the Newick string. Defaults to True.\n\n Returns:\n String: Newick formatted tree string\n \"\"\"\n if self.name:\n name = str(self.name)\n else:\n name = \"\"\n\n if self.children:\n subtree_string = \",\".join([c.to_newick(branch_lengths=branch_lengths) for c in self.children])\n newick = f\"({subtree_string}){name}\"\n else:\n newick = name\n\n if branch_lengths and self.ID != 0:\n length = self.length\n if length is None:\n warn(\n \"Trying to write branch length for node that has no branch length \\\n set, defaulting to zero length branch.\"\n )\n length = 0\n if length == 0:\n length = int(0)\n newick += f\":{length}\"\n\n if self == self.root:\n newick += \";\"\n\n return newick\n\n @classmethod\n def from_sklearn(cls, clustering) -> \"Tree\":\n \"\"\"Read a tree from sklearn agglomerative clustering\n\n Args:\n clustering (sklearn object): sklearn agglomerative clustering\\\n object.\n\n Returns:\n Tree: Tree object\n \"\"\"\n nodes = clustering.children_\n n_leaves = nodes.shape[0] + 1\n tree = cls(ID=nodes.shape[0] * 2)\n\n queue = [tree]\n while queue:\n node = queue.pop(0)\n if node.ID < n_leaves:\n node.name = str(node.ID)\n continue\n for child_ID in nodes[node.ID - n_leaves]:\n child = cls(ID=child_ID)\n child.parent = node\n node.children.append(child)\n queue += node.children\n\n return tree\n\n def to_sklearn(self):\n # TODO\n raise NotImplementedError()\n\n @classmethod\n def from_json(cls):\n # TODO\n raise NotImplementedError()\n\n def to_json(self, indent: Optional[int] = None) -> str:\n return json.dumps(self.to_dict(), indent=indent)\n\n @classmethod\n def from_dict(cls, tree_dict):\n # TODO\n raise NotImplementedError()\n # tree = cls()\n # return tree\n\n def to_dict(self) -> TreeDict:\n \"\"\"[summary]\n\n Returns:\n TreeDict: [description]\n \"\"\"\n return asdict(self)\n\n def breadth_first(self) -> Generator[\"Tree\", None, None]:\n \"\"\"Generator implementing breadth first search starting at root node\"\"\"\n queue = [self]\n while queue:\n node = queue.pop(0)\n queue += node.children\n yield node\n\n def depth_first(self, post_order: bool = True) -> Generator[\"Tree\", None, None]:\n \"\"\"Generator implementing depth first search in either post- or\n pre-order traversel\n\n Keyword Arguments:\n post_order (bool, optional): Depth first search in post-order\n traversal or not. Defaults to True\n \"\"\"\n if not post_order:\n yield self\n for child in self.children:\n yield from child.depth_first(post_order=post_order)\n if post_order:\n yield self\n\n def rename_leaves(self, rename_func: Callable, inplace: bool = True) -> Optional[\"Tree\"]:\n \"\"\"[summary]\"\"\"\n tree = self if inplace else deepcopy(self)\n for leaf in tree.leaves:\n leaf.name = rename_func(leaf.name)\n
"},{"location":"API/tree/#picea.Tree.iloc","title":"iloc: Tree
property
","text":"Index based index
Examplefrom picea import Tree newick = '(((a,b),(c,d)),e);' tree = Tree.from_newick(newick) tree.iloc[2] Tree(name='', length=None, children=[Tree(name='a', length=None, children=[]), Tree(name='b', length=None, children=[])])
Returns:
Name Type DescriptionTree
Tree
tree node matching index
"},{"location":"API/tree/#picea.Tree.leaves","title":"leaves: List[Tree]
property
","text":"A list of leaf nodes only
Returns:
Name Type Descriptionlist
List[Tree]
A list of leaf nodes only
"},{"location":"API/tree/#picea.Tree.links","title":"links: List[Tuple[Tree, Tree]]
property
","text":"A list of all (parent, child) combinations
Returns:
Name Type Descriptionlist
List[Tuple[Tree, Tree]]
All (parent,child) combinations
"},{"location":"API/tree/#picea.Tree.loc","title":"loc: Tree
property
","text":"Name based index
Examplefrom picea import Tree newick = '(((a,b),(c,d)),e);' tree = Tree.from_newick(newick) tree.loc['a'] Tree(name='a', length=None, children=[])
Returns:
Name Type DescriptionTree
Tree
tree node matching name
"},{"location":"API/tree/#picea.Tree.nodes","title":"nodes: List[Tree]
property
","text":"A list of all tree nodes in breadth-first order
Returns:
Name Type Descriptionlist
List[Tree]
A list of all tree nodes
"},{"location":"API/tree/#picea.Tree.root","title":"root: Tree
property
","text":"Root node of the (sub)tree
Returns:
Name Type DescriptionTree
Tree
Root node
"},{"location":"API/tree/#picea.Tree.__post_init__","title":"__post_init__(ID, *args, **kwargs)
","text":"[summary]
Parameters:
Name Type Description DefaultID
[type]
[description]
required Source code inpicea/tree.py
def __post_init__(self, ID, *args, **kwargs):\n \"\"\"[summary]\n\n Args:\n ID ([type]): [description]\n \"\"\"\n self.ID = ID\n
"},{"location":"API/tree/#picea.Tree.breadth_first","title":"breadth_first()
","text":"Generator implementing breadth first search starting at root node
Source code inpicea/tree.py
def breadth_first(self) -> Generator[\"Tree\", None, None]:\n \"\"\"Generator implementing breadth first search starting at root node\"\"\"\n queue = [self]\n while queue:\n node = queue.pop(0)\n queue += node.children\n yield node\n
"},{"location":"API/tree/#picea.Tree.depth_first","title":"depth_first(post_order=True)
","text":"Generator implementing depth first search in either post- or pre-order traversel
Other Parameters:
Name Type Descriptionpost_order
bool
Depth first search in post-order
Source code inpicea/tree.py
def depth_first(self, post_order: bool = True) -> Generator[\"Tree\", None, None]:\n \"\"\"Generator implementing depth first search in either post- or\n pre-order traversel\n\n Keyword Arguments:\n post_order (bool, optional): Depth first search in post-order\n traversal or not. Defaults to True\n \"\"\"\n if not post_order:\n yield self\n for child in self.children:\n yield from child.depth_first(post_order=post_order)\n if post_order:\n yield self\n
"},{"location":"API/tree/#picea.Tree.from_newick","title":"from_newick(string=None, filename=None)
classmethod
","text":"Parse a newick formatted string into a Tree object
Parameters:
Name Type Description Defaultnewick_string
string
Newick formatted tree string
requiredReturns:
Name Type DescriptionTree
Tree
Tree object
Source code inpicea/tree.py
@classmethod\ndef from_newick(cls, string: Optional[str] = None, filename: Optional[str] = None) -> \"Tree\":\n \"\"\"Parse a newick formatted string into a Tree object\n\n Arguments:\n newick_string (string): Newick formatted tree string\n\n Returns:\n Tree: Tree object\n \"\"\"\n assert filename or string\n assert not (filename and string)\n if filename:\n with open(filename) as filehandle:\n string = filehandle.read()\n tokens: list[str] = re.split(r\"\\s*(;|\\(|\\)|,|:)\\s*\", string)\n ID = 0\n tree = cls(ID=ID)\n ancestors: list[Tree] = list()\n found_branchlengths = False\n for i, token in enumerate(tokens):\n if token == \"(\":\n ID += 1\n subtree = cls(ID=ID)\n tree.children = [subtree]\n ancestors.append(tree)\n tree = subtree\n elif token == \",\":\n ID += 1\n subtree = cls(ID=ID)\n ancestors[-1].children.append(subtree)\n tree = subtree\n elif token == \")\":\n tree = ancestors.pop()\n else:\n previous_token = tokens[i - 1]\n if previous_token in (\"(\", \")\", \",\"):\n tree.name = token\n elif previous_token == \":\":\n found_branchlengths = True\n tree.length = float(token)\n tree.cumulative_length = 0.0\n tree.depth = 0\n queue: list[Tree] = [tree]\n while queue:\n node = queue.pop(0)\n if found_branchlengths:\n if node.length is None:\n warn(\n \"Found branchlengths on some parts of the tree, but node \"\n f\"{node.ID} has no branchlength specified, setting to \"\n \"branchlength 0.0\"\n )\n node.length = 0.0\n node.cumulative_length = 0.0\n for child in node.children:\n child.parent = node\n child.depth = node.depth + 1\n if child.length:\n child.cumulative_length = node.cumulative_length + abs(child.length)\n queue += node.children\n\n return tree\n
"},{"location":"API/tree/#picea.Tree.from_sklearn","title":"from_sklearn(clustering)
classmethod
","text":"Read a tree from sklearn agglomerative clustering
Parameters:
Name Type Description Defaultclustering
sklearn object
sklearn agglomerative clustering object.
requiredReturns:
Name Type DescriptionTree
Tree
Tree object
Source code inpicea/tree.py
@classmethod\ndef from_sklearn(cls, clustering) -> \"Tree\":\n \"\"\"Read a tree from sklearn agglomerative clustering\n\n Args:\n clustering (sklearn object): sklearn agglomerative clustering\\\n object.\n\n Returns:\n Tree: Tree object\n \"\"\"\n nodes = clustering.children_\n n_leaves = nodes.shape[0] + 1\n tree = cls(ID=nodes.shape[0] * 2)\n\n queue = [tree]\n while queue:\n node = queue.pop(0)\n if node.ID < n_leaves:\n node.name = str(node.ID)\n continue\n for child_ID in nodes[node.ID - n_leaves]:\n child = cls(ID=child_ID)\n child.parent = node\n node.children.append(child)\n queue += node.children\n\n return tree\n
"},{"location":"API/tree/#picea.Tree.rename_leaves","title":"rename_leaves(rename_func, inplace=True)
","text":"[summary]
Source code inpicea/tree.py
def rename_leaves(self, rename_func: Callable, inplace: bool = True) -> Optional[\"Tree\"]:\n \"\"\"[summary]\"\"\"\n tree = self if inplace else deepcopy(self)\n for leaf in tree.leaves:\n leaf.name = rename_func(leaf.name)\n
"},{"location":"API/tree/#picea.Tree.to_dict","title":"to_dict()
","text":"[summary]
Returns:
Name Type DescriptionTreeDict
TreeDict
[description]
Source code inpicea/tree.py
def to_dict(self) -> TreeDict:\n \"\"\"[summary]\n\n Returns:\n TreeDict: [description]\n \"\"\"\n return asdict(self)\n
"},{"location":"API/tree/#picea.Tree.to_newick","title":"to_newick(branch_lengths=False)
","text":"Make a Newick formatted string
Parameters:
Name Type Description Defaultbranch_lengths
bool
Whether to include branch lengths in the Newick string. Defaults to True.
False
Returns:
Name Type DescriptionString
str
Newick formatted tree string
Source code inpicea/tree.py
def to_newick(self, branch_lengths: bool = False) -> str:\n \"\"\"Make a Newick formatted string\n\n Args:\n branch_lengths (bool, optional): Whether to include branch lengths\\\n in the Newick string. Defaults to True.\n\n Returns:\n String: Newick formatted tree string\n \"\"\"\n if self.name:\n name = str(self.name)\n else:\n name = \"\"\n\n if self.children:\n subtree_string = \",\".join([c.to_newick(branch_lengths=branch_lengths) for c in self.children])\n newick = f\"({subtree_string}){name}\"\n else:\n newick = name\n\n if branch_lengths and self.ID != 0:\n length = self.length\n if length is None:\n warn(\n \"Trying to write branch length for node that has no branch length \\\n set, defaulting to zero length branch.\"\n )\n length = 0\n if length == 0:\n length = int(0)\n newick += f\":{length}\"\n\n if self == self.root:\n newick += \";\"\n\n return newick\n
"},{"location":"examples/ontology.pct/","title":"Ontology.pct","text":"This notebook shows how to work with biological ontologies such as the sequence ontology or the gene ontology.
In\u00a0[1]: Copied!import requests\nimport sys\nsys.path.insert(0, '../../')\nimport picea\npicea.__version__\nimport requests import sys sys.path.insert(0, '../../') import picea picea.__version__ Out[1]:
'0.0.26'In\u00a0[2]: Copied!
obo_url = (\n 'https://raw.githubusercontent.com/The-Sequence-Ontology/'\n 'SO-Ontologies/master/Ontology_Files/so.obo'\n)\nr = requests.get(obo_url)\nr\nobo_url = ( 'https://raw.githubusercontent.com/The-Sequence-Ontology/' 'SO-Ontologies/master/Ontology_Files/so.obo' ) r = requests.get(obo_url) r Out[2]:
<Response [200]>In\u00a0[3]: Copied!
r.text.split('\\n')[:100]\nr.text.split('\\n')[:100] Out[3]:
['format-version: 1.2',\n 'data-version: 2024-04-10',\n 'date: 10:04:2024 18:16',\n 'saved-by: Evan Christensen',\n 'subsetdef: Alliance_of_Genome_Resources \"Alliance of Genome Resources Gene Biotype Slim\"',\n 'subsetdef: biosapiens \"biosapiens protein feature ontology\"',\n 'subsetdef: DBVAR \"database of genomic structural variation\"',\n 'subsetdef: SOFA \"SO feature annotation\"',\n 'synonymtypedef: aa1 \"amino acid 1 letter code\"',\n 'synonymtypedef: aa3 \"amino acid 3 letter code\"',\n 'synonymtypedef: AAMOD \"amino acid modification\"',\n 'synonymtypedef: AGR \"Alliance of Genome Resources\"',\n 'synonymtypedef: BS \"biosapiens\"',\n 'synonymtypedef: dbsnp \"dbsnp variant terms\"',\n 'synonymtypedef: dbvar \"DBVAR\"',\n 'synonymtypedef: ebi_variants \"ensembl variant terms\"',\n 'synonymtypedef: RNAMOD \"RNA modification\" EXACT',\n 'synonymtypedef: VAR \"variant annotation term\"',\n 'default-namespace: sequence',\n 'ontology: so',\n 'property_value: IAO:0000700 SO:0000110',\n 'property_value: IAO:0000700 SO:0000400',\n 'property_value: IAO:0000700 SO:0001060',\n 'property_value: IAO:0000700 SO:0001260',\n '',\n '[Term]',\n 'id: SO:0000000',\n 'name: Sequence_Ontology',\n 'subset: SOFA',\n 'is_obsolete: true',\n '',\n '[Term]',\n 'id: SO:00000000002382',\n 'name: 5_prime_UTR_uORF_variant',\n 'def: \"A 5\\' UTR variant within an upstream open reading frame.\" [PMID:32461616, PMID:32926138]',\n 'comment: Added 10 Apr 2024 at the request of Sarah Hunt (EBI). See GitHub Issue #647.',\n 'is_a: SO:0001623 ! 5_prime_UTR_variant',\n 'created_by: evan',\n 'creation_date: 2024-04-10T17:49:03Z',\n '',\n '[Term]',\n 'id: SO:0000001',\n 'name: region',\n 'def: \"A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids.\" [SO:ke]',\n 'subset: SOFA',\n 'synonym: \"sequence\" EXACT []',\n 'is_a: SO:0000110 ! sequence_feature',\n '',\n '[Term]',\n 'id: SO:00000010002382',\n 'name: 5_prime_UTR_uORF_stop_codon_variant',\n 'def: \"A 5\\' UTR variant where a stop codon in an upstream open reading frame is introduced, moved or lost.\" [PMID:32461616, PMID:32926138]',\n 'comment: Added 10 Apr 2024 at the request of Sarah Hunt (EBI). See GitHub Issue #622.',\n 'is_a: SO:00000000002382 ! 5_prime_UTR_uORF_variant',\n 'created_by: evan',\n 'creation_date: 2024-04-10T17:56:17Z',\n '',\n '[Term]',\n 'id: SO:0000002',\n 'name: sequence_secondary_structure',\n 'def: \"A folded sequence.\" [SO:ke]',\n 'synonym: \"INSDC_feature:misc_structure\" EXACT []',\n 'synonym: \"sequence secondary structure\" EXACT []',\n 'is_a: SO:0001411 ! biological_region',\n '',\n '[Term]',\n 'id: SO:00000020002382',\n 'name: 5_prime_UTR_uORF_frameshift_variant',\n 'def: \"A 5\\' UTR variant which disrupts the translation of an upstream open reading frame because the number of nucleotides inserted or deleted is not a multiple of three.\" [PMID:32461616, PMID:32926138]',\n 'comment: Added 10 Apr 2024 at the request of Sarah Hunt (EBI). See GitHub Issue #621.',\n 'synonym: \"uFrameshift (UTRannotator)\" EXACT []',\n 'is_a: SO:00000000002382 ! 5_prime_UTR_uORF_variant',\n 'created_by: evan',\n 'creation_date: 2024-04-10T17:58:40Z',\n '',\n '[Term]',\n 'id: SO:0000003',\n 'name: G_quartet',\n 'def: \"G-quartets are unusual nucleic acid structures consisting of a planar arrangement where each guanine is hydrogen bonded by hoogsteen pairing to another guanine in the quartet.\" [http://www.ncbi.nlm.nih.gov/pubmed/7919797?dopt=Abstract]',\n 'synonym: \"G quartet\" EXACT []',\n 'synonym: \"G tetrad\" EXACT []',\n 'synonym: \"G-quadruplex\" EXACT []',\n 'synonym: \"G-quartet\" EXACT []',\n 'synonym: \"G-tetrad\" EXACT []',\n 'synonym: \"G_quadruplex\" EXACT []',\n 'synonym: \"guanine tetrad\" EXACT []',\n 'xref: http://en.wikipedia.org/wiki/G-quadruplex \"wiki\"',\n 'is_a: SO:0000002 ! sequence_secondary_structure',\n '',\n '[Term]',\n 'id: SO:00000030002382',\n 'name: 5_prime_UTR_uORF_stop_codon_gain_variant',\n 'def: \"A 5\\' UTR variant where a premature stop codon is gained in an upstream open reading frame.\" [PMID:32461616, PMID:32926138]',\n 'comment: Added 10 Apr 2024 at the request of Sarah Hunt (EBI). See GitHub Issue #624.',\n 'synonym: \"uSTOP_gained\" EXACT [] {comment=\"UTRannotator\"}',\n 'is_a: SO:00000010002382 ! 5_prime_UTR_uORF_stop_codon_variant',\n 'created_by: evan',\n 'creation_date: 2024-04-10T18:01:42Z',\n '',\n '[Term]']In\u00a0[4]: Copied!
so = picea.Ontology.from_obo(string=r.text)\nso = picea.Ontology.from_obo(string=r.text) In\u00a0[5]: Copied!
ids = [el.ID for el in so['SO:0000866'].parents.elements]\nids = [el.ID for el in so['SO:0000866'].parents.elements] In\u00a0[6]: Copied!
'SO:0000866' in {el.ID for so_id in ids for el in so[so_id].children.elements}\n'SO:0000866' in {el.ID for so_id in ids for el in so[so_id].children.elements} Out[6]:
TrueIn\u00a0[7]: Copied!
len(so)\nlen(so) Out[7]:
2513In\u00a0[8]: Copied!
url = 'http://purl.obolibrary.org/obo/go.obo'\n# url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'\nr = requests.get(url)\ngo = picea.Ontology.from_obo(string=r.text)\nlen(go.elements)\nurl = 'http://purl.obolibrary.org/obo/go.obo' # url = 'http://purl.obolibrary.org/obo/go/go-basic.obo' r = requests.get(url) go = picea.Ontology.from_obo(string=r.text) len(go.elements)
/home/runner/work/picea/picea/docs/examples/../../picea/ontology.py:32: UserWarning: Accessed GO term by alt ID GO:0008150, returning main GO term with ID GO:0044699\n warnings.warn(f\"Accessed GO term by alt ID {ID}, \" f\"returning main GO term with ID {alt_id}\")\n/home/runner/work/picea/picea/docs/examples/../../picea/ontology.py:32: UserWarning: Accessed GO term by alt ID GO:0003674, returning main GO term with ID GO:0005554\n warnings.warn(f\"Accessed GO term by alt ID {ID}, \" f\"returning main GO term with ID {alt_id}\")\n/home/runner/work/picea/picea/docs/examples/../../picea/ontology.py:32: UserWarning: Accessed GO term by alt ID GO:0005575, returning main GO term with ID GO:0008372\n warnings.warn(f\"Accessed GO term by alt ID {ID}, \" f\"returning main GO term with ID {alt_id}\")\nOut[8]:
45667In\u00a0[9]: Copied!
[(term.ID, term.name, len(term.parents)) for term in go['GO:0048316'].parents]\n[(term.ID, term.name, len(term.parents)) for term in go['GO:0048316'].parents] Out[9]:
[('GO:0009791', ['post-embryonic development'], 5),\n ('GO:0032501', ['multicellular organismal process'], 1),\n ('GO:0044699', ['biological_process'], 0),\n ('GO:0007275', ['multicellular organism development'], 4),\n ('GO:0048856', ['anatomical structure development'], 2),\n ('GO:0032502', ['developmental process'], 1),\n ('GO:0048608', ['reproductive structure development'], 9),\n ('GO:0003006', ['developmental process involved in reproduction'], 3),\n ('GO:0022414', ['reproductive process'], 1),\n ('GO:0061458', ['reproductive system development'], 6),\n ('GO:0048731', ['system development'], 5),\n ('GO:0010154', ['fruit development'], 10)]In\u00a0[10]: Copied!
go['GO:0048316'].children\ngo['GO:0048316'].children Out[10]:
<picea.ontology.Ontology at 0x7f139fd26020>In\u00a0[11]: Copied!
import networkx as nx\nnx.__version__\nimport networkx as nx nx.__version__
\n---------------------------------------------------------------------------\nModuleNotFoundError Traceback (most recent call last)\nCell In[11], line 1\n----> 1 import networkx as nx\n 2 nx.__version__\n\nModuleNotFoundError: No module named 'networkx'In\u00a0[12]: Copied!
graph = nx.DiGraph()\nfor term in [go['GO:0048316'], *go['GO:0048316'].children]:\n graph.add_node(term.ID, name=term.name)\n for child_ID in term._children:\n graph.add_edge(term.ID, child_ID)\nlayout = nx.planar_layout(graph)\nnx.draw(graph, pos=layout, node_shape='s')\ngraph = nx.DiGraph() for term in [go['GO:0048316'], *go['GO:0048316'].children]: graph.add_node(term.ID, name=term.name) for child_ID in term._children: graph.add_edge(term.ID, child_ID) layout = nx.planar_layout(graph) nx.draw(graph, pos=layout, node_shape='s')
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[12], line 1\n----> 1 graph = nx.DiGraph()\n 2 for term in [go['GO:0048316'], *go['GO:0048316'].children]:\n 3 graph.add_node(term.ID, name=term.name)\n\nNameError: name 'nx' is not definedIn\u00a0[13]: Copied!
import sys\n!{sys.executable} -m pip install pygraphviz\nnx.nx_agraph.to_agraph(graph)\nimport sys !{sys.executable} -m pip install pygraphviz nx.nx_agraph.to_agraph(graph)
Collecting pygraphviz\r\n
Downloading pygraphviz-1.13.tar.gz (104 kB)\r\n \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501 0.0/104.6 kB ? eta -:--:--\r \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501 104.6/104.6 kB 4.5 MB/s eta 0:00:00\r\n
Installing build dependencies ... -
\b \b\\
\b \b|
\b \b/
\b \bdone\r\n
Getting requirements to build wheel ... -\b \bdone\r\n
Installing backend dependencies ... -
\b \b\\
\b \bdone\r\n
Preparing metadata (pyproject.toml) ... -
\b \bdone\r\nBuilding wheels for collected packages: pygraphviz\r\n
Building wheel for pygraphviz (pyproject.toml) ... -
\b \b\\
\b \berror\r\n error: subprocess-exited-with-error\r\n \r\n \u00d7 Building wheel for pygraphviz (pyproject.toml) did not run successfully.\r\n \u2502 exit code: 1\r\n \u2570\u2500> [61 lines of output]\r\n running bdist_wheel\r\n running build\r\n running build_py\r\n creating build\r\n creating build/lib.linux-x86_64-cpython-310\r\n creating build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/agraph.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/testing.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/scraper.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/__init__.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/graphviz.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n creating build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_node_attributes.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_graph.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/__init__.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_repr_mimebundle.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_unicode.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_readwrite.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_edge_attributes.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_layout.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_scraper.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_close.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_clear.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_attribute_defaults.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_drawing.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_html.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_subgraph.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_string.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n running egg_info\r\n writing pygraphviz.egg-info/PKG-INFO\r\n writing dependency_links to pygraphviz.egg-info/dependency_links.txt\r\n writing top-level names to pygraphviz.egg-info/top_level.txt\r\n reading manifest file 'pygraphviz.egg-info/SOURCES.txt'\r\n reading manifest template 'MANIFEST.in'\r\n warning: no files found matching '*.swg'\r\n warning: no files found matching '*.png' under directory 'doc'\r\n warning: no files found matching '*.html' under directory 'doc'\r\n warning: no files found matching '*.txt' under directory 'doc'\r\n warning: no files found matching '*.css' under directory 'doc'\r\n warning: no previously-included files matching '*~' found anywhere in distribution\r\n warning: no previously-included files matching '*.pyc' found anywhere in distribution\r\n warning: no previously-included files matching '.svn' found anywhere in distribution\r\n no previously-included directories found matching 'doc/build'\r\n adding license file 'LICENSE'\r\n writing manifest file 'pygraphviz.egg-info/SOURCES.txt'\r\n copying pygraphviz/graphviz.i -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/graphviz_wrap.c -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n running build_ext\r\n building 'pygraphviz._graphviz' extension\r\n creating build/temp.linux-x86_64-cpython-310\r\n creating build/temp.linux-x86_64-cpython-310/pygraphviz\r\n x86_64-linux-gnu-gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -fPIC -DSWIG_PYTHON_STRICT_BYTE_CHAR -I/home/runner/.cache/pypoetry/virtualenvs/picea-ox5U8VzY-py3.10/include -I/usr/include/python3.10 -c pygraphviz/graphviz_wrap.c -o build/temp.linux-x86_64-cpython-310/pygraphviz/graphviz_wrap.o\r\n pygraphviz/graphviz_wrap.c:9: warning: \"SWIG_PYTHON_STRICT_BYTE_CHAR\" redefined\r\n 9 | #define SWIG_PYTHON_STRICT_BYTE_CHAR\r\n |\r\n <command-line>: note: this is the location of the previous definition\r\n pygraphviz/graphviz_wrap.c:3023:10: fatal error: graphviz/cgraph.h: No such file or directory\r\n 3023 | #include \"graphviz/cgraph.h\"\r\n | ^~~~~~~~~~~~~~~~~~~\r\n compilation terminated.\r\n error: command '/usr/bin/x86_64-linux-gnu-gcc' failed with exit code 1\r\n [end of output]\r\n \r\n note: This error originates from a subprocess, and is likely not a problem with pip.\r\n ERROR: Failed building wheel for pygraphviz\r\nFailed to build pygraphviz\r\nERROR: Could not build wheels for pygraphviz, which is required to install pyproject.toml-based projects\r\n
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[13], line 3\n 1 import sys\n 2 get_ipython().system('{sys.executable} -m pip install pygraphviz')\n----> 3 nx.nx_agraph.to_agraph(graph)\n\nNameError: name 'nx' is not definedIn\u00a0[14]: Copied!
[(term.ID, term.name) for term in go['GO:0048316'].children]\n[(term.ID, term.name) for term in go['GO:0048316'].children] Out[14]:
[('GO:0009793', ['embryo development ending in seed dormancy']),\n ('GO:0009942', ['longitudinal axis specification']),\n ('GO:0010069', ['zygote asymmetric cytokinesis in embryo sac']),\n ('GO:0010262', ['somatic embryogenesis']),\n ('GO:0010654', ['apical cell fate commitment']),\n ('GO:0048508', ['embryonic meristem development']),\n ('GO:0010065', ['primary meristem tissue development']),\n ('GO:0010066', ['ground meristem histogenesis']),\n ('GO:0010067', ['procambium histogenesis']),\n ('GO:0010068', ['protoderm histogenesis']),\n ('GO:0010071', ['root meristem specification']),\n ('GO:0010072', ['primary shoot apical meristem specification']),\n ('GO:0048825', ['cotyledon development']),\n ('GO:0048826', ['cotyledon morphogenesis']),\n ('GO:0010588', ['cotyledon vascular tissue pattern formation']),\n ('GO:0009960', ['endosperm development']),\n ('GO:0010214', ['seed coat development']),\n ('GO:0048359',\n ['mucilage metabolic process involved in seed coat development']),\n ('GO:0048354',\n ['mucilage biosynthetic process involved in seed coat development']),\n ('GO:0010344', ['seed oilbody biogenesis']),\n ('GO:0010431', ['seed maturation']),\n ('GO:0010162', ['seed dormancy process']),\n ('GO:0010231', ['maintenance of seed dormancy']),\n ('GO:0098755', ['maintenance of seed dormancy by absisic acid']),\n ('GO:0048700', ['acquisition of desiccation tolerance in seed']),\n ('GO:0048838', ['release of seed from dormancy']),\n ('GO:1990068', ['seed dehydration']),\n ('GO:0048317', ['seed morphogenesis']),\n ('GO:0080001', ['mucilage extrusion from seed coat']),\n ('GO:0080112', ['seed growth']),\n ('GO:0090376', ['seed trichome differentiation']),\n ('GO:0090377', ['seed trichome initiation']),\n ('GO:0090378', ['seed trichome elongation']),\n ('GO:0090379',\n ['secondary cell wall biogenesis involved in seed trichome differentiation']),\n ('GO:0090380', ['seed trichome maturation']),\n ('GO:0140547', ['acquisition of seed longevity'])]In\u00a0[15]: Copied!
go['GO:0010431'].__dict__\ngo['GO:0010431'].__dict__ Out[15]:
{'_ID': 'GO:0010431',\n '_original_ID': 'GO:0010431',\n '_container': <picea.ontology.Ontology at 0x7f1376f978b0>,\n '_children': ['GO:0010162', 'GO:1990068'],\n '_parents': ['GO:0003006', 'GO:0021700', 'GO:0048609', 'GO:0048316'],\n 'name': ['seed maturation'],\n 'def': ['\"A process in seed development that occurs after embryogenesis by which a quiescent state is established in a seed. Seed maturation is characterized by storage compound accumulation, acquisition of desiccation tolerance, growth arrest and the entry into a dormancy period of variable length that is broken upon germination.\" [PMID:16096971]'],\n 'alt_id': [],\n 'namespace': ['biological_process'],\n 'is_a': ['GO:0003006 ! developmental process involved in reproduction',\n 'GO:0021700 ! developmental maturation',\n 'GO:0048609 ! multicellular organismal reproductive process'],\n 'relationship': ['part_of GO:0048316 ! seed development']}In\u00a0[16]: Copied!
go['GO:0048316'].__dict__\ngo['GO:0048316'].__dict__ Out[16]:
{'_ID': 'GO:0048316',\n '_original_ID': 'GO:0048316',\n '_container': <picea.ontology.Ontology at 0x7f1376f978b0>,\n '_children': ['GO:0009793',\n 'GO:0009960',\n 'GO:0010214',\n 'GO:0010344',\n 'GO:0010431',\n 'GO:0048317',\n 'GO:0080001',\n 'GO:0080112',\n 'GO:0090376',\n 'GO:0140547'],\n '_parents': ['GO:0009791', 'GO:0048608', 'GO:0010154'],\n 'name': ['seed development'],\n 'def': ['\"The process whose specific outcome is the progression of the seed over time, from its formation to the mature structure. A seed is a propagating organ formed in the sexual reproductive cycle of gymnosperms and angiosperms, consisting of a protective coat enclosing an embryo and food reserves.\" [GOC:jid, PO:0009010]'],\n 'alt_id': [],\n 'namespace': ['biological_process'],\n 'is_a': ['GO:0009791 ! post-embryonic development',\n 'GO:0048608 ! reproductive structure development'],\n 'relationship': ['part_of GO:0010154 ! fruit development']}In\u00a0[17]: Copied!
go['GO:0048316'].children._elements.keys()\ngo['GO:0048316'].children._elements.keys() Out[17]:
dict_keys(['GO:0009793', 'GO:0009942', 'GO:0010069', 'GO:0010262', 'GO:0010654', 'GO:0048508', 'GO:0010065', 'GO:0010066', 'GO:0010067', 'GO:0010068', 'GO:0010071', 'GO:0010072', 'GO:0048825', 'GO:0048826', 'GO:0010588', 'GO:0009960', 'GO:0010214', 'GO:0048359', 'GO:0048354', 'GO:0010344', 'GO:0010431', 'GO:0010162', 'GO:0010231', 'GO:0098755', 'GO:0048700', 'GO:0048838', 'GO:1990068', 'GO:0048317', 'GO:0080001', 'GO:0080112', 'GO:0090376', 'GO:0090377', 'GO:0090378', 'GO:0090379', 'GO:0090380', 'GO:0140547'])In\u00a0[18]: Copied!
[(term.ID,term.name) for term in go if term.__dict__.get('alt_id') and term._parents]\n[(term.ID,term.name) for term in go if term.__dict__.get('alt_id') and term._parents] Out[18]:
[('GO:0036422', ['heptaprenyl diphosphate synthase activity']),\n ('GO:0000010', ['heptaprenyl diphosphate synthase activity']),\n ('GO:0000022', ['mitotic spindle elongation']),\n ('GO:1905121', ['mitotic spindle elongation']),\n ('GO:0000946', ['tRNA binding']),\n ('GO:0000049', ['tRNA binding']),\n ('GO:0006871', ['urea cycle']),\n ('GO:0006594', ['urea cycle']),\n ('GO:0000050', ['urea cycle']),\n ('GO:0000055', ['ribosomal large subunit export from nucleus']),\n ('GO:0000057', ['ribosomal large subunit export from nucleus']),\n ('GO:0000058', ['ribosomal small subunit export from nucleus']),\n ('GO:0000056', ['ribosomal small subunit export from nucleus']),\n ('GO:0016359', ['mitotic sister chromatid segregation']),\n ('GO:0000070', ['mitotic sister chromatid segregation']),\n ('GO:0030475', ['initial mitotic spindle pole body separation']),\n ('GO:0000073', ['initial mitotic spindle pole body separation']),\n ('GO:0072395', ['cell cycle checkpoint signaling']),\n ('GO:0072407', ['cell cycle checkpoint signaling']),\n ('GO:0071779', ['cell cycle checkpoint signaling']),\n ('GO:0000075', ['cell cycle checkpoint signaling']),\n ('GO:0031576', ['cell cycle checkpoint signaling']),\n ('GO:0072404', ['cell cycle checkpoint signaling']),\n ('GO:0000076', ['DNA replication checkpoint signaling']),\n ('GO:0072437', ['DNA replication checkpoint signaling']),\n ('GO:0072422', ['DNA damage checkpoint signaling']),\n ('GO:0000077', ['DNA damage checkpoint signaling']),\n ('GO:0015177',\n ['S-adenosyl-L-methionine transmembrane transporter activity']),\n ('GO:0000095',\n ['S-adenosyl-L-methionine transmembrane transporter activity']),\n ('GO:0000100', ['S-methylmethionine transmembrane transporter activity']),\n ('GO:0015178', ['S-methylmethionine transmembrane transporter activity']),\n ('GO:0019378', ['sulfate assimilation']),\n ('GO:0000103', ['sulfate assimilation']),\n ('GO:0019739', ['succinate dehydrogenase activity']),\n ('GO:0000104', ['succinate dehydrogenase activity']),\n ('GO:0045816', ['negative regulation of transcription by RNA polymerase II']),\n ('GO:0010553', ['negative regulation of transcription by RNA polymerase II']),\n ('GO:0000122', ['negative regulation of transcription by RNA polymerase II']),\n ('GO:0000125', ['SAGA complex']),\n ('GO:0000124', ['SAGA complex']),\n ('GO:0030914', ['SAGA complex']),\n ('GO:0000128', ['flocculation']),\n ('GO:0036281', ['flocculation']),\n ('GO:0000501', ['flocculation']),\n ('GO:0032128', ['flocculation']),\n ('GO:0043690', ['flocculation']),\n ('GO:0043689', ['flocculation']),\n ('GO:0036282', ['flocculation']),\n ('GO:0030607', ['establishment of mitotic spindle orientation']),\n ('GO:0030609', ['establishment of mitotic spindle orientation']),\n ('GO:0000132', ['establishment of mitotic spindle orientation']),\n ('GO:0000146', ['microfilament motor activity']),\n ('GO:0030898', ['microfilament motor activity']),\n ('GO:0016548', ['rRNA modification']),\n ('GO:0000154', ['rRNA modification']),\n ('GO:0009096', ['tryptophan biosynthetic process']),\n ('GO:0000162', ['tryptophan biosynthetic process']),\n ('GO:0000165', ['MAPK cascade']),\n ('GO:0007255', ['MAPK cascade']),\n ('GO:0043790', ['rRNA (adenine-N6,N6-)-dimethyltransferase activity']),\n ('GO:0000179', ['rRNA (adenine-N6,N6-)-dimethyltransferase activity']),\n ('GO:0043147', ['meiotic spindle organization']),\n ('GO:0000212', ['meiotic spindle organization']),\n ('GO:0000215', [\"tRNA 2'-phosphotransferase activity\"]),\n ('GO:0008665', [\"tRNA 2'-phosphotransferase activity\"]),\n ('GO:0000351', ['spliceosomal tri-snRNP complex assembly']),\n ('GO:0000244', ['spliceosomal tri-snRNP complex assembly']),\n ('GO:0000355', ['spliceosomal tri-snRNP complex assembly']),\n ('GO:0050576', ['3-keto sterol reductase activity']),\n ('GO:0000253', ['3-keto sterol reductase activity']),\n ('GO:0005051', ['peroxisome targeting sequence binding']),\n ('GO:0000268', ['peroxisome targeting sequence binding']),\n ('GO:0009284', ['peptidoglycan metabolic process']),\n ('GO:0000270', ['peptidoglycan metabolic process']),\n ('GO:0044244', ['polysaccharide catabolic process']),\n ('GO:0000272', ['polysaccharide catabolic process']),\n ('GO:0007067', ['mitotic cell cycle']),\n ('GO:0000278', ['mitotic cell cycle']),\n ('GO:0000292', ['RNA fragment catabolic process']),\n ('GO:0030452', ['RNA fragment catabolic process']),\n ('GO:0000310', ['xanthine phosphoribosyltransferase activity']),\n ('GO:0009043', ['xanthine phosphoribosyltransferase activity']),\n ('GO:0010388', ['protein deneddylation']),\n ('GO:0000338', ['protein deneddylation']),\n ('GO:0000370', ['mRNA branch site recognition']),\n ('GO:0000348', ['mRNA branch site recognition']),\n ('GO:0000371', ['mRNA branch site recognition']),\n ('GO:0000357',\n ['generation of catalytic spliceosome for first transesterification step']),\n ('GO:0000356',\n ['generation of catalytic spliceosome for first transesterification step']),\n ('GO:0000349',\n ['generation of catalytic spliceosome for first transesterification step']),\n ('GO:0000359',\n ['generation of catalytic spliceosome for second transesterification step']),\n ('GO:0000358',\n ['generation of catalytic spliceosome for second transesterification step']),\n ('GO:0000350',\n ['generation of catalytic spliceosome for second transesterification step']),\n ('GO:0000360', ['cis assembly of pre-catalytic spliceosome']),\n ('GO:0000354', ['cis assembly of pre-catalytic spliceosome']),\n ('GO:0000361', ['cis assembly of pre-catalytic spliceosome']),\n ('GO:0000385', ['RNA splicing, via transesterification reactions']),\n ('GO:0031202', ['RNA splicing, via transesterification reactions']),\n ('GO:0000375', ['RNA splicing, via transesterification reactions']),\n ('GO:0000388',\n ['spliceosome conformational change to release U4 (or U4atac) and U1 (or U11)']),\n ('GO:0000396',\n ['spliceosome conformational change to release U4 (or U4atac) and U1 (or U11)']),\n ('GO:0000397',\n ['spliceosome conformational change to release U4 (or U4atac) and U1 (or U11)']),\n ('GO:0000382', [\"mRNA 3'-splice site recognition\"]),\n ('GO:0000383', [\"mRNA 3'-splice site recognition\"]),\n ('GO:0000389', [\"mRNA 3'-splice site recognition\"]),\n ('GO:0000392', ['spliceosomal complex disassembly']),\n ('GO:0000391', ['spliceosomal complex disassembly']),\n ('GO:0000390', ['spliceosomal complex disassembly']),\n ('GO:0000395', [\"mRNA 5'-splice site recognition\"]),\n ('GO:0000369', [\"mRNA 5'-splice site recognition\"]),\n ('GO:0000368', [\"mRNA 5'-splice site recognition\"]),\n ('GO:0000398', ['mRNA splicing, via spliceosome']),\n ('GO:0006374', ['mRNA splicing, via spliceosome']),\n ('GO:0006375', ['mRNA splicing, via spliceosome']),\n ('GO:0000420', ['RNA polymerase IV complex']),\n ('GO:0000418', ['RNA polymerase IV complex']),\n ('GO:0000419', ['RNA polymerase V complex']),\n ('GO:0080137', ['RNA polymerase V complex']),\n ('GO:0000443', ['core TFIIH complex portion of holo TFIIH complex']),\n ('GO:0000438', ['core TFIIH complex portion of holo TFIIH complex']),\n ('GO:0000441', ['transcription factor TFIIH core complex']),\n ('GO:0000439', ['transcription factor TFIIH core complex']),\n ('GO:0000440', ['core TFIIH complex portion of NEF3 complex']),\n ('GO:0000442', ['core TFIIH complex portion of NEF3 complex']),\n ('GO:0000444', ['MIS12/MIND type complex']),\n ('GO:0000818', ['MIS12/MIND type complex']),\n ('GO:0000448',\n ['cleavage in ITS2 between 5.8S rRNA and LSU-rRNA of tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']),\n ('GO:0000490',\n ['cleavage in ITS2 between 5.8S rRNA and LSU-rRNA of tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']),\n ('GO:0000462',\n ['maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']),\n ('GO:1990041',\n ['maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']),\n ('GO:0019004',\n ['oxidized pyrimidine nucleobase lesion DNA N-glycosylase activity']),\n ('GO:0000703',\n ['oxidized pyrimidine nucleobase lesion DNA N-glycosylase activity']),\n ('GO:0016924', ['double-strand break repair via homologous recombination']),\n ('GO:0000724', ['double-strand break repair via homologous recombination']),\n ('GO:0000741', ['karyogamy']),\n ('GO:0007335', ['karyogamy']),\n ('GO:0006946',\n ['nuclear migration involved in conjugation with cellular fusion']),\n ('GO:0000743',\n ['nuclear migration involved in conjugation with cellular fusion']),\n ('GO:0030477', ['conjugation with cellular fusion']),\n ('GO:0000747', ['conjugation with cellular fusion']),\n ('GO:0007322', ['conjugation with cellular fusion']),\n ('GO:0007333', ['conjugation with cellular fusion']),\n ('GO:0030461', ['conjugation with cellular fusion']),\n ('GO:0030434',\n ['response to pheromone triggering conjugation with cellular fusion']),\n ('GO:0007328',\n ['response to pheromone triggering conjugation with cellular fusion']),\n ('GO:0000749',\n ['response to pheromone triggering conjugation with cellular fusion']),\n ('GO:0007330',\n ['pheromone-dependent signal transduction involved in conjugation with cellular fusion']),\n ('GO:0000750',\n ['pheromone-dependent signal transduction involved in conjugation with cellular fusion']),\n ('GO:0030454',\n ['pheromone-dependent signal transduction involved in conjugation with cellular fusion']),\n ('GO:0030571', ['mitotic cell cycle G1 arrest in response to pheromone']),\n ('GO:0000751', ['mitotic cell cycle G1 arrest in response to pheromone']),\n ('GO:0000752',\n ['agglutination involved in conjugation with cellular fusion']),\n ('GO:0007334',\n ['agglutination involved in conjugation with cellular fusion']),\n ('GO:0000753',\n ['cell morphogenesis involved in conjugation with cellular fusion']),\n ('GO:0007332',\n ['cell morphogenesis involved in conjugation with cellular fusion']),\n ('GO:0007331',\n ['adaptation of signaling pathway by response to pheromone involved in conjugation with cellular fusion']),\n ('GO:0030453',\n ['adaptation of signaling pathway by response to pheromone involved in conjugation with cellular fusion']),\n ('GO:0000754',\n ['adaptation of signaling pathway by response to pheromone involved in conjugation with cellular fusion']),\n ('GO:0000218', ['cytogamy']),\n ('GO:0030462', ['cytogamy']),\n ('GO:0000755', ['cytogamy']),\n ('GO:0000770', ['peptide pheromone export']),\n ('GO:0007325', ['peptide pheromone export']),\n ('GO:0097521', ['chromosome, centromeric region']),\n ('GO:0000775', ['chromosome, centromeric region']),\n ('GO:0000776', ['kinetochore']),\n ('GO:0031617', ['kinetochore']),\n ('GO:0000777', ['kinetochore']),\n ('GO:0005699', ['kinetochore']),\n ('GO:0000778', ['kinetochore']),\n ('GO:0000780', ['condensed chromosome, centromeric region']),\n ('GO:0000779', ['condensed chromosome, centromeric region']),\n ('GO:0000781', ['chromosome, telomeric region']),\n ('GO:0000784', ['chromosome, telomeric region']),\n ('GO:0000785', ['chromatin']),\n ('GO:0000789', ['chromatin']),\n ('GO:0000790', ['chromatin']),\n ('GO:0005717', ['chromatin']),\n ('GO:0005718', ['nucleosome']),\n ('GO:0000787', ['nucleosome']),\n ('GO:0000788', ['nucleosome']),\n ('GO:0000786', ['nucleosome']),\n ('GO:0005719', ['euchromatin']),\n ('GO:0035327', ['euchromatin']),\n ('GO:0000791', ['euchromatin']),\n ('GO:0035328', ['heterochromatin']),\n ('GO:0000792', ['heterochromatin']),\n ('GO:0005720', ['heterochromatin']),\n ('GO:0005716', ['synaptonemal complex']),\n ('GO:0000795', ['synaptonemal complex']),\n ('GO:0008620', ['condensin complex']),\n ('GO:0061814', ['condensin complex']),\n ('GO:0000796', ['condensin complex']),\n ('GO:0000799', ['condensin complex']),\n ('GO:0000797', ['condensin complex']),\n ('GO:0008621', ['condensin complex']),\n ('GO:0005676', ['condensin complex']),\n ('GO:0007148', ['cell morphogenesis']),\n ('GO:0000902', ['cell morphogenesis']),\n ('GO:0045791', ['cell morphogenesis']),\n ('GO:0045790', ['cell morphogenesis']),\n ('GO:0016288', ['cytokinesis']),\n ('GO:0033205', ['cytokinesis']),\n ('GO:0007104', ['cytokinesis']),\n ('GO:0000910', ['cytokinesis']),\n ('GO:0000915', ['actomyosin contractile ring assembly']),\n ('GO:0045573', ['actomyosin contractile ring assembly']),\n ('GO:2000708', ['actomyosin contractile ring assembly']),\n ('GO:1902411', ['division septum assembly']),\n ('GO:0000917', ['division septum assembly']),\n ('GO:0071937', ['division septum assembly']),\n ('GO:1902409', ['septum digestion after cytokinesis']),\n ('GO:2000695', ['septum digestion after cytokinesis']),\n ('GO:0000920', ['septum digestion after cytokinesis']),\n ('GO:0000922', ['spindle pole']),\n ('GO:0030615', ['spindle pole']),\n ('GO:0000929', ['gamma-tubulin ring complex']),\n ('GO:0000925', ['gamma-tubulin ring complex']),\n ('GO:0055032', ['gamma-tubulin ring complex']),\n ('GO:0055031', ['gamma-tubulin ring complex']),\n ('GO:0000926', ['gamma-tubulin ring complex']),\n ('GO:0000931', ['gamma-tubulin ring complex']),\n ('GO:0055033', ['gamma-tubulin ring complex']),\n ('GO:0061494', ['gamma-tubulin ring complex']),\n ('GO:0008274', ['gamma-tubulin ring complex']),\n ('GO:0000924', ['gamma-tubulin ring complex']),\n ('GO:0000935', ['division septum']),\n ('GO:0043187', ['division septum']),\n ('GO:0000939', ['inner kinetochore']),\n ('GO:0000941', ['inner kinetochore']),\n ('GO:0000942', ['outer kinetochore']),\n ('GO:0000940', ['outer kinetochore']),\n ('GO:0001017', ['transcription cis-regulatory region binding']),\n ('GO:0000976', ['transcription cis-regulatory region binding']),\n ('GO:0000984', ['transcription cis-regulatory region binding']),\n ('GO:0044212', ['transcription cis-regulatory region binding']),\n ('GO:0000975', ['transcription cis-regulatory region binding']),\n ('GO:0000977',\n ['RNA polymerase II transcription regulatory region sequence-specific DNA binding']),\n ('GO:0001012',\n ['RNA polymerase II transcription regulatory region sequence-specific DNA binding']),\n ('GO:0000978',\n ['RNA polymerase II cis-regulatory region sequence-specific DNA binding']),\n ('GO:0000980',\n ['RNA polymerase II cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001201',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0000982',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0000981',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0001200',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0001133',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0001203',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0003705',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0001202',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0000986', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001158', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0035326', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001159', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001150', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0000987', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0000995',\n ['RNA polymerase III general transcription initiation factor activity']),\n ('GO:0001034',\n ['RNA polymerase III general transcription initiation factor activity']),\n ('GO:0001002',\n ['RNA polymerase III type 1 promoter sequence-specific DNA binding']),\n ('GO:0001030',\n ['RNA polymerase III type 1 promoter sequence-specific DNA binding']),\n ('GO:0001031',\n ['RNA polymerase III type 2 promoter sequence-specific DNA binding']),\n ('GO:0001003',\n ['RNA polymerase III type 2 promoter sequence-specific DNA binding']),\n ('GO:0001032',\n ['RNA polymerase III type 3 promoter sequence-specific DNA binding']),\n ('GO:0001006',\n ['RNA polymerase III type 3 promoter sequence-specific DNA binding']),\n ('GO:0001045', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0070362', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0070364', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0070363', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0001018', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0000997', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0001044', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0070361', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0001039',\n ['RNA polymerase III hybrid type promoter sequence-specific DNA binding']),\n ('GO:0001037',\n ['RNA polymerase III hybrid type promoter sequence-specific DNA binding']),\n ('GO:0001046', ['core promoter sequence-specific DNA binding']),\n ('GO:0001047', ['core promoter sequence-specific DNA binding']),\n ('GO:0000985', ['core promoter sequence-specific DNA binding']),\n ('GO:0001109', ['promoter clearance during DNA-templated transcription']),\n ('GO:0001122', ['promoter clearance during DNA-templated transcription']),\n ('GO:0001112', ['DNA-templated transcription open complex formation']),\n ('GO:0001127', ['DNA-templated transcription open complex formation']),\n ('GO:0001146',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0001145',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0001160',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0001147',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0001148',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0044213',\n ['intronic transcription regulatory region sequence-specific DNA binding']),\n ('GO:0001161',\n ['intronic transcription regulatory region sequence-specific DNA binding']),\n ('GO:0001013',\n ['RNA polymerase I transcription regulatory region sequence-specific DNA binding']),\n ('GO:0001163',\n ['RNA polymerase I transcription regulatory region sequence-specific DNA binding']),\n ('GO:0001187',\n ['RNA polymerase I core promoter sequence-specific DNA binding']),\n ('GO:0001164',\n ['RNA polymerase I core promoter sequence-specific DNA binding']),\n ('GO:0001166',\n ['RNA polymerase I cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001165',\n ['RNA polymerase I cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001173', ['DNA-templated transcriptional start site selection']),\n ('GO:0001176', ['DNA-templated transcriptional start site selection']),\n ('GO:0001184', ['RNA polymerase I promoter clearance']),\n ('GO:0001182', ['RNA polymerase I promoter clearance']),\n ('GO:0001188', ['RNA polymerase I preinitiation complex assembly']),\n ('GO:0001189', ['RNA polymerase I preinitiation complex assembly']),\n ('GO:0001192',\n ['maintenance of transcriptional fidelity during transcription elongation']),\n ('GO:0001194',\n ['maintenance of transcriptional fidelity during transcription elongation']),\n ('GO:0001140', ['DNA-binding transcription activator activity']),\n ('GO:0001216', ['DNA-binding transcription activator activity']),\n ('GO:0001215', ['DNA-binding transcription activator activity']),\n ('GO:0001219', ['DNA-binding transcription repressor activity']),\n ('GO:0001217', ['DNA-binding transcription repressor activity']),\n ('GO:0001220', ['DNA-binding transcription repressor activity']),\n ('GO:0001141', ['DNA-binding transcription repressor activity']),\n ('GO:0001218', ['DNA-binding transcription repressor activity']),\n ('GO:0001221', ['transcription coregulator binding']),\n ('GO:0001224', ['transcription coregulator binding']),\n ('GO:0001222', ['transcription corepressor binding']),\n ('GO:0001226', ['transcription corepressor binding']),\n ('GO:0001223', ['transcription coactivator binding']),\n ('GO:0001225', ['transcription coactivator binding']),\n ('GO:0001206',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001078',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001227',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001210',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001214',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001228',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001211',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001212',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001209',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001077',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001213',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001205',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0055027', ['chlamydospore formation']),\n ('GO:0001410', ['chlamydospore formation']),\n ('GO:0001562', ['response to protozoan']),\n ('GO:0042833', ['response to protozoan']),\n ('GO:0001588',\n ['dopamine neurotransmitter receptor activity, coupled via Gs']),\n ('GO:0001589',\n ['dopamine neurotransmitter receptor activity, coupled via Gs']),\n ('GO:0001590',\n ['dopamine neurotransmitter receptor activity, coupled via Gs']),\n ('GO:0001593',\n ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']),\n ('GO:0001670',\n ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']),\n ('GO:0001591',\n ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']),\n ('GO:0001592',\n ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']),\n ('GO:0001611', ['G protein-coupled adenosine receptor activity']),\n ('GO:0001612', ['G protein-coupled adenosine receptor activity']),\n ('GO:0001613', ['G protein-coupled adenosine receptor activity']),\n ('GO:0008501', ['G protein-coupled adenosine receptor activity']),\n ('GO:0001609', ['G protein-coupled adenosine receptor activity']),\n ('GO:0001610', ['G protein-coupled adenosine receptor activity']),\n ('GO:0001614', ['purinergic nucleotide receptor activity']),\n ('GO:0035586', ['purinergic nucleotide receptor activity']),\n ('GO:0001621', ['G protein-coupled ADP receptor activity']),\n ('GO:0045032', ['G protein-coupled ADP receptor activity']),\n ('GO:0001634',\n ['pituitary adenylate cyclase-activating polypeptide receptor activity']),\n ('GO:0016522',\n ['pituitary adenylate cyclase-activating polypeptide receptor activity']),\n ('GO:0001646', ['cAMP receptor activity']),\n ('GO:0001644', ['cAMP receptor activity']),\n ('GO:0001654', ['eye development']),\n ('GO:0042460', ['eye development']),\n ('GO:0043081', ['male germ cell nucleus']),\n ('GO:0001673', ['male germ cell nucleus']),\n ('GO:0043080', ['female germ cell nucleus']),\n ('GO:0001674', ['female germ cell nucleus']),\n ('GO:0001693', ['histamine biosynthetic process']),\n ('GO:0001694', ['histamine biosynthetic process']),\n ('GO:0001702', ['gastrulation with mouth forming second']),\n ('GO:0048276', ['gastrulation with mouth forming second']),\n ('GO:0010003', ['gastrulation with mouth forming second']),\n ('GO:0007457', ['insect visual primordium formation']),\n ('GO:0001744', ['insect visual primordium formation']),\n ('GO:0001748', ['insect visual primordium development']),\n ('GO:0048049', ['insect visual primordium development']),\n ('GO:0001752', ['compound eye photoreceptor fate commitment']),\n ('GO:0007459', ['compound eye photoreceptor fate commitment']),\n ('GO:0016069', ['type IV hypersensitivity']),\n ('GO:0001806', ['type IV hypersensitivity']),\n ('GO:0001816', ['cytokine production']),\n ('GO:0042107', ['cytokine production']),\n ('GO:0042089', ['cytokine production']),\n ('GO:0050663', ['cytokine production']),\n ('GO:0042032', ['cytokine production']),\n ('GO:0042035', ['regulation of cytokine production']),\n ('GO:0001817', ['regulation of cytokine production']),\n ('GO:0050707', ['regulation of cytokine production']),\n ('GO:0050710', ['negative regulation of cytokine production']),\n ('GO:0042036', ['negative regulation of cytokine production']),\n ('GO:0001818', ['negative regulation of cytokine production']),\n ('GO:0001819', ['positive regulation of cytokine production']),\n ('GO:0042108', ['positive regulation of cytokine production']),\n ('GO:0050715', ['positive regulation of cytokine production']),\n ('GO:0001679', ['neural tube formation']),\n ('GO:0001841', ['neural tube formation']),\n ('GO:0080087', ['(1->3)-beta-D-glucan binding']),\n ('GO:0001872', ['(1->3)-beta-D-glucan binding']),\n ('GO:0001942', ['hair follicle development']),\n ('GO:0001943', ['hair follicle development']),\n ('GO:0002003', ['angiotensin maturation']),\n ('GO:0002005', ['angiotensin maturation']),\n ('GO:1900920', ['regulation of L-glutamate import across plasma membrane']),\n ('GO:0002036', ['regulation of L-glutamate import across plasma membrane']),\n ('GO:0002037',\n ['negative regulation of L-glutamate import across plasma membrane']),\n ('GO:1900921',\n ['negative regulation of L-glutamate import across plasma membrane']),\n ('GO:1900922',\n ['positive regulation of L-glutamate import across plasma membrane']),\n ('GO:0002038',\n ['positive regulation of L-glutamate import across plasma membrane']),\n ('GO:0002046', ['opsin binding']),\n ('GO:0016030', ['opsin binding']),\n ('GO:0002162', ['dystroglycan binding']),\n ('GO:0002163', ['dystroglycan binding']),\n ('GO:0002166', ['dystroglycan binding']),\n ('GO:0002214', ['defense response to insect']),\n ('GO:0002213', ['defense response to insect']),\n ('GO:0002216', ['defense response to nematode']),\n ('GO:0002215', ['defense response to nematode']),\n ('GO:0002219', ['activation of innate immune response']),\n ('GO:0002218', ['activation of innate immune response']),\n ('GO:0002367', ['cytokine production involved in immune response']),\n ('GO:0002375', ['cytokine production involved in immune response']),\n ('GO:0002374', ['cytokine production involved in immune response']),\n ('GO:0002377', ['immunoglobulin production']),\n ('GO:0002378', ['immunoglobulin production']),\n ('GO:0048305', ['immunoglobulin production']),\n ('GO:0002381',\n ['immunoglobulin production involved in immunoglobulin-mediated immune response']),\n ('GO:0002379',\n ['immunoglobulin production involved in immunoglobulin-mediated immune response']),\n ('GO:0002380',\n ['immunoglobulin production involved in immunoglobulin-mediated immune response']),\n ('GO:0002422', ['mucosal immune response']),\n ('GO:0002386', ['mucosal immune response']),\n ('GO:0002385', ['mucosal immune response']),\n ('GO:0002535',\n ['platelet activating factor production involved in inflammatory response']),\n ('GO:0002392',\n ['platelet activating factor production involved in inflammatory response']),\n ('GO:0002390',\n ['platelet activating factor production involved in inflammatory response']),\n ('GO:0002391',\n ['platelet activating factor production involved in inflammatory response']),\n ('GO:0002443', ['leukocyte mediated immunity']),\n ('GO:0042087', ['leukocyte mediated immunity']),\n ('GO:0019723', ['leukocyte mediated immunity']),\n ('GO:0002637', ['regulation of immunoglobulin production']),\n ('GO:0002640', ['regulation of immunoglobulin production']),\n ('GO:0051023', ['regulation of immunoglobulin production']),\n ('GO:0002641', ['negative regulation of immunoglobulin production']),\n ('GO:0002638', ['negative regulation of immunoglobulin production']),\n ('GO:0051025', ['negative regulation of immunoglobulin production']),\n ('GO:0051024', ['positive regulation of immunoglobulin production']),\n ('GO:0002639', ['positive regulation of immunoglobulin production']),\n ('GO:0002642', ['positive regulation of immunoglobulin production']),\n ('GO:0002715', ['regulation of natural killer cell mediated immunity']),\n ('GO:0045845', ['regulation of natural killer cell mediated immunity']),\n ('GO:0030102',\n ['negative regulation of natural killer cell mediated immunity']),\n ('GO:0002716',\n ['negative regulation of natural killer cell mediated immunity']),\n ('GO:0002717',\n ['positive regulation of natural killer cell mediated immunity']),\n ('GO:0045846',\n ['positive regulation of natural killer cell mediated immunity']),\n ('GO:0002739',\n ['regulation of cytokine production involved in immune response']),\n ('GO:0002718',\n ['regulation of cytokine production involved in immune response']),\n ('GO:0002742',\n ['regulation of cytokine production involved in immune response']),\n ('GO:0002740',\n ['negative regulation of cytokine production involved in immune response']),\n ('GO:0002719',\n ['negative regulation of cytokine production involved in immune response']),\n ('GO:0002743',\n ['negative regulation of cytokine production involved in immune response']),\n ('GO:0002741',\n ['positive regulation of cytokine production involved in immune response']),\n ('GO:0002720',\n ['positive regulation of cytokine production involved in immune response']),\n ('GO:0002744',\n ['positive regulation of cytokine production involved in immune response']),\n ('GO:0002753',\n ['cytoplasmic pattern recognition receptor signaling pathway']),\n ('GO:0039528',\n ['cytoplasmic pattern recognition receptor signaling pathway']),\n ('GO:0010204', ['innate immune response-activating signaling pathway']),\n ('GO:0009870', ['innate immune response-activating signaling pathway']),\n ('GO:0002758', ['innate immune response-activating signaling pathway']),\n ('GO:0002949', ['tRNA threonylcarbamoyladenosine modification']),\n ('GO:0070526', ['tRNA threonylcarbamoyladenosine modification']),\n ('GO:0010802', ['respiratory system process']),\n ('GO:0003016', ['respiratory system process']),\n ('GO:0003124', ['epinephrine-mediated vasodilation']),\n ('GO:0003121', ['epinephrine-mediated vasodilation']),\n ('GO:0003123', ['epinephrine-mediated vasodilation']),\n ('GO:0003126', ['norepinephrine-mediated vasodilation']),\n ('GO:0003122', ['norepinephrine-mediated vasodilation']),\n ('GO:0003125', ['norepinephrine-mediated vasodilation']),\n ('GO:0036142', ['cilium movement']),\n ('GO:0003341', ['cilium movement']),\n ('GO:0003352', ['regulation of cilium movement']),\n ('GO:1900172', ['regulation of cilium movement']),\n ('GO:1900174', ['positive regulation of cilium movement']),\n ('GO:0003353', ['positive regulation of cilium movement']),\n ('GO:0003354', ['negative regulation of cilium movement']),\n ('GO:1900173', ['negative regulation of cilium movement']),\n ('GO:0036144', ['regulation of cilium beat frequency']),\n ('GO:0003356', ['regulation of cilium beat frequency']),\n ('GO:0003376', ['sphingosine-1-phosphate receptor signaling pathway']),\n ('GO:0001789', ['sphingosine-1-phosphate receptor signaling pathway']),\n ('GO:0003676', ['nucleic acid binding']),\n ('GO:0000496', ['nucleic acid binding']),\n ('GO:0003677', ['DNA binding']),\n ('GO:0043566', ['DNA binding']),\n ('GO:0003678', ['DNA helicase activity']),\n ('GO:0003679', ['DNA helicase activity']),\n ('GO:0004003', ['DNA helicase activity']),\n ('GO:0003689', ['DNA clamp loader activity']),\n ('GO:0033170', ['DNA clamp loader activity']),\n ('GO:0003697', ['single-stranded DNA binding']),\n ('GO:0003699', ['single-stranded DNA binding']),\n ('GO:0003698', ['single-stranded DNA binding']),\n ('GO:0001071', ['DNA-binding transcription factor activity']),\n ('GO:0001199', ['DNA-binding transcription factor activity']),\n ('GO:0001151', ['DNA-binding transcription factor activity']),\n ('GO:0001130', ['DNA-binding transcription factor activity']),\n ('GO:0001204', ['DNA-binding transcription factor activity']),\n ('GO:0003700', ['DNA-binding transcription factor activity']),\n ('GO:0001131', ['DNA-binding transcription factor activity']),\n ('GO:0000130', ['DNA-binding transcription factor activity']),\n ('GO:0016455', ['transcription coregulator activity']),\n ('GO:0001104', ['transcription coregulator activity']),\n ('GO:0003712', ['transcription coregulator activity']),\n ('GO:0003713', ['transcription coactivator activity']),\n ('GO:0001105', ['transcription coactivator activity']),\n ('GO:0001106', ['transcription corepressor activity']),\n ('GO:0003714', ['transcription corepressor activity']),\n ('GO:0000498', ['RNA binding']),\n ('GO:0003723', ['RNA binding']),\n ('GO:0044822', ['RNA binding']),\n ('GO:0004004', ['RNA helicase activity']),\n ('GO:0003724', ['RNA helicase activity']),\n ('GO:0003971', ['double-stranded RNA adenosine deaminase activity']),\n ('GO:0003726', ['double-stranded RNA adenosine deaminase activity']),\n ('GO:0003728', ['single-stranded RNA binding']),\n ('GO:0003727', ['single-stranded RNA binding']),\n ('GO:0003729', ['mRNA binding']),\n ('GO:0000499', ['mRNA binding']),\n ('GO:0003740', ['structural constituent of ribosome']),\n ('GO:0003736', ['structural constituent of ribosome']),\n ('GO:0003742', ['structural constituent of ribosome']),\n ('GO:0003739', ['structural constituent of ribosome']),\n ('GO:0003741', ['structural constituent of ribosome']),\n ('GO:0003735', ['structural constituent of ribosome']),\n ('GO:0003737', ['structural constituent of ribosome']),\n ('GO:0003738', ['structural constituent of ribosome']),\n ('GO:0003745', ['translation initiation factor activity']),\n ('GO:0003744', ['translation initiation factor activity']),\n ('GO:0003743', ['translation initiation factor activity']),\n ('GO:0003746', ['translation elongation factor activity']),\n ('GO:0008183', ['translation elongation factor activity']),\n ('GO:0008182', ['translation elongation factor activity']),\n ('GO:0003747', ['translation release factor activity']),\n ('GO:0003748', ['translation release factor activity']),\n ('GO:0003749', ['translation release factor activity']),\n ('GO:0003755', ['peptidyl-prolyl cis-trans isomerase activity']),\n ('GO:0004752', ['peptidyl-prolyl cis-trans isomerase activity']),\n ('GO:0042028', ['peptidyl-prolyl cis-trans isomerase activity']),\n ('GO:0003756', ['protein disulfide isomerase activity']),\n ('GO:0006467', ['protein disulfide isomerase activity']),\n ('GO:0003777', ['microtubule motor activity']),\n ('GO:1990939', ['microtubule motor activity']),\n ('GO:0003838', ['sterol 24-C-methyltransferase activity']),\n ('GO:0102101', ['sterol 24-C-methyltransferase activity']),\n ('GO:0003841', ['1-acylglycerol-3-phosphate O-acyltransferase activity']),\n ('GO:0004469', ['1-acylglycerol-3-phosphate O-acyltransferase activity']),\n ('GO:0009981', ['1,3-beta-D-glucan synthase activity']),\n ('GO:0003843', ['1,3-beta-D-glucan synthase activity']),\n ('GO:0047119', ['2-methylbutanoyl-CoA dehydrogenase activity']),\n ('GO:0003853', ['2-methylbutanoyl-CoA dehydrogenase activity']),\n ('GO:0003863',\n ['3-methyl-2-oxobutanoate dehydrogenase (2-methylpropanoyl-transferring) activity']),\n ('GO:0003826',\n ['3-methyl-2-oxobutanoate dehydrogenase (2-methylpropanoyl-transferring) activity']),\n ('GO:0046913', ['ATP citrate synthase activity']),\n ('GO:0003878', ['ATP citrate synthase activity']),\n ('GO:0003886', ['DNA (cytosine-5-)-methyltransferase activity']),\n ('GO:0008326', ['DNA (cytosine-5-)-methyltransferase activity']),\n ('GO:0016448', ['DNA-directed DNA polymerase activity']),\n ('GO:0016452', ['DNA-directed DNA polymerase activity']),\n ('GO:0016449', ['DNA-directed DNA polymerase activity']),\n ('GO:0003893', ['DNA-directed DNA polymerase activity']),\n ('GO:0019984', ['DNA-directed DNA polymerase activity']),\n ('GO:0003889', ['DNA-directed DNA polymerase activity']),\n ('GO:0003894', ['DNA-directed DNA polymerase activity']),\n ('GO:0016451', ['DNA-directed DNA polymerase activity']),\n ('GO:0003895', ['DNA-directed DNA polymerase activity']),\n ('GO:0016000', ['DNA-directed DNA polymerase activity']),\n ('GO:0016450', ['DNA-directed DNA polymerase activity']),\n ('GO:0008723', ['DNA-directed DNA polymerase activity']),\n ('GO:0003887', ['DNA-directed DNA polymerase activity']),\n ('GO:0003890', ['DNA-directed DNA polymerase activity']),\n ('GO:0015999', ['DNA-directed DNA polymerase activity']),\n ('GO:0003891', ['DNA-directed DNA polymerase activity']),\n ('GO:0003888', ['DNA-directed DNA polymerase activity']),\n ('GO:0003897', ['DNA primase activity']),\n ('GO:0003896', ['DNA primase activity']),\n ('GO:0003898', ['DNA primase activity']),\n ('GO:0003899', [\"DNA-directed 5'-3' RNA polymerase activity\"]),\n ('GO:0000129', [\"DNA-directed 5'-3' RNA polymerase activity\"]),\n ('GO:0003905', ['alkylbase DNA N-glycosylase activity']),\n ('GO:0004036', ['alkylbase DNA N-glycosylase activity']),\n ('GO:0009387', ['DNA topoisomerase activity']),\n ('GO:0003916', ['DNA topoisomerase activity']),\n ('GO:0003918',\n ['DNA topoisomerase type II (double strand cut, ATP-hydrolyzing) activity']),\n ('GO:0061505',\n ['DNA topoisomerase type II (double strand cut, ATP-hydrolyzing) activity']),\n ('GO:0003924', ['GTPase activity']),\n ('GO:0061745', ['GTPase activity']),\n ('GO:0003927', ['G protein activity']),\n ('GO:0003925', ['G protein activity']),\n ('GO:0016660', ['NADPH dehydrogenase activity']),\n ('GO:0008468', ['NADPH dehydrogenase activity']),\n ('GO:0003959', ['NADPH dehydrogenase activity']),\n ('GO:0019282', ['O-acetylhomoserine aminocarboxypropyltransferase activity']),\n ('GO:0003961', ['O-acetylhomoserine aminocarboxypropyltransferase activity']),\n ('GO:0000505', ['cystathionine gamma-synthase activity']),\n ('GO:0003962', ['cystathionine gamma-synthase activity']),\n ('GO:0003973', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0052853', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0052854', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0008891', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0052852', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0003992',\n ['N2-acetyl-L-ornithine:2-oxoglutarate 5-aminotransferase activity']),\n ('GO:0047318',\n ['N2-acetyl-L-ornithine:2-oxoglutarate 5-aminotransferase activity']),\n ('GO:0003994', ['aconitate hydratase activity']),\n ('GO:0052632', ['aconitate hydratase activity']),\n ('GO:0019109', ['acyl-CoA dehydrogenase activity']),\n ('GO:0003995', ['acyl-CoA dehydrogenase activity']),\n ('GO:0004031', ['aldehyde oxidase activity']),\n ('GO:0050250', ['aldehyde oxidase activity']),\n ('GO:0004045', ['aminoacyl-tRNA hydrolase activity']),\n ('GO:0019851', ['aminoacyl-tRNA hydrolase activity']),\n ('GO:0019850', ['aminoacyl-tRNA hydrolase activity']),\n ('GO:0004057', ['arginyl-tRNA--protein transferase activity']),\n ('GO:0042172', ['arginyl-tRNA--protein transferase activity']),\n ('GO:0016400', ['aromatic-L-amino-acid decarboxylase activity']),\n ('GO:0004058', ['aromatic-L-amino-acid decarboxylase activity']),\n ('GO:0004094', ['carnitine O-acetyltransferase activity']),\n ('GO:0004092', ['carnitine O-acetyltransferase activity']),\n ('GO:0004093', ['carnitine O-acetyltransferase activity']),\n ('GO:0016952', ['catalase activity']),\n ('GO:0004096', ['catalase activity']),\n ('GO:0016953', ['catalase activity']),\n ('GO:0102316', ['catechol oxidase activity']),\n ('GO:0036264', ['catechol oxidase activity']),\n ('GO:0004097', ['catechol oxidase activity']),\n ('GO:0036263', ['catechol oxidase activity']),\n ('GO:0016225', ['cystathionine gamma-lyase activity']),\n ('GO:0004123', ['cystathionine gamma-lyase activity']),\n ('GO:0004165', ['delta(3)-delta(2)-enoyl-CoA isomerase activity']),\n ('GO:0008461', ['delta(3)-delta(2)-enoyl-CoA isomerase activity']),\n ('GO:0004101',\n ['dolichyl-phosphate alpha-N-acetylglucosaminyltransferase activity']),\n ('GO:0004166',\n ['dolichyl-phosphate alpha-N-acetylglucosaminyltransferase activity']),\n ('GO:0048059', ['dopachrome isomerase activity']),\n ('GO:0004167', ['dopachrome isomerase activity']),\n ('GO:0004175', ['endopeptidase activity']),\n ('GO:0016809', ['endopeptidase activity']),\n ('GO:0004176', ['ATP-dependent peptidase activity']),\n ('GO:0004280', ['ATP-dependent peptidase activity']),\n ('GO:0016510', ['enoyl-CoA hydratase activity']),\n ('GO:0004300', ['enoyl-CoA hydratase activity']),\n ('GO:0033817', ['3-oxoacyl-[acyl-carrier-protein] synthase activity']),\n ('GO:0004315', ['3-oxoacyl-[acyl-carrier-protein] synthase activity']),\n ('GO:0004328', ['formamidase activity']),\n ('GO:0034566', ['formamidase activity']),\n ('GO:0004330', ['fructose-2,6-bisphosphate 2-phosphatase activity']),\n ('GO:0004331', ['fructose-2,6-bisphosphate 2-phosphatase activity']),\n ('GO:0008708', ['glucose dehydrogenase activity']),\n ('GO:0004344', ['glucose dehydrogenase activity']),\n ('GO:0004350', ['glutamate-5-semialdehyde dehydrogenase activity']),\n ('GO:0001513', ['glutamate-5-semialdehyde dehydrogenase activity']),\n ('GO:0004580', ['glycolipid mannosyltransferase activity']),\n ('GO:0004376', ['glycolipid mannosyltransferase activity']),\n ('GO:0004379', ['glycylpeptide N-tetradecanoyltransferase activity']),\n ('GO:0019106', ['glycylpeptide N-tetradecanoyltransferase activity']),\n ('GO:0008026', ['helicase activity']),\n ('GO:0004386', ['helicase activity']),\n ('GO:0043166', ['histone acetyltransferase activity']),\n ('GO:0004403', ['histone acetyltransferase activity']),\n ('GO:0046971', ['histone acetyltransferase activity']),\n ('GO:0004405', ['histone acetyltransferase activity']),\n ('GO:0004406', ['histone acetyltransferase activity']),\n ('GO:0004404', ['histone acetyltransferase activity']),\n ('GO:0004402', ['histone acetyltransferase activity']),\n ('GO:0004420', ['hydroxymethylglutaryl-CoA reductase (NADPH) activity']),\n ('GO:0042282', ['hydroxymethylglutaryl-CoA reductase (NADPH) activity']),\n ('GO:0004438', ['phosphatidylinositol-3-phosphate phosphatase activity']),\n ('GO:0016315', ['phosphatidylinositol-3-phosphate phosphatase activity']),\n ('GO:0004439',\n ['phosphatidylinositol-4,5-bisphosphate 5-phosphatase activity']),\n ('GO:0001668',\n ['phosphatidylinositol-4,5-bisphosphate 5-phosphatase activity']),\n ('GO:0004467', ['long-chain fatty acid-CoA ligase activity']),\n ('GO:0003996', ['long-chain fatty acid-CoA ligase activity']),\n ('GO:0016619', ['malate dehydrogenase (decarboxylating) (NAD+) activity']),\n ('GO:0004472', ['malate dehydrogenase (decarboxylating) (NAD+) activity']),\n ('GO:0004471', ['malate dehydrogenase (decarboxylating) (NAD+) activity']),\n ('GO:0001718', ['methionyl-tRNA formyltransferase activity']),\n ('GO:0004479', ['methionyl-tRNA formyltransferase activity']),\n ('GO:0070128', ['methionyl-tRNA formyltransferase activity']),\n ('GO:0008702', ['methylenetetrahydrofolate reductase (NAD(P)H) activity']),\n ('GO:0004489', ['methylenetetrahydrofolate reductase (NAD(P)H) activity']),\n ('GO:0047076', ['N,N-dimethylaniline monooxygenase activity']),\n ('GO:0004499', ['N,N-dimethylaniline monooxygenase activity']),\n ('GO:0004524', ['RNA-DNA hybrid ribonuclease activity']),\n ('GO:0004523', ['RNA-DNA hybrid ribonuclease activity']),\n ('GO:0004527', ['exonuclease activity']),\n ('GO:0008857', ['exonuclease activity']),\n ('GO:0004536', ['DNA nuclease activity']),\n ('GO:0004537', ['DNA nuclease activity']),\n ('GO:0004553', ['hydrolase activity, hydrolyzing O-glycosyl compounds']),\n ('GO:0016800', ['hydrolase activity, hydrolyzing O-glycosyl compounds']),\n ('GO:0004556', ['alpha-amylase activity']),\n ('GO:0103025', ['alpha-amylase activity']),\n ('GO:0016982', ['alpha-1,4-glucosidase activity']),\n ('GO:0004562', ['alpha-1,4-glucosidase activity']),\n ('GO:0004558', ['alpha-1,4-glucosidase activity']),\n ('GO:0016693', ['peroxidase activity']),\n ('GO:0016687', ['peroxidase activity']),\n ('GO:0016685', ['peroxidase activity']),\n ('GO:0016686', ['peroxidase activity']),\n ('GO:0004601', ['peroxidase activity']),\n ('GO:0016224', ['glutathione peroxidase activity']),\n ('GO:0004602', ['glutathione peroxidase activity']),\n ('GO:0004615', ['phosphomannomutase activity']),\n ('GO:0008971', ['phosphomannomutase activity']),\n ('GO:0004622', ['lysophospholipase activity']),\n ('GO:0045126', ['lysophospholipase activity']),\n ('GO:0004623', ['phospholipase A2 activity']),\n ('GO:0102568', ['phospholipase A2 activity']),\n ('GO:0102567', ['phospholipase A2 activity']),\n ('GO:0004629', ['phospholipase C activity']),\n ('GO:0042298', ['phospholipase C activity']),\n ('GO:0004646',\n ['O-phospho-L-serine:2-oxoglutarate aminotransferase activity']),\n ('GO:0004648',\n ['O-phospho-L-serine:2-oxoglutarate aminotransferase activity']),\n ('GO:0018223', ['protein farnesyltransferase activity']),\n ('GO:0004660', ['protein farnesyltransferase activity']),\n ('GO:0004661', ['protein geranylgeranyltransferase activity']),\n ('GO:0018224', ['protein geranylgeranyltransferase activity']),\n ('GO:0018225',\n ['protein C-terminal S-isoprenylcysteine carboxyl O-methyltransferase activity']),\n ('GO:0004671',\n ['protein C-terminal S-isoprenylcysteine carboxyl O-methyltransferase activity']),\n ('GO:0004672', ['protein kinase activity']),\n ('GO:0050222', ['protein kinase activity']),\n ('GO:0008896', ['protein histidine kinase activity']),\n ('GO:0004673', ['protein histidine kinase activity']),\n ('GO:0004700', ['protein serine/threonine kinase activity']),\n ('GO:0004695', ['protein serine/threonine kinase activity']),\n ('GO:0106311', ['protein serine/threonine kinase activity']),\n ('GO:0004696', ['protein serine/threonine kinase activity']),\n ('GO:0004674', ['protein serine/threonine kinase activity']),\n ('GO:0004685', ['calmodulin-dependent protein kinase activity']),\n ('GO:0004684', ['calmodulin-dependent protein kinase activity']),\n ('GO:0004688', ['calmodulin-dependent protein kinase activity']),\n ('GO:0004683', ['calmodulin-dependent protein kinase activity']),\n ('GO:0008606', ['phosphorylase kinase activity']),\n ('GO:0004689', ['phosphorylase kinase activity']),\n ('GO:0004691', ['cAMP-dependent protein kinase activity']),\n ('GO:0008602', ['cAMP-dependent protein kinase activity']),\n ('GO:0016537', ['cyclin-dependent protein serine/threonine kinase activity']),\n ('GO:0004693', ['cyclin-dependent protein serine/threonine kinase activity']),\n ('GO:0004697', ['diacylglycerol-dependent serine/threonine kinase activity']),\n ('GO:0004701', ['diacylglycerol-dependent serine/threonine kinase activity']),\n ('GO:0004678', ['G protein-coupled receptor kinase activity']),\n ('GO:0004703', ['G protein-coupled receptor kinase activity']),\n ('GO:0004707', ['MAP kinase activity']),\n ('GO:0016908', ['MAP kinase activity']),\n ('GO:0008338', ['MAP kinase activity']),\n ('GO:0016909', ['MAP kinase activity']),\n ('GO:0008339', ['MAP kinase activity']),\n ('GO:0004709', ['MAP kinase kinase kinase activity']),\n ('GO:0004710', ['MAP kinase kinase kinase activity']),\n ('GO:0004713', ['protein tyrosine kinase activity']),\n ('GO:0004718', ['protein tyrosine kinase activity']),\n ('GO:0018056', ['protein-lysine 6-oxidase activity']),\n ('GO:0004720', ['protein-lysine 6-oxidase activity']),\n ('GO:0106306', ['protein serine/threonine phosphatase activity']),\n ('GO:0004722', ['protein serine/threonine phosphatase activity']),\n ('GO:0004724', ['protein serine/threonine phosphatase activity']),\n ('GO:0000158', ['protein serine/threonine phosphatase activity']),\n ('GO:0015071', ['protein serine/threonine phosphatase activity']),\n ('GO:0030358', ['protein serine/threonine phosphatase activity']),\n ('GO:0030361', ['protein serine/threonine phosphatase activity']),\n ('GO:0106307', ['protein serine/threonine phosphatase activity']),\n ('GO:0030357', ['protein serine/threonine phosphatase activity']),\n ('GO:0000163', ['protein serine/threonine phosphatase activity']),\n ('GO:0008598', ['protein serine/threonine phosphatase activity']),\n ('GO:0008600', ['protein serine/threonine phosphatase activity']),\n ('GO:0030360', ['protein serine/threonine phosphatase activity']),\n ('GO:0004723',\n ['calcium-dependent protein serine/threonine phosphatase activity']),\n ('GO:0008596',\n ['calcium-dependent protein serine/threonine phosphatase activity']),\n ('GO:0004741',\n ['[pyruvate dehydrogenase (acetyl-transferring)]-phosphatase activity']),\n ('GO:0019906',\n ['[pyruvate dehydrogenase (acetyl-transferring)]-phosphatase activity']),\n ('GO:0030523', ['dihydrolipoyllysine-residue acetyltransferase activity']),\n ('GO:0004742', ['dihydrolipoyllysine-residue acetyltransferase activity']),\n ('GO:0016960',\n ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']),\n ('GO:0016961',\n ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']),\n ('GO:0004748',\n ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']),\n ('GO:0016959',\n ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']),\n ('GO:0004760', ['serine-pyruvate transaminase activity']),\n ('GO:0004763', ['serine-pyruvate transaminase activity']),\n ('GO:0004761', ['serine-pyruvate transaminase activity']),\n ('GO:0004762', ['serine-pyruvate transaminase activity']),\n ('GO:0004767', ['sphingomyelin phosphodiesterase activity']),\n ('GO:0030231', ['sphingomyelin phosphodiesterase activity']),\n ('GO:0030230', ['sphingomyelin phosphodiesterase activity']),\n ('GO:0043735', ['stearoyl-CoA 9-desaturase activity']),\n ('GO:0004768', ['stearoyl-CoA 9-desaturase activity']),\n ('GO:0016214', ['stearoyl-CoA 9-desaturase activity']),\n ('GO:0004772', ['sterol O-acyltransferase activity']),\n ('GO:0017066', ['sterol O-acyltransferase activity']),\n ('GO:0004777', ['succinate-semialdehyde dehydrogenase (NAD+) activity']),\n ('GO:0008952', ['succinate-semialdehyde dehydrogenase (NAD+) activity']),\n ('GO:0008383', ['superoxide dismutase activity']),\n ('GO:0008382', ['superoxide dismutase activity']),\n ('GO:0004784', ['superoxide dismutase activity']),\n ('GO:0004785', ['superoxide dismutase activity']),\n ('GO:0016954', ['superoxide dismutase activity']),\n ('GO:0004796', ['thromboxane-A synthase activity']),\n ('GO:0008400', ['thromboxane-A synthase activity']),\n ('GO:0004803', ['transposase activity']),\n ('GO:0004804', ['transposase activity']),\n ('GO:0016425',\n ['tRNA (5-methylaminomethyl-2-thiouridylate)(34)-methyltransferase activity']),\n ('GO:0004808',\n ['tRNA (5-methylaminomethyl-2-thiouridylate)(34)-methyltransferase activity']),\n ('GO:0004810', ['CCA tRNA nucleotidyltransferase activity']),\n ('GO:0016437', ['CCA tRNA nucleotidyltransferase activity']),\n ('GO:0017100', ['aminoacyl-tRNA ligase activity']),\n ('GO:0004812', ['aminoacyl-tRNA ligase activity']),\n ('GO:0016876', ['aminoacyl-tRNA ligase activity']),\n ('GO:0004833', ['tryptophan 2,3-dioxygenase activity']),\n ('GO:0004426', ['tryptophan 2,3-dioxygenase activity']),\n ('GO:0004842', ['ubiquitin-protein transferase activity']),\n ('GO:0004841', ['ubiquitin-protein transferase activity']),\n ('GO:0004840', ['ubiquitin-protein transferase activity']),\n ('GO:0036459', ['cysteine-type deubiquitinase activity']),\n ('GO:0004843', ['cysteine-type deubiquitinase activity']),\n ('GO:0004857', ['enzyme inhibitor activity']),\n ('GO:0048551', ['enzyme inhibitor activity']),\n ('GO:0004864', ['protein phosphatase inhibitor activity']),\n ('GO:1990681', ['protein phosphatase inhibitor activity']),\n ('GO:0004869', ['cysteine-type endopeptidase inhibitor activity']),\n ('GO:0004870', ['cysteine-type endopeptidase inhibitor activity']),\n ('GO:0004875', ['complement receptor activity']),\n ('GO:0004942', ['complement receptor activity']),\n ('GO:0004943', ['complement component C3a receptor activity']),\n ('GO:0004876', ['complement component C3a receptor activity']),\n ('GO:0004944', ['complement component C5a receptor activity']),\n ('GO:0004878', ['complement component C5a receptor activity']),\n ('GO:0004880', ['nuclear receptor activity']),\n ('GO:0004884', ['nuclear receptor activity']),\n ('GO:0004886', ['nuclear receptor activity']),\n ('GO:0038050', ['nuclear receptor activity']),\n ('GO:0004882', ['nuclear receptor activity']),\n ('GO:0004887', ['nuclear receptor activity']),\n ('GO:0038051', ['nuclear receptor activity']),\n ('GO:0003708', ['nuclear receptor activity']),\n ('GO:0038052', ['nuclear receptor activity']),\n ('GO:0004879', ['nuclear receptor activity']),\n ('GO:0008434', ['nuclear receptor activity']),\n ('GO:0004888', ['transmembrane signaling receptor activity']),\n ('GO:0004926', ['transmembrane signaling receptor activity']),\n ('GO:0099600', ['transmembrane signaling receptor activity']),\n ('GO:0004907', ['cytokine receptor activity']),\n ('GO:0004896', ['cytokine receptor activity']),\n ('GO:0030525',\n ['granulocyte macrophage colony-stimulating factor receptor activity']),\n ('GO:0004901',\n ['granulocyte macrophage colony-stimulating factor receptor activity']),\n ('GO:0030524', ['granulocyte colony-stimulating factor receptor activity']),\n ('GO:0004902', ['granulocyte colony-stimulating factor receptor activity']),\n ('GO:0004909', ['interleukin-1, type I, activating receptor activity']),\n ('GO:0019967', ['interleukin-1, type I, activating receptor activity']),\n ('GO:0004910', ['interleukin-1, type II, blocking receptor activity']),\n ('GO:0019968', ['interleukin-1, type II, blocking receptor activity']),\n ('GO:0004923', ['leukemia inhibitory factor receptor activity']),\n ('GO:0004899', ['leukemia inhibitory factor receptor activity']),\n ('GO:0001623', ['G protein-coupled receptor activity']),\n ('GO:0001622', ['G protein-coupled receptor activity']),\n ('GO:0016526', ['G protein-coupled receptor activity']),\n ('GO:0004930', ['G protein-coupled receptor activity']),\n ('GO:0001625', ['G protein-coupled receptor activity']),\n ('GO:0001624', ['G protein-coupled receptor activity']),\n ('GO:0001599', ['endothelin receptor activity']),\n ('GO:0004962', ['endothelin receptor activity']),\n ('GO:0001600', ['endothelin receptor activity']),\n ('GO:0004976', ['luteinizing hormone receptor activity']),\n ('GO:0004964', ['luteinizing hormone receptor activity']),\n ('GO:0016609', ['G protein-coupled serotonin receptor activity']),\n ('GO:0004993', ['G protein-coupled serotonin receptor activity']),\n ('GO:0001585', ['G protein-coupled serotonin receptor activity']),\n ('GO:0016931', ['vasopressin receptor activity']),\n ('GO:0005000', ['vasopressin receptor activity']),\n ('GO:0005023', ['epidermal growth factor receptor activity']),\n ('GO:0005006', ['epidermal growth factor receptor activity']),\n ('GO:0036326', ['vascular endothelial growth factor receptor activity']),\n ('GO:0036328', ['vascular endothelial growth factor receptor activity']),\n ('GO:0036327', ['vascular endothelial growth factor receptor activity']),\n ('GO:0005021', ['vascular endothelial growth factor receptor activity']),\n ('GO:0036329', ['vascular endothelial growth factor receptor activity']),\n ('GO:0036330', ['vascular endothelial growth factor receptor activity']),\n ('GO:0005031', ['tumor necrosis factor receptor activity']),\n ('GO:0005033', ['tumor necrosis factor receptor activity']),\n ('GO:0005032', ['tumor necrosis factor receptor activity']),\n ('GO:0005041', ['low-density lipoprotein particle receptor activity']),\n ('GO:0008032', ['low-density lipoprotein particle receptor activity']),\n ('GO:0005048', ['signal sequence binding']),\n ('GO:0008249', ['signal sequence binding']),\n ('GO:0005049', ['nuclear export signal receptor activity']),\n ('GO:0008262', ['nuclear export signal receptor activity']),\n ('GO:0005068',\n ['transmembrane receptor protein tyrosine kinase adaptor activity']),\n ('GO:0005069',\n ['transmembrane receptor protein tyrosine kinase adaptor activity']),\n ('GO:0097024', ['protein kinase C binding']),\n ('GO:0072569', ['protein kinase C binding']),\n ('GO:0005080', ['protein kinase C binding']),\n ('GO:0072568', ['protein kinase C binding']),\n ('GO:0005090', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0016220', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0016219', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0017132', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0017112', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005086', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005088', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005087', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0008321', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005089', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0008433', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0019839', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0030676', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005085', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0017034', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005096', ['GTPase activator activity']),\n ('GO:0008060', ['GTPase activator activity']),\n ('GO:0005099', ['GTPase activator activity']),\n ('GO:0005097', ['GTPase activator activity']),\n ('GO:0005098', ['GTPase activator activity']),\n ('GO:0030675', ['GTPase activator activity']),\n ('GO:0017123', ['GTPase activator activity']),\n ('GO:0005101', ['GTPase activator activity']),\n ('GO:0005100', ['GTPase activator activity']),\n ('GO:0046582', ['GTPase activator activity']),\n ('GO:0005104', ['fibroblast growth factor receptor binding']),\n ('GO:0005162', ['fibroblast growth factor receptor binding']),\n ('GO:0001521', ['fibroblast growth factor receptor binding']),\n ('GO:0005109', ['frizzled binding']),\n ('GO:0005110', ['frizzled binding']),\n ('GO:0005154', ['epidermal growth factor receptor binding']),\n ('GO:0008185', ['epidermal growth factor receptor binding']),\n ('GO:0005159', ['insulin-like growth factor receptor binding']),\n ('GO:0005067', ['insulin-like growth factor receptor binding']),\n ('GO:0005478', ['transporter activity']),\n ('GO:0005215', ['transporter activity']),\n ('GO:0008095',\n ['inositol 1,4,5-trisphosphate-gated calcium channel activity']),\n ('GO:0005220',\n ['inositol 1,4,5-trisphosphate-gated calcium channel activity']),\n ('GO:0005243', ['gap junction channel activity']),\n ('GO:0015285', ['gap junction channel activity']),\n ('GO:0015286', ['gap junction channel activity']),\n ('GO:0015270', ['voltage-gated calcium channel activity']),\n ('GO:0010173', ['voltage-gated calcium channel activity']),\n ('GO:0005245', ['voltage-gated calcium channel activity']),\n ('GO:0005224', ['intracellularly ATP-gated chloride channel activity']),\n ('GO:0005260', ['intracellularly ATP-gated chloride channel activity']),\n ('GO:0005261', ['monoatomic cation channel activity']),\n ('GO:0015338', ['monoatomic cation channel activity']),\n ('GO:0015281', ['monoatomic cation channel activity']),\n ('GO:0015206', ['allantoin:proton symporter activity']),\n ('GO:0005274', ['allantoin:proton symporter activity']),\n ('GO:0005279', ['amine transmembrane transporter activity']),\n ('GO:0005275', ['amine transmembrane transporter activity']),\n ('GO:0005283', ['amino acid:sodium symporter activity']),\n ('GO:0005284', ['amino acid:sodium symporter activity']),\n ('GO:0005285', ['amino acid:sodium symporter activity']),\n ('GO:0005295', ['neutral L-amino acid:sodium symporter activity']),\n ('GO:0005282', ['neutral L-amino acid:sodium symporter activity']),\n ('GO:0015508', ['L-tyrosine transmembrane transporter activity']),\n ('GO:0005302', ['L-tyrosine transmembrane transporter activity']),\n ('GO:0005312', ['dicarboxylic acid transmembrane transporter activity']),\n ('GO:0015365', ['dicarboxylic acid transmembrane transporter activity']),\n ('GO:0005310', ['dicarboxylic acid transmembrane transporter activity']),\n ('GO:0005317', ['phosphate transmembrane transporter activity']),\n ('GO:1901677', ['phosphate transmembrane transporter activity']),\n ('GO:0005315', ['phosphate transmembrane transporter activity']),\n ('GO:0008562', ['long-chain fatty acid transmembrane transporter activity']),\n ('GO:0005324', ['long-chain fatty acid transmembrane transporter activity']),\n ('GO:0005325', ['long-chain fatty acid transmembrane transporter activity']),\n ('GO:0005329', ['dopamine:sodium symporter activity']),\n ('GO:0005330', ['dopamine:sodium symporter activity']),\n ('GO:0005333', ['norepinephrine:sodium symporter activity']),\n ('GO:0005334', ['norepinephrine:sodium symporter activity']),\n ('GO:0005336', ['serotonin:sodium:chloride symporter activity']),\n ('GO:0005335', ['serotonin:sodium:chloride symporter activity']),\n ('GO:0015222', ['serotonin:sodium:chloride symporter activity']),\n ('GO:0005338', ['nucleotide-sugar transmembrane transporter activity']),\n ('GO:0005339', ['nucleotide-sugar transmembrane transporter activity']),\n ('GO:0005340', ['nucleotide-sulfate transmembrane transporter activity']),\n ('GO:0005341', ['nucleotide-sulfate transmembrane transporter activity']),\n ('GO:0015033', ['oxygen carrier activity']),\n ('GO:0005344', ['oxygen carrier activity']),\n ('GO:0005348', ['ATP transmembrane transporter activity']),\n ('GO:0005347', ['ATP transmembrane transporter activity']),\n ('GO:0005403', ['carbohydrate:proton symporter activity']),\n ('GO:0015542', ['carbohydrate:proton symporter activity']),\n ('GO:0005351', ['carbohydrate:proton symporter activity']),\n ('GO:0005353', ['fructose transmembrane transporter activity']),\n ('GO:0019192', ['fructose transmembrane transporter activity']),\n ('GO:0015585', ['fructose transmembrane transporter activity']),\n ('GO:0005355', ['glucose transmembrane transporter activity']),\n ('GO:0015579', ['glucose transmembrane transporter activity']),\n ('GO:0005356', ['glucose:proton symporter activity']),\n ('GO:0005361', ['glucose:proton symporter activity']),\n ('GO:0005363', ['maltose transmembrane transporter activity']),\n ('GO:0015581', ['maltose transmembrane transporter activity']),\n ('GO:0005371',\n ['tricarboxylate secondary active transmembrane transporter activity']),\n ('GO:0005370',\n ['tricarboxylate secondary active transmembrane transporter activity']),\n ('GO:0005379', ['copper ion transmembrane transporter activity']),\n ('GO:0015088', ['copper ion transmembrane transporter activity']),\n ('GO:0005378', ['copper ion transmembrane transporter activity']),\n ('GO:0005375', ['copper ion transmembrane transporter activity']),\n ('GO:0005380', ['copper ion transmembrane transporter activity']),\n ('GO:0016033', ['iron ion transmembrane transporter activity']),\n ('GO:0005382', ['iron ion transmembrane transporter activity']),\n ('GO:0097689', ['iron ion transmembrane transporter activity']),\n ('GO:0005381', ['iron ion transmembrane transporter activity']),\n ('GO:0008522', ['nucleoside:sodium symporter activity']),\n ('GO:0005415', ['nucleoside:sodium symporter activity']),\n ('GO:0005436', ['sodium:phosphate symporter activity']),\n ('GO:0015321', ['sodium:phosphate symporter activity']),\n ('GO:0005349', ['ATP:ADP antiporter activity']),\n ...]In\u00a0[19]: Copied!
[(term.ID,term.name) for term in go if not term.parents and term.children]\n[(term.ID,term.name) for term in go if not term.parents and term.children] Out[19]:
[('GO:0005554', ['molecular_function']),\n ('GO:0008372', ['cellular_component']),\n ('GO:0044699', ['biological_process'])]In\u00a0[20]: Copied!
go['GO:0005554'].__dict__\ngo['GO:0005554'].__dict__ Out[20]:
{'_ID': 'GO:0005554',\n '_original_ID': 'GO:0005554',\n '_container': <picea.ontology.Ontology at 0x7f1376f978b0>,\n '_children': ['GO:0003774',\n 'GO:0003824',\n 'GO:0005198',\n 'GO:0005478',\n 'GO:0005215',\n 'GO:0005488',\n 'GO:0009054',\n 'GO:0009053',\n 'GO:0009055',\n 'GO:0016209',\n 'GO:0038024',\n 'GO:0044183',\n 'GO:0045182',\n 'GO:0045735',\n 'GO:0060089',\n 'GO:0060090',\n 'GO:0032947',\n 'GO:0090729',\n 'GO:0050827',\n 'GO:0098772',\n 'GO:0140104',\n 'GO:0140110',\n 'GO:0140223',\n 'GO:0140299',\n 'GO:0140313',\n 'GO:0140489',\n 'GO:0140522',\n 'GO:0140657',\n 'GO:0140691',\n 'GO:0140776',\n 'GO:0140777',\n 'GO:0034290',\n 'GO:0140911',\n 'GO:0034292',\n 'GO:0034291',\n 'GO:0140912',\n 'GO:0141047',\n 'GO:0180020',\n 'GO:0180024'],\n '_parents': [],\n 'name': ['molecular_function'],\n 'def': ['\"A molecular process that can be carried out by the action of a single macromolecular machine, usually via direct physical interactions with other molecular entities. Function in this sense denotes an action, or activity, that a gene product (or a complex) performs.\" [GOC:pdt]'],\n 'alt_id': ['GO:0003674'],\n 'namespace': ['molecular_function'],\n 'comment': [\"Note that, in addition to forming the root of the molecular function ontology, this term is recommended for the annotation of gene products whose molecular function is unknown. When this term is used for annotation, it indicates that no information was available about the molecular function of the gene product annotated as of the date the annotation was made; the evidence code 'no data' (ND), is used to indicate this. Despite its name, this is not a type of 'function' in the sense typically defined by upper ontologies such as Basic Formal Ontology (BFO). It is instead a BFO:process carried out by a single gene product or complex.\"],\n 'subset': ['goslim_candida',\n 'goslim_chembl',\n 'goslim_metagenomics',\n 'goslim_pir',\n 'goslim_plant',\n 'goslim_yeast'],\n 'synonym': ['\"molecular function\" EXACT []']}In\u00a0[21]: Copied!
go['GO:0003674'].__dict__\ngo['GO:0003674'].__dict__ Out[21]:
{'_ID': 'GO:0005554',\n '_original_ID': 'GO:0005554',\n '_container': <picea.ontology.Ontology at 0x7f1376f978b0>,\n '_children': ['GO:0003774',\n 'GO:0003824',\n 'GO:0005198',\n 'GO:0005478',\n 'GO:0005215',\n 'GO:0005488',\n 'GO:0009054',\n 'GO:0009053',\n 'GO:0009055',\n 'GO:0016209',\n 'GO:0038024',\n 'GO:0044183',\n 'GO:0045182',\n 'GO:0045735',\n 'GO:0060089',\n 'GO:0060090',\n 'GO:0032947',\n 'GO:0090729',\n 'GO:0050827',\n 'GO:0098772',\n 'GO:0140104',\n 'GO:0140110',\n 'GO:0140223',\n 'GO:0140299',\n 'GO:0140313',\n 'GO:0140489',\n 'GO:0140522',\n 'GO:0140657',\n 'GO:0140691',\n 'GO:0140776',\n 'GO:0140777',\n 'GO:0034290',\n 'GO:0140911',\n 'GO:0034292',\n 'GO:0034291',\n 'GO:0140912',\n 'GO:0141047',\n 'GO:0180020',\n 'GO:0180024'],\n '_parents': [],\n 'name': ['molecular_function'],\n 'def': ['\"A molecular process that can be carried out by the action of a single macromolecular machine, usually via direct physical interactions with other molecular entities. Function in this sense denotes an action, or activity, that a gene product (or a complex) performs.\" [GOC:pdt]'],\n 'alt_id': ['GO:0003674'],\n 'namespace': ['molecular_function'],\n 'comment': [\"Note that, in addition to forming the root of the molecular function ontology, this term is recommended for the annotation of gene products whose molecular function is unknown. When this term is used for annotation, it indicates that no information was available about the molecular function of the gene product annotated as of the date the annotation was made; the evidence code 'no data' (ND), is used to indicate this. Despite its name, this is not a type of 'function' in the sense typically defined by upper ontologies such as Basic Formal Ontology (BFO). It is instead a BFO:process carried out by a single gene product or complex.\"],\n 'subset': ['goslim_candida',\n 'goslim_chembl',\n 'goslim_metagenomics',\n 'goslim_pir',\n 'goslim_plant',\n 'goslim_yeast'],\n 'synonym': ['\"molecular function\" EXACT []']}"},{"location":"examples/ontology.pct/","title":"Ontology.pct","text":"
This notebook shows how to work with biological ontologies such as the sequence ontology or the gene ontology.
In\u00a0[1]: Copied!import sys\n\nimport requests\n\nsys.path.insert(0, '../../')\nimport picea\n\npicea.__version__\nimport sys import requests sys.path.insert(0, '../../') import picea picea.__version__ Out[1]:
'0.0.26'In\u00a0[2]: Copied!
obo_url = (\n 'https://raw.githubusercontent.com/The-Sequence-Ontology/'\n 'SO-Ontologies/master/Ontology_Files/so.obo'\n)\nr = requests.get(obo_url)\nr\nobo_url = ( 'https://raw.githubusercontent.com/The-Sequence-Ontology/' 'SO-Ontologies/master/Ontology_Files/so.obo' ) r = requests.get(obo_url) r Out[2]:
<Response [200]>In\u00a0[3]: Copied!
r.text.split('\\n')[:100]\nr.text.split('\\n')[:100] Out[3]:
['format-version: 1.2',\n 'data-version: 2024-04-10',\n 'date: 10:04:2024 18:16',\n 'saved-by: Evan Christensen',\n 'subsetdef: Alliance_of_Genome_Resources \"Alliance of Genome Resources Gene Biotype Slim\"',\n 'subsetdef: biosapiens \"biosapiens protein feature ontology\"',\n 'subsetdef: DBVAR \"database of genomic structural variation\"',\n 'subsetdef: SOFA \"SO feature annotation\"',\n 'synonymtypedef: aa1 \"amino acid 1 letter code\"',\n 'synonymtypedef: aa3 \"amino acid 3 letter code\"',\n 'synonymtypedef: AAMOD \"amino acid modification\"',\n 'synonymtypedef: AGR \"Alliance of Genome Resources\"',\n 'synonymtypedef: BS \"biosapiens\"',\n 'synonymtypedef: dbsnp \"dbsnp variant terms\"',\n 'synonymtypedef: dbvar \"DBVAR\"',\n 'synonymtypedef: ebi_variants \"ensembl variant terms\"',\n 'synonymtypedef: RNAMOD \"RNA modification\" EXACT',\n 'synonymtypedef: VAR \"variant annotation term\"',\n 'default-namespace: sequence',\n 'ontology: so',\n 'property_value: IAO:0000700 SO:0000110',\n 'property_value: IAO:0000700 SO:0000400',\n 'property_value: IAO:0000700 SO:0001060',\n 'property_value: IAO:0000700 SO:0001260',\n '',\n '[Term]',\n 'id: SO:0000000',\n 'name: Sequence_Ontology',\n 'subset: SOFA',\n 'is_obsolete: true',\n '',\n '[Term]',\n 'id: SO:00000000002382',\n 'name: 5_prime_UTR_uORF_variant',\n 'def: \"A 5\\' UTR variant within an upstream open reading frame.\" [PMID:32461616, PMID:32926138]',\n 'comment: Added 10 Apr 2024 at the request of Sarah Hunt (EBI). See GitHub Issue #647.',\n 'is_a: SO:0001623 ! 5_prime_UTR_variant',\n 'created_by: evan',\n 'creation_date: 2024-04-10T17:49:03Z',\n '',\n '[Term]',\n 'id: SO:0000001',\n 'name: region',\n 'def: \"A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids.\" [SO:ke]',\n 'subset: SOFA',\n 'synonym: \"sequence\" EXACT []',\n 'is_a: SO:0000110 ! sequence_feature',\n '',\n '[Term]',\n 'id: SO:00000010002382',\n 'name: 5_prime_UTR_uORF_stop_codon_variant',\n 'def: \"A 5\\' UTR variant where a stop codon in an upstream open reading frame is introduced, moved or lost.\" [PMID:32461616, PMID:32926138]',\n 'comment: Added 10 Apr 2024 at the request of Sarah Hunt (EBI). See GitHub Issue #622.',\n 'is_a: SO:00000000002382 ! 5_prime_UTR_uORF_variant',\n 'created_by: evan',\n 'creation_date: 2024-04-10T17:56:17Z',\n '',\n '[Term]',\n 'id: SO:0000002',\n 'name: sequence_secondary_structure',\n 'def: \"A folded sequence.\" [SO:ke]',\n 'synonym: \"INSDC_feature:misc_structure\" EXACT []',\n 'synonym: \"sequence secondary structure\" EXACT []',\n 'is_a: SO:0001411 ! biological_region',\n '',\n '[Term]',\n 'id: SO:00000020002382',\n 'name: 5_prime_UTR_uORF_frameshift_variant',\n 'def: \"A 5\\' UTR variant which disrupts the translation of an upstream open reading frame because the number of nucleotides inserted or deleted is not a multiple of three.\" [PMID:32461616, PMID:32926138]',\n 'comment: Added 10 Apr 2024 at the request of Sarah Hunt (EBI). See GitHub Issue #621.',\n 'synonym: \"uFrameshift (UTRannotator)\" EXACT []',\n 'is_a: SO:00000000002382 ! 5_prime_UTR_uORF_variant',\n 'created_by: evan',\n 'creation_date: 2024-04-10T17:58:40Z',\n '',\n '[Term]',\n 'id: SO:0000003',\n 'name: G_quartet',\n 'def: \"G-quartets are unusual nucleic acid structures consisting of a planar arrangement where each guanine is hydrogen bonded by hoogsteen pairing to another guanine in the quartet.\" [http://www.ncbi.nlm.nih.gov/pubmed/7919797?dopt=Abstract]',\n 'synonym: \"G quartet\" EXACT []',\n 'synonym: \"G tetrad\" EXACT []',\n 'synonym: \"G-quadruplex\" EXACT []',\n 'synonym: \"G-quartet\" EXACT []',\n 'synonym: \"G-tetrad\" EXACT []',\n 'synonym: \"G_quadruplex\" EXACT []',\n 'synonym: \"guanine tetrad\" EXACT []',\n 'xref: http://en.wikipedia.org/wiki/G-quadruplex \"wiki\"',\n 'is_a: SO:0000002 ! sequence_secondary_structure',\n '',\n '[Term]',\n 'id: SO:00000030002382',\n 'name: 5_prime_UTR_uORF_stop_codon_gain_variant',\n 'def: \"A 5\\' UTR variant where a premature stop codon is gained in an upstream open reading frame.\" [PMID:32461616, PMID:32926138]',\n 'comment: Added 10 Apr 2024 at the request of Sarah Hunt (EBI). See GitHub Issue #624.',\n 'synonym: \"uSTOP_gained\" EXACT [] {comment=\"UTRannotator\"}',\n 'is_a: SO:00000010002382 ! 5_prime_UTR_uORF_stop_codon_variant',\n 'created_by: evan',\n 'creation_date: 2024-04-10T18:01:42Z',\n '',\n '[Term]']In\u00a0[4]: Copied!
so = picea.Ontology.from_obo(string=r.text)\nso = picea.Ontology.from_obo(string=r.text) In\u00a0[5]: Copied!
ids = [el.ID for el in so['SO:0000866'].parents.elements]\nids = [el.ID for el in so['SO:0000866'].parents.elements] In\u00a0[6]: Copied!
'SO:0000866' in {el.ID for so_id in ids for el in so[so_id].children.elements}\n'SO:0000866' in {el.ID for so_id in ids for el in so[so_id].children.elements} Out[6]:
TrueIn\u00a0[7]: Copied!
len(so)\nlen(so) Out[7]:
2513In\u00a0[8]: Copied!
url = 'http://purl.obolibrary.org/obo/go.obo'\n# url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'\nr = requests.get(url)\ngo = picea.Ontology.from_obo(string=r.text)\nlen(go.elements)\nurl = 'http://purl.obolibrary.org/obo/go.obo' # url = 'http://purl.obolibrary.org/obo/go/go-basic.obo' r = requests.get(url) go = picea.Ontology.from_obo(string=r.text) len(go.elements)
/home/runner/work/picea/picea/picea/ontology.py:32: UserWarning: Accessed GO term by alt ID GO:0008150, returning main GO term with ID GO:0007582\n warnings.warn(f\"Accessed GO term by alt ID {ID}, \" f\"returning main GO term with ID {alt_id}\")\n/home/runner/work/picea/picea/picea/ontology.py:32: UserWarning: Accessed GO term by alt ID GO:0003674, returning main GO term with ID GO:0005554\n warnings.warn(f\"Accessed GO term by alt ID {ID}, \" f\"returning main GO term with ID {alt_id}\")\n/home/runner/work/picea/picea/picea/ontology.py:32: UserWarning: Accessed GO term by alt ID GO:0005575, returning main GO term with ID GO:0008372\n warnings.warn(f\"Accessed GO term by alt ID {ID}, \" f\"returning main GO term with ID {alt_id}\")\nOut[8]:
45667In\u00a0[9]: Copied!
[(term.ID, term.name, len(term.parents)) for term in go['GO:0048316'].parents]\n[(term.ID, term.name, len(term.parents)) for term in go['GO:0048316'].parents] Out[9]:
[('GO:0009791', ['post-embryonic development'], 5),\n ('GO:0032501', ['multicellular organismal process'], 1),\n ('GO:0007582', ['biological_process'], 0),\n ('GO:0007275', ['multicellular organism development'], 4),\n ('GO:0048856', ['anatomical structure development'], 2),\n ('GO:0032502', ['developmental process'], 1),\n ('GO:0048608', ['reproductive structure development'], 9),\n ('GO:0003006', ['developmental process involved in reproduction'], 3),\n ('GO:0022414', ['reproductive process'], 1),\n ('GO:0061458', ['reproductive system development'], 6),\n ('GO:0048731', ['system development'], 5),\n ('GO:0010154', ['fruit development'], 10)]In\u00a0[10]: Copied!
go['GO:0048316'].children\ngo['GO:0048316'].children Out[10]:
<picea.ontology.Ontology at 0x7f3ebc0e9c90>In\u00a0[11]: Copied!
import networkx as nx\n\nnx.__version__\nimport networkx as nx nx.__version__
\n---------------------------------------------------------------------------\nModuleNotFoundError Traceback (most recent call last)\nCell In[11], line 1\n----> 1 import networkx as nx\n 3 nx.__version__\n\nModuleNotFoundError: No module named 'networkx'In\u00a0[12]: Copied!
graph = nx.DiGraph()\nfor term in [go['GO:0048316'], *go['GO:0048316'].children]:\n graph.add_node(term.ID, name=term.name)\n for child_ID in term._children:\n graph.add_edge(term.ID, child_ID)\nlayout = nx.planar_layout(graph)\nnx.draw(graph, pos=layout, node_shape='s')\ngraph = nx.DiGraph() for term in [go['GO:0048316'], *go['GO:0048316'].children]: graph.add_node(term.ID, name=term.name) for child_ID in term._children: graph.add_edge(term.ID, child_ID) layout = nx.planar_layout(graph) nx.draw(graph, pos=layout, node_shape='s')
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[12], line 1\n----> 1 graph = nx.DiGraph()\n 2 for term in [go['GO:0048316'], *go['GO:0048316'].children]:\n 3 graph.add_node(term.ID, name=term.name)\n\nNameError: name 'nx' is not definedIn\u00a0[13]: Copied!
import sys\n\n!{sys.executable} -m pip install pygraphviz\nnx.nx_agraph.to_agraph(graph)\nimport sys !{sys.executable} -m pip install pygraphviz nx.nx_agraph.to_agraph(graph)
Collecting pygraphviz\r\n Using cached pygraphviz-1.13.tar.gz (104 kB)\r\n
Installing build dependencies ... -
\b \b\\
\b \b|
\b \bdone\r\n
Getting requirements to build wheel ... -
\b \bdone\r\n
Installing backend dependencies ... -
\b \b\\
\b \bdone\r\n
Preparing metadata (pyproject.toml) ... -
\b \bdone\r\nBuilding wheels for collected packages: pygraphviz\r\n
Building wheel for pygraphviz (pyproject.toml) ... -
\b \berror\r\n error: subprocess-exited-with-error\r\n \r\n \u00d7 Building wheel for pygraphviz (pyproject.toml) did not run successfully.\r\n \u2502 exit code: 1\r\n \u2570\u2500> [61 lines of output]\r\n running bdist_wheel\r\n running build\r\n running build_py\r\n creating build\r\n creating build/lib.linux-x86_64-cpython-310\r\n creating build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/agraph.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/testing.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/scraper.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/__init__.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/graphviz.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n creating build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_node_attributes.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_graph.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/__init__.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_repr_mimebundle.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_unicode.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_readwrite.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_edge_attributes.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_layout.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_scraper.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_close.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_clear.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_attribute_defaults.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_drawing.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_html.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_subgraph.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_string.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n running egg_info\r\n writing pygraphviz.egg-info/PKG-INFO\r\n writing dependency_links to pygraphviz.egg-info/dependency_links.txt\r\n writing top-level names to pygraphviz.egg-info/top_level.txt\r\n reading manifest file 'pygraphviz.egg-info/SOURCES.txt'\r\n reading manifest template 'MANIFEST.in'\r\n warning: no files found matching '*.swg'\r\n warning: no files found matching '*.png' under directory 'doc'\r\n warning: no files found matching '*.html' under directory 'doc'\r\n warning: no files found matching '*.txt' under directory 'doc'\r\n warning: no files found matching '*.css' under directory 'doc'\r\n warning: no previously-included files matching '*~' found anywhere in distribution\r\n warning: no previously-included files matching '*.pyc' found anywhere in distribution\r\n warning: no previously-included files matching '.svn' found anywhere in distribution\r\n no previously-included directories found matching 'doc/build'\r\n adding license file 'LICENSE'\r\n writing manifest file 'pygraphviz.egg-info/SOURCES.txt'\r\n copying pygraphviz/graphviz.i -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/graphviz_wrap.c -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n running build_ext\r\n building 'pygraphviz._graphviz' extension\r\n creating build/temp.linux-x86_64-cpython-310\r\n creating build/temp.linux-x86_64-cpython-310/pygraphviz\r\n x86_64-linux-gnu-gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -fPIC -DSWIG_PYTHON_STRICT_BYTE_CHAR -I/home/runner/.cache/pypoetry/virtualenvs/picea-ox5U8VzY-py3.10/include -I/usr/include/python3.10 -c pygraphviz/graphviz_wrap.c -o build/temp.linux-x86_64-cpython-310/pygraphviz/graphviz_wrap.o\r\n pygraphviz/graphviz_wrap.c:9: warning: \"SWIG_PYTHON_STRICT_BYTE_CHAR\" redefined\r\n 9 | #define SWIG_PYTHON_STRICT_BYTE_CHAR\r\n |\r\n <command-line>: note: this is the location of the previous definition\r\n pygraphviz/graphviz_wrap.c:3023:10: fatal error: graphviz/cgraph.h: No such file or directory\r\n 3023 | #include \"graphviz/cgraph.h\"\r\n | ^~~~~~~~~~~~~~~~~~~\r\n compilation terminated.\r\n error: command '/usr/bin/x86_64-linux-gnu-gcc' failed with exit code 1\r\n [end of output]\r\n \r\n note: This error originates from a subprocess, and is likely not a problem with pip.\r\n ERROR: Failed building wheel for pygraphviz\r\nFailed to build pygraphviz\r\nERROR: Could not build wheels for pygraphviz, which is required to install pyproject.toml-based projects\r\n
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[13], line 4\n 1 import sys\n 3 get_ipython().system('{sys.executable} -m pip install pygraphviz')\n----> 4 nx.nx_agraph.to_agraph(graph)\n\nNameError: name 'nx' is not definedIn\u00a0[14]: Copied!
[(term.ID, term.name) for term in go['GO:0048316'].children]\n[(term.ID, term.name) for term in go['GO:0048316'].children] Out[14]:
[('GO:0009793', ['embryo development ending in seed dormancy']),\n ('GO:0009942', ['longitudinal axis specification']),\n ('GO:0010069', ['zygote asymmetric cytokinesis in embryo sac']),\n ('GO:0010262', ['somatic embryogenesis']),\n ('GO:0010654', ['apical cell fate commitment']),\n ('GO:0048508', ['embryonic meristem development']),\n ('GO:0010065', ['primary meristem tissue development']),\n ('GO:0010066', ['ground meristem histogenesis']),\n ('GO:0010067', ['procambium histogenesis']),\n ('GO:0010068', ['protoderm histogenesis']),\n ('GO:0010071', ['root meristem specification']),\n ('GO:0010072', ['primary shoot apical meristem specification']),\n ('GO:0048825', ['cotyledon development']),\n ('GO:0048826', ['cotyledon morphogenesis']),\n ('GO:0010588', ['cotyledon vascular tissue pattern formation']),\n ('GO:0009960', ['endosperm development']),\n ('GO:0010214', ['seed coat development']),\n ('GO:0048359',\n ['mucilage metabolic process involved in seed coat development']),\n ('GO:0048354',\n ['mucilage biosynthetic process involved in seed coat development']),\n ('GO:0010344', ['seed oilbody biogenesis']),\n ('GO:0010431', ['seed maturation']),\n ('GO:0010162', ['seed dormancy process']),\n ('GO:0010231', ['maintenance of seed dormancy']),\n ('GO:0098755', ['maintenance of seed dormancy by absisic acid']),\n ('GO:0048700', ['acquisition of desiccation tolerance in seed']),\n ('GO:0048838', ['release of seed from dormancy']),\n ('GO:1990068', ['seed dehydration']),\n ('GO:0048317', ['seed morphogenesis']),\n ('GO:0080001', ['mucilage extrusion from seed coat']),\n ('GO:0080112', ['seed growth']),\n ('GO:0090376', ['seed trichome differentiation']),\n ('GO:0090377', ['seed trichome initiation']),\n ('GO:0090378', ['seed trichome elongation']),\n ('GO:0090379',\n ['secondary cell wall biogenesis involved in seed trichome differentiation']),\n ('GO:0090380', ['seed trichome maturation']),\n ('GO:0140547', ['acquisition of seed longevity'])]In\u00a0[15]: Copied!
go['GO:0010431'].__dict__\ngo['GO:0010431'].__dict__ Out[15]:
{'_ID': 'GO:0010431',\n '_original_ID': 'GO:0010431',\n '_container': <picea.ontology.Ontology at 0x7f3eae711150>,\n '_children': ['GO:0010162', 'GO:1990068'],\n '_parents': ['GO:0003006', 'GO:0021700', 'GO:0048609', 'GO:0048316'],\n 'name': ['seed maturation'],\n 'def': ['\"A process in seed development that occurs after embryogenesis by which a quiescent state is established in a seed. Seed maturation is characterized by storage compound accumulation, acquisition of desiccation tolerance, growth arrest and the entry into a dormancy period of variable length that is broken upon germination.\" [PMID:16096971]'],\n 'alt_id': [],\n 'namespace': ['biological_process'],\n 'is_a': ['GO:0003006 ! developmental process involved in reproduction',\n 'GO:0021700 ! developmental maturation',\n 'GO:0048609 ! multicellular organismal reproductive process'],\n 'relationship': ['part_of GO:0048316 ! seed development']}In\u00a0[16]: Copied!
go['GO:0048316'].__dict__\ngo['GO:0048316'].__dict__ Out[16]:
{'_ID': 'GO:0048316',\n '_original_ID': 'GO:0048316',\n '_container': <picea.ontology.Ontology at 0x7f3eae711150>,\n '_children': ['GO:0009793',\n 'GO:0009960',\n 'GO:0010214',\n 'GO:0010344',\n 'GO:0010431',\n 'GO:0048317',\n 'GO:0080001',\n 'GO:0080112',\n 'GO:0090376',\n 'GO:0140547'],\n '_parents': ['GO:0009791', 'GO:0048608', 'GO:0010154'],\n 'name': ['seed development'],\n 'def': ['\"The process whose specific outcome is the progression of the seed over time, from its formation to the mature structure. A seed is a propagating organ formed in the sexual reproductive cycle of gymnosperms and angiosperms, consisting of a protective coat enclosing an embryo and food reserves.\" [GOC:jid, PO:0009010]'],\n 'alt_id': [],\n 'namespace': ['biological_process'],\n 'is_a': ['GO:0009791 ! post-embryonic development',\n 'GO:0048608 ! reproductive structure development'],\n 'relationship': ['part_of GO:0010154 ! fruit development']}In\u00a0[17]: Copied!
go['GO:0048316'].children._elements.keys()\ngo['GO:0048316'].children._elements.keys() Out[17]:
dict_keys(['GO:0009793', 'GO:0009942', 'GO:0010069', 'GO:0010262', 'GO:0010654', 'GO:0048508', 'GO:0010065', 'GO:0010066', 'GO:0010067', 'GO:0010068', 'GO:0010071', 'GO:0010072', 'GO:0048825', 'GO:0048826', 'GO:0010588', 'GO:0009960', 'GO:0010214', 'GO:0048359', 'GO:0048354', 'GO:0010344', 'GO:0010431', 'GO:0010162', 'GO:0010231', 'GO:0098755', 'GO:0048700', 'GO:0048838', 'GO:1990068', 'GO:0048317', 'GO:0080001', 'GO:0080112', 'GO:0090376', 'GO:0090377', 'GO:0090378', 'GO:0090379', 'GO:0090380', 'GO:0140547'])In\u00a0[18]: Copied!
[(term.ID,term.name) for term in go if term.__dict__.get('alt_id') and term._parents]\n[(term.ID,term.name) for term in go if term.__dict__.get('alt_id') and term._parents] Out[18]:
[('GO:0036422', ['heptaprenyl diphosphate synthase activity']),\n ('GO:0000010', ['heptaprenyl diphosphate synthase activity']),\n ('GO:0000022', ['mitotic spindle elongation']),\n ('GO:1905121', ['mitotic spindle elongation']),\n ('GO:0000049', ['tRNA binding']),\n ('GO:0000946', ['tRNA binding']),\n ('GO:0006871', ['urea cycle']),\n ('GO:0000050', ['urea cycle']),\n ('GO:0006594', ['urea cycle']),\n ('GO:0000057', ['ribosomal large subunit export from nucleus']),\n ('GO:0000055', ['ribosomal large subunit export from nucleus']),\n ('GO:0000056', ['ribosomal small subunit export from nucleus']),\n ('GO:0000058', ['ribosomal small subunit export from nucleus']),\n ('GO:0000070', ['mitotic sister chromatid segregation']),\n ('GO:0016359', ['mitotic sister chromatid segregation']),\n ('GO:0030475', ['initial mitotic spindle pole body separation']),\n ('GO:0000073', ['initial mitotic spindle pole body separation']),\n ('GO:0072395', ['cell cycle checkpoint signaling']),\n ('GO:0071779', ['cell cycle checkpoint signaling']),\n ('GO:0000075', ['cell cycle checkpoint signaling']),\n ('GO:0072404', ['cell cycle checkpoint signaling']),\n ('GO:0031576', ['cell cycle checkpoint signaling']),\n ('GO:0072407', ['cell cycle checkpoint signaling']),\n ('GO:0000076', ['DNA replication checkpoint signaling']),\n ('GO:0072437', ['DNA replication checkpoint signaling']),\n ('GO:0072422', ['DNA damage checkpoint signaling']),\n ('GO:0000077', ['DNA damage checkpoint signaling']),\n ('GO:0015177',\n ['S-adenosyl-L-methionine transmembrane transporter activity']),\n ('GO:0000095',\n ['S-adenosyl-L-methionine transmembrane transporter activity']),\n ('GO:0000100', ['S-methylmethionine transmembrane transporter activity']),\n ('GO:0015178', ['S-methylmethionine transmembrane transporter activity']),\n ('GO:0000103', ['sulfate assimilation']),\n ('GO:0019378', ['sulfate assimilation']),\n ('GO:0019739', ['succinate dehydrogenase activity']),\n ('GO:0000104', ['succinate dehydrogenase activity']),\n ('GO:0000122', ['negative regulation of transcription by RNA polymerase II']),\n ('GO:0010553', ['negative regulation of transcription by RNA polymerase II']),\n ('GO:0045816', ['negative regulation of transcription by RNA polymerase II']),\n ('GO:0000124', ['SAGA complex']),\n ('GO:0030914', ['SAGA complex']),\n ('GO:0000125', ['SAGA complex']),\n ('GO:0036281', ['flocculation']),\n ('GO:0032128', ['flocculation']),\n ('GO:0043689', ['flocculation']),\n ('GO:0000501', ['flocculation']),\n ('GO:0043690', ['flocculation']),\n ('GO:0000128', ['flocculation']),\n ('GO:0036282', ['flocculation']),\n ('GO:0030607', ['establishment of mitotic spindle orientation']),\n ('GO:0030609', ['establishment of mitotic spindle orientation']),\n ('GO:0000132', ['establishment of mitotic spindle orientation']),\n ('GO:0030898', ['microfilament motor activity']),\n ('GO:0000146', ['microfilament motor activity']),\n ('GO:0016548', ['rRNA modification']),\n ('GO:0000154', ['rRNA modification']),\n ('GO:0009096', ['tryptophan biosynthetic process']),\n ('GO:0000162', ['tryptophan biosynthetic process']),\n ('GO:0000165', ['MAPK cascade']),\n ('GO:0007255', ['MAPK cascade']),\n ('GO:0000179', ['rRNA (adenine-N6,N6-)-dimethyltransferase activity']),\n ('GO:0043790', ['rRNA (adenine-N6,N6-)-dimethyltransferase activity']),\n ('GO:0000212', ['meiotic spindle organization']),\n ('GO:0043147', ['meiotic spindle organization']),\n ('GO:0000215', [\"tRNA 2'-phosphotransferase activity\"]),\n ('GO:0008665', [\"tRNA 2'-phosphotransferase activity\"]),\n ('GO:0000355', ['spliceosomal tri-snRNP complex assembly']),\n ('GO:0000244', ['spliceosomal tri-snRNP complex assembly']),\n ('GO:0000351', ['spliceosomal tri-snRNP complex assembly']),\n ('GO:0050576', ['3-keto sterol reductase activity']),\n ('GO:0000253', ['3-keto sterol reductase activity']),\n ('GO:0005051', ['peroxisome targeting sequence binding']),\n ('GO:0000268', ['peroxisome targeting sequence binding']),\n ('GO:0000270', ['peptidoglycan metabolic process']),\n ('GO:0009284', ['peptidoglycan metabolic process']),\n ('GO:0044244', ['polysaccharide catabolic process']),\n ('GO:0000272', ['polysaccharide catabolic process']),\n ('GO:0007067', ['mitotic cell cycle']),\n ('GO:0000278', ['mitotic cell cycle']),\n ('GO:0000292', ['RNA fragment catabolic process']),\n ('GO:0030452', ['RNA fragment catabolic process']),\n ('GO:0000310', ['xanthine phosphoribosyltransferase activity']),\n ('GO:0009043', ['xanthine phosphoribosyltransferase activity']),\n ('GO:0010388', ['protein deneddylation']),\n ('GO:0000338', ['protein deneddylation']),\n ('GO:0000371', ['mRNA branch site recognition']),\n ('GO:0000348', ['mRNA branch site recognition']),\n ('GO:0000370', ['mRNA branch site recognition']),\n ('GO:0000349',\n ['generation of catalytic spliceosome for first transesterification step']),\n ('GO:0000357',\n ['generation of catalytic spliceosome for first transesterification step']),\n ('GO:0000356',\n ['generation of catalytic spliceosome for first transesterification step']),\n ('GO:0000350',\n ['generation of catalytic spliceosome for second transesterification step']),\n ('GO:0000358',\n ['generation of catalytic spliceosome for second transesterification step']),\n ('GO:0000359',\n ['generation of catalytic spliceosome for second transesterification step']),\n ('GO:0000361', ['cis assembly of pre-catalytic spliceosome']),\n ('GO:0000360', ['cis assembly of pre-catalytic spliceosome']),\n ('GO:0000354', ['cis assembly of pre-catalytic spliceosome']),\n ('GO:0000375', ['RNA splicing, via transesterification reactions']),\n ('GO:0031202', ['RNA splicing, via transesterification reactions']),\n ('GO:0000385', ['RNA splicing, via transesterification reactions']),\n ('GO:0000396',\n ['spliceosome conformational change to release U4 (or U4atac) and U1 (or U11)']),\n ('GO:0000397',\n ['spliceosome conformational change to release U4 (or U4atac) and U1 (or U11)']),\n ('GO:0000388',\n ['spliceosome conformational change to release U4 (or U4atac) and U1 (or U11)']),\n ('GO:0000382', [\"mRNA 3'-splice site recognition\"]),\n ('GO:0000389', [\"mRNA 3'-splice site recognition\"]),\n ('GO:0000383', [\"mRNA 3'-splice site recognition\"]),\n ('GO:0000390', ['spliceosomal complex disassembly']),\n ('GO:0000391', ['spliceosomal complex disassembly']),\n ('GO:0000392', ['spliceosomal complex disassembly']),\n ('GO:0000395', [\"mRNA 5'-splice site recognition\"]),\n ('GO:0000369', [\"mRNA 5'-splice site recognition\"]),\n ('GO:0000368', [\"mRNA 5'-splice site recognition\"]),\n ('GO:0006374', ['mRNA splicing, via spliceosome']),\n ('GO:0006375', ['mRNA splicing, via spliceosome']),\n ('GO:0000398', ['mRNA splicing, via spliceosome']),\n ('GO:0000418', ['RNA polymerase IV complex']),\n ('GO:0000420', ['RNA polymerase IV complex']),\n ('GO:0000419', ['RNA polymerase V complex']),\n ('GO:0080137', ['RNA polymerase V complex']),\n ('GO:0000438', ['core TFIIH complex portion of holo TFIIH complex']),\n ('GO:0000443', ['core TFIIH complex portion of holo TFIIH complex']),\n ('GO:0000439', ['transcription factor TFIIH core complex']),\n ('GO:0000441', ['transcription factor TFIIH core complex']),\n ('GO:0000440', ['core TFIIH complex portion of NEF3 complex']),\n ('GO:0000442', ['core TFIIH complex portion of NEF3 complex']),\n ('GO:0000444', ['MIS12/MIND type complex']),\n ('GO:0000818', ['MIS12/MIND type complex']),\n ('GO:0000490',\n ['cleavage in ITS2 between 5.8S rRNA and LSU-rRNA of tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']),\n ('GO:0000448',\n ['cleavage in ITS2 between 5.8S rRNA and LSU-rRNA of tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']),\n ('GO:1990041',\n ['maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']),\n ('GO:0000462',\n ['maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']),\n ('GO:0000703',\n ['oxidized pyrimidine nucleobase lesion DNA N-glycosylase activity']),\n ('GO:0019004',\n ['oxidized pyrimidine nucleobase lesion DNA N-glycosylase activity']),\n ('GO:0000724', ['double-strand break repair via homologous recombination']),\n ('GO:0016924', ['double-strand break repair via homologous recombination']),\n ('GO:0000741', ['karyogamy']),\n ('GO:0007335', ['karyogamy']),\n ('GO:0000743',\n ['nuclear migration involved in conjugation with cellular fusion']),\n ('GO:0006946',\n ['nuclear migration involved in conjugation with cellular fusion']),\n ('GO:0000747', ['conjugation with cellular fusion']),\n ('GO:0030477', ['conjugation with cellular fusion']),\n ('GO:0030461', ['conjugation with cellular fusion']),\n ('GO:0007322', ['conjugation with cellular fusion']),\n ('GO:0007333', ['conjugation with cellular fusion']),\n ('GO:0007328',\n ['response to pheromone triggering conjugation with cellular fusion']),\n ('GO:0030434',\n ['response to pheromone triggering conjugation with cellular fusion']),\n ('GO:0000749',\n ['response to pheromone triggering conjugation with cellular fusion']),\n ('GO:0000750',\n ['pheromone-dependent signal transduction involved in conjugation with cellular fusion']),\n ('GO:0007330',\n ['pheromone-dependent signal transduction involved in conjugation with cellular fusion']),\n ('GO:0030454',\n ['pheromone-dependent signal transduction involved in conjugation with cellular fusion']),\n ('GO:0000751', ['mitotic cell cycle G1 arrest in response to pheromone']),\n ('GO:0030571', ['mitotic cell cycle G1 arrest in response to pheromone']),\n ('GO:0007334',\n ['agglutination involved in conjugation with cellular fusion']),\n ('GO:0000752',\n ['agglutination involved in conjugation with cellular fusion']),\n ('GO:0000753',\n ['cell morphogenesis involved in conjugation with cellular fusion']),\n ('GO:0007332',\n ['cell morphogenesis involved in conjugation with cellular fusion']),\n ('GO:0030453',\n ['adaptation of signaling pathway by response to pheromone involved in conjugation with cellular fusion']),\n ('GO:0007331',\n ['adaptation of signaling pathway by response to pheromone involved in conjugation with cellular fusion']),\n ('GO:0000754',\n ['adaptation of signaling pathway by response to pheromone involved in conjugation with cellular fusion']),\n ('GO:0000218', ['cytogamy']),\n ('GO:0000755', ['cytogamy']),\n ('GO:0030462', ['cytogamy']),\n ('GO:0007325', ['peptide pheromone export']),\n ('GO:0000770', ['peptide pheromone export']),\n ('GO:0097521', ['chromosome, centromeric region']),\n ('GO:0000775', ['chromosome, centromeric region']),\n ('GO:0000777', ['kinetochore']),\n ('GO:0031617', ['kinetochore']),\n ('GO:0005699', ['kinetochore']),\n ('GO:0000778', ['kinetochore']),\n ('GO:0000776', ['kinetochore']),\n ('GO:0000780', ['condensed chromosome, centromeric region']),\n ('GO:0000779', ['condensed chromosome, centromeric region']),\n ('GO:0000784', ['chromosome, telomeric region']),\n ('GO:0000781', ['chromosome, telomeric region']),\n ('GO:0000785', ['chromatin']),\n ('GO:0000790', ['chromatin']),\n ('GO:0000789', ['chromatin']),\n ('GO:0005717', ['chromatin']),\n ('GO:0000786', ['nucleosome']),\n ('GO:0000787', ['nucleosome']),\n ('GO:0000788', ['nucleosome']),\n ('GO:0005718', ['nucleosome']),\n ('GO:0000791', ['euchromatin']),\n ('GO:0005719', ['euchromatin']),\n ('GO:0035327', ['euchromatin']),\n ('GO:0000792', ['heterochromatin']),\n ('GO:0035328', ['heterochromatin']),\n ('GO:0005720', ['heterochromatin']),\n ('GO:0005716', ['synaptonemal complex']),\n ('GO:0000795', ['synaptonemal complex']),\n ('GO:0000797', ['condensin complex']),\n ('GO:0008620', ['condensin complex']),\n ('GO:0005676', ['condensin complex']),\n ('GO:0061814', ['condensin complex']),\n ('GO:0000799', ['condensin complex']),\n ('GO:0000796', ['condensin complex']),\n ('GO:0008621', ['condensin complex']),\n ('GO:0045791', ['cell morphogenesis']),\n ('GO:0007148', ['cell morphogenesis']),\n ('GO:0000902', ['cell morphogenesis']),\n ('GO:0045790', ['cell morphogenesis']),\n ('GO:0007104', ['cytokinesis']),\n ('GO:0033205', ['cytokinesis']),\n ('GO:0000910', ['cytokinesis']),\n ('GO:0016288', ['cytokinesis']),\n ('GO:0045573', ['actomyosin contractile ring assembly']),\n ('GO:2000708', ['actomyosin contractile ring assembly']),\n ('GO:0000915', ['actomyosin contractile ring assembly']),\n ('GO:0071937', ['division septum assembly']),\n ('GO:0000917', ['division septum assembly']),\n ('GO:1902411', ['division septum assembly']),\n ('GO:2000695', ['septum digestion after cytokinesis']),\n ('GO:1902409', ['septum digestion after cytokinesis']),\n ('GO:0000920', ['septum digestion after cytokinesis']),\n ('GO:0000922', ['spindle pole']),\n ('GO:0030615', ['spindle pole']),\n ('GO:0000929', ['gamma-tubulin ring complex']),\n ('GO:0000925', ['gamma-tubulin ring complex']),\n ('GO:0055031', ['gamma-tubulin ring complex']),\n ('GO:0055033', ['gamma-tubulin ring complex']),\n ('GO:0061494', ['gamma-tubulin ring complex']),\n ('GO:0055032', ['gamma-tubulin ring complex']),\n ('GO:0000924', ['gamma-tubulin ring complex']),\n ('GO:0000926', ['gamma-tubulin ring complex']),\n ('GO:0000931', ['gamma-tubulin ring complex']),\n ('GO:0008274', ['gamma-tubulin ring complex']),\n ('GO:0043187', ['division septum']),\n ('GO:0000935', ['division septum']),\n ('GO:0000939', ['inner kinetochore']),\n ('GO:0000941', ['inner kinetochore']),\n ('GO:0000940', ['outer kinetochore']),\n ('GO:0000942', ['outer kinetochore']),\n ('GO:0000976', ['transcription cis-regulatory region binding']),\n ('GO:0000984', ['transcription cis-regulatory region binding']),\n ('GO:0044212', ['transcription cis-regulatory region binding']),\n ('GO:0000975', ['transcription cis-regulatory region binding']),\n ('GO:0001017', ['transcription cis-regulatory region binding']),\n ('GO:0000977',\n ['RNA polymerase II transcription regulatory region sequence-specific DNA binding']),\n ('GO:0001012',\n ['RNA polymerase II transcription regulatory region sequence-specific DNA binding']),\n ('GO:0000978',\n ['RNA polymerase II cis-regulatory region sequence-specific DNA binding']),\n ('GO:0000980',\n ['RNA polymerase II cis-regulatory region sequence-specific DNA binding']),\n ('GO:0000981',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0001201',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0001133',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0001203',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0001202',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0003705',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0001200',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0000982',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0035326', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0000987', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0000986', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001158', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001159', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001150', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001034',\n ['RNA polymerase III general transcription initiation factor activity']),\n ('GO:0000995',\n ['RNA polymerase III general transcription initiation factor activity']),\n ('GO:0001002',\n ['RNA polymerase III type 1 promoter sequence-specific DNA binding']),\n ('GO:0001030',\n ['RNA polymerase III type 1 promoter sequence-specific DNA binding']),\n ('GO:0001031',\n ['RNA polymerase III type 2 promoter sequence-specific DNA binding']),\n ('GO:0001003',\n ['RNA polymerase III type 2 promoter sequence-specific DNA binding']),\n ('GO:0001006',\n ['RNA polymerase III type 3 promoter sequence-specific DNA binding']),\n ('GO:0001032',\n ['RNA polymerase III type 3 promoter sequence-specific DNA binding']),\n ('GO:0001045', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0070364', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0001018', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0070361', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0000997', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0070363', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0001044', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0070362', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0001039',\n ['RNA polymerase III hybrid type promoter sequence-specific DNA binding']),\n ('GO:0001037',\n ['RNA polymerase III hybrid type promoter sequence-specific DNA binding']),\n ('GO:0000985', ['core promoter sequence-specific DNA binding']),\n ('GO:0001047', ['core promoter sequence-specific DNA binding']),\n ('GO:0001046', ['core promoter sequence-specific DNA binding']),\n ('GO:0001109', ['promoter clearance during DNA-templated transcription']),\n ('GO:0001122', ['promoter clearance during DNA-templated transcription']),\n ('GO:0001112', ['DNA-templated transcription open complex formation']),\n ('GO:0001127', ['DNA-templated transcription open complex formation']),\n ('GO:0001146',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0001147',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0001160',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0001145',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0001148',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0044213',\n ['intronic transcription regulatory region sequence-specific DNA binding']),\n ('GO:0001161',\n ['intronic transcription regulatory region sequence-specific DNA binding']),\n ('GO:0001013',\n ['RNA polymerase I transcription regulatory region sequence-specific DNA binding']),\n ('GO:0001163',\n ['RNA polymerase I transcription regulatory region sequence-specific DNA binding']),\n ('GO:0001164',\n ['RNA polymerase I core promoter sequence-specific DNA binding']),\n ('GO:0001187',\n ['RNA polymerase I core promoter sequence-specific DNA binding']),\n ('GO:0001166',\n ['RNA polymerase I cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001165',\n ['RNA polymerase I cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001176', ['DNA-templated transcriptional start site selection']),\n ('GO:0001173', ['DNA-templated transcriptional start site selection']),\n ('GO:0001182', ['RNA polymerase I promoter clearance']),\n ('GO:0001184', ['RNA polymerase I promoter clearance']),\n ('GO:0001189', ['RNA polymerase I preinitiation complex assembly']),\n ('GO:0001188', ['RNA polymerase I preinitiation complex assembly']),\n ('GO:0001194',\n ['maintenance of transcriptional fidelity during transcription elongation']),\n ('GO:0001192',\n ['maintenance of transcriptional fidelity during transcription elongation']),\n ('GO:0001216', ['DNA-binding transcription activator activity']),\n ('GO:0001215', ['DNA-binding transcription activator activity']),\n ('GO:0001140', ['DNA-binding transcription activator activity']),\n ('GO:0001218', ['DNA-binding transcription repressor activity']),\n ('GO:0001219', ['DNA-binding transcription repressor activity']),\n ('GO:0001141', ['DNA-binding transcription repressor activity']),\n ('GO:0001220', ['DNA-binding transcription repressor activity']),\n ('GO:0001217', ['DNA-binding transcription repressor activity']),\n ('GO:0001224', ['transcription coregulator binding']),\n ('GO:0001221', ['transcription coregulator binding']),\n ('GO:0001226', ['transcription corepressor binding']),\n ('GO:0001222', ['transcription corepressor binding']),\n ('GO:0001225', ['transcription coactivator binding']),\n ('GO:0001223', ['transcription coactivator binding']),\n ('GO:0001206',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001227',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001210',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001078',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001214',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001211',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001212',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001077',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001205',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001209',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001213',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001228',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0055027', ['chlamydospore formation']),\n ('GO:0001410', ['chlamydospore formation']),\n ('GO:0042833', ['response to protozoan']),\n ('GO:0001562', ['response to protozoan']),\n ('GO:0001589',\n ['dopamine neurotransmitter receptor activity, coupled via Gs']),\n ('GO:0001588',\n ['dopamine neurotransmitter receptor activity, coupled via Gs']),\n ('GO:0001590',\n ['dopamine neurotransmitter receptor activity, coupled via Gs']),\n ('GO:0001593',\n ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']),\n ('GO:0001670',\n ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']),\n ('GO:0001591',\n ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']),\n ('GO:0001592',\n ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']),\n ('GO:0008501', ['G protein-coupled adenosine receptor activity']),\n ('GO:0001610', ['G protein-coupled adenosine receptor activity']),\n ('GO:0001611', ['G protein-coupled adenosine receptor activity']),\n ('GO:0001609', ['G protein-coupled adenosine receptor activity']),\n ('GO:0001612', ['G protein-coupled adenosine receptor activity']),\n ('GO:0001613', ['G protein-coupled adenosine receptor activity']),\n ('GO:0035586', ['purinergic nucleotide receptor activity']),\n ('GO:0001614', ['purinergic nucleotide receptor activity']),\n ('GO:0045032', ['G protein-coupled ADP receptor activity']),\n ('GO:0001621', ['G protein-coupled ADP receptor activity']),\n ('GO:0016522',\n ['pituitary adenylate cyclase-activating polypeptide receptor activity']),\n ('GO:0001634',\n ['pituitary adenylate cyclase-activating polypeptide receptor activity']),\n ('GO:0001644', ['cAMP receptor activity']),\n ('GO:0001646', ['cAMP receptor activity']),\n ('GO:0001654', ['eye development']),\n ('GO:0042460', ['eye development']),\n ('GO:0043081', ['male germ cell nucleus']),\n ('GO:0001673', ['male germ cell nucleus']),\n ('GO:0001674', ['female germ cell nucleus']),\n ('GO:0043080', ['female germ cell nucleus']),\n ('GO:0001694', ['histamine biosynthetic process']),\n ('GO:0001693', ['histamine biosynthetic process']),\n ('GO:0048276', ['gastrulation with mouth forming second']),\n ('GO:0001702', ['gastrulation with mouth forming second']),\n ('GO:0010003', ['gastrulation with mouth forming second']),\n ('GO:0001744', ['insect visual primordium formation']),\n ('GO:0007457', ['insect visual primordium formation']),\n ('GO:0048049', ['insect visual primordium development']),\n ('GO:0001748', ['insect visual primordium development']),\n ('GO:0007459', ['compound eye photoreceptor fate commitment']),\n ('GO:0001752', ['compound eye photoreceptor fate commitment']),\n ('GO:0001806', ['type IV hypersensitivity']),\n ('GO:0016069', ['type IV hypersensitivity']),\n ('GO:0042089', ['cytokine production']),\n ('GO:0042032', ['cytokine production']),\n ('GO:0042107', ['cytokine production']),\n ('GO:0001816', ['cytokine production']),\n ('GO:0050663', ['cytokine production']),\n ('GO:0001817', ['regulation of cytokine production']),\n ('GO:0042035', ['regulation of cytokine production']),\n ('GO:0050707', ['regulation of cytokine production']),\n ('GO:0050710', ['negative regulation of cytokine production']),\n ('GO:0042036', ['negative regulation of cytokine production']),\n ('GO:0001818', ['negative regulation of cytokine production']),\n ('GO:0042108', ['positive regulation of cytokine production']),\n ('GO:0001819', ['positive regulation of cytokine production']),\n ('GO:0050715', ['positive regulation of cytokine production']),\n ('GO:0001679', ['neural tube formation']),\n ('GO:0001841', ['neural tube formation']),\n ('GO:0080087', ['(1->3)-beta-D-glucan binding']),\n ('GO:0001872', ['(1->3)-beta-D-glucan binding']),\n ('GO:0001942', ['hair follicle development']),\n ('GO:0001943', ['hair follicle development']),\n ('GO:0002003', ['angiotensin maturation']),\n ('GO:0002005', ['angiotensin maturation']),\n ('GO:0002036', ['regulation of L-glutamate import across plasma membrane']),\n ('GO:1900920', ['regulation of L-glutamate import across plasma membrane']),\n ('GO:1900921',\n ['negative regulation of L-glutamate import across plasma membrane']),\n ('GO:0002037',\n ['negative regulation of L-glutamate import across plasma membrane']),\n ('GO:0002038',\n ['positive regulation of L-glutamate import across plasma membrane']),\n ('GO:1900922',\n ['positive regulation of L-glutamate import across plasma membrane']),\n ('GO:0002046', ['opsin binding']),\n ('GO:0016030', ['opsin binding']),\n ('GO:0002163', ['dystroglycan binding']),\n ('GO:0002162', ['dystroglycan binding']),\n ('GO:0002166', ['dystroglycan binding']),\n ('GO:0002214', ['defense response to insect']),\n ('GO:0002213', ['defense response to insect']),\n ('GO:0002215', ['defense response to nematode']),\n ('GO:0002216', ['defense response to nematode']),\n ('GO:0002218', ['activation of innate immune response']),\n ('GO:0002219', ['activation of innate immune response']),\n ('GO:0002374', ['cytokine production involved in immune response']),\n ('GO:0002375', ['cytokine production involved in immune response']),\n ('GO:0002367', ['cytokine production involved in immune response']),\n ('GO:0002377', ['immunoglobulin production']),\n ('GO:0048305', ['immunoglobulin production']),\n ('GO:0002378', ['immunoglobulin production']),\n ('GO:0002379',\n ['immunoglobulin production involved in immunoglobulin-mediated immune response']),\n ('GO:0002381',\n ['immunoglobulin production involved in immunoglobulin-mediated immune response']),\n ('GO:0002380',\n ['immunoglobulin production involved in immunoglobulin-mediated immune response']),\n ('GO:0002386', ['mucosal immune response']),\n ('GO:0002422', ['mucosal immune response']),\n ('GO:0002385', ['mucosal immune response']),\n ('GO:0002535',\n ['platelet activating factor production involved in inflammatory response']),\n ('GO:0002391',\n ['platelet activating factor production involved in inflammatory response']),\n ('GO:0002392',\n ['platelet activating factor production involved in inflammatory response']),\n ('GO:0002390',\n ['platelet activating factor production involved in inflammatory response']),\n ('GO:0002443', ['leukocyte mediated immunity']),\n ('GO:0042087', ['leukocyte mediated immunity']),\n ('GO:0019723', ['leukocyte mediated immunity']),\n ('GO:0002637', ['regulation of immunoglobulin production']),\n ('GO:0002640', ['regulation of immunoglobulin production']),\n ('GO:0051023', ['regulation of immunoglobulin production']),\n ('GO:0051025', ['negative regulation of immunoglobulin production']),\n ('GO:0002638', ['negative regulation of immunoglobulin production']),\n ('GO:0002641', ['negative regulation of immunoglobulin production']),\n ('GO:0051024', ['positive regulation of immunoglobulin production']),\n ('GO:0002642', ['positive regulation of immunoglobulin production']),\n ('GO:0002639', ['positive regulation of immunoglobulin production']),\n ('GO:0045845', ['regulation of natural killer cell mediated immunity']),\n ('GO:0002715', ['regulation of natural killer cell mediated immunity']),\n ('GO:0002716',\n ['negative regulation of natural killer cell mediated immunity']),\n ('GO:0030102',\n ['negative regulation of natural killer cell mediated immunity']),\n ('GO:0002717',\n ['positive regulation of natural killer cell mediated immunity']),\n ('GO:0045846',\n ['positive regulation of natural killer cell mediated immunity']),\n ('GO:0002718',\n ['regulation of cytokine production involved in immune response']),\n ('GO:0002739',\n ['regulation of cytokine production involved in immune response']),\n ('GO:0002742',\n ['regulation of cytokine production involved in immune response']),\n ('GO:0002719',\n ['negative regulation of cytokine production involved in immune response']),\n ('GO:0002743',\n ['negative regulation of cytokine production involved in immune response']),\n ('GO:0002740',\n ['negative regulation of cytokine production involved in immune response']),\n ('GO:0002744',\n ['positive regulation of cytokine production involved in immune response']),\n ('GO:0002741',\n ['positive regulation of cytokine production involved in immune response']),\n ('GO:0002720',\n ['positive regulation of cytokine production involved in immune response']),\n ('GO:0039528',\n ['cytoplasmic pattern recognition receptor signaling pathway']),\n ('GO:0002753',\n ['cytoplasmic pattern recognition receptor signaling pathway']),\n ('GO:0002758', ['innate immune response-activating signaling pathway']),\n ('GO:0009870', ['innate immune response-activating signaling pathway']),\n ('GO:0010204', ['innate immune response-activating signaling pathway']),\n ('GO:0070526', ['tRNA threonylcarbamoyladenosine modification']),\n ('GO:0002949', ['tRNA threonylcarbamoyladenosine modification']),\n ('GO:0010802', ['respiratory system process']),\n ('GO:0003016', ['respiratory system process']),\n ('GO:0003124', ['epinephrine-mediated vasodilation']),\n ('GO:0003123', ['epinephrine-mediated vasodilation']),\n ('GO:0003121', ['epinephrine-mediated vasodilation']),\n ('GO:0003126', ['norepinephrine-mediated vasodilation']),\n ('GO:0003122', ['norepinephrine-mediated vasodilation']),\n ('GO:0003125', ['norepinephrine-mediated vasodilation']),\n ('GO:0003341', ['cilium movement']),\n ('GO:0036142', ['cilium movement']),\n ('GO:0003352', ['regulation of cilium movement']),\n ('GO:1900172', ['regulation of cilium movement']),\n ('GO:1900174', ['positive regulation of cilium movement']),\n ('GO:0003353', ['positive regulation of cilium movement']),\n ('GO:0003354', ['negative regulation of cilium movement']),\n ('GO:1900173', ['negative regulation of cilium movement']),\n ('GO:0036144', ['regulation of cilium beat frequency']),\n ('GO:0003356', ['regulation of cilium beat frequency']),\n ('GO:0003376', ['sphingosine-1-phosphate receptor signaling pathway']),\n ('GO:0001789', ['sphingosine-1-phosphate receptor signaling pathway']),\n ('GO:0000496', ['nucleic acid binding']),\n ('GO:0003676', ['nucleic acid binding']),\n ('GO:0043566', ['DNA binding']),\n ('GO:0003677', ['DNA binding']),\n ('GO:0004003', ['DNA helicase activity']),\n ('GO:0003679', ['DNA helicase activity']),\n ('GO:0003678', ['DNA helicase activity']),\n ('GO:0033170', ['DNA clamp loader activity']),\n ('GO:0003689', ['DNA clamp loader activity']),\n ('GO:0003697', ['single-stranded DNA binding']),\n ('GO:0003698', ['single-stranded DNA binding']),\n ('GO:0003699', ['single-stranded DNA binding']),\n ('GO:0001204', ['DNA-binding transcription factor activity']),\n ('GO:0001151', ['DNA-binding transcription factor activity']),\n ('GO:0001130', ['DNA-binding transcription factor activity']),\n ('GO:0001199', ['DNA-binding transcription factor activity']),\n ('GO:0000130', ['DNA-binding transcription factor activity']),\n ('GO:0003700', ['DNA-binding transcription factor activity']),\n ('GO:0001131', ['DNA-binding transcription factor activity']),\n ('GO:0001071', ['DNA-binding transcription factor activity']),\n ('GO:0003712', ['transcription coregulator activity']),\n ('GO:0001104', ['transcription coregulator activity']),\n ('GO:0016455', ['transcription coregulator activity']),\n ('GO:0001105', ['transcription coactivator activity']),\n ('GO:0003713', ['transcription coactivator activity']),\n ('GO:0003714', ['transcription corepressor activity']),\n ('GO:0001106', ['transcription corepressor activity']),\n ('GO:0003723', ['RNA binding']),\n ('GO:0000498', ['RNA binding']),\n ('GO:0044822', ['RNA binding']),\n ('GO:0004004', ['RNA helicase activity']),\n ('GO:0003724', ['RNA helicase activity']),\n ('GO:0003971', ['double-stranded RNA adenosine deaminase activity']),\n ('GO:0003726', ['double-stranded RNA adenosine deaminase activity']),\n ('GO:0003727', ['single-stranded RNA binding']),\n ('GO:0003728', ['single-stranded RNA binding']),\n ('GO:0003729', ['mRNA binding']),\n ('GO:0000499', ['mRNA binding']),\n ('GO:0003740', ['structural constituent of ribosome']),\n ('GO:0003742', ['structural constituent of ribosome']),\n ('GO:0003741', ['structural constituent of ribosome']),\n ('GO:0003738', ['structural constituent of ribosome']),\n ('GO:0003736', ['structural constituent of ribosome']),\n ('GO:0003735', ['structural constituent of ribosome']),\n ('GO:0003739', ['structural constituent of ribosome']),\n ('GO:0003737', ['structural constituent of ribosome']),\n ('GO:0003745', ['translation initiation factor activity']),\n ('GO:0003744', ['translation initiation factor activity']),\n ('GO:0003743', ['translation initiation factor activity']),\n ('GO:0003746', ['translation elongation factor activity']),\n ('GO:0008182', ['translation elongation factor activity']),\n ('GO:0008183', ['translation elongation factor activity']),\n ('GO:0003747', ['translation release factor activity']),\n ('GO:0003749', ['translation release factor activity']),\n ('GO:0003748', ['translation release factor activity']),\n ('GO:0004752', ['peptidyl-prolyl cis-trans isomerase activity']),\n ('GO:0003755', ['peptidyl-prolyl cis-trans isomerase activity']),\n ('GO:0042028', ['peptidyl-prolyl cis-trans isomerase activity']),\n ('GO:0006467', ['protein disulfide isomerase activity']),\n ('GO:0003756', ['protein disulfide isomerase activity']),\n ('GO:0003777', ['microtubule motor activity']),\n ('GO:1990939', ['microtubule motor activity']),\n ('GO:0003838', ['sterol 24-C-methyltransferase activity']),\n ('GO:0102101', ['sterol 24-C-methyltransferase activity']),\n ('GO:0003841', ['1-acylglycerol-3-phosphate O-acyltransferase activity']),\n ('GO:0004469', ['1-acylglycerol-3-phosphate O-acyltransferase activity']),\n ('GO:0003843', ['1,3-beta-D-glucan synthase activity']),\n ('GO:0009981', ['1,3-beta-D-glucan synthase activity']),\n ('GO:0003853', ['2-methylbutanoyl-CoA dehydrogenase activity']),\n ('GO:0047119', ['2-methylbutanoyl-CoA dehydrogenase activity']),\n ('GO:0003863',\n ['3-methyl-2-oxobutanoate dehydrogenase (2-methylpropanoyl-transferring) activity']),\n ('GO:0003826',\n ['3-methyl-2-oxobutanoate dehydrogenase (2-methylpropanoyl-transferring) activity']),\n ('GO:0046913', ['ATP citrate synthase activity']),\n ('GO:0003878', ['ATP citrate synthase activity']),\n ('GO:0003886', ['DNA (cytosine-5-)-methyltransferase activity']),\n ('GO:0008326', ['DNA (cytosine-5-)-methyltransferase activity']),\n ('GO:0003894', ['DNA-directed DNA polymerase activity']),\n ('GO:0008723', ['DNA-directed DNA polymerase activity']),\n ('GO:0016000', ['DNA-directed DNA polymerase activity']),\n ('GO:0003893', ['DNA-directed DNA polymerase activity']),\n ('GO:0016448', ['DNA-directed DNA polymerase activity']),\n ('GO:0003889', ['DNA-directed DNA polymerase activity']),\n ('GO:0003890', ['DNA-directed DNA polymerase activity']),\n ('GO:0003891', ['DNA-directed DNA polymerase activity']),\n ('GO:0016450', ['DNA-directed DNA polymerase activity']),\n ('GO:0016452', ['DNA-directed DNA polymerase activity']),\n ('GO:0016451', ['DNA-directed DNA polymerase activity']),\n ('GO:0019984', ['DNA-directed DNA polymerase activity']),\n ('GO:0003887', ['DNA-directed DNA polymerase activity']),\n ('GO:0003888', ['DNA-directed DNA polymerase activity']),\n ('GO:0015999', ['DNA-directed DNA polymerase activity']),\n ('GO:0003895', ['DNA-directed DNA polymerase activity']),\n ('GO:0016449', ['DNA-directed DNA polymerase activity']),\n ('GO:0003896', ['DNA primase activity']),\n ('GO:0003897', ['DNA primase activity']),\n ('GO:0003898', ['DNA primase activity']),\n ('GO:0000129', [\"DNA-directed 5'-3' RNA polymerase activity\"]),\n ('GO:0003899', [\"DNA-directed 5'-3' RNA polymerase activity\"]),\n ('GO:0003905', ['alkylbase DNA N-glycosylase activity']),\n ('GO:0004036', ['alkylbase DNA N-glycosylase activity']),\n ('GO:0009387', ['DNA topoisomerase activity']),\n ('GO:0003916', ['DNA topoisomerase activity']),\n ('GO:0003918',\n ['DNA topoisomerase type II (double strand cut, ATP-hydrolyzing) activity']),\n ('GO:0061505',\n ['DNA topoisomerase type II (double strand cut, ATP-hydrolyzing) activity']),\n ('GO:0061745', ['GTPase activity']),\n ('GO:0003924', ['GTPase activity']),\n ('GO:0003925', ['G protein activity']),\n ('GO:0003927', ['G protein activity']),\n ('GO:0008468', ['NADPH dehydrogenase activity']),\n ('GO:0016660', ['NADPH dehydrogenase activity']),\n ('GO:0003959', ['NADPH dehydrogenase activity']),\n ('GO:0019282', ['O-acetylhomoserine aminocarboxypropyltransferase activity']),\n ('GO:0003961', ['O-acetylhomoserine aminocarboxypropyltransferase activity']),\n ('GO:0003962', ['cystathionine gamma-synthase activity']),\n ('GO:0000505', ['cystathionine gamma-synthase activity']),\n ('GO:0052854', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0008891', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0052853', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0052852', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0003973', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0047318',\n ['N2-acetyl-L-ornithine:2-oxoglutarate 5-aminotransferase activity']),\n ('GO:0003992',\n ['N2-acetyl-L-ornithine:2-oxoglutarate 5-aminotransferase activity']),\n ('GO:0003994', ['aconitate hydratase activity']),\n ('GO:0052632', ['aconitate hydratase activity']),\n ('GO:0019109', ['acyl-CoA dehydrogenase activity']),\n ('GO:0003995', ['acyl-CoA dehydrogenase activity']),\n ('GO:0050250', ['aldehyde oxidase activity']),\n ('GO:0004031', ['aldehyde oxidase activity']),\n ('GO:0019851', ['aminoacyl-tRNA hydrolase activity']),\n ('GO:0019850', ['aminoacyl-tRNA hydrolase activity']),\n ('GO:0004045', ['aminoacyl-tRNA hydrolase activity']),\n ('GO:0042172', ['arginyl-tRNA--protein transferase activity']),\n ('GO:0004057', ['arginyl-tRNA--protein transferase activity']),\n ('GO:0004058', ['aromatic-L-amino-acid decarboxylase activity']),\n ('GO:0016400', ['aromatic-L-amino-acid decarboxylase activity']),\n ('GO:0004093', ['carnitine O-acetyltransferase activity']),\n ('GO:0004094', ['carnitine O-acetyltransferase activity']),\n ('GO:0004092', ['carnitine O-acetyltransferase activity']),\n ('GO:0004096', ['catalase activity']),\n ('GO:0016953', ['catalase activity']),\n ('GO:0016952', ['catalase activity']),\n ('GO:0036263', ['catechol oxidase activity']),\n ('GO:0004097', ['catechol oxidase activity']),\n ('GO:0036264', ['catechol oxidase activity']),\n ('GO:0102316', ['catechol oxidase activity']),\n ('GO:0004123', ['cystathionine gamma-lyase activity']),\n ('GO:0016225', ['cystathionine gamma-lyase activity']),\n ('GO:0008461', ['delta(3)-delta(2)-enoyl-CoA isomerase activity']),\n ('GO:0004165', ['delta(3)-delta(2)-enoyl-CoA isomerase activity']),\n ('GO:0004101',\n ['dolichyl-phosphate alpha-N-acetylglucosaminyltransferase activity']),\n ('GO:0004166',\n ['dolichyl-phosphate alpha-N-acetylglucosaminyltransferase activity']),\n ('GO:0004167', ['dopachrome isomerase activity']),\n ('GO:0048059', ['dopachrome isomerase activity']),\n ('GO:0016809', ['endopeptidase activity']),\n ('GO:0004175', ['endopeptidase activity']),\n ('GO:0004280', ['ATP-dependent peptidase activity']),\n ('GO:0004176', ['ATP-dependent peptidase activity']),\n ('GO:0016510', ['enoyl-CoA hydratase activity']),\n ('GO:0004300', ['enoyl-CoA hydratase activity']),\n ('GO:0004315', ['3-oxoacyl-[acyl-carrier-protein] synthase activity']),\n ('GO:0033817', ['3-oxoacyl-[acyl-carrier-protein] synthase activity']),\n ('GO:0034566', ['formamidase activity']),\n ('GO:0004328', ['formamidase activity']),\n ('GO:0004330', ['fructose-2,6-bisphosphate 2-phosphatase activity']),\n ('GO:0004331', ['fructose-2,6-bisphosphate 2-phosphatase activity']),\n ('GO:0004344', ['glucose dehydrogenase activity']),\n ('GO:0008708', ['glucose dehydrogenase activity']),\n ('GO:0001513', ['glutamate-5-semialdehyde dehydrogenase activity']),\n ('GO:0004350', ['glutamate-5-semialdehyde dehydrogenase activity']),\n ('GO:0004376', ['glycolipid mannosyltransferase activity']),\n ('GO:0004580', ['glycolipid mannosyltransferase activity']),\n ('GO:0019106', ['glycylpeptide N-tetradecanoyltransferase activity']),\n ('GO:0004379', ['glycylpeptide N-tetradecanoyltransferase activity']),\n ('GO:0004386', ['helicase activity']),\n ('GO:0008026', ['helicase activity']),\n ('GO:0043166', ['histone acetyltransferase activity']),\n ('GO:0004406', ['histone acetyltransferase activity']),\n ('GO:0004404', ['histone acetyltransferase activity']),\n ('GO:0004403', ['histone acetyltransferase activity']),\n ('GO:0046971', ['histone acetyltransferase activity']),\n ('GO:0004402', ['histone acetyltransferase activity']),\n ('GO:0004405', ['histone acetyltransferase activity']),\n ('GO:0042282', ['hydroxymethylglutaryl-CoA reductase (NADPH) activity']),\n ('GO:0004420', ['hydroxymethylglutaryl-CoA reductase (NADPH) activity']),\n ('GO:0016315', ['phosphatidylinositol-3-phosphate phosphatase activity']),\n ('GO:0004438', ['phosphatidylinositol-3-phosphate phosphatase activity']),\n ('GO:0001668',\n ['phosphatidylinositol-4,5-bisphosphate 5-phosphatase activity']),\n ('GO:0004439',\n ['phosphatidylinositol-4,5-bisphosphate 5-phosphatase activity']),\n ('GO:0004467', ['long-chain fatty acid-CoA ligase activity']),\n ('GO:0003996', ['long-chain fatty acid-CoA ligase activity']),\n ('GO:0004472', ['malate dehydrogenase (decarboxylating) (NAD+) activity']),\n ('GO:0004471', ['malate dehydrogenase (decarboxylating) (NAD+) activity']),\n ('GO:0016619', ['malate dehydrogenase (decarboxylating) (NAD+) activity']),\n ('GO:0070128', ['methionyl-tRNA formyltransferase activity']),\n ('GO:0001718', ['methionyl-tRNA formyltransferase activity']),\n ('GO:0004479', ['methionyl-tRNA formyltransferase activity']),\n ('GO:0008702', ['methylenetetrahydrofolate reductase (NAD(P)H) activity']),\n ('GO:0004489', ['methylenetetrahydrofolate reductase (NAD(P)H) activity']),\n ('GO:0004499', ['N,N-dimethylaniline monooxygenase activity']),\n ('GO:0047076', ['N,N-dimethylaniline monooxygenase activity']),\n ('GO:0004523', ['RNA-DNA hybrid ribonuclease activity']),\n ('GO:0004524', ['RNA-DNA hybrid ribonuclease activity']),\n ('GO:0004527', ['exonuclease activity']),\n ('GO:0008857', ['exonuclease activity']),\n ('GO:0004536', ['DNA nuclease activity']),\n ('GO:0004537', ['DNA nuclease activity']),\n ('GO:0016800', ['hydrolase activity, hydrolyzing O-glycosyl compounds']),\n ('GO:0004553', ['hydrolase activity, hydrolyzing O-glycosyl compounds']),\n ('GO:0004556', ['alpha-amylase activity']),\n ('GO:0103025', ['alpha-amylase activity']),\n ('GO:0016982', ['alpha-1,4-glucosidase activity']),\n ('GO:0004562', ['alpha-1,4-glucosidase activity']),\n ('GO:0004558', ['alpha-1,4-glucosidase activity']),\n ('GO:0016686', ['peroxidase activity']),\n ('GO:0016687', ['peroxidase activity']),\n ('GO:0004601', ['peroxidase activity']),\n ('GO:0016685', ['peroxidase activity']),\n ('GO:0016693', ['peroxidase activity']),\n ('GO:0004602', ['glutathione peroxidase activity']),\n ('GO:0016224', ['glutathione peroxidase activity']),\n ('GO:0004615', ['phosphomannomutase activity']),\n ('GO:0008971', ['phosphomannomutase activity']),\n ('GO:0004622', ['lysophospholipase activity']),\n ('GO:0045126', ['lysophospholipase activity']),\n ('GO:0102567', ['phospholipase A2 activity']),\n ('GO:0102568', ['phospholipase A2 activity']),\n ('GO:0004623', ['phospholipase A2 activity']),\n ('GO:0042298', ['phospholipase C activity']),\n ('GO:0004629', ['phospholipase C activity']),\n ('GO:0004648',\n ['O-phospho-L-serine:2-oxoglutarate aminotransferase activity']),\n ('GO:0004646',\n ['O-phospho-L-serine:2-oxoglutarate aminotransferase activity']),\n ('GO:0018223', ['protein farnesyltransferase activity']),\n ('GO:0004660', ['protein farnesyltransferase activity']),\n ('GO:0018224', ['protein geranylgeranyltransferase activity']),\n ('GO:0004661', ['protein geranylgeranyltransferase activity']),\n ('GO:0004671',\n ['protein C-terminal S-isoprenylcysteine carboxyl O-methyltransferase activity']),\n ('GO:0018225',\n ['protein C-terminal S-isoprenylcysteine carboxyl O-methyltransferase activity']),\n ('GO:0050222', ['protein kinase activity']),\n ('GO:0004672', ['protein kinase activity']),\n ('GO:0008896', ['protein histidine kinase activity']),\n ('GO:0004673', ['protein histidine kinase activity']),\n ('GO:0004674', ['protein serine/threonine kinase activity']),\n ('GO:0004696', ['protein serine/threonine kinase activity']),\n ('GO:0106311', ['protein serine/threonine kinase activity']),\n ('GO:0004695', ['protein serine/threonine kinase activity']),\n ('GO:0004700', ['protein serine/threonine kinase activity']),\n ('GO:0004688', ['calmodulin-dependent protein kinase activity']),\n ('GO:0004685', ['calmodulin-dependent protein kinase activity']),\n ('GO:0004684', ['calmodulin-dependent protein kinase activity']),\n ('GO:0004683', ['calmodulin-dependent protein kinase activity']),\n ('GO:0004689', ['phosphorylase kinase activity']),\n ('GO:0008606', ['phosphorylase kinase activity']),\n ('GO:0008602', ['cAMP-dependent protein kinase activity']),\n ('GO:0004691', ['cAMP-dependent protein kinase activity']),\n ('GO:0016537', ['cyclin-dependent protein serine/threonine kinase activity']),\n ('GO:0004693', ['cyclin-dependent protein serine/threonine kinase activity']),\n ('GO:0004697', ['diacylglycerol-dependent serine/threonine kinase activity']),\n ('GO:0004701', ['diacylglycerol-dependent serine/threonine kinase activity']),\n ('GO:0004678', ['G protein-coupled receptor kinase activity']),\n ('GO:0004703', ['G protein-coupled receptor kinase activity']),\n ('GO:0016908', ['MAP kinase activity']),\n ('GO:0004707', ['MAP kinase activity']),\n ('GO:0008339', ['MAP kinase activity']),\n ('GO:0016909', ['MAP kinase activity']),\n ('GO:0008338', ['MAP kinase activity']),\n ('GO:0004710', ['MAP kinase kinase kinase activity']),\n ('GO:0004709', ['MAP kinase kinase kinase activity']),\n ('GO:0004713', ['protein tyrosine kinase activity']),\n ('GO:0004718', ['protein tyrosine kinase activity']),\n ('GO:0018056', ['protein-lysine 6-oxidase activity']),\n ('GO:0004720', ['protein-lysine 6-oxidase activity']),\n ('GO:0008598', ['protein serine/threonine phosphatase activity']),\n ('GO:0004722', ['protein serine/threonine phosphatase activity']),\n ('GO:0015071', ['protein serine/threonine phosphatase activity']),\n ('GO:0030360', ['protein serine/threonine phosphatase activity']),\n ('GO:0004724', ['protein serine/threonine phosphatase activity']),\n ('GO:0000163', ['protein serine/threonine phosphatase activity']),\n ('GO:0008600', ['protein serine/threonine phosphatase activity']),\n ('GO:0106307', ['protein serine/threonine phosphatase activity']),\n ('GO:0030361', ['protein serine/threonine phosphatase activity']),\n ('GO:0030357', ['protein serine/threonine phosphatase activity']),\n ('GO:0030358', ['protein serine/threonine phosphatase activity']),\n ('GO:0106306', ['protein serine/threonine phosphatase activity']),\n ('GO:0000158', ['protein serine/threonine phosphatase activity']),\n ('GO:0004723',\n ['calcium-dependent protein serine/threonine phosphatase activity']),\n ('GO:0008596',\n ['calcium-dependent protein serine/threonine phosphatase activity']),\n ('GO:0004741',\n ['[pyruvate dehydrogenase (acetyl-transferring)]-phosphatase activity']),\n ('GO:0019906',\n ['[pyruvate dehydrogenase (acetyl-transferring)]-phosphatase activity']),\n ('GO:0030523', ['dihydrolipoyllysine-residue acetyltransferase activity']),\n ('GO:0004742', ['dihydrolipoyllysine-residue acetyltransferase activity']),\n ('GO:0016959',\n ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']),\n ('GO:0016960',\n ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']),\n ('GO:0004748',\n ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']),\n ('GO:0016961',\n ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']),\n ('GO:0004763', ['serine-pyruvate transaminase activity']),\n ('GO:0004760', ['serine-pyruvate transaminase activity']),\n ('GO:0004762', ['serine-pyruvate transaminase activity']),\n ('GO:0004761', ['serine-pyruvate transaminase activity']),\n ('GO:0030230', ['sphingomyelin phosphodiesterase activity']),\n ('GO:0004767', ['sphingomyelin phosphodiesterase activity']),\n ('GO:0030231', ['sphingomyelin phosphodiesterase activity']),\n ('GO:0043735', ['stearoyl-CoA 9-desaturase activity']),\n ('GO:0004768', ['stearoyl-CoA 9-desaturase activity']),\n ('GO:0016214', ['stearoyl-CoA 9-desaturase activity']),\n ('GO:0004772', ['sterol O-acyltransferase activity']),\n ('GO:0017066', ['sterol O-acyltransferase activity']),\n ('GO:0008952', ['succinate-semialdehyde dehydrogenase (NAD+) activity']),\n ('GO:0004777', ['succinate-semialdehyde dehydrogenase (NAD+) activity']),\n ('GO:0016954', ['superoxide dismutase activity']),\n ('GO:0008383', ['superoxide dismutase activity']),\n ('GO:0004785', ['superoxide dismutase activity']),\n ('GO:0004784', ['superoxide dismutase activity']),\n ('GO:0008382', ['superoxide dismutase activity']),\n ('GO:0004796', ['thromboxane-A synthase activity']),\n ('GO:0008400', ['thromboxane-A synthase activity']),\n ('GO:0004804', ['transposase activity']),\n ('GO:0004803', ['transposase activity']),\n ('GO:0004808',\n ['tRNA (5-methylaminomethyl-2-thiouridylate)(34)-methyltransferase activity']),\n ('GO:0016425',\n ['tRNA (5-methylaminomethyl-2-thiouridylate)(34)-methyltransferase activity']),\n ('GO:0004810', ['CCA tRNA nucleotidyltransferase activity']),\n ('GO:0016437', ['CCA tRNA nucleotidyltransferase activity']),\n ('GO:0004812', ['aminoacyl-tRNA ligase activity']),\n ('GO:0017100', ['aminoacyl-tRNA ligase activity']),\n ('GO:0016876', ['aminoacyl-tRNA ligase activity']),\n ('GO:0004833', ['tryptophan 2,3-dioxygenase activity']),\n ('GO:0004426', ['tryptophan 2,3-dioxygenase activity']),\n ('GO:0004842', ['ubiquitin-protein transferase activity']),\n ('GO:0004840', ['ubiquitin-protein transferase activity']),\n ('GO:0004841', ['ubiquitin-protein transferase activity']),\n ('GO:0036459', ['cysteine-type deubiquitinase activity']),\n ('GO:0004843', ['cysteine-type deubiquitinase activity']),\n ('GO:0004857', ['enzyme inhibitor activity']),\n ('GO:0048551', ['enzyme inhibitor activity']),\n ('GO:0004864', ['protein phosphatase inhibitor activity']),\n ('GO:1990681', ['protein phosphatase inhibitor activity']),\n ('GO:0004870', ['cysteine-type endopeptidase inhibitor activity']),\n ('GO:0004869', ['cysteine-type endopeptidase inhibitor activity']),\n ('GO:0004875', ['complement receptor activity']),\n ('GO:0004942', ['complement receptor activity']),\n ('GO:0004943', ['complement component C3a receptor activity']),\n ('GO:0004876', ['complement component C3a receptor activity']),\n ('GO:0004944', ['complement component C5a receptor activity']),\n ('GO:0004878', ['complement component C5a receptor activity']),\n ('GO:0003708', ['nuclear receptor activity']),\n ('GO:0038051', ['nuclear receptor activity']),\n ('GO:0004879', ['nuclear receptor activity']),\n ('GO:0004882', ['nuclear receptor activity']),\n ('GO:0004886', ['nuclear receptor activity']),\n ('GO:0038052', ['nuclear receptor activity']),\n ('GO:0004884', ['nuclear receptor activity']),\n ('GO:0008434', ['nuclear receptor activity']),\n ('GO:0004887', ['nuclear receptor activity']),\n ('GO:0038050', ['nuclear receptor activity']),\n ('GO:0004880', ['nuclear receptor activity']),\n ('GO:0004888', ['transmembrane signaling receptor activity']),\n ('GO:0099600', ['transmembrane signaling receptor activity']),\n ('GO:0004926', ['transmembrane signaling receptor activity']),\n ('GO:0004896', ['cytokine receptor activity']),\n ('GO:0004907', ['cytokine receptor activity']),\n ('GO:0030525',\n ['granulocyte macrophage colony-stimulating factor receptor activity']),\n ('GO:0004901',\n ['granulocyte macrophage colony-stimulating factor receptor activity']),\n ('GO:0030524', ['granulocyte colony-stimulating factor receptor activity']),\n ('GO:0004902', ['granulocyte colony-stimulating factor receptor activity']),\n ('GO:0019967', ['interleukin-1, type I, activating receptor activity']),\n ('GO:0004909', ['interleukin-1, type I, activating receptor activity']),\n ('GO:0004910', ['interleukin-1, type II, blocking receptor activity']),\n ('GO:0019968', ['interleukin-1, type II, blocking receptor activity']),\n ('GO:0004899', ['leukemia inhibitory factor receptor activity']),\n ('GO:0004923', ['leukemia inhibitory factor receptor activity']),\n ('GO:0004930', ['G protein-coupled receptor activity']),\n ('GO:0016526', ['G protein-coupled receptor activity']),\n ('GO:0001622', ['G protein-coupled receptor activity']),\n ('GO:0001623', ['G protein-coupled receptor activity']),\n ('GO:0001625', ['G protein-coupled receptor activity']),\n ('GO:0001624', ['G protein-coupled receptor activity']),\n ('GO:0001599', ['endothelin receptor activity']),\n ('GO:0001600', ['endothelin receptor activity']),\n ('GO:0004962', ['endothelin receptor activity']),\n ('GO:0004976', ['luteinizing hormone receptor activity']),\n ('GO:0004964', ['luteinizing hormone receptor activity']),\n ('GO:0004993', ['G protein-coupled serotonin receptor activity']),\n ('GO:0016609', ['G protein-coupled serotonin receptor activity']),\n ('GO:0001585', ['G protein-coupled serotonin receptor activity']),\n ('GO:0005000', ['vasopressin receptor activity']),\n ('GO:0016931', ['vasopressin receptor activity']),\n ('GO:0005023', ['epidermal growth factor receptor activity']),\n ('GO:0005006', ['epidermal growth factor receptor activity']),\n ('GO:0036326', ['vascular endothelial growth factor receptor activity']),\n ('GO:0036329', ['vascular endothelial growth factor receptor activity']),\n ('GO:0036328', ['vascular endothelial growth factor receptor activity']),\n ('GO:0036327', ['vascular endothelial growth factor receptor activity']),\n ('GO:0036330', ['vascular endothelial growth factor receptor activity']),\n ('GO:0005021', ['vascular endothelial growth factor receptor activity']),\n ('GO:0005031', ['tumor necrosis factor receptor activity']),\n ('GO:0005032', ['tumor necrosis factor receptor activity']),\n ('GO:0005033', ['tumor necrosis factor receptor activity']),\n ('GO:0005041', ['low-density lipoprotein particle receptor activity']),\n ('GO:0008032', ['low-density lipoprotein particle receptor activity']),\n ('GO:0005048', ['signal sequence binding']),\n ('GO:0008249', ['signal sequence binding']),\n ('GO:0008262', ['nuclear export signal receptor activity']),\n ('GO:0005049', ['nuclear export signal receptor activity']),\n ('GO:0005069',\n ['transmembrane receptor protein tyrosine kinase adaptor activity']),\n ('GO:0005068',\n ['transmembrane receptor protein tyrosine kinase adaptor activity']),\n ('GO:0005080', ['protein kinase C binding']),\n ('GO:0072568', ['protein kinase C binding']),\n ('GO:0072569', ['protein kinase C binding']),\n ('GO:0097024', ['protein kinase C binding']),\n ('GO:0019839', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0017112', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005089', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0008321', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0016220', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005088', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0016219', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0008433', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005087', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005086', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0017034', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0017132', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005085', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005090', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0030676', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005098', ['GTPase activator activity']),\n ('GO:0005101', ['GTPase activator activity']),\n ('GO:0005100', ['GTPase activator activity']),\n ('GO:0030675', ['GTPase activator activity']),\n ('GO:0017123', ['GTPase activator activity']),\n ('GO:0046582', ['GTPase activator activity']),\n ('GO:0005099', ['GTPase activator activity']),\n ('GO:0008060', ['GTPase activator activity']),\n ('GO:0005096', ['GTPase activator activity']),\n ('GO:0005097', ['GTPase activator activity']),\n ('GO:0005104', ['fibroblast growth factor receptor binding']),\n ('GO:0001521', ['fibroblast growth factor receptor binding']),\n ('GO:0005162', ['fibroblast growth factor receptor binding']),\n ('GO:0005109', ['frizzled binding']),\n ('GO:0005110', ['frizzled binding']),\n ('GO:0008185', ['epidermal growth factor receptor binding']),\n ('GO:0005154', ['epidermal growth factor receptor binding']),\n ('GO:0005159', ['insulin-like growth factor receptor binding']),\n ('GO:0005067', ['insulin-like growth factor receptor binding']),\n ('GO:0005478', ['transporter activity']),\n ('GO:0005215', ['transporter activity']),\n ('GO:0008095',\n ['inositol 1,4,5-trisphosphate-gated calcium channel activity']),\n ('GO:0005220',\n ['inositol 1,4,5-trisphosphate-gated calcium channel activity']),\n ('GO:0015285', ['gap junction channel activity']),\n ('GO:0005243', ['gap junction channel activity']),\n ('GO:0015286', ['gap junction channel activity']),\n ('GO:0005245', ['voltage-gated calcium channel activity']),\n ('GO:0015270', ['voltage-gated calcium channel activity']),\n ('GO:0010173', ['voltage-gated calcium channel activity']),\n ('GO:0005224', ['intracellularly ATP-gated chloride channel activity']),\n ('GO:0005260', ['intracellularly ATP-gated chloride channel activity']),\n ('GO:0005261', ['monoatomic cation channel activity']),\n ('GO:0015338', ['monoatomic cation channel activity']),\n ('GO:0015281', ['monoatomic cation channel activity']),\n ('GO:0015206', ['allantoin:proton symporter activity']),\n ('GO:0005274', ['allantoin:proton symporter activity']),\n ('GO:0005275', ['amine transmembrane transporter activity']),\n ('GO:0005279', ['amine transmembrane transporter activity']),\n ('GO:0005285', ['amino acid:sodium symporter activity']),\n ('GO:0005283', ['amino acid:sodium symporter activity']),\n ('GO:0005284', ['amino acid:sodium symporter activity']),\n ('GO:0005295', ['neutral L-amino acid:sodium symporter activity']),\n ('GO:0005282', ['neutral L-amino acid:sodium symporter activity']),\n ('GO:0005302', ['L-tyrosine transmembrane transporter activity']),\n ('GO:0015508', ['L-tyrosine transmembrane transporter activity']),\n ('GO:0005312', ['dicarboxylic acid transmembrane transporter activity']),\n ('GO:0005310', ['dicarboxylic acid transmembrane transporter activity']),\n ('GO:0015365', ['dicarboxylic acid transmembrane transporter activity']),\n ('GO:1901677', ['phosphate transmembrane transporter activity']),\n ('GO:0005315', ['phosphate transmembrane transporter activity']),\n ('GO:0005317', ['phosphate transmembrane transporter activity']),\n ('GO:0005324', ['long-chain fatty acid transmembrane transporter activity']),\n ('GO:0008562', ['long-chain fatty acid transmembrane transporter activity']),\n ('GO:0005325', ['long-chain fatty acid transmembrane transporter activity']),\n ('GO:0005329', ['dopamine:sodium symporter activity']),\n ('GO:0005330', ['dopamine:sodium symporter activity']),\n ('GO:0005334', ['norepinephrine:sodium symporter activity']),\n ('GO:0005333', ['norepinephrine:sodium symporter activity']),\n ('GO:0005335', ['serotonin:sodium:chloride symporter activity']),\n ('GO:0005336', ['serotonin:sodium:chloride symporter activity']),\n ('GO:0015222', ['serotonin:sodium:chloride symporter activity']),\n ('GO:0005339', ['nucleotide-sugar transmembrane transporter activity']),\n ('GO:0005338', ['nucleotide-sugar transmembrane transporter activity']),\n ('GO:0005341', ['nucleotide-sulfate transmembrane transporter activity']),\n ('GO:0005340', ['nucleotide-sulfate transmembrane transporter activity']),\n ('GO:0005344', ['oxygen carrier activity']),\n ('GO:0015033', ['oxygen carrier activity']),\n ('GO:0005347', ['ATP transmembrane transporter activity']),\n ('GO:0005348', ['ATP transmembrane transporter activity']),\n ('GO:0015542', ['carbohydrate:proton symporter activity']),\n ('GO:0005351', ['carbohydrate:proton symporter activity']),\n ('GO:0005403', ['carbohydrate:proton symporter activity']),\n ('GO:0019192', ['fructose transmembrane transporter activity']),\n ('GO:0005353', ['fructose transmembrane transporter activity']),\n ('GO:0015585', ['fructose transmembrane transporter activity']),\n ('GO:0005355', ['glucose transmembrane transporter activity']),\n ('GO:0015579', ['glucose transmembrane transporter activity']),\n ('GO:0005356', ['glucose:proton symporter activity']),\n ('GO:0005361', ['glucose:proton symporter activity']),\n ('GO:0015581', ['maltose transmembrane transporter activity']),\n ('GO:0005363', ['maltose transmembrane transporter activity']),\n ('GO:0005371',\n ['tricarboxylate secondary active transmembrane transporter activity']),\n ('GO:0005370',\n ['tricarboxylate secondary active transmembrane transporter activity']),\n ('GO:0005380', ['copper ion transmembrane transporter activity']),\n ('GO:0005375', ['copper ion transmembrane transporter activity']),\n ('GO:0005378', ['copper ion transmembrane transporter activity']),\n ('GO:0015088', ['copper ion transmembrane transporter activity']),\n ('GO:0005379', ['copper ion transmembrane transporter activity']),\n ('GO:0097689', ['iron ion transmembrane transporter activity']),\n ('GO:0016033', ['iron ion transmembrane transporter activity']),\n ('GO:0005382', ['iron ion transmembrane transporter activity']),\n ('GO:0005381', ['iron ion transmembrane transporter activity']),\n ('GO:0005415', ['nucleoside:sodium symporter activity']),\n ('GO:0008522', ['nucleoside:sodium symporter activity']),\n ('GO:0015321', ['sodium:phosphate symporter activity']),\n ('GO:0005436', ['sodium:phosphate symporter activity']),\n ('GO:0005471', ['ATP:ADP antiporter activity']),\n ...]In\u00a0[19]: Copied!
[(term.ID,term.name) for term in go if not term.parents and term.children]\n[(term.ID,term.name) for term in go if not term.parents and term.children] Out[19]:
[('GO:0005554', ['molecular_function']),\n ('GO:0008372', ['cellular_component']),\n ('GO:0007582', ['biological_process'])]In\u00a0[20]: Copied!
go['GO:0005554'].__dict__\ngo['GO:0005554'].__dict__ Out[20]:
{'_ID': 'GO:0005554',\n '_original_ID': 'GO:0005554',\n '_container': <picea.ontology.Ontology at 0x7f3eae711150>,\n '_children': ['GO:0003774',\n 'GO:0003824',\n 'GO:0005198',\n 'GO:0005478',\n 'GO:0005215',\n 'GO:0005488',\n 'GO:0009055',\n 'GO:0009053',\n 'GO:0009054',\n 'GO:0016209',\n 'GO:0038024',\n 'GO:0044183',\n 'GO:0045182',\n 'GO:0045735',\n 'GO:0060089',\n 'GO:0032947',\n 'GO:0060090',\n 'GO:0090729',\n 'GO:0050827',\n 'GO:0098772',\n 'GO:0140104',\n 'GO:0140110',\n 'GO:0140223',\n 'GO:0140299',\n 'GO:0140313',\n 'GO:0140489',\n 'GO:0140522',\n 'GO:0140657',\n 'GO:0140691',\n 'GO:0140776',\n 'GO:0140777',\n 'GO:0140911',\n 'GO:0034291',\n 'GO:0034290',\n 'GO:0034292',\n 'GO:0140912',\n 'GO:0141047',\n 'GO:0180020',\n 'GO:0180024'],\n '_parents': [],\n 'name': ['molecular_function'],\n 'def': ['\"A molecular process that can be carried out by the action of a single macromolecular machine, usually via direct physical interactions with other molecular entities. Function in this sense denotes an action, or activity, that a gene product (or a complex) performs.\" [GOC:pdt]'],\n 'alt_id': ['GO:0003674'],\n 'namespace': ['molecular_function'],\n 'comment': [\"Note that, in addition to forming the root of the molecular function ontology, this term is recommended for the annotation of gene products whose molecular function is unknown. When this term is used for annotation, it indicates that no information was available about the molecular function of the gene product annotated as of the date the annotation was made; the evidence code 'no data' (ND), is used to indicate this. Despite its name, this is not a type of 'function' in the sense typically defined by upper ontologies such as Basic Formal Ontology (BFO). It is instead a BFO:process carried out by a single gene product or complex.\"],\n 'subset': ['goslim_candida',\n 'goslim_chembl',\n 'goslim_metagenomics',\n 'goslim_pir',\n 'goslim_plant',\n 'goslim_yeast'],\n 'synonym': ['\"molecular function\" EXACT []']}In\u00a0[21]: Copied!
go['GO:0003674'].__dict__\ngo['GO:0003674'].__dict__ Out[21]:
{'_ID': 'GO:0005554',\n '_original_ID': 'GO:0005554',\n '_container': <picea.ontology.Ontology at 0x7f3eae711150>,\n '_children': ['GO:0003774',\n 'GO:0003824',\n 'GO:0005198',\n 'GO:0005478',\n 'GO:0005215',\n 'GO:0005488',\n 'GO:0009055',\n 'GO:0009053',\n 'GO:0009054',\n 'GO:0016209',\n 'GO:0038024',\n 'GO:0044183',\n 'GO:0045182',\n 'GO:0045735',\n 'GO:0060089',\n 'GO:0032947',\n 'GO:0060090',\n 'GO:0090729',\n 'GO:0050827',\n 'GO:0098772',\n 'GO:0140104',\n 'GO:0140110',\n 'GO:0140223',\n 'GO:0140299',\n 'GO:0140313',\n 'GO:0140489',\n 'GO:0140522',\n 'GO:0140657',\n 'GO:0140691',\n 'GO:0140776',\n 'GO:0140777',\n 'GO:0140911',\n 'GO:0034291',\n 'GO:0034290',\n 'GO:0034292',\n 'GO:0140912',\n 'GO:0141047',\n 'GO:0180020',\n 'GO:0180024'],\n '_parents': [],\n 'name': ['molecular_function'],\n 'def': ['\"A molecular process that can be carried out by the action of a single macromolecular machine, usually via direct physical interactions with other molecular entities. Function in this sense denotes an action, or activity, that a gene product (or a complex) performs.\" [GOC:pdt]'],\n 'alt_id': ['GO:0003674'],\n 'namespace': ['molecular_function'],\n 'comment': [\"Note that, in addition to forming the root of the molecular function ontology, this term is recommended for the annotation of gene products whose molecular function is unknown. When this term is used for annotation, it indicates that no information was available about the molecular function of the gene product annotated as of the date the annotation was made; the evidence code 'no data' (ND), is used to indicate this. Despite its name, this is not a type of 'function' in the sense typically defined by upper ontologies such as Basic Formal Ontology (BFO). It is instead a BFO:process carried out by a single gene product or complex.\"],\n 'subset': ['goslim_candida',\n 'goslim_chembl',\n 'goslim_metagenomics',\n 'goslim_pir',\n 'goslim_plant',\n 'goslim_yeast'],\n 'synonym': ['\"molecular function\" EXACT []']}"},{"location":"examples/sequence_annotation/","title":"Sequence annotation","text":"In\u00a0[1]: Copied!
import sys\nsys.path.insert(0, '../../')\nimport picea\nfrom picea import SequenceAnnotation\npicea.__version__\nimport sys sys.path.insert(0, '../../') import picea from picea import SequenceAnnotation picea.__version__ Out[1]:
'0.0.26'In\u00a0[2]: Copied!
gff3 = (\n # '##gff-version 3.1.26\\n'\n # '##sequence-region ctg123 1 1497228\\n'\n 'ctg123\\t.\\tgene\\t1000\\t9000\\t.\\t+\\t.\\tID=gene00001;Name=EDEN\\n'\n 'ctg123\\t.\\tTF_binding_site\\t1000\\t1012\\t.\\t+\\t.\\tID=tfbs00001;Parent=gene00001\\n' # noqa\n 'ctg123\\t.\\tmRNA\\t1050\\t9000\\t.\\t+\\t.\\tID=mRNA00001;Parent=gene00001;Name=EDEN.1\\n' # noqa\n 'ctg123\\t.\\tmRNA\\t1050\\t9000\\t.\\t+\\t.\\tID=mRNA00002;Parent=gene00001;Name=EDEN.2\\n' # noqa\n 'ctg123\\t.\\tmRNA\\t1300\\t9000\\t.\\t+\\t.\\tID=mRNA00003;Parent=gene00001;Name=EDEN.3\\n' # noqa\n 'ctg123\\t.\\texon\\t1300\\t1500\\t.\\t+\\t.\\tID=exon00001;Parent=mRNA00003\\n'\n 'ctg123\\t.\\texon\\t1050\\t1500\\t.\\t+\\t.\\tID=exon00002;Parent=mRNA00001,mRNA00002\\n' # noqa\n 'ctg123\\t.\\texon\\t3000\\t3902\\t.\\t+\\t.\\tID=exon00003;Parent=mRNA00001,mRNA00003\\n' # noqa\n 'ctg123\\t.\\texon\\t5000\\t5500\\t.\\t+\\t.\\tID=exon00004;Parent=mRNA00001,mRNA00002,mRNA00003\\n' # noqa\n 'ctg123\\t.\\texon\\t7000\\t9000\\t.\\t+\\t.\\tID=exon00005;Parent=mRNA00001,mRNA00002,mRNA00003\\n' # noqa\n 'ctg123\\t.\\tCDS\\t1201\\t1500\\t.\\t+\\t0\\tID=cds00001.1;Parent=mRNA00001;Name=edenprotein.1\\n' # noqa\n 'ctg123\\t.\\tCDS\\t3000\\t3902\\t.\\t+\\t0\\tID=cds00001.2;Parent=mRNA00001;Name=edenprotein.1\\n' # noqa\n 'ctg123\\t.\\tCDS\\t5000\\t5500\\t.\\t+\\t0\\tID=cds00001.3;Parent=mRNA00001;Name=edenprotein.1\\n' # noqa\n 'ctg123\\t.\\tCDS\\t7000\\t7600\\t.\\t+\\t0\\tID=cds00001.4;Parent=mRNA00001;Name=edenprotein.1\\n' # noqa\n 'ctg123\\t.\\tCDS\\t1201\\t1500\\t.\\t+\\t0\\tID=cds00002.1;Parent=mRNA00002;Name=edenprotein.2\\n' # noqa\n 'ctg123\\t.\\tCDS\\t5000\\t5500\\t.\\t+\\t0\\tID=cds00002.2;Parent=mRNA00002;Name=edenprotein.2\\n' # noqa\n 'ctg123\\t.\\tCDS\\t7000\\t7600\\t.\\t+\\t0\\tID=cds00002.3;Parent=mRNA00002;Name=edenprotein.2\\n' # noqa\n 'ctg123\\t.\\tCDS\\t3301\\t3902\\t.\\t+\\t0\\tID=cds00003.1;Parent=mRNA00003;Name=edenprotein.3\\n' # noqa\n 'ctg123\\t.\\tCDS\\t5000\\t5500\\t.\\t+\\t1\\tID=cds00003.2;Parent=mRNA00003;Name=edenprotein.3\\n' # noqa\n 'ctg123\\t.\\tCDS\\t7000\\t7600\\t.\\t+\\t1\\tID=cds00003.3;Parent=mRNA00003;Name=edenprotein.3\\n' # noqa\n 'ctg123\\t.\\tCDS\\t3391\\t3902\\t.\\t+\\t0\\tID=cds00004.1;Parent=mRNA00003;Name=edenprotein.4\\n' # noqa\n 'ctg123\\t.\\tCDS\\t5000\\t5500\\t.\\t+\\t1\\tID=cds00004.2;Parent=mRNA00003;Name=edenprotein.4\\n' # noqa\n 'ctg123\\t.\\tCDS\\t7000\\t7600\\t.\\t+\\t1\\tID=cds00004.3;Parent=mRNA00003;Name=edenprotein.4\\n' # noqa\n)\n\nann = SequenceAnnotation.from_gff(string=gff3)\nann['mRNA00003'].parents.elements\ngff3 = ( # '##gff-version 3.1.26\\n' # '##sequence-region ctg123 1 1497228\\n' 'ctg123\\t.\\tgene\\t1000\\t9000\\t.\\t+\\t.\\tID=gene00001;Name=EDEN\\n' 'ctg123\\t.\\tTF_binding_site\\t1000\\t1012\\t.\\t+\\t.\\tID=tfbs00001;Parent=gene00001\\n' # noqa 'ctg123\\t.\\tmRNA\\t1050\\t9000\\t.\\t+\\t.\\tID=mRNA00001;Parent=gene00001;Name=EDEN.1\\n' # noqa 'ctg123\\t.\\tmRNA\\t1050\\t9000\\t.\\t+\\t.\\tID=mRNA00002;Parent=gene00001;Name=EDEN.2\\n' # noqa 'ctg123\\t.\\tmRNA\\t1300\\t9000\\t.\\t+\\t.\\tID=mRNA00003;Parent=gene00001;Name=EDEN.3\\n' # noqa 'ctg123\\t.\\texon\\t1300\\t1500\\t.\\t+\\t.\\tID=exon00001;Parent=mRNA00003\\n' 'ctg123\\t.\\texon\\t1050\\t1500\\t.\\t+\\t.\\tID=exon00002;Parent=mRNA00001,mRNA00002\\n' # noqa 'ctg123\\t.\\texon\\t3000\\t3902\\t.\\t+\\t.\\tID=exon00003;Parent=mRNA00001,mRNA00003\\n' # noqa 'ctg123\\t.\\texon\\t5000\\t5500\\t.\\t+\\t.\\tID=exon00004;Parent=mRNA00001,mRNA00002,mRNA00003\\n' # noqa 'ctg123\\t.\\texon\\t7000\\t9000\\t.\\t+\\t.\\tID=exon00005;Parent=mRNA00001,mRNA00002,mRNA00003\\n' # noqa 'ctg123\\t.\\tCDS\\t1201\\t1500\\t.\\t+\\t0\\tID=cds00001.1;Parent=mRNA00001;Name=edenprotein.1\\n' # noqa 'ctg123\\t.\\tCDS\\t3000\\t3902\\t.\\t+\\t0\\tID=cds00001.2;Parent=mRNA00001;Name=edenprotein.1\\n' # noqa 'ctg123\\t.\\tCDS\\t5000\\t5500\\t.\\t+\\t0\\tID=cds00001.3;Parent=mRNA00001;Name=edenprotein.1\\n' # noqa 'ctg123\\t.\\tCDS\\t7000\\t7600\\t.\\t+\\t0\\tID=cds00001.4;Parent=mRNA00001;Name=edenprotein.1\\n' # noqa 'ctg123\\t.\\tCDS\\t1201\\t1500\\t.\\t+\\t0\\tID=cds00002.1;Parent=mRNA00002;Name=edenprotein.2\\n' # noqa 'ctg123\\t.\\tCDS\\t5000\\t5500\\t.\\t+\\t0\\tID=cds00002.2;Parent=mRNA00002;Name=edenprotein.2\\n' # noqa 'ctg123\\t.\\tCDS\\t7000\\t7600\\t.\\t+\\t0\\tID=cds00002.3;Parent=mRNA00002;Name=edenprotein.2\\n' # noqa 'ctg123\\t.\\tCDS\\t3301\\t3902\\t.\\t+\\t0\\tID=cds00003.1;Parent=mRNA00003;Name=edenprotein.3\\n' # noqa 'ctg123\\t.\\tCDS\\t5000\\t5500\\t.\\t+\\t1\\tID=cds00003.2;Parent=mRNA00003;Name=edenprotein.3\\n' # noqa 'ctg123\\t.\\tCDS\\t7000\\t7600\\t.\\t+\\t1\\tID=cds00003.3;Parent=mRNA00003;Name=edenprotein.3\\n' # noqa 'ctg123\\t.\\tCDS\\t3391\\t3902\\t.\\t+\\t0\\tID=cds00004.1;Parent=mRNA00003;Name=edenprotein.4\\n' # noqa 'ctg123\\t.\\tCDS\\t5000\\t5500\\t.\\t+\\t1\\tID=cds00004.2;Parent=mRNA00003;Name=edenprotein.4\\n' # noqa 'ctg123\\t.\\tCDS\\t7000\\t7600\\t.\\t+\\t1\\tID=cds00004.3;Parent=mRNA00003;Name=edenprotein.4\\n' # noqa ) ann = SequenceAnnotation.from_gff(string=gff3) ann['mRNA00003'].parents.elements Out[2]:
[<SequenceInterval type=gene ID=gene00001 loc=ctg123..1000..9000..+ at 0x7f7c3c64f400>]In\u00a0[3]: Copied!
ann['mRNA00003'].children.elements\nann['mRNA00003'].children.elements Out[3]:
[<SequenceInterval type=exon ID=exon00001 loc=ctg123..1300..1500..+ at 0x7f7c3c64f0a0>,\n <SequenceInterval type=exon ID=exon00003 loc=ctg123..3000..3902..+ at 0x7f7c3c64ef50>,\n <SequenceInterval type=exon ID=exon00004 loc=ctg123..5000..5500..+ at 0x7f7c3c64eef0>,\n <SequenceInterval type=exon ID=exon00005 loc=ctg123..7000..9000..+ at 0x7f7c3c64eec0>,\n <SequenceInterval type=CDS ID=cds00003.1 loc=ctg123..3301..3902..+ at 0x7f7c3c64e650>,\n <SequenceInterval type=CDS ID=cds00003.2 loc=ctg123..5000..5500..+ at 0x7f7c3c64e680>,\n <SequenceInterval type=CDS ID=cds00003.3 loc=ctg123..7000..7600..+ at 0x7f7c3c64e8f0>,\n <SequenceInterval type=CDS ID=cds00004.1 loc=ctg123..3391..3902..+ at 0x7f7c3c64f6a0>,\n <SequenceInterval type=CDS ID=cds00004.2 loc=ctg123..5000..5500..+ at 0x7f7c3c64f100>,\n <SequenceInterval type=CDS ID=cds00004.3 loc=ctg123..7000..7600..+ at 0x7f7c3c64ef80>]In\u00a0[4]: Copied!
ann['cds00004.3'].gff_attributes\nann['cds00004.3'].gff_attributes Out[4]:
{'name': ['edenprotein.4'], 'ID': ['cds00004.3'], 'Parent': ['mRNA00003']}In\u00a0[5]: Copied!
ann['cds00004.3'].to_gff_line()\nann['cds00004.3'].to_gff_line() Out[5]:
'ctg123\\t.\\tCDS\\t7000\\t7600\\t.\\t+\\t1\\tID=cds00004.3;Parent=mRNA00003;Name=edenprotein.4'"},{"location":"examples/sequence_interval/","title":"Sequence interval","text":"In\u00a0[1]: Copied!
import sys\nsys.path.insert(0, '../../')\nimport picea\nfrom picea import SequenceInterval\npicea.__version__\nimport sys sys.path.insert(0, '../../') import picea from picea import SequenceInterval picea.__version__ Out[1]:
'0.0.26'In\u00a0[2]: Copied!
interval = SequenceInterval.from_gff_line('ctg123\\t.\\tgene\\t1000\\t9000\\t.\\t+\\t.\\tID=gene00001;Name=EDEN')\ninterval\ninterval = SequenceInterval.from_gff_line('ctg123\\t.\\tgene\\t1000\\t9000\\t.\\t+\\t.\\tID=gene00001;Name=EDEN') interval Out[2]:
<SequenceInterval type=gene ID=gene00001 loc=ctg123..1000..9000..+ at 0x7f864c1400d0>In\u00a0[3]: Copied!
interval.to_gff_line()\ninterval.to_gff_line() Out[3]:
'ctg123\\t.\\tgene\\t1000\\t9000\\t.\\t+\\t.\\tID=gene00001;Name=EDEN'In\u00a0[4]: Copied!
interval['parent']\ninterval['parent']
\n---------------------------------------------------------------------------\nKeyError Traceback (most recent call last)\nCell In[4], line 1\n----> 1 interval['parent']\n\nFile ~/work/picea/picea/docs/examples/../../picea/dag.py:36, in DAGElement.__getitem__(self, key)\n 35 def __getitem__(self, key):\n---> 36 return self.__dict__[key]\n\nKeyError: 'parent'"},{"location":"examples/tree/","title":"Tree","text":"In\u00a0[1]: Copied!
import sys\nsys.path.insert(0, '../../')\nimport picea\nfrom picea import Tree, treeplot\nfrom matplotlib import pyplot as plt\npicea.__version__\nimport sys sys.path.insert(0, '../../') import picea from picea import Tree, treeplot from matplotlib import pyplot as plt picea.__version__ Out[1]:
'0.0.26'In\u00a0[2]: Copied!
!pwd\n!pwd
/home/runner/work/picea/picea/docs/examples\r\nIn\u00a0[3]: Copied!
tree = Tree.from_newick(filename='./data/tree.newick')\n\nfig, [[ax1,ax2],[ax3,ax4]] = plt.subplots(ncols=2, nrows=2, figsize=(20,20))\n\ntreeplot(tree, style='square', ax=ax1)\ntreeplot(tree, style='triangular', ltr=False, ax=ax2)\ntreeplot(tree, style='square', branchlengths=False, ax=ax3)\ntreeplot(tree, style='radial', ax=ax4)\ntree = Tree.from_newick(filename='./data/tree.newick') fig, [[ax1,ax2],[ax3,ax4]] = plt.subplots(ncols=2, nrows=2, figsize=(20,20)) treeplot(tree, style='square', ax=ax1) treeplot(tree, style='triangular', ltr=False, ax=ax2) treeplot(tree, style='square', branchlengths=False, ax=ax3) treeplot(tree, style='radial', ax=ax4)
/home/runner/work/picea/picea/docs/examples/../../picea/tree.py:177: UserWarning: Found branchlengths on some parts of the tree, but node 0 has no branchlength specified, setting to branchlength 0.0\n warn(\nOut[3]:
<Axes: >In\u00a0[4]: Copied!
0.4 / 25\n0.4 / 25 Out[4]:
0.016In\u00a0[5]: Copied!
x_min,x_max = ax3.get_xlim()\nx_max - x_min, .1 * (x_max - x_min), (x_min,x_max)\nx_min,x_max = ax3.get_xlim() x_max - x_min, .1 * (x_max - x_min), (x_min,x_max) Out[5]:
(25.09, 2.5090000000000003, (-0.52, 24.57))In\u00a0[6]: Copied!
from sklearn.cluster import AgglomerativeClustering\nimport numpy as np\nX = np.array([[1, 2], [1, 4], [1, 0],\n [4, 2], [4, 4], [4, 0]])\nclustering = AgglomerativeClustering().fit(X)\nclustering.labels_\nfrom sklearn.cluster import AgglomerativeClustering import numpy as np X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]]) clustering = AgglomerativeClustering().fit(X) clustering.labels_ Out[6]:
array([1, 1, 1, 0, 0, 0])In\u00a0[7]: Copied!
tree = Tree(children=[Tree(),Tree()])\nfor t in tree.depth_first():\n print(t.ID,t.name)\ntree = Tree(children=[Tree(),Tree()]) for t in tree.depth_first(): print(t.ID,t.name)
None None\nNone None\nNone None\nIn\u00a0[8]: Copied!
t.iloc[None]\nt.iloc[None] Out[8]:
Tree(name=None, length=None, children=[])In\u00a0[9]: Copied!
tree = Tree.from_sklearn(clustering)\ntree.to_newick(branch_lengths=False)\ntree = Tree.from_sklearn(clustering) tree.to_newick(branch_lengths=False) Out[9]:
'((2,(0,1)),(4,(3,5)));'In\u00a0[10]: Copied!
Tree(**tree.to_dict())\nTree(**tree.to_dict()) Out[10]:
Tree(name=None, length=None, children=[{'name': None, 'length': None, 'children': [{'name': '2', 'length': None, 'children': []}, {'name': None, 'length': None, 'children': [{'name': '0', 'length': None, 'children': []}, {'name': '1', 'length': None, 'children': []}]}]}, {'name': None, 'length': None, 'children': [{'name': '4', 'length': None, 'children': []}, {'name': None, 'length': None, 'children': [{'name': '3', 'length': None, 'children': []}, {'name': '5', 'length': None, 'children': []}]}]}])In\u00a0[11]: Copied!
tree.iloc[1].name = 'long name'\ntree.iloc[1].name = 'long name' In\u00a0[12]: Copied!
print(tree.to_json(indent=2))\nprint(tree.to_json(indent=2))
{\n \"name\": null,\n \"length\": null,\n \"children\": [\n {\n \"name\": null,\n \"length\": null,\n \"children\": [\n {\n \"name\": \"2\",\n \"length\": null,\n \"children\": []\n },\n {\n \"name\": null,\n \"length\": null,\n \"children\": [\n {\n \"name\": \"0\",\n \"length\": null,\n \"children\": []\n },\n {\n \"name\": \"long name\",\n \"length\": null,\n \"children\": []\n }\n ]\n }\n ]\n },\n {\n \"name\": null,\n \"length\": null,\n \"children\": [\n {\n \"name\": \"4\",\n \"length\": null,\n \"children\": []\n },\n {\n \"name\": null,\n \"length\": null,\n \"children\": [\n {\n \"name\": \"3\",\n \"length\": null,\n \"children\": []\n },\n {\n \"name\": \"5\",\n \"length\": null,\n \"children\": []\n }\n ]\n }\n ]\n }\n ]\n}\nIn\u00a0[13]: Copied!
from matplotlib import pyplot as plt\nfig, [ax1, ax2, ax3] = plt.subplots(ncols=3,figsize=(15, 5))\n\npicea.treeplot(tree, style='radial', ltr=False, ax=ax1)\npicea.treeplot(tree, style='square', ltr=True, ax=ax2)\npicea.treeplot(tree, style='triangular', ltr=False, ax=ax3)\n\nfor ax in (ax1,ax2,ax3):\n ax.scatter((0,0),(0,0),c='red')\nfrom matplotlib import pyplot as plt fig, [ax1, ax2, ax3] = plt.subplots(ncols=3,figsize=(15, 5)) picea.treeplot(tree, style='radial', ltr=False, ax=ax1) picea.treeplot(tree, style='square', ltr=True, ax=ax2) picea.treeplot(tree, style='triangular', ltr=False, ax=ax3) for ax in (ax1,ax2,ax3): ax.scatter((0,0),(0,0),c='red')
\n---------------------------------------------------------------------------\nTypeError Traceback (most recent call last)\nCell In[13], line 4\n 1 from matplotlib import pyplot as plt\n 2 fig, [ax1, ax2, ax3] = plt.subplots(ncols=3,figsize=(15, 5))\n----> 4 picea.treeplot(tree, style='radial', ltr=False, ax=ax1)\n 5 picea.treeplot(tree, style='square', ltr=True, ax=ax2)\n 6 picea.treeplot(tree, style='triangular', ltr=False, ax=ax3)\n\nFile ~/work/picea/picea/docs/examples/../../picea/tree.py:475, in treeplot(tree, style, branchlengths, ltr, node_labels, leaf_labels, leaf_marker, leaf_marker_fill, leaf_marker_edge, branch_linestyle, ax, return_layout)\n 437 def treeplot(\n 438 tree: Tree,\n 439 style: TreeStyle = TreeStyle.square,\n (...)\n 449 return_layout: bool = False,\n 450 ) -> Union[Ax, Tuple[Ax, LayoutDict]]:\n 451 \"\"\"[summary]\n 452 \n 453 Args:\n (...)\n 473 Union[Ax, Tuple[Ax, LayoutDict]]: [description]\n 474 \"\"\"\n--> 475 layout = calculate_tree_layout(tree=tree, style=style, ltr=ltr, branchlengths=branchlengths)\n 477 if not ax:\n 478 _, ax = plt.subplots(figsize=(6, 6))\n\nFile ~/work/picea/picea/docs/examples/../../picea/tree.py:417, in calculate_tree_layout(tree, style, ltr, branchlengths)\n 415 node_coords.x = increment + max(child_x_coords)\n 416 else:\n--> 417 node_coords.x = min(child_x_coords) - increment\n 418 else:\n 419 if previous_node:\n\nTypeError: '<' not supported between instances of 'NoneType' and 'NoneType'In\u00a0[14]: Copied!
fig, ax = plt.subplots(figsize=(10, 10))\npicea.treeplot(tree, style='radial', ax=ax)\n\nax.scatter((0,0),(0,0),c='red')\nfig, ax = plt.subplots(figsize=(10, 10)) picea.treeplot(tree, style='radial', ax=ax) ax.scatter((0,0),(0,0),c='red')
\n---------------------------------------------------------------------------\nTypeError Traceback (most recent call last)\nCell In[14], line 2\n 1 fig, ax = plt.subplots(figsize=(10, 10))\n----> 2 picea.treeplot(tree, style='radial', ax=ax)\n 4 ax.scatter((0,0),(0,0),c='red')\n\nFile ~/work/picea/picea/docs/examples/../../picea/tree.py:475, in treeplot(tree, style, branchlengths, ltr, node_labels, leaf_labels, leaf_marker, leaf_marker_fill, leaf_marker_edge, branch_linestyle, ax, return_layout)\n 437 def treeplot(\n 438 tree: Tree,\n 439 style: TreeStyle = TreeStyle.square,\n (...)\n 449 return_layout: bool = False,\n 450 ) -> Union[Ax, Tuple[Ax, LayoutDict]]:\n 451 \"\"\"[summary]\n 452 \n 453 Args:\n (...)\n 473 Union[Ax, Tuple[Ax, LayoutDict]]: [description]\n 474 \"\"\"\n--> 475 layout = calculate_tree_layout(tree=tree, style=style, ltr=ltr, branchlengths=branchlengths)\n 477 if not ax:\n 478 _, ax = plt.subplots(figsize=(6, 6))\n\nFile ~/work/picea/picea/docs/examples/../../picea/tree.py:415, in calculate_tree_layout(tree, style, ltr, branchlengths)\n 413 increment = node.length if branchlengths else 1.0\n 414 if ltr:\n--> 415 node_coords.x = increment + max(child_x_coords)\n 416 else:\n 417 node_coords.x = min(child_x_coords) - increment\n\nTypeError: '>' not supported between instances of 'NoneType' and 'NoneType'In\u00a0[15]: Copied!
import numpy as np\nfrom dataclasses import dataclass\n\n@dataclass\nclass TwoDCoordinate():\n x: float = 0.0\n y: float = 0.0\n \n def __iter__(self):\n yield from (self.x, self.y)\n \n def to_polar(self):\n return TwoDCoordinate(\n x = self.x * np.cos(self.y), \n y = self.x * np.sin(self.y)\n )\n \n def to_cartesian(self):\n return TwoDCoordinate(\n x = np.sqrt(self.x ** 2 + self.y ** 2),\n y = np.arctan2(self.y, self.x)\n )\n\nc = TwoDCoordinate(x=1, y=1)\n\nc\nimport numpy as np from dataclasses import dataclass @dataclass class TwoDCoordinate(): x: float = 0.0 y: float = 0.0 def __iter__(self): yield from (self.x, self.y) def to_polar(self): return TwoDCoordinate( x = self.x * np.cos(self.y), y = self.x * np.sin(self.y) ) def to_cartesian(self): return TwoDCoordinate( x = np.sqrt(self.x ** 2 + self.y ** 2), y = np.arctan2(self.y, self.x) ) c = TwoDCoordinate(x=1, y=1) c Out[15]:
TwoDCoordinate(x=1, y=1)In\u00a0[16]: Copied!
c.to_cartesian().to_polar()\nc.to_cartesian().to_polar() Out[16]:
TwoDCoordinate(x=1.0000000000000002, y=1.0)In\u00a0[17]: Copied!
c.to_polar().to_cartesian()\nc.to_polar().to_cartesian() Out[17]:
TwoDCoordinate(x=1.0, y=1.0)In\u00a0[18]: Copied!
grid = np.array([\n [TwoDCoordinate(x,y) for x in np.arange(0, 1.2, .2)] \n for y in np.arange(0, np.pi, .1)\n]).flatten()\n\nfig,[ax1,ax2] = plt.subplots(ncols=2, figsize=(20,5))\n\n\nax1.scatter(*zip(*[[*p] for p in grid]))\nax2.scatter(*zip(*[[*p.to_polar()] for p in grid]))\n\npoints = np.array([TwoDCoordinate(x, x*2) for x in np.arange(0., 1.05, .05)])\n\nax1.scatter(*zip(*[[*p] for p in points]))\nax2.scatter(*zip(*[[*p.to_polar()] for p in points]))\ngrid = np.array([ [TwoDCoordinate(x,y) for x in np.arange(0, 1.2, .2)] for y in np.arange(0, np.pi, .1) ]).flatten() fig,[ax1,ax2] = plt.subplots(ncols=2, figsize=(20,5)) ax1.scatter(*zip(*[[*p] for p in grid])) ax2.scatter(*zip(*[[*p.to_polar()] for p in grid])) points = np.array([TwoDCoordinate(x, x*2) for x in np.arange(0., 1.05, .05)]) ax1.scatter(*zip(*[[*p] for p in points])) ax2.scatter(*zip(*[[*p.to_polar()] for p in points])) Out[18]:
<matplotlib.collections.PathCollection at 0x7ff61fa01900>In\u00a0[19]: Copied!
seq = picea.SequenceCollection.from_fasta(filename='./data/HCT.fasta')\n_msa = seq.align()\nmsa = _msa._collection\nmsa.shape\nseq = picea.SequenceCollection.from_fasta(filename='./data/HCT.fasta') _msa = seq.align() msa = _msa._collection msa.shape
\n---------------------------------------------------------------------------\nFileNotFoundError Traceback (most recent call last)\nCell In[19], line 2\n 1 seq = picea.SequenceCollection.from_fasta(filename='./data/HCT.fasta')\n----> 2 _msa = seq.align()\n 3 msa = _msa._collection\n 4 msa.shape\n\nFile ~/work/picea/picea/docs/examples/../../picea/sequence.py:1518, in SequenceCollection.align(self, method, method_kwargs)\n 1516 fasta = self.to_fasta()\n 1517 command = [method, *chain(*method_kwargs.items()), \"-\"]\n-> 1518 process = Popen(command, stdin=PIPE, stdout=PIPE, stderr=PIPE)\n 1519 stdout, _ = process.communicate(input=fasta.encode())\n 1520 aligned_fasta = stdout.decode().strip()\n\nFile /usr/lib/python3.10/subprocess.py:971, in Popen.__init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, user, group, extra_groups, encoding, errors, text, umask, pipesize)\n 967 if self.text_mode:\n 968 self.stderr = io.TextIOWrapper(self.stderr,\n 969 encoding=encoding, errors=errors)\n--> 971 self._execute_child(args, executable, preexec_fn, close_fds,\n 972 pass_fds, cwd, env,\n 973 startupinfo, creationflags, shell,\n 974 p2cread, p2cwrite,\n 975 c2pread, c2pwrite,\n 976 errread, errwrite,\n 977 restore_signals,\n 978 gid, gids, uid, umask,\n 979 start_new_session)\n 980 except:\n 981 # Cleanup if the child failed starting.\n 982 for f in filter(None, (self.stdin, self.stdout, self.stderr)):\n\nFile /usr/lib/python3.10/subprocess.py:1863, in Popen._execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, gid, gids, uid, umask, start_new_session)\n 1861 if errno_num != 0:\n 1862 err_msg = os.strerror(errno_num)\n-> 1863 raise child_exception_type(errno_num, err_msg, err_filename)\n 1864 raise child_exception_type(err_msg)\n\nFileNotFoundError: [Errno 2] No such file or directory: 'mafft'In\u00a0[20]: Copied!
import numpy as np\nnp.sum(v_equals(msa[...,None], msa.T[None,...]),axis=1)\nimport numpy as np np.sum(v_equals(msa[...,None], msa.T[None,...]),axis=1)
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[20], line 2\n 1 import numpy as np\n----> 2 np.sum(v_equals(msa[...,None], msa.T[None,...]),axis=1)\n\nNameError: name 'v_equals' is not definedIn\u00a0[21]: Copied!
np.sum(np.equal(msa[...,np.newaxis], msa.T[np.newaxis,...]),axis=1)\nnp.sum(np.equal(msa[...,np.newaxis], msa.T[np.newaxis,...]),axis=1)
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[21], line 1\n----> 1 np.sum(np.equal(msa[...,np.newaxis], msa.T[np.newaxis,...]),axis=1)\n\nNameError: name 'msa' is not definedIn\u00a0[22]: Copied!
np.equal(msa[...,np.newaxis], msa.T[np.newaxis,...]).shape\nnp.equal(msa[...,np.newaxis], msa.T[np.newaxis,...]).shape
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[22], line 1\n----> 1 np.equal(msa[...,np.newaxis], msa.T[np.newaxis,...]).shape\n\nNameError: name 'msa' is not definedIn\u00a0[23]: Copied!
msa[...,np.newaxis].shape, msa.T[np.newaxis,...].shape\nmsa[...,np.newaxis].shape, msa.T[np.newaxis,...].shape
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[23], line 1\n----> 1 msa[...,np.newaxis].shape, msa.T[np.newaxis,...].shape\n\nNameError: name 'msa' is not definedIn\u00a0[24]: Copied!
#a = np.random.rand(3,4)\n#b = a.T\n\na = msa\nb = msa.T\n\ndef equals(x,y):\n return x == y\n\ndef lt(x,y):\n return x<y\n\ndef subst(x,y):\n #print(x,y)\n return substitution_scores[x][y]\n\nv_equals = np.vectorize(equals)\nv_lt = np.vectorize(lt)\nv_subst = np.vectorize(subst)\n\nnp.sum(v_subst(a[...,None], b[None,...]), axis=1)\n#a = np.random.rand(3,4) #b = a.T a = msa b = msa.T def equals(x,y): return x == y def lt(x,y): return x
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[24], line 4\n 1 #a = np.random.rand(3,4)\n 2 #b = a.T\n----> 4 a = msa\n 5 b = msa.T\n 7 def equals(x,y):\n\nNameError: name 'msa' is not definedIn\u00a0[25]: Copied!
blosum62_str = \"\"\"\n# Matrix made by matblas from blosum62.iij\n# * column uses minimum score\n# BLOSUM Clustered Scoring Matrix in 1/2 Bit Units\n# Blocks Database = /data/blocks_5.0/blocks.dat\n# Cluster Percentage: >= 62\n# Entropy = 0.6979, Expected = -0.5209\n A R N D C Q E G H I L K M F P S T W Y V B Z X *\nA 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 \nR -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4 \nN -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4 \nD -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4 \nC 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 \nQ -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4 \nE -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 \nG 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4 \nH -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4 \nI -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4 \nL -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4 \nK -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4 \nM -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4 \nF -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4 \nP -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4 \nS 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4 \nT 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4 \nW -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4 \nY -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4 \nV 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4 \nB -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4 \nZ -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 \nX 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4 \n* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1\n\"\"\"\nblosum62_str = \"\"\" # Matrix made by matblas from blosum62.iij # * column uses minimum score # BLOSUM Clustered Scoring Matrix in 1/2 Bit Units # Blocks Database = /data/blocks_5.0/blocks.dat # Cluster Percentage: >= 62 # Entropy = 0.6979, Expected = -0.5209 A R N D C Q E G H I L K M F P S T W Y V B Z X * A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4 N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4 D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4 C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4 E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4 H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4 I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4 L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4 K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4 M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4 F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4 P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4 S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4 T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4 W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4 Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4 V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4 B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4 Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4 * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 \"\"\" In\u00a0[26]: Copied!
lines = (line for line in blosum62_str.split('\\n') if line)\nmatrix_lines = (line.strip().split() for line in lines if line[0] != '#')\n\nsubstitution_scores = defaultdict(lambda: defaultdict(int))\nfor idx,matrix_line in enumerate(matrix_lines):\n if idx == 0:\n aas = matrix_line\n encoded_aas = np.array([*''.join(aas).encode()], dtype=np.uint8)\n aa_encoding = dict(zip(aas, encoded_aas))\n continue\n [aa,*scores] = matrix_line\n substitution_scores[aa_encoding[aa]].update(dict(zip(encoded_aas, scores)))\nsubstitution_scores.keys()\nlines = (line for line in blosum62_str.split('\\n') if line) matrix_lines = (line.strip().split() for line in lines if line[0] != '#') substitution_scores = defaultdict(lambda: defaultdict(int)) for idx,matrix_line in enumerate(matrix_lines): if idx == 0: aas = matrix_line encoded_aas = np.array([*''.join(aas).encode()], dtype=np.uint8) aa_encoding = dict(zip(aas, encoded_aas)) continue [aa,*scores] = matrix_line substitution_scores[aa_encoding[aa]].update(dict(zip(encoded_aas, scores))) substitution_scores.keys()
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[26], line 4\n 1 lines = (line for line in blosum62_str.split('\\n') if line)\n 2 matrix_lines = (line.strip().split() for line in lines if line[0] != '#')\n----> 4 substitution_scores = defaultdict(lambda: defaultdict(int))\n 5 for idx,matrix_line in enumerate(matrix_lines):\n 6 if idx == 0:\n\nNameError: name 'defaultdict' is not definedIn\u00a0[27]: Copied!
d = defaultdict(lambda: defaultdict(int))\nd[0].update(dict(a=1))\nd\nd = defaultdict(lambda: defaultdict(int)) d[0].update(dict(a=1)) d
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[27], line 1\n----> 1 d = defaultdict(lambda: defaultdict(int))\n 2 d[0].update(dict(a=1))\n 3 d\n\nNameError: name 'defaultdict' is not definedIn\u00a0[28]: Copied!
aa_encoding\naa_encoding
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[28], line 1\n----> 1 aa_encoding\n\nNameError: name 'aa_encoding' is not definedIn\u00a0[29]: Copied!
np.array([45],dtype=np.uint8).view('S1')[0].decode()\nnp.array([45],dtype=np.uint8).view('S1')[0].decode() Out[29]:
'-'"}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"picea","text":"
Sprucing up bioinformatics analysis
pip install picea\n
picea has datastructures and methods to perform common bioinformatics tasks. Currently sequences, sequence annotations, trees, and ontologies are supported.
Example:
from picea import Tree, treeplot\nimport matplotlib.pyplot as plt\n\nnewick = '(((a,b),(c,d)),e)'\ntree = Tree.from_newick(newick)\n\nfig, (ax1, ax2) = plt.subplots(ncols = 2, figsize = (10, 4))\n\n#left-to-right layout with direct links\ntreeplot(tree, style='rectangular', ltr=True, ax=ax1)\n\n#right-to-left layout with square links\ntreeplot(tree, style='square', ltr=False, ax=ax2)\n
"},{"location":"CONTRIBUTING/","title":"Contributing","text":"Deploying to pypi:
poetry check\npoetry run coverage run\npoetry run coverage report\npoetry version <major,minor,patch>\npoetry build\npoetry deploy\n
"},{"location":"LICENSE/","title":"License","text":"The MIT License (MIT)
Copyright (c) 2020 Rens Holmer.
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"},{"location":"API/ontology/","title":"Ontology","text":" Bases: DirectedAcyclicGraph
picea/ontology.py
class Ontology(DirectedAcyclicGraph):\n def __init__(self):\n super().__init__()\n self._header: List[str] = []\n\n def __getitem__(self, ID) -> OntologyTerm:\n term = self._elements[ID]\n if not term._children and not term._parents and term.__dict__.get(\"alt_id\"):\n alt_id = term.__dict__.get(\"alt_id\")[0]\n term = self._elements[alt_id]\n warnings.warn(f\"Accessed GO term by alt ID {ID}, \" f\"returning main GO term with ID {alt_id}\")\n return term\n\n @classmethod\n def from_obo(cls, filename: str = None, string: str = None, skip_obsolete=True) -> \"Ontology\":\n assert filename or string\n assert not (filename and string)\n ontology = cls()\n if filename:\n with open(filename) as filehandle:\n string = filehandle.read()\n\n obo_iter = (el for _, el in groupby(string.strip().split(\"\\n\"), lambda line: line[:1] == \"[\"))\n\n ontology._header = list(next(obo_iter))\n\n for element in obo_iter:\n element = next(element)\n if element != \"[Term]\":\n continue\n attributes = defaultdict(list)\n for attribute in next(obo_iter):\n if not attribute:\n continue\n attr_key, attr_value = attribute.split(\":\", 1)\n attributes[attr_key].append(attr_value.strip())\n if skip_obsolete and attributes.get(\"is_obsolete\"):\n continue\n\n ID = attributes.pop(\"id\")[0].strip()\n parents = [p.split(\"!\")[0].strip() for p in attributes.get(\"is_a\", \"\")]\n\n for relationship in attributes.get(\"relationship\", []):\n relation_type, go_id = relationship.split(\"!\")[0].strip().split(\" \")\n if relation_type == \"part_of\":\n parents.append(go_id)\n\n alt_ids = {*attributes.pop(\"alt_id\", []), ID}\n for alt_id in alt_ids:\n ontology[alt_id] = OntologyTerm(\n ID=alt_id,\n parents=parents,\n container=ontology,\n alt_id=[id for id in alt_ids if id != alt_id],\n **attributes,\n )\n\n for ontology_term in ontology:\n for parent_id in ontology_term._parents:\n parent_term = ontology[parent_id]\n parent_term._children.append(ontology_term.ID)\n\n return ontology\n
"},{"location":"API/sequence/","title":"Sequence","text":" Bases: set
Alphabet of arbitrary biological sequences
Examples:
>>> DNA = Alphabet('DNA', 'ACGT')\n>>> DNA\nAlphabet(name='DNA', members='ACGT')\n
>>> Protein = Alphabet('AminoAcid', '*-?ACDEFGHIKLMNPQRSTVWXY')\n>>> Protein\nAlphabet(name='AminoAcid', members='*-?ACDEFGHIKLMNPQRSTVWXY')\n
Parameters:
Name Type Description Defaultname
str
Alphabet name
requiredmembers
Iterable[str]
Letters of the alphabet
required Source code inpicea/sequence.py
@dataclass(frozen=True)\nclass Alphabet(set):\n \"\"\"Alphabet of arbitrary biological sequences\n\n Examples:\n >>> DNA = Alphabet('DNA', 'ACGT')\n >>> DNA\n Alphabet(name='DNA', members='ACGT')\n\n >>> Protein = Alphabet('AminoAcid', '*-?ACDEFGHIKLMNPQRSTVWXY')\n >>> Protein\n Alphabet(name='AminoAcid', members='*-?ACDEFGHIKLMNPQRSTVWXY')\n\n\n Args:\n name (str): Alphabet name\n members (Iterable[str]): Letters of the alphabet\n \"\"\"\n\n name: str\n members: Iterable[str]\n\n def __post_init__(self) -> None:\n super().__init__(self.members)\n\n def __deepcopy__(self, memo) -> \"Alphabet\":\n return Alphabet(self, self.name)\n\n def score(\n self,\n sequence: str,\n match: float = 1.0,\n mismatch: float = -1.0,\n n_chars: int = 100,\n ) -> float:\n \"\"\"Scores how well a sequence matches an alphabet by summing \\\n (mis)matches of sequence letters that are not in the alphabet \\\n and (mis)matches of alphabet letters that are not in the sequence.\n\n Args:\n sequence (str): Sequence string for which to determine how well \\\n it fits the alphabet\n match (float, optional): match score. Defaults to 1.0.\n mismatch (float, optional): mismatch score. Defaults to -1.0.\n n_chars (int, optional): number of sequence characters to use in \\\n scoring. Large numbers incur a significant computational cost.\n\n Returns:\n (float): Score of how well a sequence matches the alphabet\n \"\"\"\n return sum(match if s in self else mismatch for s in sequence[:n_chars]) + sum(\n match if s in sequence[:n_chars] else mismatch for s in self\n )\n\n def validate(self, sequence: str) -> bool:\n \"\"\"Determine whether a sequence strictly fits an alphabet\n\n Args:\n sequence (str): Sequence string\n\n Returns:\n bool: true if all characters in sequence are in the alphabet\n \"\"\"\n return sum(1 if s not in self else 0 for s in sequence) == 0\n\n def complement(self, sequence: str) -> str:\n \"\"\"Returns complementary strand of DNA or RNA sequence strings\n\n Examples:\n >>> DNA = Alphabet('DNA', 'ACGT')\n >>> DNA.complement('AACTACG')\n 'TTGATGC'\n\n Args:\n sequence (str): Sequence string\n\n Returns:\n str: complementary strand sequence string\n \"\"\"\n if self.name == \"DNA\":\n complement = dict(zip(\"acgtnACGTN-?\", \"tgcanTGCAN-?\"))\n elif self.name == \"RNA\":\n complement = dict(zip(\"acgunACGUN-?\", \"ugcanUGCAN-?\"))\n else:\n raise TypeError(\"Cannot complement non-DNA or non-RNA alphabet\")\n return \"\".join(complement[s] for s in sequence)\n\n def translate(self, sequence: str) -> str:\n \"\"\"Translate DNA or RNA sequence string to amino acid string\n\n Examples:\n >>> DNA = Alphabet('DNA', 'ACGT')\n >>> DNA.translate('ATGACGACGTAA')\n 'MTT*'\n\n Args:\n sequence (str): Sequence string (sequence length must be multiple of 3)\n\n Returns:\n str: Amino acid string\n \"\"\"\n if self.name not in (\"DNA\", \"RNA\"):\n raise TypeError(\"Cannot translate non-DNA or non-RNA alphabet\")\n codons = re.findall(\"...\", sequence.upper())\n return \"\".join(TRANSLATION.get(codon, \"X\") for codon in codons)\n
Container for a single biological sequence
Examples:
>>> s1 = Sequence('test_dna', 'ACGATCGACTAGCA')\n>>> s1\nSequence(header='test_dna', alphabet=Alphabet(name='DNA', members='-?acgtnACGNT'))\n>>> s2 = Sequence('test_aa', 'QAPISAIWPOIWQ*')\n>>> s2\nSequence(header='test_aa', alphabet=Alphabet(name='AminoAcid', members='*-?acdefghiklmnpqrstvwxyACDEFGHIKLMNPQRSTVWXY'))\n
Returns:
Type Description Source code inpicea/sequence.py
@dataclass\nclass Sequence:\n \"\"\"Container for a single biological sequence\n\n Examples:\n >>> s1 = Sequence('test_dna', 'ACGATCGACTAGCA')\n >>> s1\n Sequence(header='test_dna', \\\nalphabet=Alphabet(name='DNA', members='-?acgtnACGNT'))\n >>> s2 = Sequence('test_aa', 'QAPISAIWPOIWQ*')\n >>> s2\n Sequence(header='test_aa', \\\nalphabet=Alphabet(name='AminoAcid', \\\nmembers='*-?acdefghiklmnpqrstvwxyACDEFGHIKLMNPQRSTVWXY'))\n\n Returns:\n [type]: [description]\n \"\"\"\n\n header: str = None\n sequence: str = field(repr=False, default=None)\n alphabet: Alphabet = None\n annotation: Optional[SequenceAnnotation] = field(default_factory=SequenceAnnotation, repr=False)\n\n def __post_init__(self):\n if self.alphabet is not None:\n return\n if self.sequence is None:\n self.alphabet = alphabets.DNA\n else:\n self.alphabet = sorted(alphabets, key=lambda alphabet: alphabet.score(self.sequence)).pop()\n\n def __getitem__(self, key):\n return Sequence(self.header, self.sequence[key])\n\n def __len__(self):\n \"\"\"Length of the sequence\n\n Examples:\n >>> s = Sequence('test_dna', 'ACGTA')\n >>> len(s)\n 5\n \"\"\"\n return len(self.sequence)\n\n @property\n def reverse(self):\n return Sequence(self.header, self.sequence[::-1])\n\n @property\n def complement(self):\n return Sequence(self.header, self.alphabet.complement(self.sequence))\n\n @property\n def reverse_complement(self):\n return Sequence(self.header, self.alphabet.complement(self.sequence[::-1]))\n\n @property\n def amino_acids(self):\n if self.alphabet.name == \"AminoAcid\":\n return self\n else:\n return Sequence(self.header, self.alphabet.translate(self.sequence))\n\n def to_dict(self) -> Dict[str, str]:\n \"\"\"Make dictionary with header and sequence elements\n\n Examples:\n >>> s = Sequence('test', 'ACGTA')\n >>> s.to_dict()\n {'header': 'test', 'sequence': 'ACGTA'}\n\n Returns:\n Dict[str, str]: sequence dictionary\n \"\"\"\n return dict(header=self.header, sequence=self.sequence)\n\n @classmethod\n def from_fasta(cls, string: str) -> \"Sequence\":\n \"\"\"Create a sequence object from a fasta formatted file. _single sequence only_\n\n Examples:\n >>> fasta_string = '>test\\\\nACGT'\n >>> Sequence.from_fasta(fasta_string)\n Sequence(header='test', \\\nalphabet=Alphabet(name='DNA', members='-?acgtnACGNT'))\n\n Arguments:\n string (str)\n\n Returns:\n Sequence\n \"\"\"\n lines = string.strip().split(\"\\n\")\n header = lines[0][1:]\n sequence = \"\".join(lines[1:])\n return cls(header, sequence)\n\n def to_fasta(self, linewidth: int = 80) -> str:\n \"\"\"Make fasta formatted sequence entry\n\n Returns:\n str: sequence in fasta format\n \"\"\"\n sequence_lines = \"\\n\".join(re.findall(f\".{{1,{linewidth}}}\", self.sequence))\n return f\">{self.header}\\n{sequence_lines}\"\n
Bases: SequenceReader
picea/sequence.py
class BatchSequenceReader(SequenceReader):\n def __init__(\n self,\n string: str = None,\n filename: str = None,\n filetype: str = None,\n batchsize: int = 10,\n ) -> None:\n \"\"\"[summary]\n\n Args:\n string (str, optional): [description]. Defaults to None.\n filename (str, optional): [description]. Defaults to None.\n filetype (str, optional): [description]. Defaults to None.\n batchsize (int, optional): [description]. Defaults to 10.\n\n Returns:\n [type]: [description]\n\n Yields:\n [type]: [description]\n \"\"\"\n super().__init__(string, filename, filetype)\n self.batchsize = batchsize\n self._currentbatch = SequenceCollection()\n\n def __iter__(self) -> Iterable[\"SequenceCollection\"]:\n for s in self._iter():\n self._currentbatch[s.header] = s\n if len(self._currentbatch) == self.batchsize:\n yield self._currentbatch\n self._currentbatch = SequenceCollection()\n\n def __next__(self) -> \"SequenceCollection\":\n currentbatch = self._currentbatch\n self._currentbatch = SequenceCollection()\n if len(currentbatch) == self.batchsize:\n return currentbatch\n for s in self._iter():\n currentbatch[s.header] = s\n if len(currentbatch) == self.batchsize:\n yield currentbatch\n
(Partially) Abstract Base Class for sequence collections. Classes extending from this baseclass should override __setitem__
, __getitem__
, __delitem__
, headers
, and n_seqs
.
If the above methods are implemented, this automatically enables the following methods: from_fasta
, to_fasta
, from_json
, to_json
.
Parameters:
Name Type Description Defaultsequences
Optional[Iterable[Tuple[str, str]]]
Iterable of (header, sequence) tuples. Defaults to None.
None
sequence_annotation
Optional[SequenceAnnotation]
picea SequenceAnnotation object. Defaults to None.\n
None
Raises:
Type DescriptionNotImplementedError
Abstract Base Class can not be initialized and serves as a template only
Source code inpicea/sequence.py
class AbstractSequenceCollection(metaclass=ABCMeta):\n \"\"\"\n (Partially) Abstract Base Class for sequence collections.\n Classes extending from this baseclass should override\n `__setitem__`, `__getitem__`, `__delitem__`, `headers`, and `n_seqs`.\n\n If the above methods are implemented, this automatically enables the\n following methods: `from_fasta`, `to_fasta`, `from_json`, `to_json`.\n\n Args:\n sequences (Optional[Iterable[Tuple[str, str]]], optional):\n Iterable of (header, sequence) tuples. Defaults to None.\n sequence_annotation (Optional[SequenceAnnotation]):\n picea SequenceAnnotation object. Defaults to None.\n\n Raises:\n NotImplementedError: Abstract Base Class can not be initialized\n and serves as a template only\n \"\"\"\n\n @abstractmethod\n def __init__(\n self,\n sequences: Optional[Iterable[Sequence]] = None,\n sequence_annotation: Optional[\"SequenceAnnotation\"] = None,\n ) -> None:\n raise NotImplementedError(\n (\"Classes extending from AbstractSequenceCollection should \" \"implement __init__ method\")\n )\n\n @abstractmethod\n def __setitem__(self, header: str, seq: str) -> None:\n raise NotImplementedError(\n (\"Classes extending from AbstractSequenceCollection should \" \"implement __setitem__ method\")\n )\n\n @abstractmethod\n def __getitem__(self, header: str) -> Sequence:\n raise NotImplementedError(\n (\"Classes extending from AbstractSequenceCollection should \" \"implement __getitem__ method\")\n )\n\n @abstractmethod\n def __delitem__(self, header: str) -> None:\n raise NotImplementedError(\n (\"Classes extending from AbstractSequenceCollection should \" \"implement __delitem__ method\")\n )\n\n def __iter__(self) -> Iterable[Sequence]:\n for header in self.headers:\n yield self[header]\n\n def __len__(self) -> int:\n return len(self.headers)\n\n def __add__(self: SequenceType, other: SequenceType) -> SequenceType:\n new_collection = self.__class__()\n return new_collection\n\n @property\n @abstractmethod\n def headers(self) -> List[str]:\n \"\"\"List of sequences headers.\n Overridden in subclasses.\n\n Raises:\n NotImplementedError\n\n Returns:\n List[str]: List of sequence headers\n \"\"\"\n raise NotImplementedError(\n (\"Classes extending from AbstractSequenceCollection should \" \"implement headers property\")\n )\n\n @property\n def iloc(self) -> SequenceIndex:\n \"\"\"[summary]\n\n Returns:\n SequenceIndex: [description]\n \"\"\"\n return SequenceIndex(self)\n\n @property\n def sequences(self) -> List[str]:\n \"\"\"List of sequences without headers\n\n Returns:\n List[str]: list of sequences\n \"\"\"\n return [self[header].sequence for header in self.headers]\n\n @property\n @abstractmethod\n def n_seqs(self) -> int:\n \"\"\"Return the number of sequences in the collection.\n Overridden in subclasses\n\n Raises:\n NotImplementedError\n\n Returns:\n int: number of sequences\n \"\"\"\n raise NotImplementedError(\n (\"Classes extending from AbstractSequenceCollection should \" \"implement n_seqs property\")\n )\n\n @classmethod\n def from_sequence_iter(cls, sequence_iter: Iterable[Sequence]) -> \"SequenceCollection\":\n \"\"\"[summary]\n\n Raises:\n NotImplementedError: [description]\n\n Returns:\n [type]: [description]\n \"\"\"\n sequencecollection = cls()\n for seq in sequence_iter:\n sequencecollection[seq.header] = seq.sequence\n return sequencecollection\n\n @classmethod\n def from_fasta(\n cls,\n filename: str = None,\n string: str = None,\n ) -> \"SequenceCollection\":\n \"\"\"Parse a fasta formatted string into a SequenceCollection object\n\n Keyword Arguments:\n filename {String} -- filename string (default: {None})\n string {String} -- fasta formatted string (default: {None})\n\n Returns:\n SequenceCollection -- SequenceCollection instance\n \"\"\"\n sequencecollection = cls()\n\n for seq in SequenceReader(string=string, filename=filename, filetype=\"fasta\"):\n sequencecollection[seq.header] = seq.sequence\n return sequencecollection\n\n def to_fasta(self, linewidth: int = 80) -> str:\n \"\"\"Get a fasta-formatted string of the sequence collection\n\n Returns:\n str: Multi-line fasta-formatted string\n \"\"\"\n return \"\\n\".join([seq.to_fasta(linewidth=linewidth) for seq in self])\n\n @classmethod\n def from_json(cls, filename: Optional[str] = None, string: Optional[str] = None) -> \"SequenceCollection\":\n \"\"\"[summary]\n\n Keyword Arguments:\n string {String} -- JSON formatted string\n\n Returns:\n SequenceCollection -- SequenceCollection instance\n \"\"\"\n sequencecollection = cls()\n\n for seq in SequenceReader(string=string, filename=filename, filetype=\"json\"):\n sequencecollection[seq.header] = seq.sequence\n\n return sequencecollection\n\n def to_json(self, indent: Optional[int] = None) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n gene_dicts = [seq.to_dict() for seq in self]\n return json.dumps(gene_dicts, indent=indent)\n\n @abstractmethod\n def pop(self, header: str) -> Sequence:\n \"\"\"[summary]\n\n Args:\n header (str): [description]\n\n Returns:\n Sequence: [description]\n \"\"\"\n raise NotImplementedError((\"Classes extending from AbstractSequenceCollection should \" \"implement pop method\"))\n\n def batch_rename(self, rename_func: Callable[[str], str]) -> None:\n \"\"\"Rename all headers by calling `rename_func` on each header\n\n Args:\n rename_func (Callable): [description]\n \"\"\"\n for header in self.headers:\n s: Sequence = self.pop(header)\n s.header = rename_func(s.header)\n self[s.header] = s.sequence\n
Bases: AbstractSequenceCollection
A container for multiple (unaligned) DNA or amino acid sequences
Source code inpicea/sequence.py
class SequenceCollection(AbstractSequenceCollection):\n \"\"\"\n A container for multiple (unaligned) DNA or amino acid sequences\n \"\"\"\n\n def __init__(\n self: \"SequenceCollection\",\n sequences: Iterable[Tuple[str, str]] = None,\n sequence_annotation: \"SequenceAnnotation\" = None,\n ):\n self._collection = dict()\n if sequences:\n for header, sequence in sequences:\n self[header] = sequence\n self.sequence_annotation = sequence_annotation\n\n def __setitem__(self, header: str, seq: str) -> None:\n if header in self._collection:\n warn(f'Turning duplicate header \"{header}\" into unique header')\n new_header = header\n modifier = 0\n while new_header in self.headers:\n modifier += 1\n new_header = f\"{header}_{modifier}\"\n header = new_header\n self._collection[header] = seq\n\n def __getitem__(self, header: str) -> Sequence:\n sequence = self._collection[header]\n return Sequence(header, sequence)\n\n def __delitem__(self, header: str) -> None:\n del self._collection[header]\n\n @property\n def headers(self) -> List[str]:\n return list(self._collection.keys())\n\n @property\n def n_seqs(self) -> int:\n return len(self._collection.keys())\n\n def align(\n self, method: Optional[str] = \"mafft\", method_kwargs: Optional[Mapping[str, str]] = None\n ) -> \"MultipleSequenceAlignment\":\n \"\"\"[summary]\n\n Args:\n method (str, optional): [description]. Defaults to 'mafft'.\n method_kwargs (Mapping[str, str], optional): [description]. \\\n Defaults to dict().\n\n Returns:\n [type]: [description]\n \"\"\"\n if not method_kwargs:\n method_kwargs = dict()\n fasta = self.to_fasta()\n command = [method, *chain(*method_kwargs.items()), \"-\"]\n process = Popen(command, stdin=PIPE, stdout=PIPE, stderr=PIPE)\n stdout, _ = process.communicate(input=fasta.encode())\n aligned_fasta = stdout.decode().strip()\n return MultipleSequenceAlignment.from_fasta(string=aligned_fasta)\n\n def pop(self, header: str) -> Sequence:\n sequence = self._collection.pop(header)\n return Sequence(header, sequence)\n
Bases: SequenceCollection
A container for multiple aligned DNA or amino acid sequences
Source code inpicea/sequence.py
class MultipleSequenceAlignment(SequenceCollection):\n \"\"\"\n A container for multiple aligned DNA or amino acid sequences\n \"\"\"\n\n def __init__(\n self,\n sequences: Optional[Iterable[Sequence]] = None,\n sequence_annotation: Optional[\"SequenceAnnotation\"] = None,\n ) -> None:\n super(MultipleSequenceAlignment).__init__()\n self._collection = np.empty((0, 0), dtype=\"uint8\")\n self._header_idx = dict()\n if sequences:\n for seq in sequences:\n self[seq.header] = seq.sequence\n # if sequence_annotation:\n # sequence_annotation.sequence_collection = self\n self.sequence_annotation = sequence_annotation\n\n def __setitem__(self, header: str, seq: str) -> None:\n seq = seq.encode()\n if header in self._header_idx:\n warn(f'Turning duplicate header \"{header}\" into unique header')\n new_header = header\n modifier = 0\n while new_header in self._header_idx:\n modifier += 1\n new_header = f\"{header}_{modifier}\"\n header = new_header\n n_seq, n_char = self._collection.shape\n if n_seq == 0:\n self._collection = np.array([[*seq]], dtype=\"uint8\")\n else:\n len_diff = len(seq) - n_char\n\n filler1 = np.array([[*b\"-\"] * len_diff], dtype=\"uint8\")\n arr = np.hstack((self._collection, np.repeat(filler1, n_seq, axis=0)))\n\n filler2 = np.array([*b\"-\"] * -len_diff, dtype=\"uint8\")\n new_row = np.array([[*seq, *filler2]], dtype=\"uint8\")\n\n arr = np.vstack((arr, new_row))\n self._collection = arr\n self._header_idx[header] = n_seq\n\n def __getitem__(self, header: str) -> Sequence:\n idx = self._header_idx[header]\n n_chars = self._collection.shape[1]\n sequence = self._collection[idx].view(f\"S{n_chars}\")[0].decode()\n return Sequence(header, sequence)\n\n @property\n def headers(self) -> List[str]:\n return list(self._header_idx.keys())\n\n @property\n def n_seqs(self) -> int:\n return self._collection.shape[0]\n\n @property\n def n_chars(self) -> int:\n return self._collection.shape[1]\n\n @property\n def shape(self) -> int:\n return self._collection.shape\n\n def to_nexus(self) -> str:\n \"\"\" \"\"\"\n sequences = \"\\n\".join([f\"{s.header} {s.sequence}\" for s in self])\n return (\n \"begin data;\"\n f\"\\tdimensions ntax={self.n_seqs} nchar={self.n_chars};\"\n \"\\tformat datatype=dna gap=-;\"\n \"\\tmatrix\"\n f\"\\t{sequences}\"\n \"\\t;\"\n \"end;\"\n )\n\n def pop(self, header: str) -> Sequence:\n pop_idx = self._header_idx[header]\n n_chars = self._collection.shape[1]\n sequence = self._collection[pop_idx].view(f\"S{n_chars}\")[0].decode()\n del self._header_idx[header]\n self._header_idx = {h: (idx if idx < pop_idx else idx - 1) for h, idx in self._header_idx.items()}\n self._collection = np.delete(self._collection, (pop_idx,), axis=0)\n return Sequence(header, sequence)\n\n def pairwise_distances(self, distance_measure: str = \"identity\") -> npt.NDArray[np.float64]:\n pass\n
Bases: DirectedAcyclicGraph
picea/sequence.py
class SequenceAnnotation(DirectedAcyclicGraph):\n def __init__(self, sequence: Optional[\"Sequence\"] = None) -> None:\n \"\"\"[summary]\n\n Args:\n sequence (Optional[Sequence], optional): [description]. Defaults\\\n to None.\n \"\"\"\n super().__init__()\n if sequence:\n sequence.annotation = self\n self.sequence = sequence\n self._gff_headers = list()\n\n @property\n def intervals(self):\n return list(self)\n\n def _link_parents(self) -> None:\n \"\"\"\n Add explicit link from parent to child intervals\n GFF/GTF files only contain links of child to parent\n This modifies elements in place\n \"\"\"\n for interval in self:\n if interval.parent:\n for parent_ID in interval.parent:\n try:\n parent = self[parent_ID]\n except KeyError as exc:\n raise KeyError(\n f\"Interval {interval.ID} is listing {parent_ID} \"\n \"as Parent, but parent could not be found.\"\n ) from exc\n parent._children.append(interval.ID)\n\n @classmethod\n def from_gtf(\n cls,\n filename: Optional[str] = None,\n string: Optional[str] = None,\n sequence: Optional[\"Sequence\"] = None,\n link_parents: Optional[bool] = True,\n ) -> \"SequenceAnnotation\":\n \"\"\"[summary]\n\n Raises:\n IndexError: [description]\n IndexError: [description]\n\n Returns:\n [type]: [description]\n \"\"\"\n assert filename or string\n assert not (filename and string)\n sequence_annotation = cls(sequence=sequence)\n header = True\n\n # start with just reading all intervals\n if filename:\n with open(filename) as filehandle:\n string = filehandle.read()\n for line_number, line in enumerate(string.split(\"\\n\")):\n line = line.strip()\n if not line:\n continue\n if line[0] == \"#\":\n if header:\n sequence_annotation._gff_headers.append(line)\n continue\n else:\n header = False\n interval = SequenceInterval.from_gtf_line(gtf_line=line, line_number=line_number)\n interval._container = sequence_annotation\n sequence_annotation[interval.ID] = interval\n # fix missing gene and transcript intervals\n transcript_child_counter = Counter()\n new_intervals = dict()\n for interval in sequence_annotation:\n gene_id = interval.gff_attributes[\"gene_id\"][0]\n transcript_id = interval.gff_attributes[\"transcript_id\"][0]\n interval_type = interval.interval_type\n id_tuple = (gene_id, transcript_id, interval_type)\n child_count = transcript_child_counter[id_tuple]\n transcript_child_counter.update([id_tuple])\n interval._ID = f\"{transcript_id}.{interval_type}_{child_count}\"\n if transcript_id not in new_intervals:\n # new transcript interval\n transcript_interval = deepcopy(interval)\n transcript_interval._container = interval._container\n transcript_interval._ID = transcript_id\n transcript_interval.interval_type = \"mRNA\"\n transcript_interval.parent = [gene_id]\n # new gene interval\n gene_interval = deepcopy(interval)\n gene_interval._container = interval._container\n gene_interval._ID = gene_id\n gene_interval.interval_type = \"gene\"\n gene_interval.parent = None\n\n new_intervals[transcript_id] = transcript_interval\n new_intervals[gene_id] = gene_interval\n\n interval.parent = [transcript_id]\n new_intervals[interval.ID] = interval\n sequence_annotation._intervals = new_intervals\n\n # set children\n if link_parents:\n sequence_annotation._link_parents()\n\n # fix gene and transcript start and stop coordinates\n genes = sequence_annotation.groupby(\"interval_type\")[\"gene\"]\n for gene in genes:\n # fix gene first\n start = 10e9\n end = 0\n for child in gene.children:\n start = min(start, child.start)\n end = max(end, child.end)\n gene.start = start\n gene.end = end\n\n # fix transcripts\n transcripts = gene.children.groupby(\"interval_type\")[\"mRNA\"]\n for transcript in transcripts:\n start = 10e9\n end = 0\n for child in transcript.children:\n start = min(start, child.start)\n end = max(end, child.end)\n transcript.end = end\n transcript.start = start\n\n return sequence_annotation\n\n def to_gtf(self) -> str:\n return \"\\n\".join(interval.to_gtf_line() for interval in self)\n\n @classmethod\n def from_gff(\n cls,\n filename: Optional[str] = None,\n string: Optional[str] = None,\n sequence: Optional[\"Sequence\"] = None,\n link_parents: bool = True,\n ) -> \"SequenceAnnotation\":\n \"\"\"[summary]\n\n Args:\n filename ([type], optional): [description]. Defaults to None.\n string ([type], optional): [description]. Defaults to None.\n sequence ([type], optional): [description].\n Defaults to None.\n\n Returns:\n [type]: [description]\n \"\"\"\n assert filename or string\n assert not (filename and string)\n sequence_annotation = cls(sequence=sequence)\n header = True\n if filename:\n with open(filename) as filehandle:\n string = filehandle.read()\n for line_number, line in enumerate(string.split(\"\\n\")):\n line = line.strip()\n if not line:\n continue\n if line == \"##FASTA\":\n break\n if line[0] == \"#\":\n if header:\n sequence_annotation._gff_headers.append(line)\n continue\n else:\n header = False\n\n interval = SequenceInterval.from_gff_line(gff_line=line, line_number=line_number)\n interval._container = sequence_annotation\n sequence_annotation[interval.ID] = interval\n\n if link_parents:\n sequence_annotation._link_parents()\n\n return sequence_annotation\n\n def to_gff(self) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n return \"\".join(interval.to_gff_line(trailing_newline=True) for interval in self)\n\n @classmethod\n def from_json(\n cls,\n filename: Optional[str] = None,\n string: Optional[str] = None,\n sequence: Optional[\"Sequence\"] = None,\n ) -> \"SequenceAnnotation\":\n \"\"\"[summary]\"\"\"\n assert filename or string\n assert not (filename and string)\n if filename:\n with open(filename) as filehandle:\n string = filehandle.read()\n\n sequence_annotation = cls(sequence=sequence)\n\n gene_dicts = json.loads(string)\n assert isinstance(gene_dicts, list)\n\n for top_dict in gene_dicts:\n child_dicts = top_dict.pop(\"children\", list())\n top_interval = SequenceInterval.from_dict(interval_dict=top_dict)\n top_interval._container = sequence_annotation\n sequence_annotation[top_interval.ID] = top_interval\n for child_dict in child_dicts:\n child_interval = SequenceInterval.from_dict(interval_dict=child_dict)\n child_interval._container = sequence_annotation\n sequence_annotation[child_interval.ID] = child_interval\n for interval in sequence_annotation:\n if interval.parent:\n for parent_ID in interval.parent:\n try:\n parent = sequence_annotation[parent_ID]\n except IndexError as err:\n raise IndexError(\n \"Interval {interval.ID} is listing {parent_ID} \" \"as Parent, but parent could not be found.\"\n ) from err\n parent._children.append(interval.ID)\n return sequence_annotation\n\n def to_json(self, indent: Optional[int] = None) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n interval_dicts = [interval.to_dict() for interval in self]\n return json.dumps(interval_dicts, indent=indent)\n
Bases: DAGElement
picea/sequence.py
class SequenceInterval(DAGElement):\n _predefined_gff3_attributes = (\n \"ID\",\n \"name\",\n \"alias\",\n \"parent\",\n \"target\",\n \"gap\",\n \"derives_from\",\n \"note\",\n \"dbxref\",\n \"ontology_term\",\n \"is_circular\",\n )\n _fixed_gff3_fields = (\n \"seqid\",\n \"source\",\n \"interval_type\",\n \"start\",\n \"end\",\n \"score\",\n \"strand\",\n \"phase\",\n )\n _gtf_interval_types = dict(mRNA=\"transcript\")\n\n def __init__(\n self,\n ID: Optional[str] = None,\n seqid: Optional[str] = None,\n source: Optional[str] = None,\n interval_type: Optional[str] = None,\n start: Optional[int] = None,\n end: Optional[int] = None,\n score: Optional[float] = None,\n strand: Optional[str] = None,\n phase: Optional[str] = None,\n children: Optional[List[str]] = None,\n container: Optional[SequenceAnnotation] = None,\n **kwargs,\n ):\n \"\"\"[summary]\n\n Args:\n ID (Optional[str], optional): [description]. Defaults to None.\n seqid (Optional[str], optional): [description]. Defaults to None.\n source (Optional[str], optional): [description]. Defaults to None.\n interval_type (Optional[str], optional): [description]. Defaults\n to None.\n start (Optional[int], optional): [description]. Defaults to None.\n end (Optional[int], optional): [description]. Defaults to None.\n score (Optional[float], optional): [description]. Defaults to None.\n strand (Optional[str], optional): [description]. Defaults to None.\n phase (Optional[str], optional): [description]. Defaults to\n None.\n children (Optional[List], optional): [description]. Defaults to\n None.\n container (Optional[SequenceAnnotation], optional): [description].\n Defaults to None.\n \"\"\"\n # interval ID is a property (see below) with getter and setter\n # self._ID = ID\n # self._original_ID = ID\n parents = kwargs.pop(\"parent\", None)\n super().__init__(ID=ID, children=children, container=container, parents=parents)\n\n # Standard gff fields\n self.seqid = seqid\n self.source = source\n self.interval_type = interval_type\n self.start = start\n self.end = end\n self.score = score\n self.strand = strand\n self.phase = phase\n\n # Set attributes with predefined meanings in the gff spec to None\n for attr in self._predefined_gff3_attributes:\n # ID and parent are handled separately in DAG superclass\n if attr in {\"ID\", \"parent\"}:\n continue\n self[attr] = kwargs.get(attr, None)\n\n # Any additional attributes\n for key, value in kwargs.items():\n self[key] = value\n\n # Additional fields, used internally\n # self._container = container\n # if children is None:\n # children = []\n # self._children = children\n\n def __repr__(self):\n return (\n f\"<SequenceInterval type={self.interval_type} \"\n f\"ID={self.ID} \"\n f\"loc={self.seqid}..{self.start}..{self.end}..{self.strand} \"\n f\"at {hex(id(self))}>\"\n )\n\n @property\n def parent(self):\n return self._parents\n\n @parent.setter\n def parent(self, parent_ID: Union[List[str], str]):\n if isinstance(parent_ID, str):\n parent_ID = [parent_ID]\n self._parents = parent_ID\n\n @property\n def gff_attributes(self) -> Dict[str, str]:\n gff_attributes = {\n attr: self[attr] # dictionary comprehension\n for attr in self.__dict__\n if attr not in self._fixed_gff3_fields # skip column 1-8 in gff3\n and attr\n not in (\n \"_parents\",\n \"_children\",\n \"_container\",\n \"_ID\",\n \"_original_ID\",\n ) # internal use only\n and self[attr] is not None # no empty attributes\n }\n\n # Add attributes handled by DAG\n gff_attributes[\"ID\"] = [self.ID]\n if self._parents:\n gff_attributes[\"Parent\"] = self._parents\n\n return gff_attributes\n\n @property\n def gtf_attributes(self) -> Dict[str, str]:\n def get_gtf_type(gff_interval_type):\n return self._gtf_interval_types.get(gff_interval_type, gff_interval_type)\n\n if self.parents:\n parent_ids = {f\"{get_gtf_type(parent.interval_type)}_id\": parent.ID for parent in self.parents}\n else:\n parent_ids = dict()\n return {**self.gff_attributes, **parent_ids}\n\n @classmethod\n def from_gtf_line(cls, gtf_line: Optional[str] = None, line_number: Optional[int] = None) -> \"SequenceInterval\":\n \"\"\"[summary]\n\n Returns:\n [type]: [description]\n\n Yields:\n [type]: [description]\n \"\"\"\n return cls.from_gff_line(gtf_line, line_number, parse_gtf_attribute_string)\n\n def to_gtf_line(self) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n interval_type = self._gtf_interval_types.get(self.interval_type, self.interval_type)\n return \"\\t\".join(\n [\n self.seqid,\n self.source,\n interval_type,\n str(self.start),\n str(self.end),\n str(self.score),\n self.strand,\n str(self.phase),\n format_gtf_attribute_string(self.gtf_attributes),\n ]\n )\n\n @classmethod\n def from_gff_line(\n cls,\n gff_line: Optional[str] = None,\n line_number: Optional[int] = None,\n attribute_parser: Callable = parse_gff_attribute_string,\n ) -> \"SequenceInterval\":\n \"\"\"[summary]\n\n Args:\n gff_line (Optional[str], optional): [description]. Defaults\n to None.\n line_number (Optional[int], optional): [description]. Defaults\n to None.\n\n Returns:\n [type]: [description]\n \"\"\"\n gff_parts = gff_line.split(\"\\t\")\n assert len(gff_parts) == 9, gff_parts\n seqid, source, interval_type, start, end, score, strand, phase = gff_parts[:8]\n try:\n start = int(start)\n end = int(end)\n except ValueError as err:\n error = \"GFF start and end fields must be integer\"\n if line_number:\n error = f\"{error}, gff line {line_number}\"\n raise ValueError(error) from err\n\n if score != \".\":\n try:\n score = float(score)\n except ValueError as err:\n error = \"GFF score field must be a float\"\n if line_number:\n error = f\"{error}, gff line {line_number}\"\n raise ValueError(error) from err\n\n if strand not in (\"+\", \"-\", \".\"):\n error = 'GFF strand must be one of \"+\", \"-\" or \".\"'\n if line_number:\n error = f\"{error}, gff line {line_number}\"\n raise ValueError(error)\n\n if phase not in (\"0\", \"1\", \"2\", \".\"):\n error = 'GFF phase must be one of \"0\", \"1\", \"2\" or \".\"'\n if line_number:\n error = f\"{error}, gff line {line_number}\"\n raise ValueError(error)\n elif phase != \".\":\n phase = int(phase)\n\n # Disable phase checking of CDS for now...\n # if interval_type == 'CDS' and phase not in ('0', '1', '2'):\n # error = 'GFF intervals of type CDS must have phase of\\\n # \"0\", \"1\" or \"2\"'\n # if line_number:\n # error = f'{error}, gff line {line_number}'\n # raise ValueError(error)\n\n attributes = attribute_parser(gff_parts[8])\n\n ID = attributes.pop(\"ID\", [str(uuid.uuid4())])[0]\n\n return cls(\n seqid=seqid,\n source=source,\n interval_type=interval_type,\n start=start,\n end=end,\n score=score,\n strand=strand,\n phase=phase,\n ID=ID,\n **attributes,\n )\n\n def to_gff_line(self, trailing_newline: bool = False) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n # attributes = dict(ID=self.ID, **self.gff_attributes)\n\n gff_line = \"\\t\".join(\n [\n self.seqid,\n self.source,\n self.interval_type,\n str(self.start),\n str(self.end),\n str(self.score),\n self.strand,\n str(self.phase),\n format_gff_attribute_string(self.gff_attributes),\n ]\n )\n if trailing_newline:\n gff_line = f\"{gff_line}\\n\"\n return gff_line\n\n @classmethod\n def from_dict(cls, interval_dict: Dict[str, Any]) -> \"SequenceInterval\":\n \"\"\"[summary]\n Args:\n interval_dict\n\n Returns:\n [type]: [description]\n \"\"\"\n attributes = interval_dict.pop(\"attributes\", dict())\n return cls(**interval_dict, **attributes)\n\n def to_dict(self, include_children: bool = False) -> Dict[str, Any]:\n \"\"\"[summary]\n\n Returns:\n Dict[str, Any]: [description]\n \"\"\"\n attributes = dict(**self.gff_attributes)\n attributes.pop(\"ID\")\n interval_dict = dict(\n ID=self.ID,\n seqid=self.seqid,\n source=self.source,\n interval_type=self.interval_type,\n start=self.start,\n end=self.end,\n score=self.score,\n strand=self.strand,\n phase=self.phase,\n attributes=attributes,\n )\n if include_children:\n children = [child.to_dict() for child in self.children[1:]]\n interval_dict[\"children\"] = children\n return interval_dict\n\n def to_json(self, include_children: bool = False, indent: Optional[int] = None) -> str:\n \"\"\"[summary]\n\n Args:\n include_children (bool, optional): [description]. Defaults to \\\n False.\n\n Returns:\n str: [description]\n \"\"\"\n return json.dumps(self.to_dict(include_children=include_children), indent=indent)\n
"},{"location":"API/sequence/#picea.Alphabet.complement","title":"complement(sequence)
","text":"Returns complementary strand of DNA or RNA sequence strings
Examples:
>>> DNA = Alphabet('DNA', 'ACGT')\n>>> DNA.complement('AACTACG')\n'TTGATGC'\n
Parameters:
Name Type Description Defaultsequence
str
Sequence string
requiredReturns:
Name Type Descriptionstr
str
complementary strand sequence string
Source code inpicea/sequence.py
def complement(self, sequence: str) -> str:\n \"\"\"Returns complementary strand of DNA or RNA sequence strings\n\n Examples:\n >>> DNA = Alphabet('DNA', 'ACGT')\n >>> DNA.complement('AACTACG')\n 'TTGATGC'\n\n Args:\n sequence (str): Sequence string\n\n Returns:\n str: complementary strand sequence string\n \"\"\"\n if self.name == \"DNA\":\n complement = dict(zip(\"acgtnACGTN-?\", \"tgcanTGCAN-?\"))\n elif self.name == \"RNA\":\n complement = dict(zip(\"acgunACGUN-?\", \"ugcanUGCAN-?\"))\n else:\n raise TypeError(\"Cannot complement non-DNA or non-RNA alphabet\")\n return \"\".join(complement[s] for s in sequence)\n
"},{"location":"API/sequence/#picea.Alphabet.score","title":"score(sequence, match=1.0, mismatch=-1.0, n_chars=100)
","text":"Scores how well a sequence matches an alphabet by summing (mis)matches of sequence letters that are not in the alphabet and (mis)matches of alphabet letters that are not in the sequence.
Parameters:
Name Type Description Defaultsequence
str
Sequence string for which to determine how well it fits the alphabet
requiredmatch
float
match score. Defaults to 1.0.
1.0
mismatch
float
mismatch score. Defaults to -1.0.
-1.0
n_chars
int
number of sequence characters to use in scoring. Large numbers incur a significant computational cost.
100
Returns:
Type Descriptionfloat
Score of how well a sequence matches the alphabet
Source code inpicea/sequence.py
def score(\n self,\n sequence: str,\n match: float = 1.0,\n mismatch: float = -1.0,\n n_chars: int = 100,\n) -> float:\n \"\"\"Scores how well a sequence matches an alphabet by summing \\\n (mis)matches of sequence letters that are not in the alphabet \\\n and (mis)matches of alphabet letters that are not in the sequence.\n\n Args:\n sequence (str): Sequence string for which to determine how well \\\n it fits the alphabet\n match (float, optional): match score. Defaults to 1.0.\n mismatch (float, optional): mismatch score. Defaults to -1.0.\n n_chars (int, optional): number of sequence characters to use in \\\n scoring. Large numbers incur a significant computational cost.\n\n Returns:\n (float): Score of how well a sequence matches the alphabet\n \"\"\"\n return sum(match if s in self else mismatch for s in sequence[:n_chars]) + sum(\n match if s in sequence[:n_chars] else mismatch for s in self\n )\n
"},{"location":"API/sequence/#picea.Alphabet.translate","title":"translate(sequence)
","text":"Translate DNA or RNA sequence string to amino acid string
Examples:
>>> DNA = Alphabet('DNA', 'ACGT')\n>>> DNA.translate('ATGACGACGTAA')\n'MTT*'\n
Parameters:
Name Type Description Defaultsequence
str
Sequence string (sequence length must be multiple of 3)
requiredReturns:
Name Type Descriptionstr
str
Amino acid string
Source code inpicea/sequence.py
def translate(self, sequence: str) -> str:\n \"\"\"Translate DNA or RNA sequence string to amino acid string\n\n Examples:\n >>> DNA = Alphabet('DNA', 'ACGT')\n >>> DNA.translate('ATGACGACGTAA')\n 'MTT*'\n\n Args:\n sequence (str): Sequence string (sequence length must be multiple of 3)\n\n Returns:\n str: Amino acid string\n \"\"\"\n if self.name not in (\"DNA\", \"RNA\"):\n raise TypeError(\"Cannot translate non-DNA or non-RNA alphabet\")\n codons = re.findall(\"...\", sequence.upper())\n return \"\".join(TRANSLATION.get(codon, \"X\") for codon in codons)\n
"},{"location":"API/sequence/#picea.Alphabet.validate","title":"validate(sequence)
","text":"Determine whether a sequence strictly fits an alphabet
Parameters:
Name Type Description Defaultsequence
str
Sequence string
requiredReturns:
Name Type Descriptionbool
bool
true if all characters in sequence are in the alphabet
Source code inpicea/sequence.py
def validate(self, sequence: str) -> bool:\n \"\"\"Determine whether a sequence strictly fits an alphabet\n\n Args:\n sequence (str): Sequence string\n\n Returns:\n bool: true if all characters in sequence are in the alphabet\n \"\"\"\n return sum(1 if s not in self else 0 for s in sequence) == 0\n
"},{"location":"API/sequence/#picea.Sequence.__len__","title":"__len__()
","text":"Length of the sequence
Examples:
>>> s = Sequence('test_dna', 'ACGTA')\n>>> len(s)\n5\n
Source code in picea/sequence.py
def __len__(self):\n \"\"\"Length of the sequence\n\n Examples:\n >>> s = Sequence('test_dna', 'ACGTA')\n >>> len(s)\n 5\n \"\"\"\n return len(self.sequence)\n
"},{"location":"API/sequence/#picea.Sequence.from_fasta","title":"from_fasta(string)
classmethod
","text":"Create a sequence object from a fasta formatted file. single sequence only
Examples:
>>> fasta_string = '>test\\nACGT'\n>>> Sequence.from_fasta(fasta_string)\nSequence(header='test', alphabet=Alphabet(name='DNA', members='-?acgtnACGNT'))\n
Returns:
Type DescriptionSequence
Sequence
Source code inpicea/sequence.py
@classmethod\n def from_fasta(cls, string: str) -> \"Sequence\":\n \"\"\"Create a sequence object from a fasta formatted file. _single sequence only_\n\n Examples:\n >>> fasta_string = '>test\\\\nACGT'\n >>> Sequence.from_fasta(fasta_string)\n Sequence(header='test', \\\nalphabet=Alphabet(name='DNA', members='-?acgtnACGNT'))\n\n Arguments:\n string (str)\n\n Returns:\n Sequence\n \"\"\"\n lines = string.strip().split(\"\\n\")\n header = lines[0][1:]\n sequence = \"\".join(lines[1:])\n return cls(header, sequence)\n
"},{"location":"API/sequence/#picea.Sequence.to_dict","title":"to_dict()
","text":"Make dictionary with header and sequence elements
Examples:
>>> s = Sequence('test', 'ACGTA')\n>>> s.to_dict()\n{'header': 'test', 'sequence': 'ACGTA'}\n
Returns:
Type DescriptionDict[str, str]
Dict[str, str]: sequence dictionary
Source code inpicea/sequence.py
def to_dict(self) -> Dict[str, str]:\n \"\"\"Make dictionary with header and sequence elements\n\n Examples:\n >>> s = Sequence('test', 'ACGTA')\n >>> s.to_dict()\n {'header': 'test', 'sequence': 'ACGTA'}\n\n Returns:\n Dict[str, str]: sequence dictionary\n \"\"\"\n return dict(header=self.header, sequence=self.sequence)\n
"},{"location":"API/sequence/#picea.Sequence.to_fasta","title":"to_fasta(linewidth=80)
","text":"Make fasta formatted sequence entry
Returns:
Name Type Descriptionstr
str
sequence in fasta format
Source code inpicea/sequence.py
def to_fasta(self, linewidth: int = 80) -> str:\n \"\"\"Make fasta formatted sequence entry\n\n Returns:\n str: sequence in fasta format\n \"\"\"\n sequence_lines = \"\\n\".join(re.findall(f\".{{1,{linewidth}}}\", self.sequence))\n return f\">{self.header}\\n{sequence_lines}\"\n
"},{"location":"API/sequence/#picea.BatchSequenceReader.__init__","title":"__init__(string=None, filename=None, filetype=None, batchsize=10)
","text":"[summary]
Parameters:
Name Type Description Defaultstring
str
[description]. Defaults to None.
None
filename
str
[description]. Defaults to None.
None
filetype
str
[description]. Defaults to None.
None
batchsize
int
[description]. Defaults to 10.
10
Returns:
Type DescriptionNone
Yields:
Type DescriptionNone
Source code in picea/sequence.py
def __init__(\n self,\n string: str = None,\n filename: str = None,\n filetype: str = None,\n batchsize: int = 10,\n) -> None:\n \"\"\"[summary]\n\n Args:\n string (str, optional): [description]. Defaults to None.\n filename (str, optional): [description]. Defaults to None.\n filetype (str, optional): [description]. Defaults to None.\n batchsize (int, optional): [description]. Defaults to 10.\n\n Returns:\n [type]: [description]\n\n Yields:\n [type]: [description]\n \"\"\"\n super().__init__(string, filename, filetype)\n self.batchsize = batchsize\n self._currentbatch = SequenceCollection()\n
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.headers","title":"headers: List[str]
abstractmethod
property
","text":"List of sequences headers. Overridden in subclasses.
Returns:
Type DescriptionList[str]
List[str]: List of sequence headers
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.iloc","title":"iloc: SequenceIndex
property
","text":"[summary]
Returns:
Name Type DescriptionSequenceIndex
SequenceIndex
[description]
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.n_seqs","title":"n_seqs: int
abstractmethod
property
","text":"Return the number of sequences in the collection. Overridden in subclasses
Returns:
Name Type Descriptionint
int
number of sequences
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.sequences","title":"sequences: List[str]
property
","text":"List of sequences without headers
Returns:
Type DescriptionList[str]
List[str]: list of sequences
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.batch_rename","title":"batch_rename(rename_func)
","text":"Rename all headers by calling rename_func
on each header
Parameters:
Name Type Description Defaultrename_func
Callable
[description]
required Source code inpicea/sequence.py
def batch_rename(self, rename_func: Callable[[str], str]) -> None:\n \"\"\"Rename all headers by calling `rename_func` on each header\n\n Args:\n rename_func (Callable): [description]\n \"\"\"\n for header in self.headers:\n s: Sequence = self.pop(header)\n s.header = rename_func(s.header)\n self[s.header] = s.sequence\n
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.from_fasta","title":"from_fasta(filename=None, string=None)
classmethod
","text":"Parse a fasta formatted string into a SequenceCollection object
Other Parameters:
Name Type Descriptionfilename
{String} -- filename string (default
{None})
string
{String} -- fasta formatted string (default
{None})
Returns:
Type DescriptionSequenceCollection
SequenceCollection -- SequenceCollection instance
Source code inpicea/sequence.py
@classmethod\ndef from_fasta(\n cls,\n filename: str = None,\n string: str = None,\n) -> \"SequenceCollection\":\n \"\"\"Parse a fasta formatted string into a SequenceCollection object\n\n Keyword Arguments:\n filename {String} -- filename string (default: {None})\n string {String} -- fasta formatted string (default: {None})\n\n Returns:\n SequenceCollection -- SequenceCollection instance\n \"\"\"\n sequencecollection = cls()\n\n for seq in SequenceReader(string=string, filename=filename, filetype=\"fasta\"):\n sequencecollection[seq.header] = seq.sequence\n return sequencecollection\n
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.from_json","title":"from_json(filename=None, string=None)
classmethod
","text":"[summary]
Returns:
Type DescriptionSequenceCollection
SequenceCollection -- SequenceCollection instance
Source code inpicea/sequence.py
@classmethod\ndef from_json(cls, filename: Optional[str] = None, string: Optional[str] = None) -> \"SequenceCollection\":\n \"\"\"[summary]\n\n Keyword Arguments:\n string {String} -- JSON formatted string\n\n Returns:\n SequenceCollection -- SequenceCollection instance\n \"\"\"\n sequencecollection = cls()\n\n for seq in SequenceReader(string=string, filename=filename, filetype=\"json\"):\n sequencecollection[seq.header] = seq.sequence\n\n return sequencecollection\n
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.from_sequence_iter","title":"from_sequence_iter(sequence_iter)
classmethod
","text":"[summary]
Raises:
Type DescriptionNotImplementedError
[description]
Returns:
Type DescriptionSequenceCollection
Source code in picea/sequence.py
@classmethod\ndef from_sequence_iter(cls, sequence_iter: Iterable[Sequence]) -> \"SequenceCollection\":\n \"\"\"[summary]\n\n Raises:\n NotImplementedError: [description]\n\n Returns:\n [type]: [description]\n \"\"\"\n sequencecollection = cls()\n for seq in sequence_iter:\n sequencecollection[seq.header] = seq.sequence\n return sequencecollection\n
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.pop","title":"pop(header)
abstractmethod
","text":"[summary]
Parameters:
Name Type Description Defaultheader
str
[description]
requiredReturns:
Name Type DescriptionSequence
Sequence
[description]
Source code inpicea/sequence.py
@abstractmethod\ndef pop(self, header: str) -> Sequence:\n \"\"\"[summary]\n\n Args:\n header (str): [description]\n\n Returns:\n Sequence: [description]\n \"\"\"\n raise NotImplementedError((\"Classes extending from AbstractSequenceCollection should \" \"implement pop method\"))\n
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.to_fasta","title":"to_fasta(linewidth=80)
","text":"Get a fasta-formatted string of the sequence collection
Returns:
Name Type Descriptionstr
str
Multi-line fasta-formatted string
Source code inpicea/sequence.py
def to_fasta(self, linewidth: int = 80) -> str:\n \"\"\"Get a fasta-formatted string of the sequence collection\n\n Returns:\n str: Multi-line fasta-formatted string\n \"\"\"\n return \"\\n\".join([seq.to_fasta(linewidth=linewidth) for seq in self])\n
"},{"location":"API/sequence/#picea.AbstractSequenceCollection.to_json","title":"to_json(indent=None)
","text":"[summary]
Returns:
Name Type Descriptionstr
str
[description]
Source code inpicea/sequence.py
def to_json(self, indent: Optional[int] = None) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n gene_dicts = [seq.to_dict() for seq in self]\n return json.dumps(gene_dicts, indent=indent)\n
"},{"location":"API/sequence/#picea.SequenceCollection.align","title":"align(method='mafft', method_kwargs=None)
","text":"[summary]
Parameters:
Name Type Description Defaultmethod
str
[description]. Defaults to 'mafft'.
'mafft'
method_kwargs
Mapping[str, str]
[description]. Defaults to dict().
None
Returns:
Type DescriptionMultipleSequenceAlignment
Source code in picea/sequence.py
def align(\n self, method: Optional[str] = \"mafft\", method_kwargs: Optional[Mapping[str, str]] = None\n) -> \"MultipleSequenceAlignment\":\n \"\"\"[summary]\n\n Args:\n method (str, optional): [description]. Defaults to 'mafft'.\n method_kwargs (Mapping[str, str], optional): [description]. \\\n Defaults to dict().\n\n Returns:\n [type]: [description]\n \"\"\"\n if not method_kwargs:\n method_kwargs = dict()\n fasta = self.to_fasta()\n command = [method, *chain(*method_kwargs.items()), \"-\"]\n process = Popen(command, stdin=PIPE, stdout=PIPE, stderr=PIPE)\n stdout, _ = process.communicate(input=fasta.encode())\n aligned_fasta = stdout.decode().strip()\n return MultipleSequenceAlignment.from_fasta(string=aligned_fasta)\n
"},{"location":"API/sequence/#picea.MultipleSequenceAlignment.to_nexus","title":"to_nexus()
","text":"Source code in picea/sequence.py
def to_nexus(self) -> str:\n \"\"\" \"\"\"\n sequences = \"\\n\".join([f\"{s.header} {s.sequence}\" for s in self])\n return (\n \"begin data;\"\n f\"\\tdimensions ntax={self.n_seqs} nchar={self.n_chars};\"\n \"\\tformat datatype=dna gap=-;\"\n \"\\tmatrix\"\n f\"\\t{sequences}\"\n \"\\t;\"\n \"end;\"\n )\n
"},{"location":"API/sequence/#picea.SequenceAnnotation.__init__","title":"__init__(sequence=None)
","text":"[summary]
Parameters:
Name Type Description Defaultsequence
Optional[Sequence]
[description]. Defaults to None.
None
Source code in picea/sequence.py
def __init__(self, sequence: Optional[\"Sequence\"] = None) -> None:\n \"\"\"[summary]\n\n Args:\n sequence (Optional[Sequence], optional): [description]. Defaults\\\n to None.\n \"\"\"\n super().__init__()\n if sequence:\n sequence.annotation = self\n self.sequence = sequence\n self._gff_headers = list()\n
"},{"location":"API/sequence/#picea.SequenceAnnotation.from_gff","title":"from_gff(filename=None, string=None, sequence=None, link_parents=True)
classmethod
","text":"[summary]
Parameters:
Name Type Description Defaultfilename
[type]
[description]. Defaults to None.
None
string
[type]
[description]. Defaults to None.
None
sequence
[type]
[description]. Defaults to None.
None
Returns:
Type DescriptionSequenceAnnotation
Source code in picea/sequence.py
@classmethod\ndef from_gff(\n cls,\n filename: Optional[str] = None,\n string: Optional[str] = None,\n sequence: Optional[\"Sequence\"] = None,\n link_parents: bool = True,\n) -> \"SequenceAnnotation\":\n \"\"\"[summary]\n\n Args:\n filename ([type], optional): [description]. Defaults to None.\n string ([type], optional): [description]. Defaults to None.\n sequence ([type], optional): [description].\n Defaults to None.\n\n Returns:\n [type]: [description]\n \"\"\"\n assert filename or string\n assert not (filename and string)\n sequence_annotation = cls(sequence=sequence)\n header = True\n if filename:\n with open(filename) as filehandle:\n string = filehandle.read()\n for line_number, line in enumerate(string.split(\"\\n\")):\n line = line.strip()\n if not line:\n continue\n if line == \"##FASTA\":\n break\n if line[0] == \"#\":\n if header:\n sequence_annotation._gff_headers.append(line)\n continue\n else:\n header = False\n\n interval = SequenceInterval.from_gff_line(gff_line=line, line_number=line_number)\n interval._container = sequence_annotation\n sequence_annotation[interval.ID] = interval\n\n if link_parents:\n sequence_annotation._link_parents()\n\n return sequence_annotation\n
"},{"location":"API/sequence/#picea.SequenceAnnotation.from_gtf","title":"from_gtf(filename=None, string=None, sequence=None, link_parents=True)
classmethod
","text":"[summary]
Raises:
Type DescriptionIndexError
[description]
IndexError
[description]
Returns:
Type DescriptionSequenceAnnotation
Source code in picea/sequence.py
@classmethod\ndef from_gtf(\n cls,\n filename: Optional[str] = None,\n string: Optional[str] = None,\n sequence: Optional[\"Sequence\"] = None,\n link_parents: Optional[bool] = True,\n) -> \"SequenceAnnotation\":\n \"\"\"[summary]\n\n Raises:\n IndexError: [description]\n IndexError: [description]\n\n Returns:\n [type]: [description]\n \"\"\"\n assert filename or string\n assert not (filename and string)\n sequence_annotation = cls(sequence=sequence)\n header = True\n\n # start with just reading all intervals\n if filename:\n with open(filename) as filehandle:\n string = filehandle.read()\n for line_number, line in enumerate(string.split(\"\\n\")):\n line = line.strip()\n if not line:\n continue\n if line[0] == \"#\":\n if header:\n sequence_annotation._gff_headers.append(line)\n continue\n else:\n header = False\n interval = SequenceInterval.from_gtf_line(gtf_line=line, line_number=line_number)\n interval._container = sequence_annotation\n sequence_annotation[interval.ID] = interval\n # fix missing gene and transcript intervals\n transcript_child_counter = Counter()\n new_intervals = dict()\n for interval in sequence_annotation:\n gene_id = interval.gff_attributes[\"gene_id\"][0]\n transcript_id = interval.gff_attributes[\"transcript_id\"][0]\n interval_type = interval.interval_type\n id_tuple = (gene_id, transcript_id, interval_type)\n child_count = transcript_child_counter[id_tuple]\n transcript_child_counter.update([id_tuple])\n interval._ID = f\"{transcript_id}.{interval_type}_{child_count}\"\n if transcript_id not in new_intervals:\n # new transcript interval\n transcript_interval = deepcopy(interval)\n transcript_interval._container = interval._container\n transcript_interval._ID = transcript_id\n transcript_interval.interval_type = \"mRNA\"\n transcript_interval.parent = [gene_id]\n # new gene interval\n gene_interval = deepcopy(interval)\n gene_interval._container = interval._container\n gene_interval._ID = gene_id\n gene_interval.interval_type = \"gene\"\n gene_interval.parent = None\n\n new_intervals[transcript_id] = transcript_interval\n new_intervals[gene_id] = gene_interval\n\n interval.parent = [transcript_id]\n new_intervals[interval.ID] = interval\n sequence_annotation._intervals = new_intervals\n\n # set children\n if link_parents:\n sequence_annotation._link_parents()\n\n # fix gene and transcript start and stop coordinates\n genes = sequence_annotation.groupby(\"interval_type\")[\"gene\"]\n for gene in genes:\n # fix gene first\n start = 10e9\n end = 0\n for child in gene.children:\n start = min(start, child.start)\n end = max(end, child.end)\n gene.start = start\n gene.end = end\n\n # fix transcripts\n transcripts = gene.children.groupby(\"interval_type\")[\"mRNA\"]\n for transcript in transcripts:\n start = 10e9\n end = 0\n for child in transcript.children:\n start = min(start, child.start)\n end = max(end, child.end)\n transcript.end = end\n transcript.start = start\n\n return sequence_annotation\n
"},{"location":"API/sequence/#picea.SequenceAnnotation.from_json","title":"from_json(filename=None, string=None, sequence=None)
classmethod
","text":"[summary]
Source code inpicea/sequence.py
@classmethod\ndef from_json(\n cls,\n filename: Optional[str] = None,\n string: Optional[str] = None,\n sequence: Optional[\"Sequence\"] = None,\n) -> \"SequenceAnnotation\":\n \"\"\"[summary]\"\"\"\n assert filename or string\n assert not (filename and string)\n if filename:\n with open(filename) as filehandle:\n string = filehandle.read()\n\n sequence_annotation = cls(sequence=sequence)\n\n gene_dicts = json.loads(string)\n assert isinstance(gene_dicts, list)\n\n for top_dict in gene_dicts:\n child_dicts = top_dict.pop(\"children\", list())\n top_interval = SequenceInterval.from_dict(interval_dict=top_dict)\n top_interval._container = sequence_annotation\n sequence_annotation[top_interval.ID] = top_interval\n for child_dict in child_dicts:\n child_interval = SequenceInterval.from_dict(interval_dict=child_dict)\n child_interval._container = sequence_annotation\n sequence_annotation[child_interval.ID] = child_interval\n for interval in sequence_annotation:\n if interval.parent:\n for parent_ID in interval.parent:\n try:\n parent = sequence_annotation[parent_ID]\n except IndexError as err:\n raise IndexError(\n \"Interval {interval.ID} is listing {parent_ID} \" \"as Parent, but parent could not be found.\"\n ) from err\n parent._children.append(interval.ID)\n return sequence_annotation\n
"},{"location":"API/sequence/#picea.SequenceAnnotation.to_gff","title":"to_gff()
","text":"[summary]
Returns:
Name Type Descriptionstr
str
[description]
Source code inpicea/sequence.py
def to_gff(self) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n return \"\".join(interval.to_gff_line(trailing_newline=True) for interval in self)\n
"},{"location":"API/sequence/#picea.SequenceAnnotation.to_json","title":"to_json(indent=None)
","text":"[summary]
Returns:
Name Type Descriptionstr
str
[description]
Source code inpicea/sequence.py
def to_json(self, indent: Optional[int] = None) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n interval_dicts = [interval.to_dict() for interval in self]\n return json.dumps(interval_dicts, indent=indent)\n
"},{"location":"API/sequence/#picea.SequenceInterval.__init__","title":"__init__(ID=None, seqid=None, source=None, interval_type=None, start=None, end=None, score=None, strand=None, phase=None, children=None, container=None, **kwargs)
","text":"[summary]
Parameters:
Name Type Description DefaultID
Optional[str]
[description]. Defaults to None.
None
seqid
Optional[str]
[description]. Defaults to None.
None
source
Optional[str]
[description]. Defaults to None.
None
interval_type
Optional[str]
[description]. Defaults to None.
None
start
Optional[int]
[description]. Defaults to None.
None
end
Optional[int]
[description]. Defaults to None.
None
score
Optional[float]
[description]. Defaults to None.
None
strand
Optional[str]
[description]. Defaults to None.
None
phase
Optional[str]
[description]. Defaults to None.
None
children
Optional[List]
[description]. Defaults to None.
None
container
Optional[SequenceAnnotation]
[description]. Defaults to None.
None
Source code in picea/sequence.py
def __init__(\n self,\n ID: Optional[str] = None,\n seqid: Optional[str] = None,\n source: Optional[str] = None,\n interval_type: Optional[str] = None,\n start: Optional[int] = None,\n end: Optional[int] = None,\n score: Optional[float] = None,\n strand: Optional[str] = None,\n phase: Optional[str] = None,\n children: Optional[List[str]] = None,\n container: Optional[SequenceAnnotation] = None,\n **kwargs,\n):\n \"\"\"[summary]\n\n Args:\n ID (Optional[str], optional): [description]. Defaults to None.\n seqid (Optional[str], optional): [description]. Defaults to None.\n source (Optional[str], optional): [description]. Defaults to None.\n interval_type (Optional[str], optional): [description]. Defaults\n to None.\n start (Optional[int], optional): [description]. Defaults to None.\n end (Optional[int], optional): [description]. Defaults to None.\n score (Optional[float], optional): [description]. Defaults to None.\n strand (Optional[str], optional): [description]. Defaults to None.\n phase (Optional[str], optional): [description]. Defaults to\n None.\n children (Optional[List], optional): [description]. Defaults to\n None.\n container (Optional[SequenceAnnotation], optional): [description].\n Defaults to None.\n \"\"\"\n # interval ID is a property (see below) with getter and setter\n # self._ID = ID\n # self._original_ID = ID\n parents = kwargs.pop(\"parent\", None)\n super().__init__(ID=ID, children=children, container=container, parents=parents)\n\n # Standard gff fields\n self.seqid = seqid\n self.source = source\n self.interval_type = interval_type\n self.start = start\n self.end = end\n self.score = score\n self.strand = strand\n self.phase = phase\n\n # Set attributes with predefined meanings in the gff spec to None\n for attr in self._predefined_gff3_attributes:\n # ID and parent are handled separately in DAG superclass\n if attr in {\"ID\", \"parent\"}:\n continue\n self[attr] = kwargs.get(attr, None)\n\n # Any additional attributes\n for key, value in kwargs.items():\n self[key] = value\n
"},{"location":"API/sequence/#picea.SequenceInterval.from_dict","title":"from_dict(interval_dict)
classmethod
","text":"[summary] Args: interval_dict
Returns:
Type DescriptionSequenceInterval
Source code in picea/sequence.py
@classmethod\ndef from_dict(cls, interval_dict: Dict[str, Any]) -> \"SequenceInterval\":\n \"\"\"[summary]\n Args:\n interval_dict\n\n Returns:\n [type]: [description]\n \"\"\"\n attributes = interval_dict.pop(\"attributes\", dict())\n return cls(**interval_dict, **attributes)\n
"},{"location":"API/sequence/#picea.SequenceInterval.from_gff_line","title":"from_gff_line(gff_line=None, line_number=None, attribute_parser=parse_gff_attribute_string)
classmethod
","text":"[summary]
Parameters:
Name Type Description Defaultgff_line
Optional[str]
[description]. Defaults to None.
None
line_number
Optional[int]
[description]. Defaults to None.
None
Returns:
Type DescriptionSequenceInterval
Source code in picea/sequence.py
@classmethod\ndef from_gff_line(\n cls,\n gff_line: Optional[str] = None,\n line_number: Optional[int] = None,\n attribute_parser: Callable = parse_gff_attribute_string,\n) -> \"SequenceInterval\":\n \"\"\"[summary]\n\n Args:\n gff_line (Optional[str], optional): [description]. Defaults\n to None.\n line_number (Optional[int], optional): [description]. Defaults\n to None.\n\n Returns:\n [type]: [description]\n \"\"\"\n gff_parts = gff_line.split(\"\\t\")\n assert len(gff_parts) == 9, gff_parts\n seqid, source, interval_type, start, end, score, strand, phase = gff_parts[:8]\n try:\n start = int(start)\n end = int(end)\n except ValueError as err:\n error = \"GFF start and end fields must be integer\"\n if line_number:\n error = f\"{error}, gff line {line_number}\"\n raise ValueError(error) from err\n\n if score != \".\":\n try:\n score = float(score)\n except ValueError as err:\n error = \"GFF score field must be a float\"\n if line_number:\n error = f\"{error}, gff line {line_number}\"\n raise ValueError(error) from err\n\n if strand not in (\"+\", \"-\", \".\"):\n error = 'GFF strand must be one of \"+\", \"-\" or \".\"'\n if line_number:\n error = f\"{error}, gff line {line_number}\"\n raise ValueError(error)\n\n if phase not in (\"0\", \"1\", \"2\", \".\"):\n error = 'GFF phase must be one of \"0\", \"1\", \"2\" or \".\"'\n if line_number:\n error = f\"{error}, gff line {line_number}\"\n raise ValueError(error)\n elif phase != \".\":\n phase = int(phase)\n\n # Disable phase checking of CDS for now...\n # if interval_type == 'CDS' and phase not in ('0', '1', '2'):\n # error = 'GFF intervals of type CDS must have phase of\\\n # \"0\", \"1\" or \"2\"'\n # if line_number:\n # error = f'{error}, gff line {line_number}'\n # raise ValueError(error)\n\n attributes = attribute_parser(gff_parts[8])\n\n ID = attributes.pop(\"ID\", [str(uuid.uuid4())])[0]\n\n return cls(\n seqid=seqid,\n source=source,\n interval_type=interval_type,\n start=start,\n end=end,\n score=score,\n strand=strand,\n phase=phase,\n ID=ID,\n **attributes,\n )\n
"},{"location":"API/sequence/#picea.SequenceInterval.from_gtf_line","title":"from_gtf_line(gtf_line=None, line_number=None)
classmethod
","text":"[summary]
Returns:
Type DescriptionSequenceInterval
Yields:
Type DescriptionSequenceInterval
Source code in picea/sequence.py
@classmethod\ndef from_gtf_line(cls, gtf_line: Optional[str] = None, line_number: Optional[int] = None) -> \"SequenceInterval\":\n \"\"\"[summary]\n\n Returns:\n [type]: [description]\n\n Yields:\n [type]: [description]\n \"\"\"\n return cls.from_gff_line(gtf_line, line_number, parse_gtf_attribute_string)\n
"},{"location":"API/sequence/#picea.SequenceInterval.to_dict","title":"to_dict(include_children=False)
","text":"[summary]
Returns:
Type DescriptionDict[str, Any]
Dict[str, Any]: [description]
Source code inpicea/sequence.py
def to_dict(self, include_children: bool = False) -> Dict[str, Any]:\n \"\"\"[summary]\n\n Returns:\n Dict[str, Any]: [description]\n \"\"\"\n attributes = dict(**self.gff_attributes)\n attributes.pop(\"ID\")\n interval_dict = dict(\n ID=self.ID,\n seqid=self.seqid,\n source=self.source,\n interval_type=self.interval_type,\n start=self.start,\n end=self.end,\n score=self.score,\n strand=self.strand,\n phase=self.phase,\n attributes=attributes,\n )\n if include_children:\n children = [child.to_dict() for child in self.children[1:]]\n interval_dict[\"children\"] = children\n return interval_dict\n
"},{"location":"API/sequence/#picea.SequenceInterval.to_gff_line","title":"to_gff_line(trailing_newline=False)
","text":"[summary]
Returns:
Name Type Descriptionstr
str
[description]
Source code inpicea/sequence.py
def to_gff_line(self, trailing_newline: bool = False) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n # attributes = dict(ID=self.ID, **self.gff_attributes)\n\n gff_line = \"\\t\".join(\n [\n self.seqid,\n self.source,\n self.interval_type,\n str(self.start),\n str(self.end),\n str(self.score),\n self.strand,\n str(self.phase),\n format_gff_attribute_string(self.gff_attributes),\n ]\n )\n if trailing_newline:\n gff_line = f\"{gff_line}\\n\"\n return gff_line\n
"},{"location":"API/sequence/#picea.SequenceInterval.to_gtf_line","title":"to_gtf_line()
","text":"[summary]
Returns:
Name Type Descriptionstr
str
[description]
Source code inpicea/sequence.py
def to_gtf_line(self) -> str:\n \"\"\"[summary]\n\n Returns:\n str: [description]\n \"\"\"\n interval_type = self._gtf_interval_types.get(self.interval_type, self.interval_type)\n return \"\\t\".join(\n [\n self.seqid,\n self.source,\n interval_type,\n str(self.start),\n str(self.end),\n str(self.score),\n self.strand,\n str(self.phase),\n format_gtf_attribute_string(self.gtf_attributes),\n ]\n )\n
"},{"location":"API/sequence/#picea.SequenceInterval.to_json","title":"to_json(include_children=False, indent=None)
","text":"[summary]
Parameters:
Name Type Description Defaultinclude_children
bool
[description]. Defaults to False.
False
Returns:
Name Type Descriptionstr
str
[description]
Source code inpicea/sequence.py
def to_json(self, include_children: bool = False, indent: Optional[int] = None) -> str:\n \"\"\"[summary]\n\n Args:\n include_children (bool, optional): [description]. Defaults to \\\n False.\n\n Returns:\n str: [description]\n \"\"\"\n return json.dumps(self.to_dict(include_children=include_children), indent=indent)\n
"},{"location":"API/tree/","title":"Tree","text":"Recursive Tree object
Source code inpicea/tree.py
@dataclass\nclass Tree:\n \"\"\"Recursive Tree object\"\"\"\n\n name: Optional[str] = None\n length: Optional[float] = None\n children: Optional[List[\"Tree\"]] = field(default_factory=list)\n\n ID: InitVar[Optional[int]] = None\n depth: InitVar[Optional[int]] = None\n parent: InitVar[Optional[\"Tree\"]] = None\n cumulative_length: InitVar[Optional[float]] = None\n\n def __post_init__(self, ID, *args, **kwargs):\n \"\"\"[summary]\n\n Args:\n ID ([type]): [description]\n \"\"\"\n self.ID = ID\n\n @property\n def loc(self) -> \"Tree\":\n \"\"\"Name based index\n\n Example:\n >>> from picea import Tree\n >>> newick = '(((a,b),(c,d)),e);'\n >>> tree = Tree.from_newick(newick)\n >>> tree.loc['a']\n Tree(name='a', length=None, children=[])\n\n Returns:\n Tree: tree node matching name\n\n Raises:\n IndexError\n \"\"\"\n return TreeIndex(iterator=self.depth_first, eq_func=lambda node, name: node.name == name)\n\n @property\n def iloc(self) -> \"Tree\":\n \"\"\"Index based index\n\n Example:\n >>> from picea import Tree\n >>> newick = '(((a,b),(c,d)),e);'\n >>> tree = Tree.from_newick(newick)\n >>> tree.iloc[2]\n Tree(name='', length=None, children=[Tree(name='a', length=None, \\\nchildren=[]), Tree(name='b', length=None, children=[])])\n\n Returns:\n Tree: tree node matching index\n \"\"\"\n return TreeIndex(iterator=self.depth_first, eq_func=lambda node, index: node.ID == index)\n\n @property\n def root(self) -> \"Tree\":\n \"\"\"Root node of the (sub)tree\n\n Returns:\n Tree: Root node\n \"\"\"\n root = self\n while root.parent:\n root = root.parent\n return root\n\n @property\n def nodes(self) -> List[\"Tree\"]:\n \"\"\"A list of all tree nodes in breadth-first order\n\n Returns:\n list: A list of all tree nodes\n \"\"\"\n return list(self.breadth_first())\n\n @property\n def leaves(self) -> List[\"Tree\"]:\n \"\"\"A list of leaf nodes only\n\n Returns:\n list: A list of leaf nodes only\n \"\"\"\n return [n for n in self.nodes if not n.children]\n\n @property\n def links(self) -> List[Tuple[\"Tree\", \"Tree\"]]:\n \"\"\"A list of all (parent, child) combinations\n\n Returns:\n list: All (parent,child) combinations\n \"\"\"\n _links = []\n for node in self.nodes:\n if node.children:\n for child in node.children:\n _links.append((node, child))\n return _links\n\n @classmethod\n def from_newick(cls, string: Optional[str] = None, filename: Optional[str] = None) -> \"Tree\":\n \"\"\"Parse a newick formatted string into a Tree object\n\n Arguments:\n newick_string (string): Newick formatted tree string\n\n Returns:\n Tree: Tree object\n \"\"\"\n assert filename or string\n assert not (filename and string)\n if filename:\n with open(filename) as filehandle:\n string = filehandle.read()\n tokens: list[str] = re.split(r\"\\s*(;|\\(|\\)|,|:)\\s*\", string)\n ID = 0\n tree = cls(ID=ID)\n ancestors: list[Tree] = list()\n found_branchlengths = False\n for i, token in enumerate(tokens):\n if token == \"(\":\n ID += 1\n subtree = cls(ID=ID)\n tree.children = [subtree]\n ancestors.append(tree)\n tree = subtree\n elif token == \",\":\n ID += 1\n subtree = cls(ID=ID)\n ancestors[-1].children.append(subtree)\n tree = subtree\n elif token == \")\":\n tree = ancestors.pop()\n else:\n previous_token = tokens[i - 1]\n if previous_token in (\"(\", \")\", \",\"):\n tree.name = token\n elif previous_token == \":\":\n found_branchlengths = True\n tree.length = float(token)\n tree.cumulative_length = 0.0\n tree.depth = 0\n queue: list[Tree] = [tree]\n while queue:\n node = queue.pop(0)\n if found_branchlengths:\n if node.length is None:\n warn(\n \"Found branchlengths on some parts of the tree, but node \"\n f\"{node.ID} has no branchlength specified, setting to \"\n \"branchlength 0.0\"\n )\n node.length = 0.0\n node.cumulative_length = 0.0\n for child in node.children:\n child.parent = node\n child.depth = node.depth + 1\n if child.length:\n child.cumulative_length = node.cumulative_length + abs(child.length)\n queue += node.children\n\n return tree\n\n def to_newick(self, branch_lengths: bool = False) -> str:\n \"\"\"Make a Newick formatted string\n\n Args:\n branch_lengths (bool, optional): Whether to include branch lengths\\\n in the Newick string. Defaults to True.\n\n Returns:\n String: Newick formatted tree string\n \"\"\"\n if self.name:\n name = str(self.name)\n else:\n name = \"\"\n\n if self.children:\n subtree_string = \",\".join([c.to_newick(branch_lengths=branch_lengths) for c in self.children])\n newick = f\"({subtree_string}){name}\"\n else:\n newick = name\n\n if branch_lengths and self.ID != 0:\n length = self.length\n if length is None:\n warn(\n \"Trying to write branch length for node that has no branch length \\\n set, defaulting to zero length branch.\"\n )\n length = 0\n if length == 0:\n length = int(0)\n newick += f\":{length}\"\n\n if self == self.root:\n newick += \";\"\n\n return newick\n\n @classmethod\n def from_sklearn(cls, clustering) -> \"Tree\":\n \"\"\"Read a tree from sklearn agglomerative clustering\n\n Args:\n clustering (sklearn object): sklearn agglomerative clustering\\\n object.\n\n Returns:\n Tree: Tree object\n \"\"\"\n nodes = clustering.children_\n n_leaves = nodes.shape[0] + 1\n tree = cls(ID=nodes.shape[0] * 2)\n\n queue = [tree]\n while queue:\n node = queue.pop(0)\n if node.ID < n_leaves:\n node.name = str(node.ID)\n continue\n for child_ID in nodes[node.ID - n_leaves]:\n child = cls(ID=child_ID)\n child.parent = node\n node.children.append(child)\n queue += node.children\n\n return tree\n\n def to_sklearn(self):\n # TODO\n raise NotImplementedError()\n\n @classmethod\n def from_json(cls):\n # TODO\n raise NotImplementedError()\n\n def to_json(self, indent: Optional[int] = None) -> str:\n return json.dumps(self.to_dict(), indent=indent)\n\n @classmethod\n def from_dict(cls, tree_dict):\n # TODO\n raise NotImplementedError()\n # tree = cls()\n # return tree\n\n def to_dict(self) -> TreeDict:\n \"\"\"[summary]\n\n Returns:\n TreeDict: [description]\n \"\"\"\n return asdict(self)\n\n def breadth_first(self) -> Generator[\"Tree\", None, None]:\n \"\"\"Generator implementing breadth first search starting at root node\"\"\"\n queue = [self]\n while queue:\n node = queue.pop(0)\n queue += node.children\n yield node\n\n def depth_first(self, post_order: bool = True) -> Generator[\"Tree\", None, None]:\n \"\"\"Generator implementing depth first search in either post- or\n pre-order traversel\n\n Keyword Arguments:\n post_order (bool, optional): Depth first search in post-order\n traversal or not. Defaults to True\n \"\"\"\n if not post_order:\n yield self\n for child in self.children:\n yield from child.depth_first(post_order=post_order)\n if post_order:\n yield self\n\n def rename_leaves(self, rename_func: Callable, inplace: bool = True) -> Optional[\"Tree\"]:\n \"\"\"[summary]\"\"\"\n tree = self if inplace else deepcopy(self)\n for leaf in tree.leaves:\n leaf.name = rename_func(leaf.name)\n
"},{"location":"API/tree/#picea.Tree.iloc","title":"iloc: Tree
property
","text":"Index based index
Examplefrom picea import Tree newick = '(((a,b),(c,d)),e);' tree = Tree.from_newick(newick) tree.iloc[2] Tree(name='', length=None, children=[Tree(name='a', length=None, children=[]), Tree(name='b', length=None, children=[])])
Returns:
Name Type DescriptionTree
Tree
tree node matching index
"},{"location":"API/tree/#picea.Tree.leaves","title":"leaves: List[Tree]
property
","text":"A list of leaf nodes only
Returns:
Name Type Descriptionlist
List[Tree]
A list of leaf nodes only
"},{"location":"API/tree/#picea.Tree.links","title":"links: List[Tuple[Tree, Tree]]
property
","text":"A list of all (parent, child) combinations
Returns:
Name Type Descriptionlist
List[Tuple[Tree, Tree]]
All (parent,child) combinations
"},{"location":"API/tree/#picea.Tree.loc","title":"loc: Tree
property
","text":"Name based index
Examplefrom picea import Tree newick = '(((a,b),(c,d)),e);' tree = Tree.from_newick(newick) tree.loc['a'] Tree(name='a', length=None, children=[])
Returns:
Name Type DescriptionTree
Tree
tree node matching name
"},{"location":"API/tree/#picea.Tree.nodes","title":"nodes: List[Tree]
property
","text":"A list of all tree nodes in breadth-first order
Returns:
Name Type Descriptionlist
List[Tree]
A list of all tree nodes
"},{"location":"API/tree/#picea.Tree.root","title":"root: Tree
property
","text":"Root node of the (sub)tree
Returns:
Name Type DescriptionTree
Tree
Root node
"},{"location":"API/tree/#picea.Tree.__post_init__","title":"__post_init__(ID, *args, **kwargs)
","text":"[summary]
Parameters:
Name Type Description DefaultID
[type]
[description]
required Source code inpicea/tree.py
def __post_init__(self, ID, *args, **kwargs):\n \"\"\"[summary]\n\n Args:\n ID ([type]): [description]\n \"\"\"\n self.ID = ID\n
"},{"location":"API/tree/#picea.Tree.breadth_first","title":"breadth_first()
","text":"Generator implementing breadth first search starting at root node
Source code inpicea/tree.py
def breadth_first(self) -> Generator[\"Tree\", None, None]:\n \"\"\"Generator implementing breadth first search starting at root node\"\"\"\n queue = [self]\n while queue:\n node = queue.pop(0)\n queue += node.children\n yield node\n
"},{"location":"API/tree/#picea.Tree.depth_first","title":"depth_first(post_order=True)
","text":"Generator implementing depth first search in either post- or pre-order traversel
Other Parameters:
Name Type Descriptionpost_order
bool
Depth first search in post-order
Source code inpicea/tree.py
def depth_first(self, post_order: bool = True) -> Generator[\"Tree\", None, None]:\n \"\"\"Generator implementing depth first search in either post- or\n pre-order traversel\n\n Keyword Arguments:\n post_order (bool, optional): Depth first search in post-order\n traversal or not. Defaults to True\n \"\"\"\n if not post_order:\n yield self\n for child in self.children:\n yield from child.depth_first(post_order=post_order)\n if post_order:\n yield self\n
"},{"location":"API/tree/#picea.Tree.from_newick","title":"from_newick(string=None, filename=None)
classmethod
","text":"Parse a newick formatted string into a Tree object
Parameters:
Name Type Description Defaultnewick_string
string
Newick formatted tree string
requiredReturns:
Name Type DescriptionTree
Tree
Tree object
Source code inpicea/tree.py
@classmethod\ndef from_newick(cls, string: Optional[str] = None, filename: Optional[str] = None) -> \"Tree\":\n \"\"\"Parse a newick formatted string into a Tree object\n\n Arguments:\n newick_string (string): Newick formatted tree string\n\n Returns:\n Tree: Tree object\n \"\"\"\n assert filename or string\n assert not (filename and string)\n if filename:\n with open(filename) as filehandle:\n string = filehandle.read()\n tokens: list[str] = re.split(r\"\\s*(;|\\(|\\)|,|:)\\s*\", string)\n ID = 0\n tree = cls(ID=ID)\n ancestors: list[Tree] = list()\n found_branchlengths = False\n for i, token in enumerate(tokens):\n if token == \"(\":\n ID += 1\n subtree = cls(ID=ID)\n tree.children = [subtree]\n ancestors.append(tree)\n tree = subtree\n elif token == \",\":\n ID += 1\n subtree = cls(ID=ID)\n ancestors[-1].children.append(subtree)\n tree = subtree\n elif token == \")\":\n tree = ancestors.pop()\n else:\n previous_token = tokens[i - 1]\n if previous_token in (\"(\", \")\", \",\"):\n tree.name = token\n elif previous_token == \":\":\n found_branchlengths = True\n tree.length = float(token)\n tree.cumulative_length = 0.0\n tree.depth = 0\n queue: list[Tree] = [tree]\n while queue:\n node = queue.pop(0)\n if found_branchlengths:\n if node.length is None:\n warn(\n \"Found branchlengths on some parts of the tree, but node \"\n f\"{node.ID} has no branchlength specified, setting to \"\n \"branchlength 0.0\"\n )\n node.length = 0.0\n node.cumulative_length = 0.0\n for child in node.children:\n child.parent = node\n child.depth = node.depth + 1\n if child.length:\n child.cumulative_length = node.cumulative_length + abs(child.length)\n queue += node.children\n\n return tree\n
"},{"location":"API/tree/#picea.Tree.from_sklearn","title":"from_sklearn(clustering)
classmethod
","text":"Read a tree from sklearn agglomerative clustering
Parameters:
Name Type Description Defaultclustering
sklearn object
sklearn agglomerative clustering object.
requiredReturns:
Name Type DescriptionTree
Tree
Tree object
Source code inpicea/tree.py
@classmethod\ndef from_sklearn(cls, clustering) -> \"Tree\":\n \"\"\"Read a tree from sklearn agglomerative clustering\n\n Args:\n clustering (sklearn object): sklearn agglomerative clustering\\\n object.\n\n Returns:\n Tree: Tree object\n \"\"\"\n nodes = clustering.children_\n n_leaves = nodes.shape[0] + 1\n tree = cls(ID=nodes.shape[0] * 2)\n\n queue = [tree]\n while queue:\n node = queue.pop(0)\n if node.ID < n_leaves:\n node.name = str(node.ID)\n continue\n for child_ID in nodes[node.ID - n_leaves]:\n child = cls(ID=child_ID)\n child.parent = node\n node.children.append(child)\n queue += node.children\n\n return tree\n
"},{"location":"API/tree/#picea.Tree.rename_leaves","title":"rename_leaves(rename_func, inplace=True)
","text":"[summary]
Source code inpicea/tree.py
def rename_leaves(self, rename_func: Callable, inplace: bool = True) -> Optional[\"Tree\"]:\n \"\"\"[summary]\"\"\"\n tree = self if inplace else deepcopy(self)\n for leaf in tree.leaves:\n leaf.name = rename_func(leaf.name)\n
"},{"location":"API/tree/#picea.Tree.to_dict","title":"to_dict()
","text":"[summary]
Returns:
Name Type DescriptionTreeDict
TreeDict
[description]
Source code inpicea/tree.py
def to_dict(self) -> TreeDict:\n \"\"\"[summary]\n\n Returns:\n TreeDict: [description]\n \"\"\"\n return asdict(self)\n
"},{"location":"API/tree/#picea.Tree.to_newick","title":"to_newick(branch_lengths=False)
","text":"Make a Newick formatted string
Parameters:
Name Type Description Defaultbranch_lengths
bool
Whether to include branch lengths in the Newick string. Defaults to True.
False
Returns:
Name Type DescriptionString
str
Newick formatted tree string
Source code inpicea/tree.py
def to_newick(self, branch_lengths: bool = False) -> str:\n \"\"\"Make a Newick formatted string\n\n Args:\n branch_lengths (bool, optional): Whether to include branch lengths\\\n in the Newick string. Defaults to True.\n\n Returns:\n String: Newick formatted tree string\n \"\"\"\n if self.name:\n name = str(self.name)\n else:\n name = \"\"\n\n if self.children:\n subtree_string = \",\".join([c.to_newick(branch_lengths=branch_lengths) for c in self.children])\n newick = f\"({subtree_string}){name}\"\n else:\n newick = name\n\n if branch_lengths and self.ID != 0:\n length = self.length\n if length is None:\n warn(\n \"Trying to write branch length for node that has no branch length \\\n set, defaulting to zero length branch.\"\n )\n length = 0\n if length == 0:\n length = int(0)\n newick += f\":{length}\"\n\n if self == self.root:\n newick += \";\"\n\n return newick\n
"},{"location":"examples/ontology.pct/","title":"Ontology.pct","text":"This notebook shows how to work with biological ontologies such as the sequence ontology or the gene ontology.
In\u00a0[1]: Copied!import requests\nimport sys\nsys.path.insert(0, '../../')\nimport picea\npicea.__version__\nimport requests import sys sys.path.insert(0, '../../') import picea picea.__version__ Out[1]:
'0.0.27'In\u00a0[2]: Copied!
obo_url = (\n 'https://raw.githubusercontent.com/The-Sequence-Ontology/'\n 'SO-Ontologies/master/Ontology_Files/so.obo'\n)\nr = requests.get(obo_url)\nr\nobo_url = ( 'https://raw.githubusercontent.com/The-Sequence-Ontology/' 'SO-Ontologies/master/Ontology_Files/so.obo' ) r = requests.get(obo_url) r Out[2]:
<Response [200]>In\u00a0[3]: Copied!
r.text.split('\\n')[:100]\nr.text.split('\\n')[:100] Out[3]:
['format-version: 1.2',\n 'data-version: 2024-04-10',\n 'date: 10:04:2024 18:16',\n 'saved-by: Evan Christensen',\n 'subsetdef: Alliance_of_Genome_Resources \"Alliance of Genome Resources Gene Biotype Slim\"',\n 'subsetdef: biosapiens \"biosapiens protein feature ontology\"',\n 'subsetdef: DBVAR \"database of genomic structural variation\"',\n 'subsetdef: SOFA \"SO feature annotation\"',\n 'synonymtypedef: aa1 \"amino acid 1 letter code\"',\n 'synonymtypedef: aa3 \"amino acid 3 letter code\"',\n 'synonymtypedef: AAMOD \"amino acid modification\"',\n 'synonymtypedef: AGR \"Alliance of Genome Resources\"',\n 'synonymtypedef: BS \"biosapiens\"',\n 'synonymtypedef: dbsnp \"dbsnp variant terms\"',\n 'synonymtypedef: dbvar \"DBVAR\"',\n 'synonymtypedef: ebi_variants \"ensembl variant terms\"',\n 'synonymtypedef: RNAMOD \"RNA modification\" EXACT',\n 'synonymtypedef: VAR \"variant annotation term\"',\n 'default-namespace: sequence',\n 'ontology: so',\n 'property_value: IAO:0000700 SO:0000110',\n 'property_value: IAO:0000700 SO:0000400',\n 'property_value: IAO:0000700 SO:0001060',\n 'property_value: IAO:0000700 SO:0001260',\n '',\n '[Term]',\n 'id: SO:0000000',\n 'name: Sequence_Ontology',\n 'subset: SOFA',\n 'is_obsolete: true',\n '',\n '[Term]',\n 'id: SO:00000000002382',\n 'name: 5_prime_UTR_uORF_variant',\n 'def: \"A 5\\' UTR variant within an upstream open reading frame.\" [PMID:32461616, PMID:32926138]',\n 'comment: Added 10 Apr 2024 at the request of Sarah Hunt (EBI). See GitHub Issue #647.',\n 'is_a: SO:0001623 ! 5_prime_UTR_variant',\n 'created_by: evan',\n 'creation_date: 2024-04-10T17:49:03Z',\n '',\n '[Term]',\n 'id: SO:0000001',\n 'name: region',\n 'def: \"A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids.\" [SO:ke]',\n 'subset: SOFA',\n 'synonym: \"sequence\" EXACT []',\n 'is_a: SO:0000110 ! sequence_feature',\n '',\n '[Term]',\n 'id: SO:00000010002382',\n 'name: 5_prime_UTR_uORF_stop_codon_variant',\n 'def: \"A 5\\' UTR variant where a stop codon in an upstream open reading frame is introduced, moved or lost.\" [PMID:32461616, PMID:32926138]',\n 'comment: Added 10 Apr 2024 at the request of Sarah Hunt (EBI). See GitHub Issue #622.',\n 'is_a: SO:00000000002382 ! 5_prime_UTR_uORF_variant',\n 'created_by: evan',\n 'creation_date: 2024-04-10T17:56:17Z',\n '',\n '[Term]',\n 'id: SO:0000002',\n 'name: sequence_secondary_structure',\n 'def: \"A folded sequence.\" [SO:ke]',\n 'synonym: \"INSDC_feature:misc_structure\" EXACT []',\n 'synonym: \"sequence secondary structure\" EXACT []',\n 'is_a: SO:0001411 ! biological_region',\n '',\n '[Term]',\n 'id: SO:00000020002382',\n 'name: 5_prime_UTR_uORF_frameshift_variant',\n 'def: \"A 5\\' UTR variant which disrupts the translation of an upstream open reading frame because the number of nucleotides inserted or deleted is not a multiple of three.\" [PMID:32461616, PMID:32926138]',\n 'comment: Added 10 Apr 2024 at the request of Sarah Hunt (EBI). See GitHub Issue #621.',\n 'synonym: \"uFrameshift (UTRannotator)\" EXACT []',\n 'is_a: SO:00000000002382 ! 5_prime_UTR_uORF_variant',\n 'created_by: evan',\n 'creation_date: 2024-04-10T17:58:40Z',\n '',\n '[Term]',\n 'id: SO:0000003',\n 'name: G_quartet',\n 'def: \"G-quartets are unusual nucleic acid structures consisting of a planar arrangement where each guanine is hydrogen bonded by hoogsteen pairing to another guanine in the quartet.\" [http://www.ncbi.nlm.nih.gov/pubmed/7919797?dopt=Abstract]',\n 'synonym: \"G quartet\" EXACT []',\n 'synonym: \"G tetrad\" EXACT []',\n 'synonym: \"G-quadruplex\" EXACT []',\n 'synonym: \"G-quartet\" EXACT []',\n 'synonym: \"G-tetrad\" EXACT []',\n 'synonym: \"G_quadruplex\" EXACT []',\n 'synonym: \"guanine tetrad\" EXACT []',\n 'xref: http://en.wikipedia.org/wiki/G-quadruplex \"wiki\"',\n 'is_a: SO:0000002 ! sequence_secondary_structure',\n '',\n '[Term]',\n 'id: SO:00000030002382',\n 'name: 5_prime_UTR_uORF_stop_codon_gain_variant',\n 'def: \"A 5\\' UTR variant where a premature stop codon is gained in an upstream open reading frame.\" [PMID:32461616, PMID:32926138]',\n 'comment: Added 10 Apr 2024 at the request of Sarah Hunt (EBI). See GitHub Issue #624.',\n 'synonym: \"uSTOP_gained\" EXACT [] {comment=\"UTRannotator\"}',\n 'is_a: SO:00000010002382 ! 5_prime_UTR_uORF_stop_codon_variant',\n 'created_by: evan',\n 'creation_date: 2024-04-10T18:01:42Z',\n '',\n '[Term]']In\u00a0[4]: Copied!
so = picea.Ontology.from_obo(string=r.text)\nso = picea.Ontology.from_obo(string=r.text) In\u00a0[5]: Copied!
ids = [el.ID for el in so['SO:0000866'].parents.elements]\nids = [el.ID for el in so['SO:0000866'].parents.elements] In\u00a0[6]: Copied!
'SO:0000866' in {el.ID for so_id in ids for el in so[so_id].children.elements}\n'SO:0000866' in {el.ID for so_id in ids for el in so[so_id].children.elements} Out[6]:
TrueIn\u00a0[7]: Copied!
len(so)\nlen(so) Out[7]:
2513In\u00a0[8]: Copied!
url = 'http://purl.obolibrary.org/obo/go.obo'\n# url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'\nr = requests.get(url)\ngo = picea.Ontology.from_obo(string=r.text)\nlen(go.elements)\nurl = 'http://purl.obolibrary.org/obo/go.obo' # url = 'http://purl.obolibrary.org/obo/go/go-basic.obo' r = requests.get(url) go = picea.Ontology.from_obo(string=r.text) len(go.elements)
/home/runner/work/picea/picea/docs/examples/../../picea/ontology.py:32: UserWarning: Accessed GO term by alt ID GO:0008150, returning main GO term with ID GO:0000004\n warnings.warn(f\"Accessed GO term by alt ID {ID}, \" f\"returning main GO term with ID {alt_id}\")\n/home/runner/work/picea/picea/docs/examples/../../picea/ontology.py:32: UserWarning: Accessed GO term by alt ID GO:0003674, returning main GO term with ID GO:0005554\n warnings.warn(f\"Accessed GO term by alt ID {ID}, \" f\"returning main GO term with ID {alt_id}\")\n/home/runner/work/picea/picea/docs/examples/../../picea/ontology.py:32: UserWarning: Accessed GO term by alt ID GO:0005575, returning main GO term with ID GO:0008372\n warnings.warn(f\"Accessed GO term by alt ID {ID}, \" f\"returning main GO term with ID {alt_id}\")\nOut[8]:
45667In\u00a0[9]: Copied!
[(term.ID, term.name, len(term.parents)) for term in go['GO:0048316'].parents]\n[(term.ID, term.name, len(term.parents)) for term in go['GO:0048316'].parents] Out[9]:
[('GO:0009791', ['post-embryonic development'], 5),\n ('GO:0032501', ['multicellular organismal process'], 1),\n ('GO:0000004', ['biological_process'], 0),\n ('GO:0007275', ['multicellular organism development'], 4),\n ('GO:0048856', ['anatomical structure development'], 2),\n ('GO:0032502', ['developmental process'], 1),\n ('GO:0048608', ['reproductive structure development'], 9),\n ('GO:0003006', ['developmental process involved in reproduction'], 3),\n ('GO:0022414', ['reproductive process'], 1),\n ('GO:0061458', ['reproductive system development'], 6),\n ('GO:0048731', ['system development'], 5),\n ('GO:0010154', ['fruit development'], 10)]In\u00a0[10]: Copied!
go['GO:0048316'].children\ngo['GO:0048316'].children Out[10]:
<picea.ontology.Ontology at 0x7f8a40ee3ac0>In\u00a0[11]: Copied!
import networkx as nx\nnx.__version__\nimport networkx as nx nx.__version__
\n---------------------------------------------------------------------------\nModuleNotFoundError Traceback (most recent call last)\nCell In[11], line 1\n----> 1 import networkx as nx\n 2 nx.__version__\n\nModuleNotFoundError: No module named 'networkx'In\u00a0[12]: Copied!
graph = nx.DiGraph()\nfor term in [go['GO:0048316'], *go['GO:0048316'].children]:\n graph.add_node(term.ID, name=term.name)\n for child_ID in term._children:\n graph.add_edge(term.ID, child_ID)\nlayout = nx.planar_layout(graph)\nnx.draw(graph, pos=layout, node_shape='s')\ngraph = nx.DiGraph() for term in [go['GO:0048316'], *go['GO:0048316'].children]: graph.add_node(term.ID, name=term.name) for child_ID in term._children: graph.add_edge(term.ID, child_ID) layout = nx.planar_layout(graph) nx.draw(graph, pos=layout, node_shape='s')
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[12], line 1\n----> 1 graph = nx.DiGraph()\n 2 for term in [go['GO:0048316'], *go['GO:0048316'].children]:\n 3 graph.add_node(term.ID, name=term.name)\n\nNameError: name 'nx' is not definedIn\u00a0[13]: Copied!
import sys\n!{sys.executable} -m pip install pygraphviz\nnx.nx_agraph.to_agraph(graph)\nimport sys !{sys.executable} -m pip install pygraphviz nx.nx_agraph.to_agraph(graph)
Collecting pygraphviz\r\n
Downloading pygraphviz-1.13.tar.gz (104 kB)\r\n \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501 0.0/104.6 kB ? eta -:--:--
\r \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501 104.6/104.6 kB 6.5 MB/s eta 0:00:00\r\n
Installing build dependencies ... -
\b \b\\
\b \b|
\b \b/
\b \bdone\r\n
Getting requirements to build wheel ... -
\b \bdone\r\n
Installing backend dependencies ... -
\b \b\\
\b \bdone\r\n
Preparing metadata (pyproject.toml) ... -
\b \bdone\r\nBuilding wheels for collected packages: pygraphviz\r\n
Building wheel for pygraphviz (pyproject.toml) ... -
\b \b\\\b \berror\r\n error: subprocess-exited-with-error\r\n \r\n \u00d7 Building wheel for pygraphviz (pyproject.toml) did not run successfully.\r\n \u2502 exit code: 1\r\n \u2570\u2500> [61 lines of output]\r\n running bdist_wheel\r\n running build\r\n running build_py\r\n creating build\r\n creating build/lib.linux-x86_64-cpython-310\r\n creating build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/agraph.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/testing.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/scraper.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/__init__.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/graphviz.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n creating build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_node_attributes.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_graph.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/__init__.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_repr_mimebundle.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_unicode.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_readwrite.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_edge_attributes.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_layout.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_scraper.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_close.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_clear.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_attribute_defaults.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_drawing.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_html.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_subgraph.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_string.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n running egg_info\r\n writing pygraphviz.egg-info/PKG-INFO\r\n writing dependency_links to pygraphviz.egg-info/dependency_links.txt\r\n writing top-level names to pygraphviz.egg-info/top_level.txt\r\n reading manifest file 'pygraphviz.egg-info/SOURCES.txt'\r\n reading manifest template 'MANIFEST.in'\r\n warning: no files found matching '*.swg'\r\n warning: no files found matching '*.png' under directory 'doc'\r\n warning: no files found matching '*.html' under directory 'doc'\r\n warning: no files found matching '*.txt' under directory 'doc'\r\n warning: no files found matching '*.css' under directory 'doc'\r\n warning: no previously-included files matching '*~' found anywhere in distribution\r\n warning: no previously-included files matching '*.pyc' found anywhere in distribution\r\n warning: no previously-included files matching '.svn' found anywhere in distribution\r\n no previously-included directories found matching 'doc/build'\r\n adding license file 'LICENSE'\r\n writing manifest file 'pygraphviz.egg-info/SOURCES.txt'\r\n copying pygraphviz/graphviz.i -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/graphviz_wrap.c -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n running build_ext\r\n building 'pygraphviz._graphviz' extension\r\n creating build/temp.linux-x86_64-cpython-310\r\n creating build/temp.linux-x86_64-cpython-310/pygraphviz\r\n x86_64-linux-gnu-gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -fPIC -DSWIG_PYTHON_STRICT_BYTE_CHAR -I/home/runner/.cache/pypoetry/virtualenvs/picea-ox5U8VzY-py3.10/include -I/usr/include/python3.10 -c pygraphviz/graphviz_wrap.c -o build/temp.linux-x86_64-cpython-310/pygraphviz/graphviz_wrap.o\r\n pygraphviz/graphviz_wrap.c:9: warning: \"SWIG_PYTHON_STRICT_BYTE_CHAR\" redefined\r\n 9 | #define SWIG_PYTHON_STRICT_BYTE_CHAR\r\n |\r\n <command-line>: note: this is the location of the previous definition\r\n pygraphviz/graphviz_wrap.c:3023:10: fatal error: graphviz/cgraph.h: No such file or directory\r\n 3023 | #include \"graphviz/cgraph.h\"\r\n | ^~~~~~~~~~~~~~~~~~~\r\n compilation terminated.\r\n error: command '/usr/bin/x86_64-linux-gnu-gcc' failed with exit code 1\r\n [end of output]\r\n \r\n note: This error originates from a subprocess, and is likely not a problem with pip.\r\n ERROR: Failed building wheel for pygraphviz\r\nFailed to build pygraphviz\r\nERROR: Could not build wheels for pygraphviz, which is required to install pyproject.toml-based projects\r\n
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[13], line 3\n 1 import sys\n 2 get_ipython().system('{sys.executable} -m pip install pygraphviz')\n----> 3 nx.nx_agraph.to_agraph(graph)\n\nNameError: name 'nx' is not definedIn\u00a0[14]: Copied!
[(term.ID, term.name) for term in go['GO:0048316'].children]\n[(term.ID, term.name) for term in go['GO:0048316'].children] Out[14]:
[('GO:0009793', ['embryo development ending in seed dormancy']),\n ('GO:0009942', ['longitudinal axis specification']),\n ('GO:0010069', ['zygote asymmetric cytokinesis in embryo sac']),\n ('GO:0010262', ['somatic embryogenesis']),\n ('GO:0010654', ['apical cell fate commitment']),\n ('GO:0048508', ['embryonic meristem development']),\n ('GO:0010065', ['primary meristem tissue development']),\n ('GO:0010066', ['ground meristem histogenesis']),\n ('GO:0010067', ['procambium histogenesis']),\n ('GO:0010068', ['protoderm histogenesis']),\n ('GO:0010071', ['root meristem specification']),\n ('GO:0010072', ['primary shoot apical meristem specification']),\n ('GO:0048825', ['cotyledon development']),\n ('GO:0048826', ['cotyledon morphogenesis']),\n ('GO:0010588', ['cotyledon vascular tissue pattern formation']),\n ('GO:0009960', ['endosperm development']),\n ('GO:0010214', ['seed coat development']),\n ('GO:0048359',\n ['mucilage metabolic process involved in seed coat development']),\n ('GO:0048354',\n ['mucilage biosynthetic process involved in seed coat development']),\n ('GO:0010344', ['seed oilbody biogenesis']),\n ('GO:0010431', ['seed maturation']),\n ('GO:0010162', ['seed dormancy process']),\n ('GO:0010231', ['maintenance of seed dormancy']),\n ('GO:0098755', ['maintenance of seed dormancy by absisic acid']),\n ('GO:0048700', ['acquisition of desiccation tolerance in seed']),\n ('GO:0048838', ['release of seed from dormancy']),\n ('GO:1990068', ['seed dehydration']),\n ('GO:0048317', ['seed morphogenesis']),\n ('GO:0080001', ['mucilage extrusion from seed coat']),\n ('GO:0080112', ['seed growth']),\n ('GO:0090376', ['seed trichome differentiation']),\n ('GO:0090377', ['seed trichome initiation']),\n ('GO:0090378', ['seed trichome elongation']),\n ('GO:0090379',\n ['secondary cell wall biogenesis involved in seed trichome differentiation']),\n ('GO:0090380', ['seed trichome maturation']),\n ('GO:0140547', ['acquisition of seed longevity'])]In\u00a0[15]: Copied!
go['GO:0010431'].__dict__\ngo['GO:0010431'].__dict__ Out[15]:
{'_ID': 'GO:0010431',\n '_original_ID': 'GO:0010431',\n '_container': <picea.ontology.Ontology at 0x7f8a1bfd6380>,\n '_children': ['GO:0010162', 'GO:1990068'],\n '_parents': ['GO:0003006', 'GO:0021700', 'GO:0048609', 'GO:0048316'],\n 'name': ['seed maturation'],\n 'def': ['\"A process in seed development that occurs after embryogenesis by which a quiescent state is established in a seed. Seed maturation is characterized by storage compound accumulation, acquisition of desiccation tolerance, growth arrest and the entry into a dormancy period of variable length that is broken upon germination.\" [PMID:16096971]'],\n 'alt_id': [],\n 'namespace': ['biological_process'],\n 'is_a': ['GO:0003006 ! developmental process involved in reproduction',\n 'GO:0021700 ! developmental maturation',\n 'GO:0048609 ! multicellular organismal reproductive process'],\n 'relationship': ['part_of GO:0048316 ! seed development']}In\u00a0[16]: Copied!
go['GO:0048316'].__dict__\ngo['GO:0048316'].__dict__ Out[16]:
{'_ID': 'GO:0048316',\n '_original_ID': 'GO:0048316',\n '_container': <picea.ontology.Ontology at 0x7f8a1bfd6380>,\n '_children': ['GO:0009793',\n 'GO:0009960',\n 'GO:0010214',\n 'GO:0010344',\n 'GO:0010431',\n 'GO:0048317',\n 'GO:0080001',\n 'GO:0080112',\n 'GO:0090376',\n 'GO:0140547'],\n '_parents': ['GO:0009791', 'GO:0048608', 'GO:0010154'],\n 'name': ['seed development'],\n 'def': ['\"The process whose specific outcome is the progression of the seed over time, from its formation to the mature structure. A seed is a propagating organ formed in the sexual reproductive cycle of gymnosperms and angiosperms, consisting of a protective coat enclosing an embryo and food reserves.\" [GOC:jid, PO:0009010]'],\n 'alt_id': [],\n 'namespace': ['biological_process'],\n 'is_a': ['GO:0009791 ! post-embryonic development',\n 'GO:0048608 ! reproductive structure development'],\n 'relationship': ['part_of GO:0010154 ! fruit development']}In\u00a0[17]: Copied!
go['GO:0048316'].children._elements.keys()\ngo['GO:0048316'].children._elements.keys() Out[17]:
dict_keys(['GO:0009793', 'GO:0009942', 'GO:0010069', 'GO:0010262', 'GO:0010654', 'GO:0048508', 'GO:0010065', 'GO:0010066', 'GO:0010067', 'GO:0010068', 'GO:0010071', 'GO:0010072', 'GO:0048825', 'GO:0048826', 'GO:0010588', 'GO:0009960', 'GO:0010214', 'GO:0048359', 'GO:0048354', 'GO:0010344', 'GO:0010431', 'GO:0010162', 'GO:0010231', 'GO:0098755', 'GO:0048700', 'GO:0048838', 'GO:1990068', 'GO:0048317', 'GO:0080001', 'GO:0080112', 'GO:0090376', 'GO:0090377', 'GO:0090378', 'GO:0090379', 'GO:0090380', 'GO:0140547'])In\u00a0[18]: Copied!
[(term.ID,term.name) for term in go if term.__dict__.get('alt_id') and term._parents]\n[(term.ID,term.name) for term in go if term.__dict__.get('alt_id') and term._parents] Out[18]:
[('GO:0036422', ['heptaprenyl diphosphate synthase activity']),\n ('GO:0000010', ['heptaprenyl diphosphate synthase activity']),\n ('GO:1905121', ['mitotic spindle elongation']),\n ('GO:0000022', ['mitotic spindle elongation']),\n ('GO:0000946', ['tRNA binding']),\n ('GO:0000049', ['tRNA binding']),\n ('GO:0006594', ['urea cycle']),\n ('GO:0000050', ['urea cycle']),\n ('GO:0006871', ['urea cycle']),\n ('GO:0000055', ['ribosomal large subunit export from nucleus']),\n ('GO:0000057', ['ribosomal large subunit export from nucleus']),\n ('GO:0000058', ['ribosomal small subunit export from nucleus']),\n ('GO:0000056', ['ribosomal small subunit export from nucleus']),\n ('GO:0000070', ['mitotic sister chromatid segregation']),\n ('GO:0016359', ['mitotic sister chromatid segregation']),\n ('GO:0000073', ['initial mitotic spindle pole body separation']),\n ('GO:0030475', ['initial mitotic spindle pole body separation']),\n ('GO:0071779', ['cell cycle checkpoint signaling']),\n ('GO:0000075', ['cell cycle checkpoint signaling']),\n ('GO:0072395', ['cell cycle checkpoint signaling']),\n ('GO:0031576', ['cell cycle checkpoint signaling']),\n ('GO:0072404', ['cell cycle checkpoint signaling']),\n ('GO:0072407', ['cell cycle checkpoint signaling']),\n ('GO:0072437', ['DNA replication checkpoint signaling']),\n ('GO:0000076', ['DNA replication checkpoint signaling']),\n ('GO:0072422', ['DNA damage checkpoint signaling']),\n ('GO:0000077', ['DNA damage checkpoint signaling']),\n ('GO:0000095',\n ['S-adenosyl-L-methionine transmembrane transporter activity']),\n ('GO:0015177',\n ['S-adenosyl-L-methionine transmembrane transporter activity']),\n ('GO:0015178', ['S-methylmethionine transmembrane transporter activity']),\n ('GO:0000100', ['S-methylmethionine transmembrane transporter activity']),\n ('GO:0000103', ['sulfate assimilation']),\n ('GO:0019378', ['sulfate assimilation']),\n ('GO:0000104', ['succinate dehydrogenase activity']),\n ('GO:0019739', ['succinate dehydrogenase activity']),\n ('GO:0010553', ['negative regulation of transcription by RNA polymerase II']),\n ('GO:0045816', ['negative regulation of transcription by RNA polymerase II']),\n ('GO:0000122', ['negative regulation of transcription by RNA polymerase II']),\n ('GO:0000124', ['SAGA complex']),\n ('GO:0000125', ['SAGA complex']),\n ('GO:0030914', ['SAGA complex']),\n ('GO:0043690', ['flocculation']),\n ('GO:0000128', ['flocculation']),\n ('GO:0032128', ['flocculation']),\n ('GO:0036281', ['flocculation']),\n ('GO:0043689', ['flocculation']),\n ('GO:0000501', ['flocculation']),\n ('GO:0036282', ['flocculation']),\n ('GO:0030607', ['establishment of mitotic spindle orientation']),\n ('GO:0000132', ['establishment of mitotic spindle orientation']),\n ('GO:0030609', ['establishment of mitotic spindle orientation']),\n ('GO:0000146', ['microfilament motor activity']),\n ('GO:0030898', ['microfilament motor activity']),\n ('GO:0000154', ['rRNA modification']),\n ('GO:0016548', ['rRNA modification']),\n ('GO:0009096', ['tryptophan biosynthetic process']),\n ('GO:0000162', ['tryptophan biosynthetic process']),\n ('GO:0007255', ['MAPK cascade']),\n ('GO:0000165', ['MAPK cascade']),\n ('GO:0043790', ['rRNA (adenine-N6,N6-)-dimethyltransferase activity']),\n ('GO:0000179', ['rRNA (adenine-N6,N6-)-dimethyltransferase activity']),\n ('GO:0000212', ['meiotic spindle organization']),\n ('GO:0043147', ['meiotic spindle organization']),\n ('GO:0008665', [\"tRNA 2'-phosphotransferase activity\"]),\n ('GO:0000215', [\"tRNA 2'-phosphotransferase activity\"]),\n ('GO:0000351', ['spliceosomal tri-snRNP complex assembly']),\n ('GO:0000355', ['spliceosomal tri-snRNP complex assembly']),\n ('GO:0000244', ['spliceosomal tri-snRNP complex assembly']),\n ('GO:0050576', ['3-keto sterol reductase activity']),\n ('GO:0000253', ['3-keto sterol reductase activity']),\n ('GO:0005051', ['peroxisome targeting sequence binding']),\n ('GO:0000268', ['peroxisome targeting sequence binding']),\n ('GO:0009284', ['peptidoglycan metabolic process']),\n ('GO:0000270', ['peptidoglycan metabolic process']),\n ('GO:0044244', ['polysaccharide catabolic process']),\n ('GO:0000272', ['polysaccharide catabolic process']),\n ('GO:0000278', ['mitotic cell cycle']),\n ('GO:0007067', ['mitotic cell cycle']),\n ('GO:0030452', ['RNA fragment catabolic process']),\n ('GO:0000292', ['RNA fragment catabolic process']),\n ('GO:0009043', ['xanthine phosphoribosyltransferase activity']),\n ('GO:0000310', ['xanthine phosphoribosyltransferase activity']),\n ('GO:0000338', ['protein deneddylation']),\n ('GO:0010388', ['protein deneddylation']),\n ('GO:0000348', ['mRNA branch site recognition']),\n ('GO:0000370', ['mRNA branch site recognition']),\n ('GO:0000371', ['mRNA branch site recognition']),\n ('GO:0000356',\n ['generation of catalytic spliceosome for first transesterification step']),\n ('GO:0000357',\n ['generation of catalytic spliceosome for first transesterification step']),\n ('GO:0000349',\n ['generation of catalytic spliceosome for first transesterification step']),\n ('GO:0000350',\n ['generation of catalytic spliceosome for second transesterification step']),\n ('GO:0000359',\n ['generation of catalytic spliceosome for second transesterification step']),\n ('GO:0000358',\n ['generation of catalytic spliceosome for second transesterification step']),\n ('GO:0000360', ['cis assembly of pre-catalytic spliceosome']),\n ('GO:0000361', ['cis assembly of pre-catalytic spliceosome']),\n ('GO:0000354', ['cis assembly of pre-catalytic spliceosome']),\n ('GO:0000385', ['RNA splicing, via transesterification reactions']),\n ('GO:0000375', ['RNA splicing, via transesterification reactions']),\n ('GO:0031202', ['RNA splicing, via transesterification reactions']),\n ('GO:0000396',\n ['spliceosome conformational change to release U4 (or U4atac) and U1 (or U11)']),\n ('GO:0000388',\n ['spliceosome conformational change to release U4 (or U4atac) and U1 (or U11)']),\n ('GO:0000397',\n ['spliceosome conformational change to release U4 (or U4atac) and U1 (or U11)']),\n ('GO:0000383', [\"mRNA 3'-splice site recognition\"]),\n ('GO:0000389', [\"mRNA 3'-splice site recognition\"]),\n ('GO:0000382', [\"mRNA 3'-splice site recognition\"]),\n ('GO:0000390', ['spliceosomal complex disassembly']),\n ('GO:0000391', ['spliceosomal complex disassembly']),\n ('GO:0000392', ['spliceosomal complex disassembly']),\n ('GO:0000369', [\"mRNA 5'-splice site recognition\"]),\n ('GO:0000395', [\"mRNA 5'-splice site recognition\"]),\n ('GO:0000368', [\"mRNA 5'-splice site recognition\"]),\n ('GO:0006375', ['mRNA splicing, via spliceosome']),\n ('GO:0000398', ['mRNA splicing, via spliceosome']),\n ('GO:0006374', ['mRNA splicing, via spliceosome']),\n ('GO:0000420', ['RNA polymerase IV complex']),\n ('GO:0000418', ['RNA polymerase IV complex']),\n ('GO:0000419', ['RNA polymerase V complex']),\n ('GO:0080137', ['RNA polymerase V complex']),\n ('GO:0000443', ['core TFIIH complex portion of holo TFIIH complex']),\n ('GO:0000438', ['core TFIIH complex portion of holo TFIIH complex']),\n ('GO:0000441', ['transcription factor TFIIH core complex']),\n ('GO:0000439', ['transcription factor TFIIH core complex']),\n ('GO:0000440', ['core TFIIH complex portion of NEF3 complex']),\n ('GO:0000442', ['core TFIIH complex portion of NEF3 complex']),\n ('GO:0000818', ['MIS12/MIND type complex']),\n ('GO:0000444', ['MIS12/MIND type complex']),\n ('GO:0000490',\n ['cleavage in ITS2 between 5.8S rRNA and LSU-rRNA of tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']),\n ('GO:0000448',\n ['cleavage in ITS2 between 5.8S rRNA and LSU-rRNA of tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']),\n ('GO:1990041',\n ['maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']),\n ('GO:0000462',\n ['maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']),\n ('GO:0019004',\n ['oxidized pyrimidine nucleobase lesion DNA N-glycosylase activity']),\n ('GO:0000703',\n ['oxidized pyrimidine nucleobase lesion DNA N-glycosylase activity']),\n ('GO:0016924', ['double-strand break repair via homologous recombination']),\n ('GO:0000724', ['double-strand break repair via homologous recombination']),\n ('GO:0007335', ['karyogamy']),\n ('GO:0000741', ['karyogamy']),\n ('GO:0000743',\n ['nuclear migration involved in conjugation with cellular fusion']),\n ('GO:0006946',\n ['nuclear migration involved in conjugation with cellular fusion']),\n ('GO:0007322', ['conjugation with cellular fusion']),\n ('GO:0007333', ['conjugation with cellular fusion']),\n ('GO:0030461', ['conjugation with cellular fusion']),\n ('GO:0000747', ['conjugation with cellular fusion']),\n ('GO:0030477', ['conjugation with cellular fusion']),\n ('GO:0000749',\n ['response to pheromone triggering conjugation with cellular fusion']),\n ('GO:0007328',\n ['response to pheromone triggering conjugation with cellular fusion']),\n ('GO:0030434',\n ['response to pheromone triggering conjugation with cellular fusion']),\n ('GO:0007330',\n ['pheromone-dependent signal transduction involved in conjugation with cellular fusion']),\n ('GO:0030454',\n ['pheromone-dependent signal transduction involved in conjugation with cellular fusion']),\n ('GO:0000750',\n ['pheromone-dependent signal transduction involved in conjugation with cellular fusion']),\n ('GO:0030571', ['mitotic cell cycle G1 arrest in response to pheromone']),\n ('GO:0000751', ['mitotic cell cycle G1 arrest in response to pheromone']),\n ('GO:0007334',\n ['agglutination involved in conjugation with cellular fusion']),\n ('GO:0000752',\n ['agglutination involved in conjugation with cellular fusion']),\n ('GO:0007332',\n ['cell morphogenesis involved in conjugation with cellular fusion']),\n ('GO:0000753',\n ['cell morphogenesis involved in conjugation with cellular fusion']),\n ('GO:0030453',\n ['adaptation of signaling pathway by response to pheromone involved in conjugation with cellular fusion']),\n ('GO:0007331',\n ['adaptation of signaling pathway by response to pheromone involved in conjugation with cellular fusion']),\n ('GO:0000754',\n ['adaptation of signaling pathway by response to pheromone involved in conjugation with cellular fusion']),\n ('GO:0000218', ['cytogamy']),\n ('GO:0030462', ['cytogamy']),\n ('GO:0000755', ['cytogamy']),\n ('GO:0007325', ['peptide pheromone export']),\n ('GO:0000770', ['peptide pheromone export']),\n ('GO:0097521', ['chromosome, centromeric region']),\n ('GO:0000775', ['chromosome, centromeric region']),\n ('GO:0000776', ['kinetochore']),\n ('GO:0031617', ['kinetochore']),\n ('GO:0005699', ['kinetochore']),\n ('GO:0000778', ['kinetochore']),\n ('GO:0000777', ['kinetochore']),\n ('GO:0000779', ['condensed chromosome, centromeric region']),\n ('GO:0000780', ['condensed chromosome, centromeric region']),\n ('GO:0000781', ['chromosome, telomeric region']),\n ('GO:0000784', ['chromosome, telomeric region']),\n ('GO:0000790', ['chromatin']),\n ('GO:0005717', ['chromatin']),\n ('GO:0000789', ['chromatin']),\n ('GO:0000785', ['chromatin']),\n ('GO:0005718', ['nucleosome']),\n ('GO:0000786', ['nucleosome']),\n ('GO:0000787', ['nucleosome']),\n ('GO:0000788', ['nucleosome']),\n ('GO:0000791', ['euchromatin']),\n ('GO:0005719', ['euchromatin']),\n ('GO:0035327', ['euchromatin']),\n ('GO:0005720', ['heterochromatin']),\n ('GO:0035328', ['heterochromatin']),\n ('GO:0000792', ['heterochromatin']),\n ('GO:0000795', ['synaptonemal complex']),\n ('GO:0005716', ['synaptonemal complex']),\n ('GO:0008621', ['condensin complex']),\n ('GO:0005676', ['condensin complex']),\n ('GO:0061814', ['condensin complex']),\n ('GO:0008620', ['condensin complex']),\n ('GO:0000799', ['condensin complex']),\n ('GO:0000796', ['condensin complex']),\n ('GO:0000797', ['condensin complex']),\n ('GO:0007148', ['cell morphogenesis']),\n ('GO:0045791', ['cell morphogenesis']),\n ('GO:0000902', ['cell morphogenesis']),\n ('GO:0045790', ['cell morphogenesis']),\n ('GO:0000910', ['cytokinesis']),\n ('GO:0007104', ['cytokinesis']),\n ('GO:0033205', ['cytokinesis']),\n ('GO:0016288', ['cytokinesis']),\n ('GO:0045573', ['actomyosin contractile ring assembly']),\n ('GO:2000708', ['actomyosin contractile ring assembly']),\n ('GO:0000915', ['actomyosin contractile ring assembly']),\n ('GO:0071937', ['division septum assembly']),\n ('GO:1902411', ['division septum assembly']),\n ('GO:0000917', ['division septum assembly']),\n ('GO:0000920', ['septum digestion after cytokinesis']),\n ('GO:2000695', ['septum digestion after cytokinesis']),\n ('GO:1902409', ['septum digestion after cytokinesis']),\n ('GO:0000922', ['spindle pole']),\n ('GO:0030615', ['spindle pole']),\n ('GO:0008274', ['gamma-tubulin ring complex']),\n ('GO:0055031', ['gamma-tubulin ring complex']),\n ('GO:0000926', ['gamma-tubulin ring complex']),\n ('GO:0061494', ['gamma-tubulin ring complex']),\n ('GO:0000931', ['gamma-tubulin ring complex']),\n ('GO:0055033', ['gamma-tubulin ring complex']),\n ('GO:0000929', ['gamma-tubulin ring complex']),\n ('GO:0055032', ['gamma-tubulin ring complex']),\n ('GO:0000924', ['gamma-tubulin ring complex']),\n ('GO:0000925', ['gamma-tubulin ring complex']),\n ('GO:0043187', ['division septum']),\n ('GO:0000935', ['division septum']),\n ('GO:0000939', ['inner kinetochore']),\n ('GO:0000941', ['inner kinetochore']),\n ('GO:0000942', ['outer kinetochore']),\n ('GO:0000940', ['outer kinetochore']),\n ('GO:0001017', ['transcription cis-regulatory region binding']),\n ('GO:0044212', ['transcription cis-regulatory region binding']),\n ('GO:0000975', ['transcription cis-regulatory region binding']),\n ('GO:0000984', ['transcription cis-regulatory region binding']),\n ('GO:0000976', ['transcription cis-regulatory region binding']),\n ('GO:0001012',\n ['RNA polymerase II transcription regulatory region sequence-specific DNA binding']),\n ('GO:0000977',\n ['RNA polymerase II transcription regulatory region sequence-specific DNA binding']),\n ('GO:0000978',\n ['RNA polymerase II cis-regulatory region sequence-specific DNA binding']),\n ('GO:0000980',\n ['RNA polymerase II cis-regulatory region sequence-specific DNA binding']),\n ('GO:0003705',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0000981',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0001201',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0000982',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0001203',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0001133',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0001200',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0001202',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0000987', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001159', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001150', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0035326', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001158', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0000986', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001034',\n ['RNA polymerase III general transcription initiation factor activity']),\n ('GO:0000995',\n ['RNA polymerase III general transcription initiation factor activity']),\n ('GO:0001002',\n ['RNA polymerase III type 1 promoter sequence-specific DNA binding']),\n ('GO:0001030',\n ['RNA polymerase III type 1 promoter sequence-specific DNA binding']),\n ('GO:0001031',\n ['RNA polymerase III type 2 promoter sequence-specific DNA binding']),\n ('GO:0001003',\n ['RNA polymerase III type 2 promoter sequence-specific DNA binding']),\n ('GO:0001032',\n ['RNA polymerase III type 3 promoter sequence-specific DNA binding']),\n ('GO:0001006',\n ['RNA polymerase III type 3 promoter sequence-specific DNA binding']),\n ('GO:0001045', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0070363', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0070362', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0070361', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0070364', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0001044', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0000997', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0001018', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0001037',\n ['RNA polymerase III hybrid type promoter sequence-specific DNA binding']),\n ('GO:0001039',\n ['RNA polymerase III hybrid type promoter sequence-specific DNA binding']),\n ('GO:0000985', ['core promoter sequence-specific DNA binding']),\n ('GO:0001046', ['core promoter sequence-specific DNA binding']),\n ('GO:0001047', ['core promoter sequence-specific DNA binding']),\n ('GO:0001122', ['promoter clearance during DNA-templated transcription']),\n ('GO:0001109', ['promoter clearance during DNA-templated transcription']),\n ('GO:0001112', ['DNA-templated transcription open complex formation']),\n ('GO:0001127', ['DNA-templated transcription open complex formation']),\n ('GO:0001145',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0001148',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0001160',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0001147',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0001146',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0044213',\n ['intronic transcription regulatory region sequence-specific DNA binding']),\n ('GO:0001161',\n ['intronic transcription regulatory region sequence-specific DNA binding']),\n ('GO:0001163',\n ['RNA polymerase I transcription regulatory region sequence-specific DNA binding']),\n ('GO:0001013',\n ['RNA polymerase I transcription regulatory region sequence-specific DNA binding']),\n ('GO:0001164',\n ['RNA polymerase I core promoter sequence-specific DNA binding']),\n ('GO:0001187',\n ['RNA polymerase I core promoter sequence-specific DNA binding']),\n ('GO:0001165',\n ['RNA polymerase I cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001166',\n ['RNA polymerase I cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001173', ['DNA-templated transcriptional start site selection']),\n ('GO:0001176', ['DNA-templated transcriptional start site selection']),\n ('GO:0001182', ['RNA polymerase I promoter clearance']),\n ('GO:0001184', ['RNA polymerase I promoter clearance']),\n ('GO:0001188', ['RNA polymerase I preinitiation complex assembly']),\n ('GO:0001189', ['RNA polymerase I preinitiation complex assembly']),\n ('GO:0001194',\n ['maintenance of transcriptional fidelity during transcription elongation']),\n ('GO:0001192',\n ['maintenance of transcriptional fidelity during transcription elongation']),\n ('GO:0001216', ['DNA-binding transcription activator activity']),\n ('GO:0001140', ['DNA-binding transcription activator activity']),\n ('GO:0001215', ['DNA-binding transcription activator activity']),\n ('GO:0001219', ['DNA-binding transcription repressor activity']),\n ('GO:0001141', ['DNA-binding transcription repressor activity']),\n ('GO:0001217', ['DNA-binding transcription repressor activity']),\n ('GO:0001220', ['DNA-binding transcription repressor activity']),\n ('GO:0001218', ['DNA-binding transcription repressor activity']),\n ('GO:0001224', ['transcription coregulator binding']),\n ('GO:0001221', ['transcription coregulator binding']),\n ('GO:0001226', ['transcription corepressor binding']),\n ('GO:0001222', ['transcription corepressor binding']),\n ('GO:0001225', ['transcription coactivator binding']),\n ('GO:0001223', ['transcription coactivator binding']),\n ('GO:0001206',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001210',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001227',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001214',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001078',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001211',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001228',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001212',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001077',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001205',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001209',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001213',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001410', ['chlamydospore formation']),\n ('GO:0055027', ['chlamydospore formation']),\n ('GO:0042833', ['response to protozoan']),\n ('GO:0001562', ['response to protozoan']),\n ('GO:0001590',\n ['dopamine neurotransmitter receptor activity, coupled via Gs']),\n ('GO:0001588',\n ['dopamine neurotransmitter receptor activity, coupled via Gs']),\n ('GO:0001589',\n ['dopamine neurotransmitter receptor activity, coupled via Gs']),\n ('GO:0001592',\n ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']),\n ('GO:0001591',\n ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']),\n ('GO:0001670',\n ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']),\n ('GO:0001593',\n ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']),\n ('GO:0001611', ['G protein-coupled adenosine receptor activity']),\n ('GO:0001613', ['G protein-coupled adenosine receptor activity']),\n ('GO:0008501', ['G protein-coupled adenosine receptor activity']),\n ('GO:0001609', ['G protein-coupled adenosine receptor activity']),\n ('GO:0001612', ['G protein-coupled adenosine receptor activity']),\n ('GO:0001610', ['G protein-coupled adenosine receptor activity']),\n ('GO:0035586', ['purinergic nucleotide receptor activity']),\n ('GO:0001614', ['purinergic nucleotide receptor activity']),\n ('GO:0045032', ['G protein-coupled ADP receptor activity']),\n ('GO:0001621', ['G protein-coupled ADP receptor activity']),\n ('GO:0016522',\n ['pituitary adenylate cyclase-activating polypeptide receptor activity']),\n ('GO:0001634',\n ['pituitary adenylate cyclase-activating polypeptide receptor activity']),\n ('GO:0001644', ['cAMP receptor activity']),\n ('GO:0001646', ['cAMP receptor activity']),\n ('GO:0001654', ['eye development']),\n ('GO:0042460', ['eye development']),\n ('GO:0001673', ['male germ cell nucleus']),\n ('GO:0043081', ['male germ cell nucleus']),\n ('GO:0001674', ['female germ cell nucleus']),\n ('GO:0043080', ['female germ cell nucleus']),\n ('GO:0001694', ['histamine biosynthetic process']),\n ('GO:0001693', ['histamine biosynthetic process']),\n ('GO:0001702', ['gastrulation with mouth forming second']),\n ('GO:0048276', ['gastrulation with mouth forming second']),\n ('GO:0010003', ['gastrulation with mouth forming second']),\n ('GO:0007457', ['insect visual primordium formation']),\n ('GO:0001744', ['insect visual primordium formation']),\n ('GO:0048049', ['insect visual primordium development']),\n ('GO:0001748', ['insect visual primordium development']),\n ('GO:0007459', ['compound eye photoreceptor fate commitment']),\n ('GO:0001752', ['compound eye photoreceptor fate commitment']),\n ('GO:0001806', ['type IV hypersensitivity']),\n ('GO:0016069', ['type IV hypersensitivity']),\n ('GO:0042089', ['cytokine production']),\n ('GO:0050663', ['cytokine production']),\n ('GO:0001816', ['cytokine production']),\n ('GO:0042032', ['cytokine production']),\n ('GO:0042107', ['cytokine production']),\n ('GO:0001817', ['regulation of cytokine production']),\n ('GO:0050707', ['regulation of cytokine production']),\n ('GO:0042035', ['regulation of cytokine production']),\n ('GO:0001818', ['negative regulation of cytokine production']),\n ('GO:0042036', ['negative regulation of cytokine production']),\n ('GO:0050710', ['negative regulation of cytokine production']),\n ('GO:0050715', ['positive regulation of cytokine production']),\n ('GO:0001819', ['positive regulation of cytokine production']),\n ('GO:0042108', ['positive regulation of cytokine production']),\n ('GO:0001679', ['neural tube formation']),\n ('GO:0001841', ['neural tube formation']),\n ('GO:0001872', ['(1->3)-beta-D-glucan binding']),\n ('GO:0080087', ['(1->3)-beta-D-glucan binding']),\n ('GO:0001942', ['hair follicle development']),\n ('GO:0001943', ['hair follicle development']),\n ('GO:0002003', ['angiotensin maturation']),\n ('GO:0002005', ['angiotensin maturation']),\n ('GO:1900920', ['regulation of L-glutamate import across plasma membrane']),\n ('GO:0002036', ['regulation of L-glutamate import across plasma membrane']),\n ('GO:1900921',\n ['negative regulation of L-glutamate import across plasma membrane']),\n ('GO:0002037',\n ['negative regulation of L-glutamate import across plasma membrane']),\n ('GO:0002038',\n ['positive regulation of L-glutamate import across plasma membrane']),\n ('GO:1900922',\n ['positive regulation of L-glutamate import across plasma membrane']),\n ('GO:0002046', ['opsin binding']),\n ('GO:0016030', ['opsin binding']),\n ('GO:0002163', ['dystroglycan binding']),\n ('GO:0002166', ['dystroglycan binding']),\n ('GO:0002162', ['dystroglycan binding']),\n ('GO:0002214', ['defense response to insect']),\n ('GO:0002213', ['defense response to insect']),\n ('GO:0002215', ['defense response to nematode']),\n ('GO:0002216', ['defense response to nematode']),\n ('GO:0002219', ['activation of innate immune response']),\n ('GO:0002218', ['activation of innate immune response']),\n ('GO:0002375', ['cytokine production involved in immune response']),\n ('GO:0002374', ['cytokine production involved in immune response']),\n ('GO:0002367', ['cytokine production involved in immune response']),\n ('GO:0048305', ['immunoglobulin production']),\n ('GO:0002377', ['immunoglobulin production']),\n ('GO:0002378', ['immunoglobulin production']),\n ('GO:0002381',\n ['immunoglobulin production involved in immunoglobulin-mediated immune response']),\n ('GO:0002379',\n ['immunoglobulin production involved in immunoglobulin-mediated immune response']),\n ('GO:0002380',\n ['immunoglobulin production involved in immunoglobulin-mediated immune response']),\n ('GO:0002385', ['mucosal immune response']),\n ('GO:0002422', ['mucosal immune response']),\n ('GO:0002386', ['mucosal immune response']),\n ('GO:0002390',\n ['platelet activating factor production involved in inflammatory response']),\n ('GO:0002535',\n ['platelet activating factor production involved in inflammatory response']),\n ('GO:0002391',\n ['platelet activating factor production involved in inflammatory response']),\n ('GO:0002392',\n ['platelet activating factor production involved in inflammatory response']),\n ('GO:0042087', ['leukocyte mediated immunity']),\n ('GO:0002443', ['leukocyte mediated immunity']),\n ('GO:0019723', ['leukocyte mediated immunity']),\n ('GO:0002640', ['regulation of immunoglobulin production']),\n ('GO:0051023', ['regulation of immunoglobulin production']),\n ('GO:0002637', ['regulation of immunoglobulin production']),\n ('GO:0002638', ['negative regulation of immunoglobulin production']),\n ('GO:0051025', ['negative regulation of immunoglobulin production']),\n ('GO:0002641', ['negative regulation of immunoglobulin production']),\n ('GO:0051024', ['positive regulation of immunoglobulin production']),\n ('GO:0002642', ['positive regulation of immunoglobulin production']),\n ('GO:0002639', ['positive regulation of immunoglobulin production']),\n ('GO:0002715', ['regulation of natural killer cell mediated immunity']),\n ('GO:0045845', ['regulation of natural killer cell mediated immunity']),\n ('GO:0002716',\n ['negative regulation of natural killer cell mediated immunity']),\n ('GO:0030102',\n ['negative regulation of natural killer cell mediated immunity']),\n ('GO:0045846',\n ['positive regulation of natural killer cell mediated immunity']),\n ('GO:0002717',\n ['positive regulation of natural killer cell mediated immunity']),\n ('GO:0002742',\n ['regulation of cytokine production involved in immune response']),\n ('GO:0002718',\n ['regulation of cytokine production involved in immune response']),\n ('GO:0002739',\n ['regulation of cytokine production involved in immune response']),\n ('GO:0002740',\n ['negative regulation of cytokine production involved in immune response']),\n ('GO:0002719',\n ['negative regulation of cytokine production involved in immune response']),\n ('GO:0002743',\n ['negative regulation of cytokine production involved in immune response']),\n ('GO:0002720',\n ['positive regulation of cytokine production involved in immune response']),\n ('GO:0002744',\n ['positive regulation of cytokine production involved in immune response']),\n ('GO:0002741',\n ['positive regulation of cytokine production involved in immune response']),\n ('GO:0002753',\n ['cytoplasmic pattern recognition receptor signaling pathway']),\n ('GO:0039528',\n ['cytoplasmic pattern recognition receptor signaling pathway']),\n ('GO:0009870', ['innate immune response-activating signaling pathway']),\n ('GO:0002758', ['innate immune response-activating signaling pathway']),\n ('GO:0010204', ['innate immune response-activating signaling pathway']),\n ('GO:0070526', ['tRNA threonylcarbamoyladenosine modification']),\n ('GO:0002949', ['tRNA threonylcarbamoyladenosine modification']),\n ('GO:0010802', ['respiratory system process']),\n ('GO:0003016', ['respiratory system process']),\n ('GO:0003121', ['epinephrine-mediated vasodilation']),\n ('GO:0003124', ['epinephrine-mediated vasodilation']),\n ('GO:0003123', ['epinephrine-mediated vasodilation']),\n ('GO:0003125', ['norepinephrine-mediated vasodilation']),\n ('GO:0003122', ['norepinephrine-mediated vasodilation']),\n ('GO:0003126', ['norepinephrine-mediated vasodilation']),\n ('GO:0036142', ['cilium movement']),\n ('GO:0003341', ['cilium movement']),\n ('GO:0003352', ['regulation of cilium movement']),\n ('GO:1900172', ['regulation of cilium movement']),\n ('GO:1900174', ['positive regulation of cilium movement']),\n ('GO:0003353', ['positive regulation of cilium movement']),\n ('GO:0003354', ['negative regulation of cilium movement']),\n ('GO:1900173', ['negative regulation of cilium movement']),\n ('GO:0036144', ['regulation of cilium beat frequency']),\n ('GO:0003356', ['regulation of cilium beat frequency']),\n ('GO:0001789', ['sphingosine-1-phosphate receptor signaling pathway']),\n ('GO:0003376', ['sphingosine-1-phosphate receptor signaling pathway']),\n ('GO:0000496', ['nucleic acid binding']),\n ('GO:0003676', ['nucleic acid binding']),\n ('GO:0003677', ['DNA binding']),\n ('GO:0043566', ['DNA binding']),\n ('GO:0003679', ['DNA helicase activity']),\n ('GO:0004003', ['DNA helicase activity']),\n ('GO:0003678', ['DNA helicase activity']),\n ('GO:0033170', ['DNA clamp loader activity']),\n ('GO:0003689', ['DNA clamp loader activity']),\n ('GO:0003698', ['single-stranded DNA binding']),\n ('GO:0003697', ['single-stranded DNA binding']),\n ('GO:0003699', ['single-stranded DNA binding']),\n ('GO:0001071', ['DNA-binding transcription factor activity']),\n ('GO:0001204', ['DNA-binding transcription factor activity']),\n ('GO:0000130', ['DNA-binding transcription factor activity']),\n ('GO:0001151', ['DNA-binding transcription factor activity']),\n ('GO:0003700', ['DNA-binding transcription factor activity']),\n ('GO:0001131', ['DNA-binding transcription factor activity']),\n ('GO:0001130', ['DNA-binding transcription factor activity']),\n ('GO:0001199', ['DNA-binding transcription factor activity']),\n ('GO:0003712', ['transcription coregulator activity']),\n ('GO:0016455', ['transcription coregulator activity']),\n ('GO:0001104', ['transcription coregulator activity']),\n ('GO:0003713', ['transcription coactivator activity']),\n ('GO:0001105', ['transcription coactivator activity']),\n ('GO:0003714', ['transcription corepressor activity']),\n ('GO:0001106', ['transcription corepressor activity']),\n ('GO:0003723', ['RNA binding']),\n ('GO:0044822', ['RNA binding']),\n ('GO:0000498', ['RNA binding']),\n ('GO:0004004', ['RNA helicase activity']),\n ('GO:0003724', ['RNA helicase activity']),\n ('GO:0003726', ['double-stranded RNA adenosine deaminase activity']),\n ('GO:0003971', ['double-stranded RNA adenosine deaminase activity']),\n ('GO:0003727', ['single-stranded RNA binding']),\n ('GO:0003728', ['single-stranded RNA binding']),\n ('GO:0000499', ['mRNA binding']),\n ('GO:0003729', ['mRNA binding']),\n ('GO:0003742', ['structural constituent of ribosome']),\n ('GO:0003741', ['structural constituent of ribosome']),\n ('GO:0003738', ['structural constituent of ribosome']),\n ('GO:0003737', ['structural constituent of ribosome']),\n ('GO:0003736', ['structural constituent of ribosome']),\n ('GO:0003740', ['structural constituent of ribosome']),\n ('GO:0003739', ['structural constituent of ribosome']),\n ('GO:0003735', ['structural constituent of ribosome']),\n ('GO:0003745', ['translation initiation factor activity']),\n ('GO:0003744', ['translation initiation factor activity']),\n ('GO:0003743', ['translation initiation factor activity']),\n ('GO:0003746', ['translation elongation factor activity']),\n ('GO:0008182', ['translation elongation factor activity']),\n ('GO:0008183', ['translation elongation factor activity']),\n ('GO:0003748', ['translation release factor activity']),\n ('GO:0003749', ['translation release factor activity']),\n ('GO:0003747', ['translation release factor activity']),\n ('GO:0003755', ['peptidyl-prolyl cis-trans isomerase activity']),\n ('GO:0042028', ['peptidyl-prolyl cis-trans isomerase activity']),\n ('GO:0004752', ['peptidyl-prolyl cis-trans isomerase activity']),\n ('GO:0003756', ['protein disulfide isomerase activity']),\n ('GO:0006467', ['protein disulfide isomerase activity']),\n ('GO:1990939', ['microtubule motor activity']),\n ('GO:0003777', ['microtubule motor activity']),\n ('GO:0102101', ['sterol 24-C-methyltransferase activity']),\n ('GO:0003838', ['sterol 24-C-methyltransferase activity']),\n ('GO:0004469', ['1-acylglycerol-3-phosphate O-acyltransferase activity']),\n ('GO:0003841', ['1-acylglycerol-3-phosphate O-acyltransferase activity']),\n ('GO:0003843', ['1,3-beta-D-glucan synthase activity']),\n ('GO:0009981', ['1,3-beta-D-glucan synthase activity']),\n ('GO:0047119', ['2-methylbutanoyl-CoA dehydrogenase activity']),\n ('GO:0003853', ['2-methylbutanoyl-CoA dehydrogenase activity']),\n ('GO:0003863',\n ['3-methyl-2-oxobutanoate dehydrogenase (2-methylpropanoyl-transferring) activity']),\n ('GO:0003826',\n ['3-methyl-2-oxobutanoate dehydrogenase (2-methylpropanoyl-transferring) activity']),\n ('GO:0003878', ['ATP citrate synthase activity']),\n ('GO:0046913', ['ATP citrate synthase activity']),\n ('GO:0008326', ['DNA (cytosine-5-)-methyltransferase activity']),\n ('GO:0003886', ['DNA (cytosine-5-)-methyltransferase activity']),\n ('GO:0003891', ['DNA-directed DNA polymerase activity']),\n ('GO:0003889', ['DNA-directed DNA polymerase activity']),\n ('GO:0003895', ['DNA-directed DNA polymerase activity']),\n ('GO:0015999', ['DNA-directed DNA polymerase activity']),\n ('GO:0003893', ['DNA-directed DNA polymerase activity']),\n ('GO:0019984', ['DNA-directed DNA polymerase activity']),\n ('GO:0016000', ['DNA-directed DNA polymerase activity']),\n ('GO:0003888', ['DNA-directed DNA polymerase activity']),\n ('GO:0008723', ['DNA-directed DNA polymerase activity']),\n ('GO:0016449', ['DNA-directed DNA polymerase activity']),\n ('GO:0003887', ['DNA-directed DNA polymerase activity']),\n ('GO:0016450', ['DNA-directed DNA polymerase activity']),\n ('GO:0003890', ['DNA-directed DNA polymerase activity']),\n ('GO:0016451', ['DNA-directed DNA polymerase activity']),\n ('GO:0003894', ['DNA-directed DNA polymerase activity']),\n ('GO:0016452', ['DNA-directed DNA polymerase activity']),\n ('GO:0016448', ['DNA-directed DNA polymerase activity']),\n ('GO:0003898', ['DNA primase activity']),\n ('GO:0003897', ['DNA primase activity']),\n ('GO:0003896', ['DNA primase activity']),\n ('GO:0000129', [\"DNA-directed 5'-3' RNA polymerase activity\"]),\n ('GO:0003899', [\"DNA-directed 5'-3' RNA polymerase activity\"]),\n ('GO:0004036', ['alkylbase DNA N-glycosylase activity']),\n ('GO:0003905', ['alkylbase DNA N-glycosylase activity']),\n ('GO:0009387', ['DNA topoisomerase activity']),\n ('GO:0003916', ['DNA topoisomerase activity']),\n ('GO:0003918',\n ['DNA topoisomerase type II (double strand cut, ATP-hydrolyzing) activity']),\n ('GO:0061505',\n ['DNA topoisomerase type II (double strand cut, ATP-hydrolyzing) activity']),\n ('GO:0061745', ['GTPase activity']),\n ('GO:0003924', ['GTPase activity']),\n ('GO:0003925', ['G protein activity']),\n ('GO:0003927', ['G protein activity']),\n ('GO:0003959', ['NADPH dehydrogenase activity']),\n ('GO:0008468', ['NADPH dehydrogenase activity']),\n ('GO:0016660', ['NADPH dehydrogenase activity']),\n ('GO:0019282', ['O-acetylhomoserine aminocarboxypropyltransferase activity']),\n ('GO:0003961', ['O-acetylhomoserine aminocarboxypropyltransferase activity']),\n ('GO:0003962', ['cystathionine gamma-synthase activity']),\n ('GO:0000505', ['cystathionine gamma-synthase activity']),\n ('GO:0052852', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0052854', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0052853', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0008891', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0003973', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0003992',\n ['N2-acetyl-L-ornithine:2-oxoglutarate 5-aminotransferase activity']),\n ('GO:0047318',\n ['N2-acetyl-L-ornithine:2-oxoglutarate 5-aminotransferase activity']),\n ('GO:0052632', ['aconitate hydratase activity']),\n ('GO:0003994', ['aconitate hydratase activity']),\n ('GO:0019109', ['acyl-CoA dehydrogenase activity']),\n ('GO:0003995', ['acyl-CoA dehydrogenase activity']),\n ('GO:0004031', ['aldehyde oxidase activity']),\n ('GO:0050250', ['aldehyde oxidase activity']),\n ('GO:0019850', ['aminoacyl-tRNA hydrolase activity']),\n ('GO:0019851', ['aminoacyl-tRNA hydrolase activity']),\n ('GO:0004045', ['aminoacyl-tRNA hydrolase activity']),\n ('GO:0004057', ['arginyl-tRNA--protein transferase activity']),\n ('GO:0042172', ['arginyl-tRNA--protein transferase activity']),\n ('GO:0016400', ['aromatic-L-amino-acid decarboxylase activity']),\n ('GO:0004058', ['aromatic-L-amino-acid decarboxylase activity']),\n ('GO:0004094', ['carnitine O-acetyltransferase activity']),\n ('GO:0004093', ['carnitine O-acetyltransferase activity']),\n ('GO:0004092', ['carnitine O-acetyltransferase activity']),\n ('GO:0004096', ['catalase activity']),\n ('GO:0016953', ['catalase activity']),\n ('GO:0016952', ['catalase activity']),\n ('GO:0004097', ['catechol oxidase activity']),\n ('GO:0036263', ['catechol oxidase activity']),\n ('GO:0036264', ['catechol oxidase activity']),\n ('GO:0102316', ['catechol oxidase activity']),\n ('GO:0004123', ['cystathionine gamma-lyase activity']),\n ('GO:0016225', ['cystathionine gamma-lyase activity']),\n ('GO:0008461', ['delta(3)-delta(2)-enoyl-CoA isomerase activity']),\n ('GO:0004165', ['delta(3)-delta(2)-enoyl-CoA isomerase activity']),\n ('GO:0004166',\n ['dolichyl-phosphate alpha-N-acetylglucosaminyltransferase activity']),\n ('GO:0004101',\n ['dolichyl-phosphate alpha-N-acetylglucosaminyltransferase activity']),\n ('GO:0048059', ['dopachrome isomerase activity']),\n ('GO:0004167', ['dopachrome isomerase activity']),\n ('GO:0016809', ['endopeptidase activity']),\n ('GO:0004175', ['endopeptidase activity']),\n ('GO:0004176', ['ATP-dependent peptidase activity']),\n ('GO:0004280', ['ATP-dependent peptidase activity']),\n ('GO:0004300', ['enoyl-CoA hydratase activity']),\n ('GO:0016510', ['enoyl-CoA hydratase activity']),\n ('GO:0004315', ['3-oxoacyl-[acyl-carrier-protein] synthase activity']),\n ('GO:0033817', ['3-oxoacyl-[acyl-carrier-protein] synthase activity']),\n ('GO:0034566', ['formamidase activity']),\n ('GO:0004328', ['formamidase activity']),\n ('GO:0004330', ['fructose-2,6-bisphosphate 2-phosphatase activity']),\n ('GO:0004331', ['fructose-2,6-bisphosphate 2-phosphatase activity']),\n ('GO:0008708', ['glucose dehydrogenase activity']),\n ('GO:0004344', ['glucose dehydrogenase activity']),\n ('GO:0004350', ['glutamate-5-semialdehyde dehydrogenase activity']),\n ('GO:0001513', ['glutamate-5-semialdehyde dehydrogenase activity']),\n ('GO:0004376', ['glycolipid mannosyltransferase activity']),\n ('GO:0004580', ['glycolipid mannosyltransferase activity']),\n ('GO:0019106', ['glycylpeptide N-tetradecanoyltransferase activity']),\n ('GO:0004379', ['glycylpeptide N-tetradecanoyltransferase activity']),\n ('GO:0004386', ['helicase activity']),\n ('GO:0008026', ['helicase activity']),\n ('GO:0004402', ['histone acetyltransferase activity']),\n ('GO:0004404', ['histone acetyltransferase activity']),\n ('GO:0046971', ['histone acetyltransferase activity']),\n ('GO:0004403', ['histone acetyltransferase activity']),\n ('GO:0004406', ['histone acetyltransferase activity']),\n ('GO:0043166', ['histone acetyltransferase activity']),\n ('GO:0004405', ['histone acetyltransferase activity']),\n ('GO:0004420', ['hydroxymethylglutaryl-CoA reductase (NADPH) activity']),\n ('GO:0042282', ['hydroxymethylglutaryl-CoA reductase (NADPH) activity']),\n ('GO:0016315', ['phosphatidylinositol-3-phosphate phosphatase activity']),\n ('GO:0004438', ['phosphatidylinositol-3-phosphate phosphatase activity']),\n ('GO:0001668',\n ['phosphatidylinositol-4,5-bisphosphate 5-phosphatase activity']),\n ('GO:0004439',\n ['phosphatidylinositol-4,5-bisphosphate 5-phosphatase activity']),\n ('GO:0004467', ['long-chain fatty acid-CoA ligase activity']),\n ('GO:0003996', ['long-chain fatty acid-CoA ligase activity']),\n ('GO:0016619', ['malate dehydrogenase (decarboxylating) (NAD+) activity']),\n ('GO:0004471', ['malate dehydrogenase (decarboxylating) (NAD+) activity']),\n ('GO:0004472', ['malate dehydrogenase (decarboxylating) (NAD+) activity']),\n ('GO:0004479', ['methionyl-tRNA formyltransferase activity']),\n ('GO:0070128', ['methionyl-tRNA formyltransferase activity']),\n ('GO:0001718', ['methionyl-tRNA formyltransferase activity']),\n ('GO:0008702', ['methylenetetrahydrofolate reductase (NAD(P)H) activity']),\n ('GO:0004489', ['methylenetetrahydrofolate reductase (NAD(P)H) activity']),\n ('GO:0004499', ['N,N-dimethylaniline monooxygenase activity']),\n ('GO:0047076', ['N,N-dimethylaniline monooxygenase activity']),\n ('GO:0004523', ['RNA-DNA hybrid ribonuclease activity']),\n ('GO:0004524', ['RNA-DNA hybrid ribonuclease activity']),\n ('GO:0008857', ['exonuclease activity']),\n ('GO:0004527', ['exonuclease activity']),\n ('GO:0004536', ['DNA nuclease activity']),\n ('GO:0004537', ['DNA nuclease activity']),\n ('GO:0004553', ['hydrolase activity, hydrolyzing O-glycosyl compounds']),\n ('GO:0016800', ['hydrolase activity, hydrolyzing O-glycosyl compounds']),\n ('GO:0103025', ['alpha-amylase activity']),\n ('GO:0004556', ['alpha-amylase activity']),\n ('GO:0004562', ['alpha-1,4-glucosidase activity']),\n ('GO:0004558', ['alpha-1,4-glucosidase activity']),\n ('GO:0016982', ['alpha-1,4-glucosidase activity']),\n ('GO:0016686', ['peroxidase activity']),\n ('GO:0004601', ['peroxidase activity']),\n ('GO:0016687', ['peroxidase activity']),\n ('GO:0016685', ['peroxidase activity']),\n ('GO:0016693', ['peroxidase activity']),\n ('GO:0004602', ['glutathione peroxidase activity']),\n ('GO:0016224', ['glutathione peroxidase activity']),\n ('GO:0004615', ['phosphomannomutase activity']),\n ('GO:0008971', ['phosphomannomutase activity']),\n ('GO:0045126', ['lysophospholipase activity']),\n ('GO:0004622', ['lysophospholipase activity']),\n ('GO:0102568', ['phospholipase A2 activity']),\n ('GO:0004623', ['phospholipase A2 activity']),\n ('GO:0102567', ['phospholipase A2 activity']),\n ('GO:0004629', ['phospholipase C activity']),\n ('GO:0042298', ['phospholipase C activity']),\n ('GO:0004648',\n ['O-phospho-L-serine:2-oxoglutarate aminotransferase activity']),\n ('GO:0004646',\n ['O-phospho-L-serine:2-oxoglutarate aminotransferase activity']),\n ('GO:0004660', ['protein farnesyltransferase activity']),\n ('GO:0018223', ['protein farnesyltransferase activity']),\n ('GO:0004661', ['protein geranylgeranyltransferase activity']),\n ('GO:0018224', ['protein geranylgeranyltransferase activity']),\n ('GO:0018225',\n ['protein C-terminal S-isoprenylcysteine carboxyl O-methyltransferase activity']),\n ('GO:0004671',\n ['protein C-terminal S-isoprenylcysteine carboxyl O-methyltransferase activity']),\n ('GO:0004672', ['protein kinase activity']),\n ('GO:0050222', ['protein kinase activity']),\n ('GO:0008896', ['protein histidine kinase activity']),\n ('GO:0004673', ['protein histidine kinase activity']),\n ('GO:0004696', ['protein serine/threonine kinase activity']),\n ('GO:0004695', ['protein serine/threonine kinase activity']),\n ('GO:0106311', ['protein serine/threonine kinase activity']),\n ('GO:0004700', ['protein serine/threonine kinase activity']),\n ('GO:0004674', ['protein serine/threonine kinase activity']),\n ('GO:0004683', ['calmodulin-dependent protein kinase activity']),\n ('GO:0004684', ['calmodulin-dependent protein kinase activity']),\n ('GO:0004685', ['calmodulin-dependent protein kinase activity']),\n ('GO:0004688', ['calmodulin-dependent protein kinase activity']),\n ('GO:0008606', ['phosphorylase kinase activity']),\n ('GO:0004689', ['phosphorylase kinase activity']),\n ('GO:0004691', ['cAMP-dependent protein kinase activity']),\n ('GO:0008602', ['cAMP-dependent protein kinase activity']),\n ('GO:0016537', ['cyclin-dependent protein serine/threonine kinase activity']),\n ('GO:0004693', ['cyclin-dependent protein serine/threonine kinase activity']),\n ('GO:0004701', ['diacylglycerol-dependent serine/threonine kinase activity']),\n ('GO:0004697', ['diacylglycerol-dependent serine/threonine kinase activity']),\n ('GO:0004678', ['G protein-coupled receptor kinase activity']),\n ('GO:0004703', ['G protein-coupled receptor kinase activity']),\n ('GO:0016909', ['MAP kinase activity']),\n ('GO:0016908', ['MAP kinase activity']),\n ('GO:0008339', ['MAP kinase activity']),\n ('GO:0008338', ['MAP kinase activity']),\n ('GO:0004707', ['MAP kinase activity']),\n ('GO:0004709', ['MAP kinase kinase kinase activity']),\n ('GO:0004710', ['MAP kinase kinase kinase activity']),\n ('GO:0004718', ['protein tyrosine kinase activity']),\n ('GO:0004713', ['protein tyrosine kinase activity']),\n ('GO:0018056', ['protein-lysine 6-oxidase activity']),\n ('GO:0004720', ['protein-lysine 6-oxidase activity']),\n ('GO:0000158', ['protein serine/threonine phosphatase activity']),\n ('GO:0015071', ['protein serine/threonine phosphatase activity']),\n ('GO:0030357', ['protein serine/threonine phosphatase activity']),\n ('GO:0106306', ['protein serine/threonine phosphatase activity']),\n ('GO:0008598', ['protein serine/threonine phosphatase activity']),\n ('GO:0000163', ['protein serine/threonine phosphatase activity']),\n ('GO:0030360', ['protein serine/threonine phosphatase activity']),\n ('GO:0004724', ['protein serine/threonine phosphatase activity']),\n ('GO:0106307', ['protein serine/threonine phosphatase activity']),\n ('GO:0008600', ['protein serine/threonine phosphatase activity']),\n ('GO:0004722', ['protein serine/threonine phosphatase activity']),\n ('GO:0030358', ['protein serine/threonine phosphatase activity']),\n ('GO:0030361', ['protein serine/threonine phosphatase activity']),\n ('GO:0008596',\n ['calcium-dependent protein serine/threonine phosphatase activity']),\n ('GO:0004723',\n ['calcium-dependent protein serine/threonine phosphatase activity']),\n ('GO:0019906',\n ['[pyruvate dehydrogenase (acetyl-transferring)]-phosphatase activity']),\n ('GO:0004741',\n ['[pyruvate dehydrogenase (acetyl-transferring)]-phosphatase activity']),\n ('GO:0004742', ['dihydrolipoyllysine-residue acetyltransferase activity']),\n ('GO:0030523', ['dihydrolipoyllysine-residue acetyltransferase activity']),\n ('GO:0016959',\n ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']),\n ('GO:0016960',\n ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']),\n ('GO:0016961',\n ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']),\n ('GO:0004748',\n ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']),\n ('GO:0004761', ['serine-pyruvate transaminase activity']),\n ('GO:0004762', ['serine-pyruvate transaminase activity']),\n ('GO:0004760', ['serine-pyruvate transaminase activity']),\n ('GO:0004763', ['serine-pyruvate transaminase activity']),\n ('GO:0030231', ['sphingomyelin phosphodiesterase activity']),\n ('GO:0030230', ['sphingomyelin phosphodiesterase activity']),\n ('GO:0004767', ['sphingomyelin phosphodiesterase activity']),\n ('GO:0004768', ['stearoyl-CoA 9-desaturase activity']),\n ('GO:0016214', ['stearoyl-CoA 9-desaturase activity']),\n ('GO:0043735', ['stearoyl-CoA 9-desaturase activity']),\n ('GO:0017066', ['sterol O-acyltransferase activity']),\n ('GO:0004772', ['sterol O-acyltransferase activity']),\n ('GO:0008952', ['succinate-semialdehyde dehydrogenase (NAD+) activity']),\n ('GO:0004777', ['succinate-semialdehyde dehydrogenase (NAD+) activity']),\n ('GO:0008382', ['superoxide dismutase activity']),\n ('GO:0016954', ['superoxide dismutase activity']),\n ('GO:0004784', ['superoxide dismutase activity']),\n ('GO:0008383', ['superoxide dismutase activity']),\n ('GO:0004785', ['superoxide dismutase activity']),\n ('GO:0004796', ['thromboxane-A synthase activity']),\n ('GO:0008400', ['thromboxane-A synthase activity']),\n ('GO:0004803', ['transposase activity']),\n ('GO:0004804', ['transposase activity']),\n ('GO:0016425',\n ['tRNA (5-methylaminomethyl-2-thiouridylate)(34)-methyltransferase activity']),\n ('GO:0004808',\n ['tRNA (5-methylaminomethyl-2-thiouridylate)(34)-methyltransferase activity']),\n ('GO:0016437', ['CCA tRNA nucleotidyltransferase activity']),\n ('GO:0004810', ['CCA tRNA nucleotidyltransferase activity']),\n ('GO:0017100', ['aminoacyl-tRNA ligase activity']),\n ('GO:0004812', ['aminoacyl-tRNA ligase activity']),\n ('GO:0016876', ['aminoacyl-tRNA ligase activity']),\n ('GO:0004426', ['tryptophan 2,3-dioxygenase activity']),\n ('GO:0004833', ['tryptophan 2,3-dioxygenase activity']),\n ('GO:0004842', ['ubiquitin-protein transferase activity']),\n ('GO:0004841', ['ubiquitin-protein transferase activity']),\n ('GO:0004840', ['ubiquitin-protein transferase activity']),\n ('GO:0036459', ['cysteine-type deubiquitinase activity']),\n ('GO:0004843', ['cysteine-type deubiquitinase activity']),\n ('GO:0004857', ['enzyme inhibitor activity']),\n ('GO:0048551', ['enzyme inhibitor activity']),\n ('GO:0004864', ['protein phosphatase inhibitor activity']),\n ('GO:1990681', ['protein phosphatase inhibitor activity']),\n ('GO:0004870', ['cysteine-type endopeptidase inhibitor activity']),\n ('GO:0004869', ['cysteine-type endopeptidase inhibitor activity']),\n ('GO:0004942', ['complement receptor activity']),\n ('GO:0004875', ['complement receptor activity']),\n ('GO:0004943', ['complement component C3a receptor activity']),\n ('GO:0004876', ['complement component C3a receptor activity']),\n ('GO:0004944', ['complement component C5a receptor activity']),\n ('GO:0004878', ['complement component C5a receptor activity']),\n ('GO:0038050', ['nuclear receptor activity']),\n ('GO:0004879', ['nuclear receptor activity']),\n ('GO:0008434', ['nuclear receptor activity']),\n ('GO:0003708', ['nuclear receptor activity']),\n ('GO:0004882', ['nuclear receptor activity']),\n ('GO:0038052', ['nuclear receptor activity']),\n ('GO:0004887', ['nuclear receptor activity']),\n ('GO:0038051', ['nuclear receptor activity']),\n ('GO:0004880', ['nuclear receptor activity']),\n ('GO:0004886', ['nuclear receptor activity']),\n ('GO:0004884', ['nuclear receptor activity']),\n ('GO:0004888', ['transmembrane signaling receptor activity']),\n ('GO:0004926', ['transmembrane signaling receptor activity']),\n ('GO:0099600', ['transmembrane signaling receptor activity']),\n ('GO:0004896', ['cytokine receptor activity']),\n ('GO:0004907', ['cytokine receptor activity']),\n ('GO:0030525',\n ['granulocyte macrophage colony-stimulating factor receptor activity']),\n ('GO:0004901',\n ['granulocyte macrophage colony-stimulating factor receptor activity']),\n ('GO:0004902', ['granulocyte colony-stimulating factor receptor activity']),\n ('GO:0030524', ['granulocyte colony-stimulating factor receptor activity']),\n ('GO:0004909', ['interleukin-1, type I, activating receptor activity']),\n ('GO:0019967', ['interleukin-1, type I, activating receptor activity']),\n ('GO:0004910', ['interleukin-1, type II, blocking receptor activity']),\n ('GO:0019968', ['interleukin-1, type II, blocking receptor activity']),\n ('GO:0004923', ['leukemia inhibitory factor receptor activity']),\n ('GO:0004899', ['leukemia inhibitory factor receptor activity']),\n ('GO:0001622', ['G protein-coupled receptor activity']),\n ('GO:0001624', ['G protein-coupled receptor activity']),\n ('GO:0016526', ['G protein-coupled receptor activity']),\n ('GO:0004930', ['G protein-coupled receptor activity']),\n ('GO:0001623', ['G protein-coupled receptor activity']),\n ('GO:0001625', ['G protein-coupled receptor activity']),\n ('GO:0001600', ['endothelin receptor activity']),\n ('GO:0004962', ['endothelin receptor activity']),\n ('GO:0001599', ['endothelin receptor activity']),\n ('GO:0004964', ['luteinizing hormone receptor activity']),\n ('GO:0004976', ['luteinizing hormone receptor activity']),\n ('GO:0004993', ['G protein-coupled serotonin receptor activity']),\n ('GO:0001585', ['G protein-coupled serotonin receptor activity']),\n ('GO:0016609', ['G protein-coupled serotonin receptor activity']),\n ('GO:0005000', ['vasopressin receptor activity']),\n ('GO:0016931', ['vasopressin receptor activity']),\n ('GO:0005023', ['epidermal growth factor receptor activity']),\n ('GO:0005006', ['epidermal growth factor receptor activity']),\n ('GO:0005021', ['vascular endothelial growth factor receptor activity']),\n ('GO:0036329', ['vascular endothelial growth factor receptor activity']),\n ('GO:0036326', ['vascular endothelial growth factor receptor activity']),\n ('GO:0036330', ['vascular endothelial growth factor receptor activity']),\n ('GO:0036328', ['vascular endothelial growth factor receptor activity']),\n ('GO:0036327', ['vascular endothelial growth factor receptor activity']),\n ('GO:0005032', ['tumor necrosis factor receptor activity']),\n ('GO:0005033', ['tumor necrosis factor receptor activity']),\n ('GO:0005031', ['tumor necrosis factor receptor activity']),\n ('GO:0005041', ['low-density lipoprotein particle receptor activity']),\n ('GO:0008032', ['low-density lipoprotein particle receptor activity']),\n ('GO:0005048', ['signal sequence binding']),\n ('GO:0008249', ['signal sequence binding']),\n ('GO:0008262', ['nuclear export signal receptor activity']),\n ('GO:0005049', ['nuclear export signal receptor activity']),\n ('GO:0005069',\n ['transmembrane receptor protein tyrosine kinase adaptor activity']),\n ('GO:0005068',\n ['transmembrane receptor protein tyrosine kinase adaptor activity']),\n ('GO:0072568', ['protein kinase C binding']),\n ('GO:0072569', ['protein kinase C binding']),\n ('GO:0097024', ['protein kinase C binding']),\n ('GO:0005080', ['protein kinase C binding']),\n ('GO:0005087', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0019839', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0017112', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005088', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0017132', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005090', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0017034', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0008321', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0016219', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005086', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0030676', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0016220', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005085', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0008433', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005089', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0030675', ['GTPase activator activity']),\n ('GO:0008060', ['GTPase activator activity']),\n ('GO:0005097', ['GTPase activator activity']),\n ('GO:0046582', ['GTPase activator activity']),\n ('GO:0005096', ['GTPase activator activity']),\n ('GO:0005100', ['GTPase activator activity']),\n ('GO:0005098', ['GTPase activator activity']),\n ('GO:0005099', ['GTPase activator activity']),\n ('GO:0017123', ['GTPase activator activity']),\n ('GO:0005101', ['GTPase activator activity']),\n ('GO:0005162', ['fibroblast growth factor receptor binding']),\n ('GO:0001521', ['fibroblast growth factor receptor binding']),\n ('GO:0005104', ['fibroblast growth factor receptor binding']),\n ('GO:0005109', ['frizzled binding']),\n ('GO:0005110', ['frizzled binding']),\n ('GO:0008185', ['epidermal growth factor receptor binding']),\n ('GO:0005154', ['epidermal growth factor receptor binding']),\n ('GO:0005067', ['insulin-like growth factor receptor binding']),\n ('GO:0005159', ['insulin-like growth factor receptor binding']),\n ('GO:0005478', ['transporter activity']),\n ('GO:0005215', ['transporter activity']),\n ('GO:0005220',\n ['inositol 1,4,5-trisphosphate-gated calcium channel activity']),\n ('GO:0008095',\n ['inositol 1,4,5-trisphosphate-gated calcium channel activity']),\n ('GO:0015285', ['gap junction channel activity']),\n ('GO:0005243', ['gap junction channel activity']),\n ('GO:0015286', ['gap junction channel activity']),\n ('GO:0005245', ['voltage-gated calcium channel activity']),\n ('GO:0015270', ['voltage-gated calcium channel activity']),\n ('GO:0010173', ['voltage-gated calcium channel activity']),\n ('GO:0005224', ['intracellularly ATP-gated chloride channel activity']),\n ('GO:0005260', ['intracellularly ATP-gated chloride channel activity']),\n ('GO:0005261', ['monoatomic cation channel activity']),\n ('GO:0015281', ['monoatomic cation channel activity']),\n ('GO:0015338', ['monoatomic cation channel activity']),\n ('GO:0015206', ['allantoin:proton symporter activity']),\n ('GO:0005274', ['allantoin:proton symporter activity']),\n ('GO:0005275', ['amine transmembrane transporter activity']),\n ('GO:0005279', ['amine transmembrane transporter activity']),\n ('GO:0005283', ['amino acid:sodium symporter activity']),\n ('GO:0005285', ['amino acid:sodium symporter activity']),\n ('GO:0005284', ['amino acid:sodium symporter activity']),\n ('GO:0005282', ['neutral L-amino acid:sodium symporter activity']),\n ('GO:0005295', ['neutral L-amino acid:sodium symporter activity']),\n ('GO:0015508', ['L-tyrosine transmembrane transporter activity']),\n ('GO:0005302', ['L-tyrosine transmembrane transporter activity']),\n ('GO:0005310', ['dicarboxylic acid transmembrane transporter activity']),\n ('GO:0015365', ['dicarboxylic acid transmembrane transporter activity']),\n ('GO:0005312', ['dicarboxylic acid transmembrane transporter activity']),\n ('GO:1901677', ['phosphate transmembrane transporter activity']),\n ('GO:0005315', ['phosphate transmembrane transporter activity']),\n ('GO:0005317', ['phosphate transmembrane transporter activity']),\n ('GO:0008562', ['long-chain fatty acid transmembrane transporter activity']),\n ('GO:0005325', ['long-chain fatty acid transmembrane transporter activity']),\n ('GO:0005324', ['long-chain fatty acid transmembrane transporter activity']),\n ('GO:0005330', ['dopamine:sodium symporter activity']),\n ('GO:0005329', ['dopamine:sodium symporter activity']),\n ('GO:0005334', ['norepinephrine:sodium symporter activity']),\n ('GO:0005333', ['norepinephrine:sodium symporter activity']),\n ('GO:0015222', ['serotonin:sodium:chloride symporter activity']),\n ('GO:0005335', ['serotonin:sodium:chloride symporter activity']),\n ('GO:0005336', ['serotonin:sodium:chloride symporter activity']),\n ('GO:0005339', ['nucleotide-sugar transmembrane transporter activity']),\n ('GO:0005338', ['nucleotide-sugar transmembrane transporter activity']),\n ('GO:0005341', ['nucleotide-sulfate transmembrane transporter activity']),\n ('GO:0005340', ['nucleotide-sulfate transmembrane transporter activity']),\n ('GO:0005344', ['oxygen carrier activity']),\n ('GO:0015033', ['oxygen carrier activity']),\n ('GO:0005348', ['ATP transmembrane transporter activity']),\n ('GO:0005347', ['ATP transmembrane transporter activity']),\n ('GO:0015542', ['carbohydrate:proton symporter activity']),\n ('GO:0005351', ['carbohydrate:proton symporter activity']),\n ('GO:0005403', ['carbohydrate:proton symporter activity']),\n ('GO:0019192', ['fructose transmembrane transporter activity']),\n ('GO:0015585', ['fructose transmembrane transporter activity']),\n ('GO:0005353', ['fructose transmembrane transporter activity']),\n ('GO:0015579', ['glucose transmembrane transporter activity']),\n ('GO:0005355', ['glucose transmembrane transporter activity']),\n ('GO:0005356', ['glucose:proton symporter activity']),\n ('GO:0005361', ['glucose:proton symporter activity']),\n ('GO:0015581', ['maltose transmembrane transporter activity']),\n ('GO:0005363', ['maltose transmembrane transporter activity']),\n ('GO:0005371',\n ['tricarboxylate secondary active transmembrane transporter activity']),\n ('GO:0005370',\n ['tricarboxylate secondary active transmembrane transporter activity']),\n ('GO:0005379', ['copper ion transmembrane transporter activity']),\n ('GO:0005380', ['copper ion transmembrane transporter activity']),\n ('GO:0005378', ['copper ion transmembrane transporter activity']),\n ('GO:0005375', ['copper ion transmembrane transporter activity']),\n ('GO:0015088', ['copper ion transmembrane transporter activity']),\n ('GO:0016033', ['iron ion transmembrane transporter activity']),\n ('GO:0097689', ['iron ion transmembrane transporter activity']),\n ('GO:0005382', ['iron ion transmembrane transporter activity']),\n ('GO:0005381', ['iron ion transmembrane transporter activity']),\n ('GO:0008522', ['nucleoside:sodium symporter activity']),\n ('GO:0005415', ['nucleoside:sodium symporter activity']),\n ('GO:0005436', ['sodium:phosphate symporter activity']),\n ('GO:0015321', ['sodium:phosphate symporter activity']),\n ('GO:0005471', ['ATP:ADP antiporter activity']),\n ...]In\u00a0[19]: Copied!
[(term.ID,term.name) for term in go if not term.parents and term.children]\n[(term.ID,term.name) for term in go if not term.parents and term.children] Out[19]:
[('GO:0005554', ['molecular_function']),\n ('GO:0008372', ['cellular_component']),\n ('GO:0000004', ['biological_process'])]In\u00a0[20]: Copied!
go['GO:0005554'].__dict__\ngo['GO:0005554'].__dict__ Out[20]:
{'_ID': 'GO:0005554',\n '_original_ID': 'GO:0005554',\n '_container': <picea.ontology.Ontology at 0x7f8a1bfd6380>,\n '_children': ['GO:0003774',\n 'GO:0003824',\n 'GO:0005198',\n 'GO:0005478',\n 'GO:0005215',\n 'GO:0005488',\n 'GO:0009055',\n 'GO:0009054',\n 'GO:0009053',\n 'GO:0016209',\n 'GO:0038024',\n 'GO:0044183',\n 'GO:0045182',\n 'GO:0045735',\n 'GO:0060089',\n 'GO:0060090',\n 'GO:0032947',\n 'GO:0090729',\n 'GO:0050827',\n 'GO:0098772',\n 'GO:0140104',\n 'GO:0140110',\n 'GO:0140223',\n 'GO:0140299',\n 'GO:0140313',\n 'GO:0140489',\n 'GO:0140522',\n 'GO:0140657',\n 'GO:0140691',\n 'GO:0140776',\n 'GO:0140777',\n 'GO:0034292',\n 'GO:0034291',\n 'GO:0140911',\n 'GO:0034290',\n 'GO:0140912',\n 'GO:0141047',\n 'GO:0180020',\n 'GO:0180024'],\n '_parents': [],\n 'name': ['molecular_function'],\n 'def': ['\"A molecular process that can be carried out by the action of a single macromolecular machine, usually via direct physical interactions with other molecular entities. Function in this sense denotes an action, or activity, that a gene product (or a complex) performs.\" [GOC:pdt]'],\n 'alt_id': ['GO:0003674'],\n 'namespace': ['molecular_function'],\n 'comment': [\"Note that, in addition to forming the root of the molecular function ontology, this term is recommended for the annotation of gene products whose molecular function is unknown. When this term is used for annotation, it indicates that no information was available about the molecular function of the gene product annotated as of the date the annotation was made; the evidence code 'no data' (ND), is used to indicate this. Despite its name, this is not a type of 'function' in the sense typically defined by upper ontologies such as Basic Formal Ontology (BFO). It is instead a BFO:process carried out by a single gene product or complex.\"],\n 'subset': ['goslim_candida',\n 'goslim_chembl',\n 'goslim_metagenomics',\n 'goslim_pir',\n 'goslim_plant',\n 'goslim_yeast'],\n 'synonym': ['\"molecular function\" EXACT []']}In\u00a0[21]: Copied!
go['GO:0003674'].__dict__\ngo['GO:0003674'].__dict__ Out[21]:
{'_ID': 'GO:0005554',\n '_original_ID': 'GO:0005554',\n '_container': <picea.ontology.Ontology at 0x7f8a1bfd6380>,\n '_children': ['GO:0003774',\n 'GO:0003824',\n 'GO:0005198',\n 'GO:0005478',\n 'GO:0005215',\n 'GO:0005488',\n 'GO:0009055',\n 'GO:0009054',\n 'GO:0009053',\n 'GO:0016209',\n 'GO:0038024',\n 'GO:0044183',\n 'GO:0045182',\n 'GO:0045735',\n 'GO:0060089',\n 'GO:0060090',\n 'GO:0032947',\n 'GO:0090729',\n 'GO:0050827',\n 'GO:0098772',\n 'GO:0140104',\n 'GO:0140110',\n 'GO:0140223',\n 'GO:0140299',\n 'GO:0140313',\n 'GO:0140489',\n 'GO:0140522',\n 'GO:0140657',\n 'GO:0140691',\n 'GO:0140776',\n 'GO:0140777',\n 'GO:0034292',\n 'GO:0034291',\n 'GO:0140911',\n 'GO:0034290',\n 'GO:0140912',\n 'GO:0141047',\n 'GO:0180020',\n 'GO:0180024'],\n '_parents': [],\n 'name': ['molecular_function'],\n 'def': ['\"A molecular process that can be carried out by the action of a single macromolecular machine, usually via direct physical interactions with other molecular entities. Function in this sense denotes an action, or activity, that a gene product (or a complex) performs.\" [GOC:pdt]'],\n 'alt_id': ['GO:0003674'],\n 'namespace': ['molecular_function'],\n 'comment': [\"Note that, in addition to forming the root of the molecular function ontology, this term is recommended for the annotation of gene products whose molecular function is unknown. When this term is used for annotation, it indicates that no information was available about the molecular function of the gene product annotated as of the date the annotation was made; the evidence code 'no data' (ND), is used to indicate this. Despite its name, this is not a type of 'function' in the sense typically defined by upper ontologies such as Basic Formal Ontology (BFO). It is instead a BFO:process carried out by a single gene product or complex.\"],\n 'subset': ['goslim_candida',\n 'goslim_chembl',\n 'goslim_metagenomics',\n 'goslim_pir',\n 'goslim_plant',\n 'goslim_yeast'],\n 'synonym': ['\"molecular function\" EXACT []']}"},{"location":"examples/ontology.pct/","title":"Ontology.pct","text":"
This notebook shows how to work with biological ontologies such as the sequence ontology or the gene ontology.
In\u00a0[1]: Copied!import sys\n\nimport requests\n\nsys.path.insert(0, '../../')\nimport picea\n\npicea.__version__\nimport sys import requests sys.path.insert(0, '../../') import picea picea.__version__ Out[1]:
'0.0.27'In\u00a0[2]: Copied!
obo_url = (\n 'https://raw.githubusercontent.com/The-Sequence-Ontology/'\n 'SO-Ontologies/master/Ontology_Files/so.obo'\n)\nr = requests.get(obo_url)\nr\nobo_url = ( 'https://raw.githubusercontent.com/The-Sequence-Ontology/' 'SO-Ontologies/master/Ontology_Files/so.obo' ) r = requests.get(obo_url) r Out[2]:
<Response [200]>In\u00a0[3]: Copied!
r.text.split('\\n')[:100]\nr.text.split('\\n')[:100] Out[3]:
['format-version: 1.2',\n 'data-version: 2024-04-10',\n 'date: 10:04:2024 18:16',\n 'saved-by: Evan Christensen',\n 'subsetdef: Alliance_of_Genome_Resources \"Alliance of Genome Resources Gene Biotype Slim\"',\n 'subsetdef: biosapiens \"biosapiens protein feature ontology\"',\n 'subsetdef: DBVAR \"database of genomic structural variation\"',\n 'subsetdef: SOFA \"SO feature annotation\"',\n 'synonymtypedef: aa1 \"amino acid 1 letter code\"',\n 'synonymtypedef: aa3 \"amino acid 3 letter code\"',\n 'synonymtypedef: AAMOD \"amino acid modification\"',\n 'synonymtypedef: AGR \"Alliance of Genome Resources\"',\n 'synonymtypedef: BS \"biosapiens\"',\n 'synonymtypedef: dbsnp \"dbsnp variant terms\"',\n 'synonymtypedef: dbvar \"DBVAR\"',\n 'synonymtypedef: ebi_variants \"ensembl variant terms\"',\n 'synonymtypedef: RNAMOD \"RNA modification\" EXACT',\n 'synonymtypedef: VAR \"variant annotation term\"',\n 'default-namespace: sequence',\n 'ontology: so',\n 'property_value: IAO:0000700 SO:0000110',\n 'property_value: IAO:0000700 SO:0000400',\n 'property_value: IAO:0000700 SO:0001060',\n 'property_value: IAO:0000700 SO:0001260',\n '',\n '[Term]',\n 'id: SO:0000000',\n 'name: Sequence_Ontology',\n 'subset: SOFA',\n 'is_obsolete: true',\n '',\n '[Term]',\n 'id: SO:00000000002382',\n 'name: 5_prime_UTR_uORF_variant',\n 'def: \"A 5\\' UTR variant within an upstream open reading frame.\" [PMID:32461616, PMID:32926138]',\n 'comment: Added 10 Apr 2024 at the request of Sarah Hunt (EBI). See GitHub Issue #647.',\n 'is_a: SO:0001623 ! 5_prime_UTR_variant',\n 'created_by: evan',\n 'creation_date: 2024-04-10T17:49:03Z',\n '',\n '[Term]',\n 'id: SO:0000001',\n 'name: region',\n 'def: \"A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids.\" [SO:ke]',\n 'subset: SOFA',\n 'synonym: \"sequence\" EXACT []',\n 'is_a: SO:0000110 ! sequence_feature',\n '',\n '[Term]',\n 'id: SO:00000010002382',\n 'name: 5_prime_UTR_uORF_stop_codon_variant',\n 'def: \"A 5\\' UTR variant where a stop codon in an upstream open reading frame is introduced, moved or lost.\" [PMID:32461616, PMID:32926138]',\n 'comment: Added 10 Apr 2024 at the request of Sarah Hunt (EBI). See GitHub Issue #622.',\n 'is_a: SO:00000000002382 ! 5_prime_UTR_uORF_variant',\n 'created_by: evan',\n 'creation_date: 2024-04-10T17:56:17Z',\n '',\n '[Term]',\n 'id: SO:0000002',\n 'name: sequence_secondary_structure',\n 'def: \"A folded sequence.\" [SO:ke]',\n 'synonym: \"INSDC_feature:misc_structure\" EXACT []',\n 'synonym: \"sequence secondary structure\" EXACT []',\n 'is_a: SO:0001411 ! biological_region',\n '',\n '[Term]',\n 'id: SO:00000020002382',\n 'name: 5_prime_UTR_uORF_frameshift_variant',\n 'def: \"A 5\\' UTR variant which disrupts the translation of an upstream open reading frame because the number of nucleotides inserted or deleted is not a multiple of three.\" [PMID:32461616, PMID:32926138]',\n 'comment: Added 10 Apr 2024 at the request of Sarah Hunt (EBI). See GitHub Issue #621.',\n 'synonym: \"uFrameshift (UTRannotator)\" EXACT []',\n 'is_a: SO:00000000002382 ! 5_prime_UTR_uORF_variant',\n 'created_by: evan',\n 'creation_date: 2024-04-10T17:58:40Z',\n '',\n '[Term]',\n 'id: SO:0000003',\n 'name: G_quartet',\n 'def: \"G-quartets are unusual nucleic acid structures consisting of a planar arrangement where each guanine is hydrogen bonded by hoogsteen pairing to another guanine in the quartet.\" [http://www.ncbi.nlm.nih.gov/pubmed/7919797?dopt=Abstract]',\n 'synonym: \"G quartet\" EXACT []',\n 'synonym: \"G tetrad\" EXACT []',\n 'synonym: \"G-quadruplex\" EXACT []',\n 'synonym: \"G-quartet\" EXACT []',\n 'synonym: \"G-tetrad\" EXACT []',\n 'synonym: \"G_quadruplex\" EXACT []',\n 'synonym: \"guanine tetrad\" EXACT []',\n 'xref: http://en.wikipedia.org/wiki/G-quadruplex \"wiki\"',\n 'is_a: SO:0000002 ! sequence_secondary_structure',\n '',\n '[Term]',\n 'id: SO:00000030002382',\n 'name: 5_prime_UTR_uORF_stop_codon_gain_variant',\n 'def: \"A 5\\' UTR variant where a premature stop codon is gained in an upstream open reading frame.\" [PMID:32461616, PMID:32926138]',\n 'comment: Added 10 Apr 2024 at the request of Sarah Hunt (EBI). See GitHub Issue #624.',\n 'synonym: \"uSTOP_gained\" EXACT [] {comment=\"UTRannotator\"}',\n 'is_a: SO:00000010002382 ! 5_prime_UTR_uORF_stop_codon_variant',\n 'created_by: evan',\n 'creation_date: 2024-04-10T18:01:42Z',\n '',\n '[Term]']In\u00a0[4]: Copied!
so = picea.Ontology.from_obo(string=r.text)\nso = picea.Ontology.from_obo(string=r.text) In\u00a0[5]: Copied!
ids = [el.ID for el in so['SO:0000866'].parents.elements]\nids = [el.ID for el in so['SO:0000866'].parents.elements] In\u00a0[6]: Copied!
'SO:0000866' in {el.ID for so_id in ids for el in so[so_id].children.elements}\n'SO:0000866' in {el.ID for so_id in ids for el in so[so_id].children.elements} Out[6]:
TrueIn\u00a0[7]: Copied!
len(so)\nlen(so) Out[7]:
2513In\u00a0[8]: Copied!
url = 'http://purl.obolibrary.org/obo/go.obo'\n# url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'\nr = requests.get(url)\ngo = picea.Ontology.from_obo(string=r.text)\nlen(go.elements)\nurl = 'http://purl.obolibrary.org/obo/go.obo' # url = 'http://purl.obolibrary.org/obo/go/go-basic.obo' r = requests.get(url) go = picea.Ontology.from_obo(string=r.text) len(go.elements)
/home/runner/work/picea/picea/picea/ontology.py:32: UserWarning: Accessed GO term by alt ID GO:0008150, returning main GO term with ID GO:0000004\n warnings.warn(f\"Accessed GO term by alt ID {ID}, \" f\"returning main GO term with ID {alt_id}\")\n/home/runner/work/picea/picea/picea/ontology.py:32: UserWarning: Accessed GO term by alt ID GO:0003674, returning main GO term with ID GO:0005554\n warnings.warn(f\"Accessed GO term by alt ID {ID}, \" f\"returning main GO term with ID {alt_id}\")\n/home/runner/work/picea/picea/picea/ontology.py:32: UserWarning: Accessed GO term by alt ID GO:0005575, returning main GO term with ID GO:0008372\n warnings.warn(f\"Accessed GO term by alt ID {ID}, \" f\"returning main GO term with ID {alt_id}\")\nOut[8]:
45667In\u00a0[9]: Copied!
[(term.ID, term.name, len(term.parents)) for term in go['GO:0048316'].parents]\n[(term.ID, term.name, len(term.parents)) for term in go['GO:0048316'].parents] Out[9]:
[('GO:0009791', ['post-embryonic development'], 5),\n ('GO:0032501', ['multicellular organismal process'], 1),\n ('GO:0000004', ['biological_process'], 0),\n ('GO:0007275', ['multicellular organism development'], 4),\n ('GO:0048856', ['anatomical structure development'], 2),\n ('GO:0032502', ['developmental process'], 1),\n ('GO:0048608', ['reproductive structure development'], 9),\n ('GO:0003006', ['developmental process involved in reproduction'], 3),\n ('GO:0022414', ['reproductive process'], 1),\n ('GO:0061458', ['reproductive system development'], 6),\n ('GO:0048731', ['system development'], 5),\n ('GO:0010154', ['fruit development'], 10)]In\u00a0[10]: Copied!
go['GO:0048316'].children\ngo['GO:0048316'].children Out[10]:
<picea.ontology.Ontology at 0x7f4598e2cf10>In\u00a0[11]: Copied!
import networkx as nx\n\nnx.__version__\nimport networkx as nx nx.__version__
\n---------------------------------------------------------------------------\nModuleNotFoundError Traceback (most recent call last)\nCell In[11], line 1\n----> 1 import networkx as nx\n 3 nx.__version__\n\nModuleNotFoundError: No module named 'networkx'In\u00a0[12]: Copied!
graph = nx.DiGraph()\nfor term in [go['GO:0048316'], *go['GO:0048316'].children]:\n graph.add_node(term.ID, name=term.name)\n for child_ID in term._children:\n graph.add_edge(term.ID, child_ID)\nlayout = nx.planar_layout(graph)\nnx.draw(graph, pos=layout, node_shape='s')\ngraph = nx.DiGraph() for term in [go['GO:0048316'], *go['GO:0048316'].children]: graph.add_node(term.ID, name=term.name) for child_ID in term._children: graph.add_edge(term.ID, child_ID) layout = nx.planar_layout(graph) nx.draw(graph, pos=layout, node_shape='s')
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[12], line 1\n----> 1 graph = nx.DiGraph()\n 2 for term in [go['GO:0048316'], *go['GO:0048316'].children]:\n 3 graph.add_node(term.ID, name=term.name)\n\nNameError: name 'nx' is not definedIn\u00a0[13]: Copied!
import sys\n\n!{sys.executable} -m pip install pygraphviz\nnx.nx_agraph.to_agraph(graph)\nimport sys !{sys.executable} -m pip install pygraphviz nx.nx_agraph.to_agraph(graph)
Collecting pygraphviz\r\n Using cached pygraphviz-1.13.tar.gz (104 kB)\r\n
Installing build dependencies ... -
\b \b\\
\b \b|
\b \bdone\r\n
Getting requirements to build wheel ... -
\b \bdone\r\n
Installing backend dependencies ... -
\b \b\\
\b \bdone\r\n
Preparing metadata (pyproject.toml) ... -\b \bdone\r\n
Building wheels for collected packages: pygraphviz\r\n
Building wheel for pygraphviz (pyproject.toml) ... -
\b \berror\r\n error: subprocess-exited-with-error\r\n \r\n \u00d7 Building wheel for pygraphviz (pyproject.toml) did not run successfully.\r\n \u2502 exit code: 1\r\n \u2570\u2500> [61 lines of output]\r\n running bdist_wheel\r\n running build\r\n running build_py\r\n creating build\r\n creating build/lib.linux-x86_64-cpython-310\r\n creating build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/agraph.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/testing.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/scraper.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/__init__.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/graphviz.py -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n creating build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_node_attributes.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_graph.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/__init__.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_repr_mimebundle.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_unicode.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_readwrite.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_edge_attributes.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_layout.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_scraper.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_close.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_clear.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_attribute_defaults.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_drawing.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_html.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_subgraph.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n copying pygraphviz/tests/test_string.py -> build/lib.linux-x86_64-cpython-310/pygraphviz/tests\r\n running egg_info\r\n writing pygraphviz.egg-info/PKG-INFO\r\n writing dependency_links to pygraphviz.egg-info/dependency_links.txt\r\n writing top-level names to pygraphviz.egg-info/top_level.txt\r\n reading manifest file 'pygraphviz.egg-info/SOURCES.txt'\r\n reading manifest template 'MANIFEST.in'\r\n warning: no files found matching '*.swg'\r\n warning: no files found matching '*.png' under directory 'doc'\r\n warning: no files found matching '*.html' under directory 'doc'\r\n warning: no files found matching '*.txt' under directory 'doc'\r\n warning: no files found matching '*.css' under directory 'doc'\r\n warning: no previously-included files matching '*~' found anywhere in distribution\r\n warning: no previously-included files matching '*.pyc' found anywhere in distribution\r\n warning: no previously-included files matching '.svn' found anywhere in distribution\r\n no previously-included directories found matching 'doc/build'\r\n adding license file 'LICENSE'\r\n writing manifest file 'pygraphviz.egg-info/SOURCES.txt'\r\n copying pygraphviz/graphviz.i -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n copying pygraphviz/graphviz_wrap.c -> build/lib.linux-x86_64-cpython-310/pygraphviz\r\n running build_ext\r\n building 'pygraphviz._graphviz' extension\r\n creating build/temp.linux-x86_64-cpython-310\r\n creating build/temp.linux-x86_64-cpython-310/pygraphviz\r\n x86_64-linux-gnu-gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -fPIC -DSWIG_PYTHON_STRICT_BYTE_CHAR -I/home/runner/.cache/pypoetry/virtualenvs/picea-ox5U8VzY-py3.10/include -I/usr/include/python3.10 -c pygraphviz/graphviz_wrap.c -o build/temp.linux-x86_64-cpython-310/pygraphviz/graphviz_wrap.o\r\n pygraphviz/graphviz_wrap.c:9: warning: \"SWIG_PYTHON_STRICT_BYTE_CHAR\" redefined\r\n 9 | #define SWIG_PYTHON_STRICT_BYTE_CHAR\r\n |\r\n <command-line>: note: this is the location of the previous definition\r\n pygraphviz/graphviz_wrap.c:3023:10: fatal error: graphviz/cgraph.h: No such file or directory\r\n 3023 | #include \"graphviz/cgraph.h\"\r\n | ^~~~~~~~~~~~~~~~~~~\r\n compilation terminated.\r\n error: command '/usr/bin/x86_64-linux-gnu-gcc' failed with exit code 1\r\n [end of output]\r\n \r\n note: This error originates from a subprocess, and is likely not a problem with pip.\r\n ERROR: Failed building wheel for pygraphviz\r\nFailed to build pygraphviz\r\nERROR: Could not build wheels for pygraphviz, which is required to install pyproject.toml-based projects\r\n
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[13], line 4\n 1 import sys\n 3 get_ipython().system('{sys.executable} -m pip install pygraphviz')\n----> 4 nx.nx_agraph.to_agraph(graph)\n\nNameError: name 'nx' is not definedIn\u00a0[14]: Copied!
[(term.ID, term.name) for term in go['GO:0048316'].children]\n[(term.ID, term.name) for term in go['GO:0048316'].children] Out[14]:
[('GO:0009793', ['embryo development ending in seed dormancy']),\n ('GO:0009942', ['longitudinal axis specification']),\n ('GO:0010069', ['zygote asymmetric cytokinesis in embryo sac']),\n ('GO:0010262', ['somatic embryogenesis']),\n ('GO:0010654', ['apical cell fate commitment']),\n ('GO:0048508', ['embryonic meristem development']),\n ('GO:0010065', ['primary meristem tissue development']),\n ('GO:0010066', ['ground meristem histogenesis']),\n ('GO:0010067', ['procambium histogenesis']),\n ('GO:0010068', ['protoderm histogenesis']),\n ('GO:0010071', ['root meristem specification']),\n ('GO:0010072', ['primary shoot apical meristem specification']),\n ('GO:0048825', ['cotyledon development']),\n ('GO:0048826', ['cotyledon morphogenesis']),\n ('GO:0010588', ['cotyledon vascular tissue pattern formation']),\n ('GO:0009960', ['endosperm development']),\n ('GO:0010214', ['seed coat development']),\n ('GO:0048359',\n ['mucilage metabolic process involved in seed coat development']),\n ('GO:0048354',\n ['mucilage biosynthetic process involved in seed coat development']),\n ('GO:0010344', ['seed oilbody biogenesis']),\n ('GO:0010431', ['seed maturation']),\n ('GO:0010162', ['seed dormancy process']),\n ('GO:0010231', ['maintenance of seed dormancy']),\n ('GO:0098755', ['maintenance of seed dormancy by absisic acid']),\n ('GO:0048700', ['acquisition of desiccation tolerance in seed']),\n ('GO:0048838', ['release of seed from dormancy']),\n ('GO:1990068', ['seed dehydration']),\n ('GO:0048317', ['seed morphogenesis']),\n ('GO:0080001', ['mucilage extrusion from seed coat']),\n ('GO:0080112', ['seed growth']),\n ('GO:0090376', ['seed trichome differentiation']),\n ('GO:0090377', ['seed trichome initiation']),\n ('GO:0090378', ['seed trichome elongation']),\n ('GO:0090379',\n ['secondary cell wall biogenesis involved in seed trichome differentiation']),\n ('GO:0090380', ['seed trichome maturation']),\n ('GO:0140547', ['acquisition of seed longevity'])]In\u00a0[15]: Copied!
go['GO:0010431'].__dict__\ngo['GO:0010431'].__dict__ Out[15]:
{'_ID': 'GO:0010431',\n '_original_ID': 'GO:0010431',\n '_container': <picea.ontology.Ontology at 0x7f4598e2cac0>,\n '_children': ['GO:0010162', 'GO:1990068'],\n '_parents': ['GO:0003006', 'GO:0021700', 'GO:0048609', 'GO:0048316'],\n 'name': ['seed maturation'],\n 'def': ['\"A process in seed development that occurs after embryogenesis by which a quiescent state is established in a seed. Seed maturation is characterized by storage compound accumulation, acquisition of desiccation tolerance, growth arrest and the entry into a dormancy period of variable length that is broken upon germination.\" [PMID:16096971]'],\n 'alt_id': [],\n 'namespace': ['biological_process'],\n 'is_a': ['GO:0003006 ! developmental process involved in reproduction',\n 'GO:0021700 ! developmental maturation',\n 'GO:0048609 ! multicellular organismal reproductive process'],\n 'relationship': ['part_of GO:0048316 ! seed development']}In\u00a0[16]: Copied!
go['GO:0048316'].__dict__\ngo['GO:0048316'].__dict__ Out[16]:
{'_ID': 'GO:0048316',\n '_original_ID': 'GO:0048316',\n '_container': <picea.ontology.Ontology at 0x7f4598e2cac0>,\n '_children': ['GO:0009793',\n 'GO:0009960',\n 'GO:0010214',\n 'GO:0010344',\n 'GO:0010431',\n 'GO:0048317',\n 'GO:0080001',\n 'GO:0080112',\n 'GO:0090376',\n 'GO:0140547'],\n '_parents': ['GO:0009791', 'GO:0048608', 'GO:0010154'],\n 'name': ['seed development'],\n 'def': ['\"The process whose specific outcome is the progression of the seed over time, from its formation to the mature structure. A seed is a propagating organ formed in the sexual reproductive cycle of gymnosperms and angiosperms, consisting of a protective coat enclosing an embryo and food reserves.\" [GOC:jid, PO:0009010]'],\n 'alt_id': [],\n 'namespace': ['biological_process'],\n 'is_a': ['GO:0009791 ! post-embryonic development',\n 'GO:0048608 ! reproductive structure development'],\n 'relationship': ['part_of GO:0010154 ! fruit development']}In\u00a0[17]: Copied!
go['GO:0048316'].children._elements.keys()\ngo['GO:0048316'].children._elements.keys() Out[17]:
dict_keys(['GO:0009793', 'GO:0009942', 'GO:0010069', 'GO:0010262', 'GO:0010654', 'GO:0048508', 'GO:0010065', 'GO:0010066', 'GO:0010067', 'GO:0010068', 'GO:0010071', 'GO:0010072', 'GO:0048825', 'GO:0048826', 'GO:0010588', 'GO:0009960', 'GO:0010214', 'GO:0048359', 'GO:0048354', 'GO:0010344', 'GO:0010431', 'GO:0010162', 'GO:0010231', 'GO:0098755', 'GO:0048700', 'GO:0048838', 'GO:1990068', 'GO:0048317', 'GO:0080001', 'GO:0080112', 'GO:0090376', 'GO:0090377', 'GO:0090378', 'GO:0090379', 'GO:0090380', 'GO:0140547'])In\u00a0[18]: Copied!
[(term.ID,term.name) for term in go if term.__dict__.get('alt_id') and term._parents]\n[(term.ID,term.name) for term in go if term.__dict__.get('alt_id') and term._parents] Out[18]:
[('GO:0000010', ['heptaprenyl diphosphate synthase activity']),\n ('GO:0036422', ['heptaprenyl diphosphate synthase activity']),\n ('GO:1905121', ['mitotic spindle elongation']),\n ('GO:0000022', ['mitotic spindle elongation']),\n ('GO:0000946', ['tRNA binding']),\n ('GO:0000049', ['tRNA binding']),\n ('GO:0000050', ['urea cycle']),\n ('GO:0006594', ['urea cycle']),\n ('GO:0006871', ['urea cycle']),\n ('GO:0000055', ['ribosomal large subunit export from nucleus']),\n ('GO:0000057', ['ribosomal large subunit export from nucleus']),\n ('GO:0000058', ['ribosomal small subunit export from nucleus']),\n ('GO:0000056', ['ribosomal small subunit export from nucleus']),\n ('GO:0016359', ['mitotic sister chromatid segregation']),\n ('GO:0000070', ['mitotic sister chromatid segregation']),\n ('GO:0000073', ['initial mitotic spindle pole body separation']),\n ('GO:0030475', ['initial mitotic spindle pole body separation']),\n ('GO:0031576', ['cell cycle checkpoint signaling']),\n ('GO:0071779', ['cell cycle checkpoint signaling']),\n ('GO:0072407', ['cell cycle checkpoint signaling']),\n ('GO:0072404', ['cell cycle checkpoint signaling']),\n ('GO:0000075', ['cell cycle checkpoint signaling']),\n ('GO:0072395', ['cell cycle checkpoint signaling']),\n ('GO:0000076', ['DNA replication checkpoint signaling']),\n ('GO:0072437', ['DNA replication checkpoint signaling']),\n ('GO:0072422', ['DNA damage checkpoint signaling']),\n ('GO:0000077', ['DNA damage checkpoint signaling']),\n ('GO:0015177',\n ['S-adenosyl-L-methionine transmembrane transporter activity']),\n ('GO:0000095',\n ['S-adenosyl-L-methionine transmembrane transporter activity']),\n ('GO:0015178', ['S-methylmethionine transmembrane transporter activity']),\n ('GO:0000100', ['S-methylmethionine transmembrane transporter activity']),\n ('GO:0000103', ['sulfate assimilation']),\n ('GO:0019378', ['sulfate assimilation']),\n ('GO:0000104', ['succinate dehydrogenase activity']),\n ('GO:0019739', ['succinate dehydrogenase activity']),\n ('GO:0045816', ['negative regulation of transcription by RNA polymerase II']),\n ('GO:0000122', ['negative regulation of transcription by RNA polymerase II']),\n ('GO:0010553', ['negative regulation of transcription by RNA polymerase II']),\n ('GO:0030914', ['SAGA complex']),\n ('GO:0000124', ['SAGA complex']),\n ('GO:0000125', ['SAGA complex']),\n ('GO:0036282', ['flocculation']),\n ('GO:0000501', ['flocculation']),\n ('GO:0032128', ['flocculation']),\n ('GO:0036281', ['flocculation']),\n ('GO:0043690', ['flocculation']),\n ('GO:0000128', ['flocculation']),\n ('GO:0043689', ['flocculation']),\n ('GO:0030609', ['establishment of mitotic spindle orientation']),\n ('GO:0000132', ['establishment of mitotic spindle orientation']),\n ('GO:0030607', ['establishment of mitotic spindle orientation']),\n ('GO:0030898', ['microfilament motor activity']),\n ('GO:0000146', ['microfilament motor activity']),\n ('GO:0016548', ['rRNA modification']),\n ('GO:0000154', ['rRNA modification']),\n ('GO:0000162', ['tryptophan biosynthetic process']),\n ('GO:0009096', ['tryptophan biosynthetic process']),\n ('GO:0000165', ['MAPK cascade']),\n ('GO:0007255', ['MAPK cascade']),\n ('GO:0043790', ['rRNA (adenine-N6,N6-)-dimethyltransferase activity']),\n ('GO:0000179', ['rRNA (adenine-N6,N6-)-dimethyltransferase activity']),\n ('GO:0000212', ['meiotic spindle organization']),\n ('GO:0043147', ['meiotic spindle organization']),\n ('GO:0000215', [\"tRNA 2'-phosphotransferase activity\"]),\n ('GO:0008665', [\"tRNA 2'-phosphotransferase activity\"]),\n ('GO:0000355', ['spliceosomal tri-snRNP complex assembly']),\n ('GO:0000244', ['spliceosomal tri-snRNP complex assembly']),\n ('GO:0000351', ['spliceosomal tri-snRNP complex assembly']),\n ('GO:0000253', ['3-keto sterol reductase activity']),\n ('GO:0050576', ['3-keto sterol reductase activity']),\n ('GO:0005051', ['peroxisome targeting sequence binding']),\n ('GO:0000268', ['peroxisome targeting sequence binding']),\n ('GO:0000270', ['peptidoglycan metabolic process']),\n ('GO:0009284', ['peptidoglycan metabolic process']),\n ('GO:0000272', ['polysaccharide catabolic process']),\n ('GO:0044244', ['polysaccharide catabolic process']),\n ('GO:0000278', ['mitotic cell cycle']),\n ('GO:0007067', ['mitotic cell cycle']),\n ('GO:0030452', ['RNA fragment catabolic process']),\n ('GO:0000292', ['RNA fragment catabolic process']),\n ('GO:0000310', ['xanthine phosphoribosyltransferase activity']),\n ('GO:0009043', ['xanthine phosphoribosyltransferase activity']),\n ('GO:0000338', ['protein deneddylation']),\n ('GO:0010388', ['protein deneddylation']),\n ('GO:0000371', ['mRNA branch site recognition']),\n ('GO:0000370', ['mRNA branch site recognition']),\n ('GO:0000348', ['mRNA branch site recognition']),\n ('GO:0000357',\n ['generation of catalytic spliceosome for first transesterification step']),\n ('GO:0000356',\n ['generation of catalytic spliceosome for first transesterification step']),\n ('GO:0000349',\n ['generation of catalytic spliceosome for first transesterification step']),\n ('GO:0000350',\n ['generation of catalytic spliceosome for second transesterification step']),\n ('GO:0000358',\n ['generation of catalytic spliceosome for second transesterification step']),\n ('GO:0000359',\n ['generation of catalytic spliceosome for second transesterification step']),\n ('GO:0000354', ['cis assembly of pre-catalytic spliceosome']),\n ('GO:0000361', ['cis assembly of pre-catalytic spliceosome']),\n ('GO:0000360', ['cis assembly of pre-catalytic spliceosome']),\n ('GO:0000375', ['RNA splicing, via transesterification reactions']),\n ('GO:0031202', ['RNA splicing, via transesterification reactions']),\n ('GO:0000385', ['RNA splicing, via transesterification reactions']),\n ('GO:0000397',\n ['spliceosome conformational change to release U4 (or U4atac) and U1 (or U11)']),\n ('GO:0000396',\n ['spliceosome conformational change to release U4 (or U4atac) and U1 (or U11)']),\n ('GO:0000388',\n ['spliceosome conformational change to release U4 (or U4atac) and U1 (or U11)']),\n ('GO:0000382', [\"mRNA 3'-splice site recognition\"]),\n ('GO:0000389', [\"mRNA 3'-splice site recognition\"]),\n ('GO:0000383', [\"mRNA 3'-splice site recognition\"]),\n ('GO:0000391', ['spliceosomal complex disassembly']),\n ('GO:0000390', ['spliceosomal complex disassembly']),\n ('GO:0000392', ['spliceosomal complex disassembly']),\n ('GO:0000395', [\"mRNA 5'-splice site recognition\"]),\n ('GO:0000368', [\"mRNA 5'-splice site recognition\"]),\n ('GO:0000369', [\"mRNA 5'-splice site recognition\"]),\n ('GO:0006375', ['mRNA splicing, via spliceosome']),\n ('GO:0006374', ['mRNA splicing, via spliceosome']),\n ('GO:0000398', ['mRNA splicing, via spliceosome']),\n ('GO:0000420', ['RNA polymerase IV complex']),\n ('GO:0000418', ['RNA polymerase IV complex']),\n ('GO:0000419', ['RNA polymerase V complex']),\n ('GO:0080137', ['RNA polymerase V complex']),\n ('GO:0000443', ['core TFIIH complex portion of holo TFIIH complex']),\n ('GO:0000438', ['core TFIIH complex portion of holo TFIIH complex']),\n ('GO:0000439', ['transcription factor TFIIH core complex']),\n ('GO:0000441', ['transcription factor TFIIH core complex']),\n ('GO:0000440', ['core TFIIH complex portion of NEF3 complex']),\n ('GO:0000442', ['core TFIIH complex portion of NEF3 complex']),\n ('GO:0000444', ['MIS12/MIND type complex']),\n ('GO:0000818', ['MIS12/MIND type complex']),\n ('GO:0000490',\n ['cleavage in ITS2 between 5.8S rRNA and LSU-rRNA of tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']),\n ('GO:0000448',\n ['cleavage in ITS2 between 5.8S rRNA and LSU-rRNA of tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']),\n ('GO:0000462',\n ['maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']),\n ('GO:1990041',\n ['maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)']),\n ('GO:0000703',\n ['oxidized pyrimidine nucleobase lesion DNA N-glycosylase activity']),\n ('GO:0019004',\n ['oxidized pyrimidine nucleobase lesion DNA N-glycosylase activity']),\n ('GO:0016924', ['double-strand break repair via homologous recombination']),\n ('GO:0000724', ['double-strand break repair via homologous recombination']),\n ('GO:0007335', ['karyogamy']),\n ('GO:0000741', ['karyogamy']),\n ('GO:0006946',\n ['nuclear migration involved in conjugation with cellular fusion']),\n ('GO:0000743',\n ['nuclear migration involved in conjugation with cellular fusion']),\n ('GO:0030461', ['conjugation with cellular fusion']),\n ('GO:0007333', ['conjugation with cellular fusion']),\n ('GO:0000747', ['conjugation with cellular fusion']),\n ('GO:0007322', ['conjugation with cellular fusion']),\n ('GO:0030477', ['conjugation with cellular fusion']),\n ('GO:0030434',\n ['response to pheromone triggering conjugation with cellular fusion']),\n ('GO:0007328',\n ['response to pheromone triggering conjugation with cellular fusion']),\n ('GO:0000749',\n ['response to pheromone triggering conjugation with cellular fusion']),\n ('GO:0007330',\n ['pheromone-dependent signal transduction involved in conjugation with cellular fusion']),\n ('GO:0030454',\n ['pheromone-dependent signal transduction involved in conjugation with cellular fusion']),\n ('GO:0000750',\n ['pheromone-dependent signal transduction involved in conjugation with cellular fusion']),\n ('GO:0000751', ['mitotic cell cycle G1 arrest in response to pheromone']),\n ('GO:0030571', ['mitotic cell cycle G1 arrest in response to pheromone']),\n ('GO:0000752',\n ['agglutination involved in conjugation with cellular fusion']),\n ('GO:0007334',\n ['agglutination involved in conjugation with cellular fusion']),\n ('GO:0000753',\n ['cell morphogenesis involved in conjugation with cellular fusion']),\n ('GO:0007332',\n ['cell morphogenesis involved in conjugation with cellular fusion']),\n ('GO:0030453',\n ['adaptation of signaling pathway by response to pheromone involved in conjugation with cellular fusion']),\n ('GO:0000754',\n ['adaptation of signaling pathway by response to pheromone involved in conjugation with cellular fusion']),\n ('GO:0007331',\n ['adaptation of signaling pathway by response to pheromone involved in conjugation with cellular fusion']),\n ('GO:0000218', ['cytogamy']),\n ('GO:0030462', ['cytogamy']),\n ('GO:0000755', ['cytogamy']),\n ('GO:0007325', ['peptide pheromone export']),\n ('GO:0000770', ['peptide pheromone export']),\n ('GO:0097521', ['chromosome, centromeric region']),\n ('GO:0000775', ['chromosome, centromeric region']),\n ('GO:0031617', ['kinetochore']),\n ('GO:0000778', ['kinetochore']),\n ('GO:0000777', ['kinetochore']),\n ('GO:0005699', ['kinetochore']),\n ('GO:0000776', ['kinetochore']),\n ('GO:0000779', ['condensed chromosome, centromeric region']),\n ('GO:0000780', ['condensed chromosome, centromeric region']),\n ('GO:0000781', ['chromosome, telomeric region']),\n ('GO:0000784', ['chromosome, telomeric region']),\n ('GO:0000789', ['chromatin']),\n ('GO:0005717', ['chromatin']),\n ('GO:0000785', ['chromatin']),\n ('GO:0000790', ['chromatin']),\n ('GO:0000787', ['nucleosome']),\n ('GO:0005718', ['nucleosome']),\n ('GO:0000788', ['nucleosome']),\n ('GO:0000786', ['nucleosome']),\n ('GO:0005719', ['euchromatin']),\n ('GO:0035327', ['euchromatin']),\n ('GO:0000791', ['euchromatin']),\n ('GO:0005720', ['heterochromatin']),\n ('GO:0035328', ['heterochromatin']),\n ('GO:0000792', ['heterochromatin']),\n ('GO:0005716', ['synaptonemal complex']),\n ('GO:0000795', ['synaptonemal complex']),\n ('GO:0008620', ['condensin complex']),\n ('GO:0000799', ['condensin complex']),\n ('GO:0000797', ['condensin complex']),\n ('GO:0008621', ['condensin complex']),\n ('GO:0061814', ['condensin complex']),\n ('GO:0000796', ['condensin complex']),\n ('GO:0005676', ['condensin complex']),\n ('GO:0000902', ['cell morphogenesis']),\n ('GO:0045790', ['cell morphogenesis']),\n ('GO:0007148', ['cell morphogenesis']),\n ('GO:0045791', ['cell morphogenesis']),\n ('GO:0016288', ['cytokinesis']),\n ('GO:0000910', ['cytokinesis']),\n ('GO:0033205', ['cytokinesis']),\n ('GO:0007104', ['cytokinesis']),\n ('GO:0000915', ['actomyosin contractile ring assembly']),\n ('GO:0045573', ['actomyosin contractile ring assembly']),\n ('GO:2000708', ['actomyosin contractile ring assembly']),\n ('GO:0071937', ['division septum assembly']),\n ('GO:1902411', ['division septum assembly']),\n ('GO:0000917', ['division septum assembly']),\n ('GO:1902409', ['septum digestion after cytokinesis']),\n ('GO:0000920', ['septum digestion after cytokinesis']),\n ('GO:2000695', ['septum digestion after cytokinesis']),\n ('GO:0000922', ['spindle pole']),\n ('GO:0030615', ['spindle pole']),\n ('GO:0061494', ['gamma-tubulin ring complex']),\n ('GO:0000925', ['gamma-tubulin ring complex']),\n ('GO:0000926', ['gamma-tubulin ring complex']),\n ('GO:0000931', ['gamma-tubulin ring complex']),\n ('GO:0000929', ['gamma-tubulin ring complex']),\n ('GO:0008274', ['gamma-tubulin ring complex']),\n ('GO:0055033', ['gamma-tubulin ring complex']),\n ('GO:0055031', ['gamma-tubulin ring complex']),\n ('GO:0055032', ['gamma-tubulin ring complex']),\n ('GO:0000924', ['gamma-tubulin ring complex']),\n ('GO:0000935', ['division septum']),\n ('GO:0043187', ['division septum']),\n ('GO:0000939', ['inner kinetochore']),\n ('GO:0000941', ['inner kinetochore']),\n ('GO:0000940', ['outer kinetochore']),\n ('GO:0000942', ['outer kinetochore']),\n ('GO:0044212', ['transcription cis-regulatory region binding']),\n ('GO:0000984', ['transcription cis-regulatory region binding']),\n ('GO:0000975', ['transcription cis-regulatory region binding']),\n ('GO:0000976', ['transcription cis-regulatory region binding']),\n ('GO:0001017', ['transcription cis-regulatory region binding']),\n ('GO:0000977',\n ['RNA polymerase II transcription regulatory region sequence-specific DNA binding']),\n ('GO:0001012',\n ['RNA polymerase II transcription regulatory region sequence-specific DNA binding']),\n ('GO:0000980',\n ['RNA polymerase II cis-regulatory region sequence-specific DNA binding']),\n ('GO:0000978',\n ['RNA polymerase II cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001203',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0000982',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0003705',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0001202',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0001133',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0001201',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0000981',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0001200',\n ['DNA-binding transcription factor activity, RNA polymerase II-specific']),\n ('GO:0000986', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001158', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0035326', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001159', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0000987', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001150', ['cis-regulatory region sequence-specific DNA binding']),\n ('GO:0000995',\n ['RNA polymerase III general transcription initiation factor activity']),\n ('GO:0001034',\n ['RNA polymerase III general transcription initiation factor activity']),\n ('GO:0001002',\n ['RNA polymerase III type 1 promoter sequence-specific DNA binding']),\n ('GO:0001030',\n ['RNA polymerase III type 1 promoter sequence-specific DNA binding']),\n ('GO:0001003',\n ['RNA polymerase III type 2 promoter sequence-specific DNA binding']),\n ('GO:0001031',\n ['RNA polymerase III type 2 promoter sequence-specific DNA binding']),\n ('GO:0001032',\n ['RNA polymerase III type 3 promoter sequence-specific DNA binding']),\n ('GO:0001006',\n ['RNA polymerase III type 3 promoter sequence-specific DNA binding']),\n ('GO:0070363', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0070364', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0001045', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0070361', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0001018', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0001044', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0070362', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0000997', ['mitochondrial promoter sequence-specific DNA binding']),\n ('GO:0001039',\n ['RNA polymerase III hybrid type promoter sequence-specific DNA binding']),\n ('GO:0001037',\n ['RNA polymerase III hybrid type promoter sequence-specific DNA binding']),\n ('GO:0001046', ['core promoter sequence-specific DNA binding']),\n ('GO:0000985', ['core promoter sequence-specific DNA binding']),\n ('GO:0001047', ['core promoter sequence-specific DNA binding']),\n ('GO:0001109', ['promoter clearance during DNA-templated transcription']),\n ('GO:0001122', ['promoter clearance during DNA-templated transcription']),\n ('GO:0001127', ['DNA-templated transcription open complex formation']),\n ('GO:0001112', ['DNA-templated transcription open complex formation']),\n ('GO:0001160',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0001145',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0001148',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0001147',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0001146',\n ['transcription termination site sequence-specific DNA binding']),\n ('GO:0044213',\n ['intronic transcription regulatory region sequence-specific DNA binding']),\n ('GO:0001161',\n ['intronic transcription regulatory region sequence-specific DNA binding']),\n ('GO:0001163',\n ['RNA polymerase I transcription regulatory region sequence-specific DNA binding']),\n ('GO:0001013',\n ['RNA polymerase I transcription regulatory region sequence-specific DNA binding']),\n ('GO:0001187',\n ['RNA polymerase I core promoter sequence-specific DNA binding']),\n ('GO:0001164',\n ['RNA polymerase I core promoter sequence-specific DNA binding']),\n ('GO:0001166',\n ['RNA polymerase I cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001165',\n ['RNA polymerase I cis-regulatory region sequence-specific DNA binding']),\n ('GO:0001173', ['DNA-templated transcriptional start site selection']),\n ('GO:0001176', ['DNA-templated transcriptional start site selection']),\n ('GO:0001184', ['RNA polymerase I promoter clearance']),\n ('GO:0001182', ['RNA polymerase I promoter clearance']),\n ('GO:0001189', ['RNA polymerase I preinitiation complex assembly']),\n ('GO:0001188', ['RNA polymerase I preinitiation complex assembly']),\n ('GO:0001192',\n ['maintenance of transcriptional fidelity during transcription elongation']),\n ('GO:0001194',\n ['maintenance of transcriptional fidelity during transcription elongation']),\n ('GO:0001216', ['DNA-binding transcription activator activity']),\n ('GO:0001140', ['DNA-binding transcription activator activity']),\n ('GO:0001215', ['DNA-binding transcription activator activity']),\n ('GO:0001220', ['DNA-binding transcription repressor activity']),\n ('GO:0001141', ['DNA-binding transcription repressor activity']),\n ('GO:0001219', ['DNA-binding transcription repressor activity']),\n ('GO:0001217', ['DNA-binding transcription repressor activity']),\n ('GO:0001218', ['DNA-binding transcription repressor activity']),\n ('GO:0001224', ['transcription coregulator binding']),\n ('GO:0001221', ['transcription coregulator binding']),\n ('GO:0001226', ['transcription corepressor binding']),\n ('GO:0001222', ['transcription corepressor binding']),\n ('GO:0001223', ['transcription coactivator binding']),\n ('GO:0001225', ['transcription coactivator binding']),\n ('GO:0001227',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001206',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001210',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001214',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001078',\n ['DNA-binding transcription repressor activity, RNA polymerase II-specific']),\n ('GO:0001205',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001077',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001211',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001212',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001213',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001209',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0001228',\n ['DNA-binding transcription activator activity, RNA polymerase II-specific']),\n ('GO:0055027', ['chlamydospore formation']),\n ('GO:0001410', ['chlamydospore formation']),\n ('GO:0042833', ['response to protozoan']),\n ('GO:0001562', ['response to protozoan']),\n ('GO:0001590',\n ['dopamine neurotransmitter receptor activity, coupled via Gs']),\n ('GO:0001588',\n ['dopamine neurotransmitter receptor activity, coupled via Gs']),\n ('GO:0001589',\n ['dopamine neurotransmitter receptor activity, coupled via Gs']),\n ('GO:0001591',\n ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']),\n ('GO:0001670',\n ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']),\n ('GO:0001592',\n ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']),\n ('GO:0001593',\n ['dopamine neurotransmitter receptor activity, coupled via Gi/Go']),\n ('GO:0001610', ['G protein-coupled adenosine receptor activity']),\n ('GO:0001612', ['G protein-coupled adenosine receptor activity']),\n ('GO:0001613', ['G protein-coupled adenosine receptor activity']),\n ('GO:0001611', ['G protein-coupled adenosine receptor activity']),\n ('GO:0001609', ['G protein-coupled adenosine receptor activity']),\n ('GO:0008501', ['G protein-coupled adenosine receptor activity']),\n ('GO:0001614', ['purinergic nucleotide receptor activity']),\n ('GO:0035586', ['purinergic nucleotide receptor activity']),\n ('GO:0001621', ['G protein-coupled ADP receptor activity']),\n ('GO:0045032', ['G protein-coupled ADP receptor activity']),\n ('GO:0001634',\n ['pituitary adenylate cyclase-activating polypeptide receptor activity']),\n ('GO:0016522',\n ['pituitary adenylate cyclase-activating polypeptide receptor activity']),\n ('GO:0001644', ['cAMP receptor activity']),\n ('GO:0001646', ['cAMP receptor activity']),\n ('GO:0001654', ['eye development']),\n ('GO:0042460', ['eye development']),\n ('GO:0043081', ['male germ cell nucleus']),\n ('GO:0001673', ['male germ cell nucleus']),\n ('GO:0043080', ['female germ cell nucleus']),\n ('GO:0001674', ['female germ cell nucleus']),\n ('GO:0001693', ['histamine biosynthetic process']),\n ('GO:0001694', ['histamine biosynthetic process']),\n ('GO:0010003', ['gastrulation with mouth forming second']),\n ('GO:0001702', ['gastrulation with mouth forming second']),\n ('GO:0048276', ['gastrulation with mouth forming second']),\n ('GO:0007457', ['insect visual primordium formation']),\n ('GO:0001744', ['insect visual primordium formation']),\n ('GO:0048049', ['insect visual primordium development']),\n ('GO:0001748', ['insect visual primordium development']),\n ('GO:0007459', ['compound eye photoreceptor fate commitment']),\n ('GO:0001752', ['compound eye photoreceptor fate commitment']),\n ('GO:0016069', ['type IV hypersensitivity']),\n ('GO:0001806', ['type IV hypersensitivity']),\n ('GO:0050663', ['cytokine production']),\n ('GO:0042032', ['cytokine production']),\n ('GO:0042089', ['cytokine production']),\n ('GO:0001816', ['cytokine production']),\n ('GO:0042107', ['cytokine production']),\n ('GO:0042035', ['regulation of cytokine production']),\n ('GO:0050707', ['regulation of cytokine production']),\n ('GO:0001817', ['regulation of cytokine production']),\n ('GO:0042036', ['negative regulation of cytokine production']),\n ('GO:0001818', ['negative regulation of cytokine production']),\n ('GO:0050710', ['negative regulation of cytokine production']),\n ('GO:0050715', ['positive regulation of cytokine production']),\n ('GO:0001819', ['positive regulation of cytokine production']),\n ('GO:0042108', ['positive regulation of cytokine production']),\n ('GO:0001679', ['neural tube formation']),\n ('GO:0001841', ['neural tube formation']),\n ('GO:0001872', ['(1->3)-beta-D-glucan binding']),\n ('GO:0080087', ['(1->3)-beta-D-glucan binding']),\n ('GO:0001943', ['hair follicle development']),\n ('GO:0001942', ['hair follicle development']),\n ('GO:0002003', ['angiotensin maturation']),\n ('GO:0002005', ['angiotensin maturation']),\n ('GO:0002036', ['regulation of L-glutamate import across plasma membrane']),\n ('GO:1900920', ['regulation of L-glutamate import across plasma membrane']),\n ('GO:0002037',\n ['negative regulation of L-glutamate import across plasma membrane']),\n ('GO:1900921',\n ['negative regulation of L-glutamate import across plasma membrane']),\n ('GO:1900922',\n ['positive regulation of L-glutamate import across plasma membrane']),\n ('GO:0002038',\n ['positive regulation of L-glutamate import across plasma membrane']),\n ('GO:0016030', ['opsin binding']),\n ('GO:0002046', ['opsin binding']),\n ('GO:0002162', ['dystroglycan binding']),\n ('GO:0002166', ['dystroglycan binding']),\n ('GO:0002163', ['dystroglycan binding']),\n ('GO:0002214', ['defense response to insect']),\n ('GO:0002213', ['defense response to insect']),\n ('GO:0002216', ['defense response to nematode']),\n ('GO:0002215', ['defense response to nematode']),\n ('GO:0002219', ['activation of innate immune response']),\n ('GO:0002218', ['activation of innate immune response']),\n ('GO:0002367', ['cytokine production involved in immune response']),\n ('GO:0002374', ['cytokine production involved in immune response']),\n ('GO:0002375', ['cytokine production involved in immune response']),\n ('GO:0048305', ['immunoglobulin production']),\n ('GO:0002377', ['immunoglobulin production']),\n ('GO:0002378', ['immunoglobulin production']),\n ('GO:0002381',\n ['immunoglobulin production involved in immunoglobulin-mediated immune response']),\n ('GO:0002379',\n ['immunoglobulin production involved in immunoglobulin-mediated immune response']),\n ('GO:0002380',\n ['immunoglobulin production involved in immunoglobulin-mediated immune response']),\n ('GO:0002422', ['mucosal immune response']),\n ('GO:0002385', ['mucosal immune response']),\n ('GO:0002386', ['mucosal immune response']),\n ('GO:0002391',\n ['platelet activating factor production involved in inflammatory response']),\n ('GO:0002535',\n ['platelet activating factor production involved in inflammatory response']),\n ('GO:0002390',\n ['platelet activating factor production involved in inflammatory response']),\n ('GO:0002392',\n ['platelet activating factor production involved in inflammatory response']),\n ('GO:0042087', ['leukocyte mediated immunity']),\n ('GO:0019723', ['leukocyte mediated immunity']),\n ('GO:0002443', ['leukocyte mediated immunity']),\n ('GO:0002640', ['regulation of immunoglobulin production']),\n ('GO:0002637', ['regulation of immunoglobulin production']),\n ('GO:0051023', ['regulation of immunoglobulin production']),\n ('GO:0002641', ['negative regulation of immunoglobulin production']),\n ('GO:0051025', ['negative regulation of immunoglobulin production']),\n ('GO:0002638', ['negative regulation of immunoglobulin production']),\n ('GO:0002642', ['positive regulation of immunoglobulin production']),\n ('GO:0051024', ['positive regulation of immunoglobulin production']),\n ('GO:0002639', ['positive regulation of immunoglobulin production']),\n ('GO:0002715', ['regulation of natural killer cell mediated immunity']),\n ('GO:0045845', ['regulation of natural killer cell mediated immunity']),\n ('GO:0030102',\n ['negative regulation of natural killer cell mediated immunity']),\n ('GO:0002716',\n ['negative regulation of natural killer cell mediated immunity']),\n ('GO:0045846',\n ['positive regulation of natural killer cell mediated immunity']),\n ('GO:0002717',\n ['positive regulation of natural killer cell mediated immunity']),\n ('GO:0002742',\n ['regulation of cytokine production involved in immune response']),\n ('GO:0002739',\n ['regulation of cytokine production involved in immune response']),\n ('GO:0002718',\n ['regulation of cytokine production involved in immune response']),\n ('GO:0002740',\n ['negative regulation of cytokine production involved in immune response']),\n ('GO:0002743',\n ['negative regulation of cytokine production involved in immune response']),\n ('GO:0002719',\n ['negative regulation of cytokine production involved in immune response']),\n ('GO:0002720',\n ['positive regulation of cytokine production involved in immune response']),\n ('GO:0002744',\n ['positive regulation of cytokine production involved in immune response']),\n ('GO:0002741',\n ['positive regulation of cytokine production involved in immune response']),\n ('GO:0039528',\n ['cytoplasmic pattern recognition receptor signaling pathway']),\n ('GO:0002753',\n ['cytoplasmic pattern recognition receptor signaling pathway']),\n ('GO:0002758', ['innate immune response-activating signaling pathway']),\n ('GO:0009870', ['innate immune response-activating signaling pathway']),\n ('GO:0010204', ['innate immune response-activating signaling pathway']),\n ('GO:0070526', ['tRNA threonylcarbamoyladenosine modification']),\n ('GO:0002949', ['tRNA threonylcarbamoyladenosine modification']),\n ('GO:0010802', ['respiratory system process']),\n ('GO:0003016', ['respiratory system process']),\n ('GO:0003123', ['epinephrine-mediated vasodilation']),\n ('GO:0003121', ['epinephrine-mediated vasodilation']),\n ('GO:0003124', ['epinephrine-mediated vasodilation']),\n ('GO:0003126', ['norepinephrine-mediated vasodilation']),\n ('GO:0003125', ['norepinephrine-mediated vasodilation']),\n ('GO:0003122', ['norepinephrine-mediated vasodilation']),\n ('GO:0036142', ['cilium movement']),\n ('GO:0003341', ['cilium movement']),\n ('GO:1900172', ['regulation of cilium movement']),\n ('GO:0003352', ['regulation of cilium movement']),\n ('GO:0003353', ['positive regulation of cilium movement']),\n ('GO:1900174', ['positive regulation of cilium movement']),\n ('GO:0003354', ['negative regulation of cilium movement']),\n ('GO:1900173', ['negative regulation of cilium movement']),\n ('GO:0036144', ['regulation of cilium beat frequency']),\n ('GO:0003356', ['regulation of cilium beat frequency']),\n ('GO:0001789', ['sphingosine-1-phosphate receptor signaling pathway']),\n ('GO:0003376', ['sphingosine-1-phosphate receptor signaling pathway']),\n ('GO:0000496', ['nucleic acid binding']),\n ('GO:0003676', ['nucleic acid binding']),\n ('GO:0043566', ['DNA binding']),\n ('GO:0003677', ['DNA binding']),\n ('GO:0003679', ['DNA helicase activity']),\n ('GO:0004003', ['DNA helicase activity']),\n ('GO:0003678', ['DNA helicase activity']),\n ('GO:0003689', ['DNA clamp loader activity']),\n ('GO:0033170', ['DNA clamp loader activity']),\n ('GO:0003697', ['single-stranded DNA binding']),\n ('GO:0003698', ['single-stranded DNA binding']),\n ('GO:0003699', ['single-stranded DNA binding']),\n ('GO:0001199', ['DNA-binding transcription factor activity']),\n ('GO:0001131', ['DNA-binding transcription factor activity']),\n ('GO:0001071', ['DNA-binding transcription factor activity']),\n ('GO:0001151', ['DNA-binding transcription factor activity']),\n ('GO:0003700', ['DNA-binding transcription factor activity']),\n ('GO:0001204', ['DNA-binding transcription factor activity']),\n ('GO:0000130', ['DNA-binding transcription factor activity']),\n ('GO:0001130', ['DNA-binding transcription factor activity']),\n ('GO:0003712', ['transcription coregulator activity']),\n ('GO:0016455', ['transcription coregulator activity']),\n ('GO:0001104', ['transcription coregulator activity']),\n ('GO:0001105', ['transcription coactivator activity']),\n ('GO:0003713', ['transcription coactivator activity']),\n ('GO:0003714', ['transcription corepressor activity']),\n ('GO:0001106', ['transcription corepressor activity']),\n ('GO:0003723', ['RNA binding']),\n ('GO:0000498', ['RNA binding']),\n ('GO:0044822', ['RNA binding']),\n ('GO:0003724', ['RNA helicase activity']),\n ('GO:0004004', ['RNA helicase activity']),\n ('GO:0003726', ['double-stranded RNA adenosine deaminase activity']),\n ('GO:0003971', ['double-stranded RNA adenosine deaminase activity']),\n ('GO:0003728', ['single-stranded RNA binding']),\n ('GO:0003727', ['single-stranded RNA binding']),\n ('GO:0000499', ['mRNA binding']),\n ('GO:0003729', ['mRNA binding']),\n ('GO:0003738', ['structural constituent of ribosome']),\n ('GO:0003735', ['structural constituent of ribosome']),\n ('GO:0003737', ['structural constituent of ribosome']),\n ('GO:0003739', ['structural constituent of ribosome']),\n ('GO:0003742', ['structural constituent of ribosome']),\n ('GO:0003741', ['structural constituent of ribosome']),\n ('GO:0003740', ['structural constituent of ribosome']),\n ('GO:0003736', ['structural constituent of ribosome']),\n ('GO:0003744', ['translation initiation factor activity']),\n ('GO:0003745', ['translation initiation factor activity']),\n ('GO:0003743', ['translation initiation factor activity']),\n ('GO:0008182', ['translation elongation factor activity']),\n ('GO:0003746', ['translation elongation factor activity']),\n ('GO:0008183', ['translation elongation factor activity']),\n ('GO:0003749', ['translation release factor activity']),\n ('GO:0003747', ['translation release factor activity']),\n ('GO:0003748', ['translation release factor activity']),\n ('GO:0003755', ['peptidyl-prolyl cis-trans isomerase activity']),\n ('GO:0004752', ['peptidyl-prolyl cis-trans isomerase activity']),\n ('GO:0042028', ['peptidyl-prolyl cis-trans isomerase activity']),\n ('GO:0003756', ['protein disulfide isomerase activity']),\n ('GO:0006467', ['protein disulfide isomerase activity']),\n ('GO:0003777', ['microtubule motor activity']),\n ('GO:1990939', ['microtubule motor activity']),\n ('GO:0102101', ['sterol 24-C-methyltransferase activity']),\n ('GO:0003838', ['sterol 24-C-methyltransferase activity']),\n ('GO:0003841', ['1-acylglycerol-3-phosphate O-acyltransferase activity']),\n ('GO:0004469', ['1-acylglycerol-3-phosphate O-acyltransferase activity']),\n ('GO:0003843', ['1,3-beta-D-glucan synthase activity']),\n ('GO:0009981', ['1,3-beta-D-glucan synthase activity']),\n ('GO:0003853', ['2-methylbutanoyl-CoA dehydrogenase activity']),\n ('GO:0047119', ['2-methylbutanoyl-CoA dehydrogenase activity']),\n ('GO:0003826',\n ['3-methyl-2-oxobutanoate dehydrogenase (2-methylpropanoyl-transferring) activity']),\n ('GO:0003863',\n ['3-methyl-2-oxobutanoate dehydrogenase (2-methylpropanoyl-transferring) activity']),\n ('GO:0003878', ['ATP citrate synthase activity']),\n ('GO:0046913', ['ATP citrate synthase activity']),\n ('GO:0003886', ['DNA (cytosine-5-)-methyltransferase activity']),\n ('GO:0008326', ['DNA (cytosine-5-)-methyltransferase activity']),\n ('GO:0016000', ['DNA-directed DNA polymerase activity']),\n ('GO:0016452', ['DNA-directed DNA polymerase activity']),\n ('GO:0003888', ['DNA-directed DNA polymerase activity']),\n ('GO:0016448', ['DNA-directed DNA polymerase activity']),\n ('GO:0003887', ['DNA-directed DNA polymerase activity']),\n ('GO:0019984', ['DNA-directed DNA polymerase activity']),\n ('GO:0016450', ['DNA-directed DNA polymerase activity']),\n ('GO:0003893', ['DNA-directed DNA polymerase activity']),\n ('GO:0016451', ['DNA-directed DNA polymerase activity']),\n ('GO:0015999', ['DNA-directed DNA polymerase activity']),\n ('GO:0003891', ['DNA-directed DNA polymerase activity']),\n ('GO:0003894', ['DNA-directed DNA polymerase activity']),\n ('GO:0008723', ['DNA-directed DNA polymerase activity']),\n ('GO:0003895', ['DNA-directed DNA polymerase activity']),\n ('GO:0003890', ['DNA-directed DNA polymerase activity']),\n ('GO:0016449', ['DNA-directed DNA polymerase activity']),\n ('GO:0003889', ['DNA-directed DNA polymerase activity']),\n ('GO:0003897', ['DNA primase activity']),\n ('GO:0003896', ['DNA primase activity']),\n ('GO:0003898', ['DNA primase activity']),\n ('GO:0003899', [\"DNA-directed 5'-3' RNA polymerase activity\"]),\n ('GO:0000129', [\"DNA-directed 5'-3' RNA polymerase activity\"]),\n ('GO:0003905', ['alkylbase DNA N-glycosylase activity']),\n ('GO:0004036', ['alkylbase DNA N-glycosylase activity']),\n ('GO:0003916', ['DNA topoisomerase activity']),\n ('GO:0009387', ['DNA topoisomerase activity']),\n ('GO:0003918',\n ['DNA topoisomerase type II (double strand cut, ATP-hydrolyzing) activity']),\n ('GO:0061505',\n ['DNA topoisomerase type II (double strand cut, ATP-hydrolyzing) activity']),\n ('GO:0061745', ['GTPase activity']),\n ('GO:0003924', ['GTPase activity']),\n ('GO:0003925', ['G protein activity']),\n ('GO:0003927', ['G protein activity']),\n ('GO:0016660', ['NADPH dehydrogenase activity']),\n ('GO:0008468', ['NADPH dehydrogenase activity']),\n ('GO:0003959', ['NADPH dehydrogenase activity']),\n ('GO:0003961', ['O-acetylhomoserine aminocarboxypropyltransferase activity']),\n ('GO:0019282', ['O-acetylhomoserine aminocarboxypropyltransferase activity']),\n ('GO:0003962', ['cystathionine gamma-synthase activity']),\n ('GO:0000505', ['cystathionine gamma-synthase activity']),\n ('GO:0052854', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0052852', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0008891', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0052853', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0003973', ['(S)-2-hydroxy-acid oxidase activity']),\n ('GO:0047318',\n ['N2-acetyl-L-ornithine:2-oxoglutarate 5-aminotransferase activity']),\n ('GO:0003992',\n ['N2-acetyl-L-ornithine:2-oxoglutarate 5-aminotransferase activity']),\n ('GO:0003994', ['aconitate hydratase activity']),\n ('GO:0052632', ['aconitate hydratase activity']),\n ('GO:0003995', ['acyl-CoA dehydrogenase activity']),\n ('GO:0019109', ['acyl-CoA dehydrogenase activity']),\n ('GO:0004031', ['aldehyde oxidase activity']),\n ('GO:0050250', ['aldehyde oxidase activity']),\n ('GO:0019851', ['aminoacyl-tRNA hydrolase activity']),\n ('GO:0019850', ['aminoacyl-tRNA hydrolase activity']),\n ('GO:0004045', ['aminoacyl-tRNA hydrolase activity']),\n ('GO:0004057', ['arginyl-tRNA--protein transferase activity']),\n ('GO:0042172', ['arginyl-tRNA--protein transferase activity']),\n ('GO:0016400', ['aromatic-L-amino-acid decarboxylase activity']),\n ('GO:0004058', ['aromatic-L-amino-acid decarboxylase activity']),\n ('GO:0004093', ['carnitine O-acetyltransferase activity']),\n ('GO:0004094', ['carnitine O-acetyltransferase activity']),\n ('GO:0004092', ['carnitine O-acetyltransferase activity']),\n ('GO:0004096', ['catalase activity']),\n ('GO:0016953', ['catalase activity']),\n ('GO:0016952', ['catalase activity']),\n ('GO:0102316', ['catechol oxidase activity']),\n ('GO:0036263', ['catechol oxidase activity']),\n ('GO:0004097', ['catechol oxidase activity']),\n ('GO:0036264', ['catechol oxidase activity']),\n ('GO:0004123', ['cystathionine gamma-lyase activity']),\n ('GO:0016225', ['cystathionine gamma-lyase activity']),\n ('GO:0004165', ['delta(3)-delta(2)-enoyl-CoA isomerase activity']),\n ('GO:0008461', ['delta(3)-delta(2)-enoyl-CoA isomerase activity']),\n ('GO:0004166',\n ['dolichyl-phosphate alpha-N-acetylglucosaminyltransferase activity']),\n ('GO:0004101',\n ['dolichyl-phosphate alpha-N-acetylglucosaminyltransferase activity']),\n ('GO:0004167', ['dopachrome isomerase activity']),\n ('GO:0048059', ['dopachrome isomerase activity']),\n ('GO:0004175', ['endopeptidase activity']),\n ('GO:0016809', ['endopeptidase activity']),\n ('GO:0004280', ['ATP-dependent peptidase activity']),\n ('GO:0004176', ['ATP-dependent peptidase activity']),\n ('GO:0016510', ['enoyl-CoA hydratase activity']),\n ('GO:0004300', ['enoyl-CoA hydratase activity']),\n ('GO:0004315', ['3-oxoacyl-[acyl-carrier-protein] synthase activity']),\n ('GO:0033817', ['3-oxoacyl-[acyl-carrier-protein] synthase activity']),\n ('GO:0034566', ['formamidase activity']),\n ('GO:0004328', ['formamidase activity']),\n ('GO:0004330', ['fructose-2,6-bisphosphate 2-phosphatase activity']),\n ('GO:0004331', ['fructose-2,6-bisphosphate 2-phosphatase activity']),\n ('GO:0004344', ['glucose dehydrogenase activity']),\n ('GO:0008708', ['glucose dehydrogenase activity']),\n ('GO:0001513', ['glutamate-5-semialdehyde dehydrogenase activity']),\n ('GO:0004350', ['glutamate-5-semialdehyde dehydrogenase activity']),\n ('GO:0004376', ['glycolipid mannosyltransferase activity']),\n ('GO:0004580', ['glycolipid mannosyltransferase activity']),\n ('GO:0019106', ['glycylpeptide N-tetradecanoyltransferase activity']),\n ('GO:0004379', ['glycylpeptide N-tetradecanoyltransferase activity']),\n ('GO:0004386', ['helicase activity']),\n ('GO:0008026', ['helicase activity']),\n ('GO:0004405', ['histone acetyltransferase activity']),\n ('GO:0043166', ['histone acetyltransferase activity']),\n ('GO:0004404', ['histone acetyltransferase activity']),\n ('GO:0004403', ['histone acetyltransferase activity']),\n ('GO:0004406', ['histone acetyltransferase activity']),\n ('GO:0046971', ['histone acetyltransferase activity']),\n ('GO:0004402', ['histone acetyltransferase activity']),\n ('GO:0042282', ['hydroxymethylglutaryl-CoA reductase (NADPH) activity']),\n ('GO:0004420', ['hydroxymethylglutaryl-CoA reductase (NADPH) activity']),\n ('GO:0016315', ['phosphatidylinositol-3-phosphate phosphatase activity']),\n ('GO:0004438', ['phosphatidylinositol-3-phosphate phosphatase activity']),\n ('GO:0004439',\n ['phosphatidylinositol-4,5-bisphosphate 5-phosphatase activity']),\n ('GO:0001668',\n ['phosphatidylinositol-4,5-bisphosphate 5-phosphatase activity']),\n ('GO:0003996', ['long-chain fatty acid-CoA ligase activity']),\n ('GO:0004467', ['long-chain fatty acid-CoA ligase activity']),\n ('GO:0004471', ['malate dehydrogenase (decarboxylating) (NAD+) activity']),\n ('GO:0004472', ['malate dehydrogenase (decarboxylating) (NAD+) activity']),\n ('GO:0016619', ['malate dehydrogenase (decarboxylating) (NAD+) activity']),\n ('GO:0001718', ['methionyl-tRNA formyltransferase activity']),\n ('GO:0004479', ['methionyl-tRNA formyltransferase activity']),\n ('GO:0070128', ['methionyl-tRNA formyltransferase activity']),\n ('GO:0004489', ['methylenetetrahydrofolate reductase (NAD(P)H) activity']),\n ('GO:0008702', ['methylenetetrahydrofolate reductase (NAD(P)H) activity']),\n ('GO:0004499', ['N,N-dimethylaniline monooxygenase activity']),\n ('GO:0047076', ['N,N-dimethylaniline monooxygenase activity']),\n ('GO:0004524', ['RNA-DNA hybrid ribonuclease activity']),\n ('GO:0004523', ['RNA-DNA hybrid ribonuclease activity']),\n ('GO:0004527', ['exonuclease activity']),\n ('GO:0008857', ['exonuclease activity']),\n ('GO:0004536', ['DNA nuclease activity']),\n ('GO:0004537', ['DNA nuclease activity']),\n ('GO:0016800', ['hydrolase activity, hydrolyzing O-glycosyl compounds']),\n ('GO:0004553', ['hydrolase activity, hydrolyzing O-glycosyl compounds']),\n ('GO:0103025', ['alpha-amylase activity']),\n ('GO:0004556', ['alpha-amylase activity']),\n ('GO:0004558', ['alpha-1,4-glucosidase activity']),\n ('GO:0016982', ['alpha-1,4-glucosidase activity']),\n ('GO:0004562', ['alpha-1,4-glucosidase activity']),\n ('GO:0004601', ['peroxidase activity']),\n ('GO:0016686', ['peroxidase activity']),\n ('GO:0016693', ['peroxidase activity']),\n ('GO:0016687', ['peroxidase activity']),\n ('GO:0016685', ['peroxidase activity']),\n ('GO:0004602', ['glutathione peroxidase activity']),\n ('GO:0016224', ['glutathione peroxidase activity']),\n ('GO:0008971', ['phosphomannomutase activity']),\n ('GO:0004615', ['phosphomannomutase activity']),\n ('GO:0045126', ['lysophospholipase activity']),\n ('GO:0004622', ['lysophospholipase activity']),\n ('GO:0004623', ['phospholipase A2 activity']),\n ('GO:0102568', ['phospholipase A2 activity']),\n ('GO:0102567', ['phospholipase A2 activity']),\n ('GO:0004629', ['phospholipase C activity']),\n ('GO:0042298', ['phospholipase C activity']),\n ('GO:0004646',\n ['O-phospho-L-serine:2-oxoglutarate aminotransferase activity']),\n ('GO:0004648',\n ['O-phospho-L-serine:2-oxoglutarate aminotransferase activity']),\n ('GO:0018223', ['protein farnesyltransferase activity']),\n ('GO:0004660', ['protein farnesyltransferase activity']),\n ('GO:0018224', ['protein geranylgeranyltransferase activity']),\n ('GO:0004661', ['protein geranylgeranyltransferase activity']),\n ('GO:0018225',\n ['protein C-terminal S-isoprenylcysteine carboxyl O-methyltransferase activity']),\n ('GO:0004671',\n ['protein C-terminal S-isoprenylcysteine carboxyl O-methyltransferase activity']),\n ('GO:0050222', ['protein kinase activity']),\n ('GO:0004672', ['protein kinase activity']),\n ('GO:0008896', ['protein histidine kinase activity']),\n ('GO:0004673', ['protein histidine kinase activity']),\n ('GO:0004700', ['protein serine/threonine kinase activity']),\n ('GO:0106311', ['protein serine/threonine kinase activity']),\n ('GO:0004674', ['protein serine/threonine kinase activity']),\n ('GO:0004695', ['protein serine/threonine kinase activity']),\n ('GO:0004696', ['protein serine/threonine kinase activity']),\n ('GO:0004688', ['calmodulin-dependent protein kinase activity']),\n ('GO:0004684', ['calmodulin-dependent protein kinase activity']),\n ('GO:0004683', ['calmodulin-dependent protein kinase activity']),\n ('GO:0004685', ['calmodulin-dependent protein kinase activity']),\n ('GO:0004689', ['phosphorylase kinase activity']),\n ('GO:0008606', ['phosphorylase kinase activity']),\n ('GO:0008602', ['cAMP-dependent protein kinase activity']),\n ('GO:0004691', ['cAMP-dependent protein kinase activity']),\n ('GO:0004693', ['cyclin-dependent protein serine/threonine kinase activity']),\n ('GO:0016537', ['cyclin-dependent protein serine/threonine kinase activity']),\n ('GO:0004701', ['diacylglycerol-dependent serine/threonine kinase activity']),\n ('GO:0004697', ['diacylglycerol-dependent serine/threonine kinase activity']),\n ('GO:0004678', ['G protein-coupled receptor kinase activity']),\n ('GO:0004703', ['G protein-coupled receptor kinase activity']),\n ('GO:0016908', ['MAP kinase activity']),\n ('GO:0016909', ['MAP kinase activity']),\n ('GO:0004707', ['MAP kinase activity']),\n ('GO:0008339', ['MAP kinase activity']),\n ('GO:0008338', ['MAP kinase activity']),\n ('GO:0004709', ['MAP kinase kinase kinase activity']),\n ('GO:0004710', ['MAP kinase kinase kinase activity']),\n ('GO:0004718', ['protein tyrosine kinase activity']),\n ('GO:0004713', ['protein tyrosine kinase activity']),\n ('GO:0018056', ['protein-lysine 6-oxidase activity']),\n ('GO:0004720', ['protein-lysine 6-oxidase activity']),\n ('GO:0000158', ['protein serine/threonine phosphatase activity']),\n ('GO:0004724', ['protein serine/threonine phosphatase activity']),\n ('GO:0030360', ['protein serine/threonine phosphatase activity']),\n ('GO:0008600', ['protein serine/threonine phosphatase activity']),\n ('GO:0030361', ['protein serine/threonine phosphatase activity']),\n ('GO:0106307', ['protein serine/threonine phosphatase activity']),\n ('GO:0000163', ['protein serine/threonine phosphatase activity']),\n ('GO:0030357', ['protein serine/threonine phosphatase activity']),\n ('GO:0106306', ['protein serine/threonine phosphatase activity']),\n ('GO:0008598', ['protein serine/threonine phosphatase activity']),\n ('GO:0015071', ['protein serine/threonine phosphatase activity']),\n ('GO:0030358', ['protein serine/threonine phosphatase activity']),\n ('GO:0004722', ['protein serine/threonine phosphatase activity']),\n ('GO:0004723',\n ['calcium-dependent protein serine/threonine phosphatase activity']),\n ('GO:0008596',\n ['calcium-dependent protein serine/threonine phosphatase activity']),\n ('GO:0019906',\n ['[pyruvate dehydrogenase (acetyl-transferring)]-phosphatase activity']),\n ('GO:0004741',\n ['[pyruvate dehydrogenase (acetyl-transferring)]-phosphatase activity']),\n ('GO:0030523', ['dihydrolipoyllysine-residue acetyltransferase activity']),\n ('GO:0004742', ['dihydrolipoyllysine-residue acetyltransferase activity']),\n ('GO:0016959',\n ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']),\n ('GO:0016961',\n ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']),\n ('GO:0004748',\n ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']),\n ('GO:0016960',\n ['ribonucleoside-diphosphate reductase activity, thioredoxin disulfide as acceptor']),\n ('GO:0004763', ['serine-pyruvate transaminase activity']),\n ('GO:0004762', ['serine-pyruvate transaminase activity']),\n ('GO:0004761', ['serine-pyruvate transaminase activity']),\n ('GO:0004760', ['serine-pyruvate transaminase activity']),\n ('GO:0004767', ['sphingomyelin phosphodiesterase activity']),\n ('GO:0030230', ['sphingomyelin phosphodiesterase activity']),\n ('GO:0030231', ['sphingomyelin phosphodiesterase activity']),\n ('GO:0016214', ['stearoyl-CoA 9-desaturase activity']),\n ('GO:0004768', ['stearoyl-CoA 9-desaturase activity']),\n ('GO:0043735', ['stearoyl-CoA 9-desaturase activity']),\n ('GO:0004772', ['sterol O-acyltransferase activity']),\n ('GO:0017066', ['sterol O-acyltransferase activity']),\n ('GO:0004777', ['succinate-semialdehyde dehydrogenase (NAD+) activity']),\n ('GO:0008952', ['succinate-semialdehyde dehydrogenase (NAD+) activity']),\n ('GO:0016954', ['superoxide dismutase activity']),\n ('GO:0004784', ['superoxide dismutase activity']),\n ('GO:0008383', ['superoxide dismutase activity']),\n ('GO:0004785', ['superoxide dismutase activity']),\n ('GO:0008382', ['superoxide dismutase activity']),\n ('GO:0004796', ['thromboxane-A synthase activity']),\n ('GO:0008400', ['thromboxane-A synthase activity']),\n ('GO:0004803', ['transposase activity']),\n ('GO:0004804', ['transposase activity']),\n ('GO:0016425',\n ['tRNA (5-methylaminomethyl-2-thiouridylate)(34)-methyltransferase activity']),\n ('GO:0004808',\n ['tRNA (5-methylaminomethyl-2-thiouridylate)(34)-methyltransferase activity']),\n ('GO:0016437', ['CCA tRNA nucleotidyltransferase activity']),\n ('GO:0004810', ['CCA tRNA nucleotidyltransferase activity']),\n ('GO:0017100', ['aminoacyl-tRNA ligase activity']),\n ('GO:0004812', ['aminoacyl-tRNA ligase activity']),\n ('GO:0016876', ['aminoacyl-tRNA ligase activity']),\n ('GO:0004833', ['tryptophan 2,3-dioxygenase activity']),\n ('GO:0004426', ['tryptophan 2,3-dioxygenase activity']),\n ('GO:0004841', ['ubiquitin-protein transferase activity']),\n ('GO:0004840', ['ubiquitin-protein transferase activity']),\n ('GO:0004842', ['ubiquitin-protein transferase activity']),\n ('GO:0004843', ['cysteine-type deubiquitinase activity']),\n ('GO:0036459', ['cysteine-type deubiquitinase activity']),\n ('GO:0048551', ['enzyme inhibitor activity']),\n ('GO:0004857', ['enzyme inhibitor activity']),\n ('GO:1990681', ['protein phosphatase inhibitor activity']),\n ('GO:0004864', ['protein phosphatase inhibitor activity']),\n ('GO:0004870', ['cysteine-type endopeptidase inhibitor activity']),\n ('GO:0004869', ['cysteine-type endopeptidase inhibitor activity']),\n ('GO:0004875', ['complement receptor activity']),\n ('GO:0004942', ['complement receptor activity']),\n ('GO:0004876', ['complement component C3a receptor activity']),\n ('GO:0004943', ['complement component C3a receptor activity']),\n ('GO:0004944', ['complement component C5a receptor activity']),\n ('GO:0004878', ['complement component C5a receptor activity']),\n ('GO:0003708', ['nuclear receptor activity']),\n ('GO:0004879', ['nuclear receptor activity']),\n ('GO:0004880', ['nuclear receptor activity']),\n ('GO:0038052', ['nuclear receptor activity']),\n ('GO:0004886', ['nuclear receptor activity']),\n ('GO:0004887', ['nuclear receptor activity']),\n ('GO:0004882', ['nuclear receptor activity']),\n ('GO:0004884', ['nuclear receptor activity']),\n ('GO:0008434', ['nuclear receptor activity']),\n ('GO:0038050', ['nuclear receptor activity']),\n ('GO:0038051', ['nuclear receptor activity']),\n ('GO:0004888', ['transmembrane signaling receptor activity']),\n ('GO:0099600', ['transmembrane signaling receptor activity']),\n ('GO:0004926', ['transmembrane signaling receptor activity']),\n ('GO:0004896', ['cytokine receptor activity']),\n ('GO:0004907', ['cytokine receptor activity']),\n ('GO:0030525',\n ['granulocyte macrophage colony-stimulating factor receptor activity']),\n ('GO:0004901',\n ['granulocyte macrophage colony-stimulating factor receptor activity']),\n ('GO:0030524', ['granulocyte colony-stimulating factor receptor activity']),\n ('GO:0004902', ['granulocyte colony-stimulating factor receptor activity']),\n ('GO:0019967', ['interleukin-1, type I, activating receptor activity']),\n ('GO:0004909', ['interleukin-1, type I, activating receptor activity']),\n ('GO:0004910', ['interleukin-1, type II, blocking receptor activity']),\n ('GO:0019968', ['interleukin-1, type II, blocking receptor activity']),\n ('GO:0004923', ['leukemia inhibitory factor receptor activity']),\n ('GO:0004899', ['leukemia inhibitory factor receptor activity']),\n ('GO:0001624', ['G protein-coupled receptor activity']),\n ('GO:0004930', ['G protein-coupled receptor activity']),\n ('GO:0001622', ['G protein-coupled receptor activity']),\n ('GO:0001623', ['G protein-coupled receptor activity']),\n ('GO:0016526', ['G protein-coupled receptor activity']),\n ('GO:0001625', ['G protein-coupled receptor activity']),\n ('GO:0001600', ['endothelin receptor activity']),\n ('GO:0004962', ['endothelin receptor activity']),\n ('GO:0001599', ['endothelin receptor activity']),\n ('GO:0004964', ['luteinizing hormone receptor activity']),\n ('GO:0004976', ['luteinizing hormone receptor activity']),\n ('GO:0001585', ['G protein-coupled serotonin receptor activity']),\n ('GO:0016609', ['G protein-coupled serotonin receptor activity']),\n ('GO:0004993', ['G protein-coupled serotonin receptor activity']),\n ('GO:0016931', ['vasopressin receptor activity']),\n ('GO:0005000', ['vasopressin receptor activity']),\n ('GO:0005023', ['epidermal growth factor receptor activity']),\n ('GO:0005006', ['epidermal growth factor receptor activity']),\n ('GO:0036328', ['vascular endothelial growth factor receptor activity']),\n ('GO:0036329', ['vascular endothelial growth factor receptor activity']),\n ('GO:0005021', ['vascular endothelial growth factor receptor activity']),\n ('GO:0036327', ['vascular endothelial growth factor receptor activity']),\n ('GO:0036330', ['vascular endothelial growth factor receptor activity']),\n ('GO:0036326', ['vascular endothelial growth factor receptor activity']),\n ('GO:0005033', ['tumor necrosis factor receptor activity']),\n ('GO:0005031', ['tumor necrosis factor receptor activity']),\n ('GO:0005032', ['tumor necrosis factor receptor activity']),\n ('GO:0005041', ['low-density lipoprotein particle receptor activity']),\n ('GO:0008032', ['low-density lipoprotein particle receptor activity']),\n ('GO:0008249', ['signal sequence binding']),\n ('GO:0005048', ['signal sequence binding']),\n ('GO:0008262', ['nuclear export signal receptor activity']),\n ('GO:0005049', ['nuclear export signal receptor activity']),\n ('GO:0005068',\n ['transmembrane receptor protein tyrosine kinase adaptor activity']),\n ('GO:0005069',\n ['transmembrane receptor protein tyrosine kinase adaptor activity']),\n ('GO:0097024', ['protein kinase C binding']),\n ('GO:0072569', ['protein kinase C binding']),\n ('GO:0005080', ['protein kinase C binding']),\n ('GO:0072568', ['protein kinase C binding']),\n ('GO:0008321', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0017034', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005088', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005090', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0016220', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0017132', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0016219', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005085', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0008433', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005087', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0030676', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005089', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005086', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0017112', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0019839', ['guanyl-nucleotide exchange factor activity']),\n ('GO:0005100', ['GTPase activator activity']),\n ('GO:0008060', ['GTPase activator activity']),\n ('GO:0005101', ['GTPase activator activity']),\n ('GO:0005096', ['GTPase activator activity']),\n ('GO:0005097', ['GTPase activator activity']),\n ('GO:0005098', ['GTPase activator activity']),\n ('GO:0030675', ['GTPase activator activity']),\n ('GO:0017123', ['GTPase activator activity']),\n ('GO:0005099', ['GTPase activator activity']),\n ('GO:0046582', ['GTPase activator activity']),\n ('GO:0001521', ['fibroblast growth factor receptor binding']),\n ('GO:0005162', ['fibroblast growth factor receptor binding']),\n ('GO:0005104', ['fibroblast growth factor receptor binding']),\n ('GO:0005110', ['frizzled binding']),\n ('GO:0005109', ['frizzled binding']),\n ('GO:0005154', ['epidermal growth factor receptor binding']),\n ('GO:0008185', ['epidermal growth factor receptor binding']),\n ('GO:0005067', ['insulin-like growth factor receptor binding']),\n ('GO:0005159', ['insulin-like growth factor receptor binding']),\n ('GO:0005478', ['transporter activity']),\n ('GO:0005215', ['transporter activity']),\n ('GO:0008095',\n ['inositol 1,4,5-trisphosphate-gated calcium channel activity']),\n ('GO:0005220',\n ['inositol 1,4,5-trisphosphate-gated calcium channel activity']),\n ('GO:0005243', ['gap junction channel activity']),\n ('GO:0015286', ['gap junction channel activity']),\n ('GO:0015285', ['gap junction channel activity']),\n ('GO:0005245', ['voltage-gated calcium channel activity']),\n ('GO:0015270', ['voltage-gated calcium channel activity']),\n ('GO:0010173', ['voltage-gated calcium channel activity']),\n ('GO:0005260', ['intracellularly ATP-gated chloride channel activity']),\n ('GO:0005224', ['intracellularly ATP-gated chloride channel activity']),\n ('GO:0015281', ['monoatomic cation channel activity']),\n ('GO:0015338', ['monoatomic cation channel activity']),\n ('GO:0005261', ['monoatomic cation channel activity']),\n ('GO:0015206', ['allantoin:proton symporter activity']),\n ('GO:0005274', ['allantoin:proton symporter activity']),\n ('GO:0005279', ['amine transmembrane transporter activity']),\n ('GO:0005275', ['amine transmembrane transporter activity']),\n ('GO:0005283', ['amino acid:sodium symporter activity']),\n ('GO:0005284', ['amino acid:sodium symporter activity']),\n ('GO:0005285', ['amino acid:sodium symporter activity']),\n ('GO:0005295', ['neutral L-amino acid:sodium symporter activity']),\n ('GO:0005282', ['neutral L-amino acid:sodium symporter activity']),\n ('GO:0015508', ['L-tyrosine transmembrane transporter activity']),\n ('GO:0005302', ['L-tyrosine transmembrane transporter activity']),\n ('GO:0005312', ['dicarboxylic acid transmembrane transporter activity']),\n ('GO:0015365', ['dicarboxylic acid transmembrane transporter activity']),\n ('GO:0005310', ['dicarboxylic acid transmembrane transporter activity']),\n ('GO:1901677', ['phosphate transmembrane transporter activity']),\n ('GO:0005317', ['phosphate transmembrane transporter activity']),\n ('GO:0005315', ['phosphate transmembrane transporter activity']),\n ('GO:0005325', ['long-chain fatty acid transmembrane transporter activity']),\n ('GO:0008562', ['long-chain fatty acid transmembrane transporter activity']),\n ('GO:0005324', ['long-chain fatty acid transmembrane transporter activity']),\n ('GO:0005330', ['dopamine:sodium symporter activity']),\n ('GO:0005329', ['dopamine:sodium symporter activity']),\n ('GO:0005333', ['norepinephrine:sodium symporter activity']),\n ('GO:0005334', ['norepinephrine:sodium symporter activity']),\n ('GO:0005336', ['serotonin:sodium:chloride symporter activity']),\n ('GO:0005335', ['serotonin:sodium:chloride symporter activity']),\n ('GO:0015222', ['serotonin:sodium:chloride symporter activity']),\n ('GO:0005338', ['nucleotide-sugar transmembrane transporter activity']),\n ('GO:0005339', ['nucleotide-sugar transmembrane transporter activity']),\n ('GO:0005341', ['nucleotide-sulfate transmembrane transporter activity']),\n ('GO:0005340', ['nucleotide-sulfate transmembrane transporter activity']),\n ('GO:0005344', ['oxygen carrier activity']),\n ('GO:0015033', ['oxygen carrier activity']),\n ('GO:0005348', ['ATP transmembrane transporter activity']),\n ('GO:0005347', ['ATP transmembrane transporter activity']),\n ('GO:0005403', ['carbohydrate:proton symporter activity']),\n ('GO:0005351', ['carbohydrate:proton symporter activity']),\n ('GO:0015542', ['carbohydrate:proton symporter activity']),\n ('GO:0019192', ['fructose transmembrane transporter activity']),\n ('GO:0015585', ['fructose transmembrane transporter activity']),\n ('GO:0005353', ['fructose transmembrane transporter activity']),\n ('GO:0015579', ['glucose transmembrane transporter activity']),\n ('GO:0005355', ['glucose transmembrane transporter activity']),\n ('GO:0005361', ['glucose:proton symporter activity']),\n ('GO:0005356', ['glucose:proton symporter activity']),\n ('GO:0015581', ['maltose transmembrane transporter activity']),\n ('GO:0005363', ['maltose transmembrane transporter activity']),\n ('GO:0005371',\n ['tricarboxylate secondary active transmembrane transporter activity']),\n ('GO:0005370',\n ['tricarboxylate secondary active transmembrane transporter activity']),\n ('GO:0005379', ['copper ion transmembrane transporter activity']),\n ('GO:0015088', ['copper ion transmembrane transporter activity']),\n ('GO:0005378', ['copper ion transmembrane transporter activity']),\n ('GO:0005375', ['copper ion transmembrane transporter activity']),\n ('GO:0005380', ['copper ion transmembrane transporter activity']),\n ('GO:0016033', ['iron ion transmembrane transporter activity']),\n ('GO:0097689', ['iron ion transmembrane transporter activity']),\n ('GO:0005381', ['iron ion transmembrane transporter activity']),\n ('GO:0005382', ['iron ion transmembrane transporter activity']),\n ('GO:0008522', ['nucleoside:sodium symporter activity']),\n ('GO:0005415', ['nucleoside:sodium symporter activity']),\n ('GO:0005436', ['sodium:phosphate symporter activity']),\n ('GO:0015321', ['sodium:phosphate symporter activity']),\n ('GO:0005349', ['ATP:ADP antiporter activity']),\n ...]In\u00a0[19]: Copied!
[(term.ID,term.name) for term in go if not term.parents and term.children]\n[(term.ID,term.name) for term in go if not term.parents and term.children] Out[19]:
[('GO:0005554', ['molecular_function']),\n ('GO:0008372', ['cellular_component']),\n ('GO:0000004', ['biological_process'])]In\u00a0[20]: Copied!
go['GO:0005554'].__dict__\ngo['GO:0005554'].__dict__ Out[20]:
{'_ID': 'GO:0005554',\n '_original_ID': 'GO:0005554',\n '_container': <picea.ontology.Ontology at 0x7f4598e2cac0>,\n '_children': ['GO:0003774',\n 'GO:0003824',\n 'GO:0005198',\n 'GO:0005478',\n 'GO:0005215',\n 'GO:0005488',\n 'GO:0009054',\n 'GO:0009053',\n 'GO:0009055',\n 'GO:0016209',\n 'GO:0038024',\n 'GO:0044183',\n 'GO:0045182',\n 'GO:0045735',\n 'GO:0060089',\n 'GO:0060090',\n 'GO:0032947',\n 'GO:0050827',\n 'GO:0090729',\n 'GO:0098772',\n 'GO:0140104',\n 'GO:0140110',\n 'GO:0140223',\n 'GO:0140299',\n 'GO:0140313',\n 'GO:0140489',\n 'GO:0140522',\n 'GO:0140657',\n 'GO:0140691',\n 'GO:0140776',\n 'GO:0140777',\n 'GO:0034290',\n 'GO:0034291',\n 'GO:0034292',\n 'GO:0140911',\n 'GO:0140912',\n 'GO:0141047',\n 'GO:0180020',\n 'GO:0180024'],\n '_parents': [],\n 'name': ['molecular_function'],\n 'def': ['\"A molecular process that can be carried out by the action of a single macromolecular machine, usually via direct physical interactions with other molecular entities. Function in this sense denotes an action, or activity, that a gene product (or a complex) performs.\" [GOC:pdt]'],\n 'alt_id': ['GO:0003674'],\n 'namespace': ['molecular_function'],\n 'comment': [\"Note that, in addition to forming the root of the molecular function ontology, this term is recommended for the annotation of gene products whose molecular function is unknown. When this term is used for annotation, it indicates that no information was available about the molecular function of the gene product annotated as of the date the annotation was made; the evidence code 'no data' (ND), is used to indicate this. Despite its name, this is not a type of 'function' in the sense typically defined by upper ontologies such as Basic Formal Ontology (BFO). It is instead a BFO:process carried out by a single gene product or complex.\"],\n 'subset': ['goslim_candida',\n 'goslim_chembl',\n 'goslim_metagenomics',\n 'goslim_pir',\n 'goslim_plant',\n 'goslim_yeast'],\n 'synonym': ['\"molecular function\" EXACT []']}In\u00a0[21]: Copied!
go['GO:0003674'].__dict__\ngo['GO:0003674'].__dict__ Out[21]:
{'_ID': 'GO:0005554',\n '_original_ID': 'GO:0005554',\n '_container': <picea.ontology.Ontology at 0x7f4598e2cac0>,\n '_children': ['GO:0003774',\n 'GO:0003824',\n 'GO:0005198',\n 'GO:0005478',\n 'GO:0005215',\n 'GO:0005488',\n 'GO:0009054',\n 'GO:0009053',\n 'GO:0009055',\n 'GO:0016209',\n 'GO:0038024',\n 'GO:0044183',\n 'GO:0045182',\n 'GO:0045735',\n 'GO:0060089',\n 'GO:0060090',\n 'GO:0032947',\n 'GO:0050827',\n 'GO:0090729',\n 'GO:0098772',\n 'GO:0140104',\n 'GO:0140110',\n 'GO:0140223',\n 'GO:0140299',\n 'GO:0140313',\n 'GO:0140489',\n 'GO:0140522',\n 'GO:0140657',\n 'GO:0140691',\n 'GO:0140776',\n 'GO:0140777',\n 'GO:0034290',\n 'GO:0034291',\n 'GO:0034292',\n 'GO:0140911',\n 'GO:0140912',\n 'GO:0141047',\n 'GO:0180020',\n 'GO:0180024'],\n '_parents': [],\n 'name': ['molecular_function'],\n 'def': ['\"A molecular process that can be carried out by the action of a single macromolecular machine, usually via direct physical interactions with other molecular entities. Function in this sense denotes an action, or activity, that a gene product (or a complex) performs.\" [GOC:pdt]'],\n 'alt_id': ['GO:0003674'],\n 'namespace': ['molecular_function'],\n 'comment': [\"Note that, in addition to forming the root of the molecular function ontology, this term is recommended for the annotation of gene products whose molecular function is unknown. When this term is used for annotation, it indicates that no information was available about the molecular function of the gene product annotated as of the date the annotation was made; the evidence code 'no data' (ND), is used to indicate this. Despite its name, this is not a type of 'function' in the sense typically defined by upper ontologies such as Basic Formal Ontology (BFO). It is instead a BFO:process carried out by a single gene product or complex.\"],\n 'subset': ['goslim_candida',\n 'goslim_chembl',\n 'goslim_metagenomics',\n 'goslim_pir',\n 'goslim_plant',\n 'goslim_yeast'],\n 'synonym': ['\"molecular function\" EXACT []']}"},{"location":"examples/sequence_annotation/","title":"Sequence annotation","text":"In\u00a0[1]: Copied!
import sys\nsys.path.insert(0, '../../')\nimport picea\nfrom picea import SequenceAnnotation\npicea.__version__\nimport sys sys.path.insert(0, '../../') import picea from picea import SequenceAnnotation picea.__version__ Out[1]:
'0.0.27'In\u00a0[2]: Copied!
gff3 = (\n # '##gff-version 3.1.26\\n'\n # '##sequence-region ctg123 1 1497228\\n'\n 'ctg123\\t.\\tgene\\t1000\\t9000\\t.\\t+\\t.\\tID=gene00001;Name=EDEN\\n'\n 'ctg123\\t.\\tTF_binding_site\\t1000\\t1012\\t.\\t+\\t.\\tID=tfbs00001;Parent=gene00001\\n' # noqa\n 'ctg123\\t.\\tmRNA\\t1050\\t9000\\t.\\t+\\t.\\tID=mRNA00001;Parent=gene00001;Name=EDEN.1\\n' # noqa\n 'ctg123\\t.\\tmRNA\\t1050\\t9000\\t.\\t+\\t.\\tID=mRNA00002;Parent=gene00001;Name=EDEN.2\\n' # noqa\n 'ctg123\\t.\\tmRNA\\t1300\\t9000\\t.\\t+\\t.\\tID=mRNA00003;Parent=gene00001;Name=EDEN.3\\n' # noqa\n 'ctg123\\t.\\texon\\t1300\\t1500\\t.\\t+\\t.\\tID=exon00001;Parent=mRNA00003\\n'\n 'ctg123\\t.\\texon\\t1050\\t1500\\t.\\t+\\t.\\tID=exon00002;Parent=mRNA00001,mRNA00002\\n' # noqa\n 'ctg123\\t.\\texon\\t3000\\t3902\\t.\\t+\\t.\\tID=exon00003;Parent=mRNA00001,mRNA00003\\n' # noqa\n 'ctg123\\t.\\texon\\t5000\\t5500\\t.\\t+\\t.\\tID=exon00004;Parent=mRNA00001,mRNA00002,mRNA00003\\n' # noqa\n 'ctg123\\t.\\texon\\t7000\\t9000\\t.\\t+\\t.\\tID=exon00005;Parent=mRNA00001,mRNA00002,mRNA00003\\n' # noqa\n 'ctg123\\t.\\tCDS\\t1201\\t1500\\t.\\t+\\t0\\tID=cds00001.1;Parent=mRNA00001;Name=edenprotein.1\\n' # noqa\n 'ctg123\\t.\\tCDS\\t3000\\t3902\\t.\\t+\\t0\\tID=cds00001.2;Parent=mRNA00001;Name=edenprotein.1\\n' # noqa\n 'ctg123\\t.\\tCDS\\t5000\\t5500\\t.\\t+\\t0\\tID=cds00001.3;Parent=mRNA00001;Name=edenprotein.1\\n' # noqa\n 'ctg123\\t.\\tCDS\\t7000\\t7600\\t.\\t+\\t0\\tID=cds00001.4;Parent=mRNA00001;Name=edenprotein.1\\n' # noqa\n 'ctg123\\t.\\tCDS\\t1201\\t1500\\t.\\t+\\t0\\tID=cds00002.1;Parent=mRNA00002;Name=edenprotein.2\\n' # noqa\n 'ctg123\\t.\\tCDS\\t5000\\t5500\\t.\\t+\\t0\\tID=cds00002.2;Parent=mRNA00002;Name=edenprotein.2\\n' # noqa\n 'ctg123\\t.\\tCDS\\t7000\\t7600\\t.\\t+\\t0\\tID=cds00002.3;Parent=mRNA00002;Name=edenprotein.2\\n' # noqa\n 'ctg123\\t.\\tCDS\\t3301\\t3902\\t.\\t+\\t0\\tID=cds00003.1;Parent=mRNA00003;Name=edenprotein.3\\n' # noqa\n 'ctg123\\t.\\tCDS\\t5000\\t5500\\t.\\t+\\t1\\tID=cds00003.2;Parent=mRNA00003;Name=edenprotein.3\\n' # noqa\n 'ctg123\\t.\\tCDS\\t7000\\t7600\\t.\\t+\\t1\\tID=cds00003.3;Parent=mRNA00003;Name=edenprotein.3\\n' # noqa\n 'ctg123\\t.\\tCDS\\t3391\\t3902\\t.\\t+\\t0\\tID=cds00004.1;Parent=mRNA00003;Name=edenprotein.4\\n' # noqa\n 'ctg123\\t.\\tCDS\\t5000\\t5500\\t.\\t+\\t1\\tID=cds00004.2;Parent=mRNA00003;Name=edenprotein.4\\n' # noqa\n 'ctg123\\t.\\tCDS\\t7000\\t7600\\t.\\t+\\t1\\tID=cds00004.3;Parent=mRNA00003;Name=edenprotein.4\\n' # noqa\n)\n\nann = SequenceAnnotation.from_gff(string=gff3)\nann['mRNA00003'].parents.elements\ngff3 = ( # '##gff-version 3.1.26\\n' # '##sequence-region ctg123 1 1497228\\n' 'ctg123\\t.\\tgene\\t1000\\t9000\\t.\\t+\\t.\\tID=gene00001;Name=EDEN\\n' 'ctg123\\t.\\tTF_binding_site\\t1000\\t1012\\t.\\t+\\t.\\tID=tfbs00001;Parent=gene00001\\n' # noqa 'ctg123\\t.\\tmRNA\\t1050\\t9000\\t.\\t+\\t.\\tID=mRNA00001;Parent=gene00001;Name=EDEN.1\\n' # noqa 'ctg123\\t.\\tmRNA\\t1050\\t9000\\t.\\t+\\t.\\tID=mRNA00002;Parent=gene00001;Name=EDEN.2\\n' # noqa 'ctg123\\t.\\tmRNA\\t1300\\t9000\\t.\\t+\\t.\\tID=mRNA00003;Parent=gene00001;Name=EDEN.3\\n' # noqa 'ctg123\\t.\\texon\\t1300\\t1500\\t.\\t+\\t.\\tID=exon00001;Parent=mRNA00003\\n' 'ctg123\\t.\\texon\\t1050\\t1500\\t.\\t+\\t.\\tID=exon00002;Parent=mRNA00001,mRNA00002\\n' # noqa 'ctg123\\t.\\texon\\t3000\\t3902\\t.\\t+\\t.\\tID=exon00003;Parent=mRNA00001,mRNA00003\\n' # noqa 'ctg123\\t.\\texon\\t5000\\t5500\\t.\\t+\\t.\\tID=exon00004;Parent=mRNA00001,mRNA00002,mRNA00003\\n' # noqa 'ctg123\\t.\\texon\\t7000\\t9000\\t.\\t+\\t.\\tID=exon00005;Parent=mRNA00001,mRNA00002,mRNA00003\\n' # noqa 'ctg123\\t.\\tCDS\\t1201\\t1500\\t.\\t+\\t0\\tID=cds00001.1;Parent=mRNA00001;Name=edenprotein.1\\n' # noqa 'ctg123\\t.\\tCDS\\t3000\\t3902\\t.\\t+\\t0\\tID=cds00001.2;Parent=mRNA00001;Name=edenprotein.1\\n' # noqa 'ctg123\\t.\\tCDS\\t5000\\t5500\\t.\\t+\\t0\\tID=cds00001.3;Parent=mRNA00001;Name=edenprotein.1\\n' # noqa 'ctg123\\t.\\tCDS\\t7000\\t7600\\t.\\t+\\t0\\tID=cds00001.4;Parent=mRNA00001;Name=edenprotein.1\\n' # noqa 'ctg123\\t.\\tCDS\\t1201\\t1500\\t.\\t+\\t0\\tID=cds00002.1;Parent=mRNA00002;Name=edenprotein.2\\n' # noqa 'ctg123\\t.\\tCDS\\t5000\\t5500\\t.\\t+\\t0\\tID=cds00002.2;Parent=mRNA00002;Name=edenprotein.2\\n' # noqa 'ctg123\\t.\\tCDS\\t7000\\t7600\\t.\\t+\\t0\\tID=cds00002.3;Parent=mRNA00002;Name=edenprotein.2\\n' # noqa 'ctg123\\t.\\tCDS\\t3301\\t3902\\t.\\t+\\t0\\tID=cds00003.1;Parent=mRNA00003;Name=edenprotein.3\\n' # noqa 'ctg123\\t.\\tCDS\\t5000\\t5500\\t.\\t+\\t1\\tID=cds00003.2;Parent=mRNA00003;Name=edenprotein.3\\n' # noqa 'ctg123\\t.\\tCDS\\t7000\\t7600\\t.\\t+\\t1\\tID=cds00003.3;Parent=mRNA00003;Name=edenprotein.3\\n' # noqa 'ctg123\\t.\\tCDS\\t3391\\t3902\\t.\\t+\\t0\\tID=cds00004.1;Parent=mRNA00003;Name=edenprotein.4\\n' # noqa 'ctg123\\t.\\tCDS\\t5000\\t5500\\t.\\t+\\t1\\tID=cds00004.2;Parent=mRNA00003;Name=edenprotein.4\\n' # noqa 'ctg123\\t.\\tCDS\\t7000\\t7600\\t.\\t+\\t1\\tID=cds00004.3;Parent=mRNA00003;Name=edenprotein.4\\n' # noqa ) ann = SequenceAnnotation.from_gff(string=gff3) ann['mRNA00003'].parents.elements Out[2]:
[<SequenceInterval type=gene ID=gene00001 loc=ctg123..1000..9000..+ at 0x7f217c13b3d0>]In\u00a0[3]: Copied!
ann['mRNA00003'].children.elements\nann['mRNA00003'].children.elements Out[3]:
[<SequenceInterval type=exon ID=exon00001 loc=ctg123..1300..1500..+ at 0x7f217c13b070>,\n <SequenceInterval type=exon ID=exon00003 loc=ctg123..3000..3902..+ at 0x7f217c13af20>,\n <SequenceInterval type=exon ID=exon00004 loc=ctg123..5000..5500..+ at 0x7f217c13aec0>,\n <SequenceInterval type=exon ID=exon00005 loc=ctg123..7000..9000..+ at 0x7f217c13ae90>,\n <SequenceInterval type=CDS ID=cds00003.1 loc=ctg123..3301..3902..+ at 0x7f217c13a620>,\n <SequenceInterval type=CDS ID=cds00003.2 loc=ctg123..5000..5500..+ at 0x7f217c13a650>,\n <SequenceInterval type=CDS ID=cds00003.3 loc=ctg123..7000..7600..+ at 0x7f217c13a8c0>,\n <SequenceInterval type=CDS ID=cds00004.1 loc=ctg123..3391..3902..+ at 0x7f217c13b670>,\n <SequenceInterval type=CDS ID=cds00004.2 loc=ctg123..5000..5500..+ at 0x7f217c13b0d0>,\n <SequenceInterval type=CDS ID=cds00004.3 loc=ctg123..7000..7600..+ at 0x7f217c13af50>]In\u00a0[4]: Copied!
ann['cds00004.3'].gff_attributes\nann['cds00004.3'].gff_attributes Out[4]:
{'name': ['edenprotein.4'], 'ID': ['cds00004.3'], 'Parent': ['mRNA00003']}In\u00a0[5]: Copied!
ann['cds00004.3'].to_gff_line()\nann['cds00004.3'].to_gff_line() Out[5]:
'ctg123\\t.\\tCDS\\t7000\\t7600\\t.\\t+\\t1\\tID=cds00004.3;Parent=mRNA00003;Name=edenprotein.4'"},{"location":"examples/sequence_interval/","title":"Sequence interval","text":"In\u00a0[1]: Copied!
import sys\nsys.path.insert(0, '../../')\nimport picea\nfrom picea import SequenceInterval\npicea.__version__\nimport sys sys.path.insert(0, '../../') import picea from picea import SequenceInterval picea.__version__ Out[1]:
'0.0.27'In\u00a0[2]: Copied!
interval = SequenceInterval.from_gff_line('ctg123\\t.\\tgene\\t1000\\t9000\\t.\\t+\\t.\\tID=gene00001;Name=EDEN')\ninterval\ninterval = SequenceInterval.from_gff_line('ctg123\\t.\\tgene\\t1000\\t9000\\t.\\t+\\t.\\tID=gene00001;Name=EDEN') interval Out[2]:
<SequenceInterval type=gene ID=gene00001 loc=ctg123..1000..9000..+ at 0x7fbec4082e00>In\u00a0[3]: Copied!
interval.to_gff_line()\ninterval.to_gff_line() Out[3]:
'ctg123\\t.\\tgene\\t1000\\t9000\\t.\\t+\\t.\\tID=gene00001;Name=EDEN'In\u00a0[4]: Copied!
interval['parent']\ninterval['parent']
\n---------------------------------------------------------------------------\nKeyError Traceback (most recent call last)\nCell In[4], line 1\n----> 1 interval['parent']\n\nFile ~/work/picea/picea/docs/examples/../../picea/dag.py:36, in DAGElement.__getitem__(self, key)\n 35 def __getitem__(self, key):\n---> 36 return self.__dict__[key]\n\nKeyError: 'parent'"},{"location":"examples/tree/","title":"Tree","text":"In\u00a0[1]: Copied!
import sys\nsys.path.insert(0, '../../')\nimport picea\nfrom picea import Tree, treeplot\nfrom matplotlib import pyplot as plt\npicea.__version__\nimport sys sys.path.insert(0, '../../') import picea from picea import Tree, treeplot from matplotlib import pyplot as plt picea.__version__ Out[1]:
'0.0.27'In\u00a0[2]: Copied!
!pwd\n!pwd
/home/runner/work/picea/picea/docs/examples\r\nIn\u00a0[3]: Copied!
tree = Tree.from_newick(filename='./data/tree.newick')\n\nfig, [[ax1,ax2],[ax3,ax4]] = plt.subplots(ncols=2, nrows=2, figsize=(20,20))\n\ntreeplot(tree, style='square', ax=ax1)\ntreeplot(tree, style='triangular', ltr=False, ax=ax2)\ntreeplot(tree, style='square', branchlengths=False, ax=ax3)\ntreeplot(tree, style='radial', ax=ax4)\ntree = Tree.from_newick(filename='./data/tree.newick') fig, [[ax1,ax2],[ax3,ax4]] = plt.subplots(ncols=2, nrows=2, figsize=(20,20)) treeplot(tree, style='square', ax=ax1) treeplot(tree, style='triangular', ltr=False, ax=ax2) treeplot(tree, style='square', branchlengths=False, ax=ax3) treeplot(tree, style='radial', ax=ax4)
/home/runner/work/picea/picea/docs/examples/../../picea/tree.py:177: UserWarning: Found branchlengths on some parts of the tree, but node 0 has no branchlength specified, setting to branchlength 0.0\n warn(\nOut[3]:
<Axes: >In\u00a0[4]: Copied!
0.4 / 25\n0.4 / 25 Out[4]:
0.016In\u00a0[5]: Copied!
x_min,x_max = ax3.get_xlim()\nx_max - x_min, .1 * (x_max - x_min), (x_min,x_max)\nx_min,x_max = ax3.get_xlim() x_max - x_min, .1 * (x_max - x_min), (x_min,x_max) Out[5]:
(25.09, 2.5090000000000003, (-0.52, 24.57))In\u00a0[6]: Copied!
from sklearn.cluster import AgglomerativeClustering\nimport numpy as np\nX = np.array([[1, 2], [1, 4], [1, 0],\n [4, 2], [4, 4], [4, 0]])\nclustering = AgglomerativeClustering().fit(X)\nclustering.labels_\nfrom sklearn.cluster import AgglomerativeClustering import numpy as np X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]]) clustering = AgglomerativeClustering().fit(X) clustering.labels_ Out[6]:
array([1, 1, 1, 0, 0, 0])In\u00a0[7]: Copied!
tree = Tree(children=[Tree(),Tree()])\nfor t in tree.depth_first():\n print(t.ID,t.name)\ntree = Tree(children=[Tree(),Tree()]) for t in tree.depth_first(): print(t.ID,t.name)
None None\nNone None\nNone None\nIn\u00a0[8]: Copied!
t.iloc[None]\nt.iloc[None] Out[8]:
Tree(name=None, length=None, children=[])In\u00a0[9]: Copied!
tree = Tree.from_sklearn(clustering)\ntree.to_newick(branch_lengths=False)\ntree = Tree.from_sklearn(clustering) tree.to_newick(branch_lengths=False) Out[9]:
'((2,(0,1)),(4,(3,5)));'In\u00a0[10]: Copied!
Tree(**tree.to_dict())\nTree(**tree.to_dict()) Out[10]:
Tree(name=None, length=None, children=[{'name': None, 'length': None, 'children': [{'name': '2', 'length': None, 'children': []}, {'name': None, 'length': None, 'children': [{'name': '0', 'length': None, 'children': []}, {'name': '1', 'length': None, 'children': []}]}]}, {'name': None, 'length': None, 'children': [{'name': '4', 'length': None, 'children': []}, {'name': None, 'length': None, 'children': [{'name': '3', 'length': None, 'children': []}, {'name': '5', 'length': None, 'children': []}]}]}])In\u00a0[11]: Copied!
tree.iloc[1].name = 'long name'\ntree.iloc[1].name = 'long name' In\u00a0[12]: Copied!
print(tree.to_json(indent=2))\nprint(tree.to_json(indent=2))
{\n \"name\": null,\n \"length\": null,\n \"children\": [\n {\n \"name\": null,\n \"length\": null,\n \"children\": [\n {\n \"name\": \"2\",\n \"length\": null,\n \"children\": []\n },\n {\n \"name\": null,\n \"length\": null,\n \"children\": [\n {\n \"name\": \"0\",\n \"length\": null,\n \"children\": []\n },\n {\n \"name\": \"long name\",\n \"length\": null,\n \"children\": []\n }\n ]\n }\n ]\n },\n {\n \"name\": null,\n \"length\": null,\n \"children\": [\n {\n \"name\": \"4\",\n \"length\": null,\n \"children\": []\n },\n {\n \"name\": null,\n \"length\": null,\n \"children\": [\n {\n \"name\": \"3\",\n \"length\": null,\n \"children\": []\n },\n {\n \"name\": \"5\",\n \"length\": null,\n \"children\": []\n }\n ]\n }\n ]\n }\n ]\n}\nIn\u00a0[13]: Copied!
from matplotlib import pyplot as plt\nfig, [ax1, ax2, ax3] = plt.subplots(ncols=3,figsize=(15, 5))\n\npicea.treeplot(tree, style='radial', ltr=False, ax=ax1)\npicea.treeplot(tree, style='square', ltr=True, ax=ax2)\npicea.treeplot(tree, style='triangular', ltr=False, ax=ax3)\n\nfor ax in (ax1,ax2,ax3):\n ax.scatter((0,0),(0,0),c='red')\nfrom matplotlib import pyplot as plt fig, [ax1, ax2, ax3] = plt.subplots(ncols=3,figsize=(15, 5)) picea.treeplot(tree, style='radial', ltr=False, ax=ax1) picea.treeplot(tree, style='square', ltr=True, ax=ax2) picea.treeplot(tree, style='triangular', ltr=False, ax=ax3) for ax in (ax1,ax2,ax3): ax.scatter((0,0),(0,0),c='red')
\n---------------------------------------------------------------------------\nTypeError Traceback (most recent call last)\nCell In[13], line 4\n 1 from matplotlib import pyplot as plt\n 2 fig, [ax1, ax2, ax3] = plt.subplots(ncols=3,figsize=(15, 5))\n----> 4 picea.treeplot(tree, style='radial', ltr=False, ax=ax1)\n 5 picea.treeplot(tree, style='square', ltr=True, ax=ax2)\n 6 picea.treeplot(tree, style='triangular', ltr=False, ax=ax3)\n\nFile ~/work/picea/picea/docs/examples/../../picea/tree.py:475, in treeplot(tree, style, branchlengths, ltr, node_labels, leaf_labels, leaf_marker, leaf_marker_fill, leaf_marker_edge, branch_linestyle, ax, return_layout)\n 437 def treeplot(\n 438 tree: Tree,\n 439 style: TreeStyle = TreeStyle.square,\n (...)\n 449 return_layout: bool = False,\n 450 ) -> Union[Ax, Tuple[Ax, LayoutDict]]:\n 451 \"\"\"[summary]\n 452 \n 453 Args:\n (...)\n 473 Union[Ax, Tuple[Ax, LayoutDict]]: [description]\n 474 \"\"\"\n--> 475 layout = calculate_tree_layout(tree=tree, style=style, ltr=ltr, branchlengths=branchlengths)\n 477 if not ax:\n 478 _, ax = plt.subplots(figsize=(6, 6))\n\nFile ~/work/picea/picea/docs/examples/../../picea/tree.py:417, in calculate_tree_layout(tree, style, ltr, branchlengths)\n 415 node_coords.x = increment + max(child_x_coords)\n 416 else:\n--> 417 node_coords.x = min(child_x_coords) - increment\n 418 else:\n 419 if previous_node:\n\nTypeError: '<' not supported between instances of 'NoneType' and 'NoneType'In\u00a0[14]: Copied!
fig, ax = plt.subplots(figsize=(10, 10))\npicea.treeplot(tree, style='radial', ax=ax)\n\nax.scatter((0,0),(0,0),c='red')\nfig, ax = plt.subplots(figsize=(10, 10)) picea.treeplot(tree, style='radial', ax=ax) ax.scatter((0,0),(0,0),c='red')
\n---------------------------------------------------------------------------\nTypeError Traceback (most recent call last)\nCell In[14], line 2\n 1 fig, ax = plt.subplots(figsize=(10, 10))\n----> 2 picea.treeplot(tree, style='radial', ax=ax)\n 4 ax.scatter((0,0),(0,0),c='red')\n\nFile ~/work/picea/picea/docs/examples/../../picea/tree.py:475, in treeplot(tree, style, branchlengths, ltr, node_labels, leaf_labels, leaf_marker, leaf_marker_fill, leaf_marker_edge, branch_linestyle, ax, return_layout)\n 437 def treeplot(\n 438 tree: Tree,\n 439 style: TreeStyle = TreeStyle.square,\n (...)\n 449 return_layout: bool = False,\n 450 ) -> Union[Ax, Tuple[Ax, LayoutDict]]:\n 451 \"\"\"[summary]\n 452 \n 453 Args:\n (...)\n 473 Union[Ax, Tuple[Ax, LayoutDict]]: [description]\n 474 \"\"\"\n--> 475 layout = calculate_tree_layout(tree=tree, style=style, ltr=ltr, branchlengths=branchlengths)\n 477 if not ax:\n 478 _, ax = plt.subplots(figsize=(6, 6))\n\nFile ~/work/picea/picea/docs/examples/../../picea/tree.py:415, in calculate_tree_layout(tree, style, ltr, branchlengths)\n 413 increment = node.length if branchlengths else 1.0\n 414 if ltr:\n--> 415 node_coords.x = increment + max(child_x_coords)\n 416 else:\n 417 node_coords.x = min(child_x_coords) - increment\n\nTypeError: '>' not supported between instances of 'NoneType' and 'NoneType'In\u00a0[15]: Copied!
import numpy as np\nfrom dataclasses import dataclass\n\n@dataclass\nclass TwoDCoordinate():\n x: float = 0.0\n y: float = 0.0\n \n def __iter__(self):\n yield from (self.x, self.y)\n \n def to_polar(self):\n return TwoDCoordinate(\n x = self.x * np.cos(self.y), \n y = self.x * np.sin(self.y)\n )\n \n def to_cartesian(self):\n return TwoDCoordinate(\n x = np.sqrt(self.x ** 2 + self.y ** 2),\n y = np.arctan2(self.y, self.x)\n )\n\nc = TwoDCoordinate(x=1, y=1)\n\nc\nimport numpy as np from dataclasses import dataclass @dataclass class TwoDCoordinate(): x: float = 0.0 y: float = 0.0 def __iter__(self): yield from (self.x, self.y) def to_polar(self): return TwoDCoordinate( x = self.x * np.cos(self.y), y = self.x * np.sin(self.y) ) def to_cartesian(self): return TwoDCoordinate( x = np.sqrt(self.x ** 2 + self.y ** 2), y = np.arctan2(self.y, self.x) ) c = TwoDCoordinate(x=1, y=1) c Out[15]:
TwoDCoordinate(x=1, y=1)In\u00a0[16]: Copied!
c.to_cartesian().to_polar()\nc.to_cartesian().to_polar() Out[16]:
TwoDCoordinate(x=1.0000000000000002, y=1.0)In\u00a0[17]: Copied!
c.to_polar().to_cartesian()\nc.to_polar().to_cartesian() Out[17]:
TwoDCoordinate(x=1.0, y=1.0)In\u00a0[18]: Copied!
grid = np.array([\n [TwoDCoordinate(x,y) for x in np.arange(0, 1.2, .2)] \n for y in np.arange(0, np.pi, .1)\n]).flatten()\n\nfig,[ax1,ax2] = plt.subplots(ncols=2, figsize=(20,5))\n\n\nax1.scatter(*zip(*[[*p] for p in grid]))\nax2.scatter(*zip(*[[*p.to_polar()] for p in grid]))\n\npoints = np.array([TwoDCoordinate(x, x*2) for x in np.arange(0., 1.05, .05)])\n\nax1.scatter(*zip(*[[*p] for p in points]))\nax2.scatter(*zip(*[[*p.to_polar()] for p in points]))\ngrid = np.array([ [TwoDCoordinate(x,y) for x in np.arange(0, 1.2, .2)] for y in np.arange(0, np.pi, .1) ]).flatten() fig,[ax1,ax2] = plt.subplots(ncols=2, figsize=(20,5)) ax1.scatter(*zip(*[[*p] for p in grid])) ax2.scatter(*zip(*[[*p.to_polar()] for p in grid])) points = np.array([TwoDCoordinate(x, x*2) for x in np.arange(0., 1.05, .05)]) ax1.scatter(*zip(*[[*p] for p in points])) ax2.scatter(*zip(*[[*p.to_polar()] for p in points])) Out[18]:
<matplotlib.collections.PathCollection at 0x7f076221da50>In\u00a0[19]: Copied!
seq = picea.SequenceCollection.from_fasta(filename='./data/HCT.fasta')\n_msa = seq.align()\nmsa = _msa._collection\nmsa.shape\nseq = picea.SequenceCollection.from_fasta(filename='./data/HCT.fasta') _msa = seq.align() msa = _msa._collection msa.shape
\n---------------------------------------------------------------------------\nFileNotFoundError Traceback (most recent call last)\nCell In[19], line 2\n 1 seq = picea.SequenceCollection.from_fasta(filename='./data/HCT.fasta')\n----> 2 _msa = seq.align()\n 3 msa = _msa._collection\n 4 msa.shape\n\nFile ~/work/picea/picea/docs/examples/../../picea/sequence.py:1518, in SequenceCollection.align(self, method, method_kwargs)\n 1516 fasta = self.to_fasta()\n 1517 command = [method, *chain(*method_kwargs.items()), \"-\"]\n-> 1518 process = Popen(command, stdin=PIPE, stdout=PIPE, stderr=PIPE)\n 1519 stdout, _ = process.communicate(input=fasta.encode())\n 1520 aligned_fasta = stdout.decode().strip()\n\nFile /usr/lib/python3.10/subprocess.py:971, in Popen.__init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, user, group, extra_groups, encoding, errors, text, umask, pipesize)\n 967 if self.text_mode:\n 968 self.stderr = io.TextIOWrapper(self.stderr,\n 969 encoding=encoding, errors=errors)\n--> 971 self._execute_child(args, executable, preexec_fn, close_fds,\n 972 pass_fds, cwd, env,\n 973 startupinfo, creationflags, shell,\n 974 p2cread, p2cwrite,\n 975 c2pread, c2pwrite,\n 976 errread, errwrite,\n 977 restore_signals,\n 978 gid, gids, uid, umask,\n 979 start_new_session)\n 980 except:\n 981 # Cleanup if the child failed starting.\n 982 for f in filter(None, (self.stdin, self.stdout, self.stderr)):\n\nFile /usr/lib/python3.10/subprocess.py:1863, in Popen._execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, gid, gids, uid, umask, start_new_session)\n 1861 if errno_num != 0:\n 1862 err_msg = os.strerror(errno_num)\n-> 1863 raise child_exception_type(errno_num, err_msg, err_filename)\n 1864 raise child_exception_type(err_msg)\n\nFileNotFoundError: [Errno 2] No such file or directory: 'mafft'In\u00a0[20]: Copied!
import numpy as np\nnp.sum(v_equals(msa[...,None], msa.T[None,...]),axis=1)\nimport numpy as np np.sum(v_equals(msa[...,None], msa.T[None,...]),axis=1)
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[20], line 2\n 1 import numpy as np\n----> 2 np.sum(v_equals(msa[...,None], msa.T[None,...]),axis=1)\n\nNameError: name 'v_equals' is not definedIn\u00a0[21]: Copied!
np.sum(np.equal(msa[...,np.newaxis], msa.T[np.newaxis,...]),axis=1)\nnp.sum(np.equal(msa[...,np.newaxis], msa.T[np.newaxis,...]),axis=1)
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[21], line 1\n----> 1 np.sum(np.equal(msa[...,np.newaxis], msa.T[np.newaxis,...]),axis=1)\n\nNameError: name 'msa' is not definedIn\u00a0[22]: Copied!
np.equal(msa[...,np.newaxis], msa.T[np.newaxis,...]).shape\nnp.equal(msa[...,np.newaxis], msa.T[np.newaxis,...]).shape
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[22], line 1\n----> 1 np.equal(msa[...,np.newaxis], msa.T[np.newaxis,...]).shape\n\nNameError: name 'msa' is not definedIn\u00a0[23]: Copied!
msa[...,np.newaxis].shape, msa.T[np.newaxis,...].shape\nmsa[...,np.newaxis].shape, msa.T[np.newaxis,...].shape
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[23], line 1\n----> 1 msa[...,np.newaxis].shape, msa.T[np.newaxis,...].shape\n\nNameError: name 'msa' is not definedIn\u00a0[24]: Copied!
#a = np.random.rand(3,4)\n#b = a.T\n\na = msa\nb = msa.T\n\ndef equals(x,y):\n return x == y\n\ndef lt(x,y):\n return x<y\n\ndef subst(x,y):\n #print(x,y)\n return substitution_scores[x][y]\n\nv_equals = np.vectorize(equals)\nv_lt = np.vectorize(lt)\nv_subst = np.vectorize(subst)\n\nnp.sum(v_subst(a[...,None], b[None,...]), axis=1)\n#a = np.random.rand(3,4) #b = a.T a = msa b = msa.T def equals(x,y): return x == y def lt(x,y): return x
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[24], line 4\n 1 #a = np.random.rand(3,4)\n 2 #b = a.T\n----> 4 a = msa\n 5 b = msa.T\n 7 def equals(x,y):\n\nNameError: name 'msa' is not definedIn\u00a0[25]: Copied!
blosum62_str = \"\"\"\n# Matrix made by matblas from blosum62.iij\n# * column uses minimum score\n# BLOSUM Clustered Scoring Matrix in 1/2 Bit Units\n# Blocks Database = /data/blocks_5.0/blocks.dat\n# Cluster Percentage: >= 62\n# Entropy = 0.6979, Expected = -0.5209\n A R N D C Q E G H I L K M F P S T W Y V B Z X *\nA 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 \nR -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4 \nN -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4 \nD -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4 \nC 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 \nQ -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4 \nE -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 \nG 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4 \nH -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4 \nI -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4 \nL -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4 \nK -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4 \nM -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4 \nF -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4 \nP -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4 \nS 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4 \nT 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4 \nW -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4 \nY -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4 \nV 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4 \nB -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4 \nZ -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 \nX 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4 \n* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1\n\"\"\"\nblosum62_str = \"\"\" # Matrix made by matblas from blosum62.iij # * column uses minimum score # BLOSUM Clustered Scoring Matrix in 1/2 Bit Units # Blocks Database = /data/blocks_5.0/blocks.dat # Cluster Percentage: >= 62 # Entropy = 0.6979, Expected = -0.5209 A R N D C Q E G H I L K M F P S T W Y V B Z X * A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4 N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4 D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4 C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4 E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4 H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4 I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4 L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4 K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4 M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4 F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4 P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4 S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4 T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4 W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4 Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4 V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4 B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4 Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4 * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 \"\"\" In\u00a0[26]: Copied!
lines = (line for line in blosum62_str.split('\\n') if line)\nmatrix_lines = (line.strip().split() for line in lines if line[0] != '#')\n\nsubstitution_scores = defaultdict(lambda: defaultdict(int))\nfor idx,matrix_line in enumerate(matrix_lines):\n if idx == 0:\n aas = matrix_line\n encoded_aas = np.array([*''.join(aas).encode()], dtype=np.uint8)\n aa_encoding = dict(zip(aas, encoded_aas))\n continue\n [aa,*scores] = matrix_line\n substitution_scores[aa_encoding[aa]].update(dict(zip(encoded_aas, scores)))\nsubstitution_scores.keys()\nlines = (line for line in blosum62_str.split('\\n') if line) matrix_lines = (line.strip().split() for line in lines if line[0] != '#') substitution_scores = defaultdict(lambda: defaultdict(int)) for idx,matrix_line in enumerate(matrix_lines): if idx == 0: aas = matrix_line encoded_aas = np.array([*''.join(aas).encode()], dtype=np.uint8) aa_encoding = dict(zip(aas, encoded_aas)) continue [aa,*scores] = matrix_line substitution_scores[aa_encoding[aa]].update(dict(zip(encoded_aas, scores))) substitution_scores.keys()
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[26], line 4\n 1 lines = (line for line in blosum62_str.split('\\n') if line)\n 2 matrix_lines = (line.strip().split() for line in lines if line[0] != '#')\n----> 4 substitution_scores = defaultdict(lambda: defaultdict(int))\n 5 for idx,matrix_line in enumerate(matrix_lines):\n 6 if idx == 0:\n\nNameError: name 'defaultdict' is not definedIn\u00a0[27]: Copied!
d = defaultdict(lambda: defaultdict(int))\nd[0].update(dict(a=1))\nd\nd = defaultdict(lambda: defaultdict(int)) d[0].update(dict(a=1)) d
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[27], line 1\n----> 1 d = defaultdict(lambda: defaultdict(int))\n 2 d[0].update(dict(a=1))\n 3 d\n\nNameError: name 'defaultdict' is not definedIn\u00a0[28]: Copied!
aa_encoding\naa_encoding
\n---------------------------------------------------------------------------\nNameError Traceback (most recent call last)\nCell In[28], line 1\n----> 1 aa_encoding\n\nNameError: name 'aa_encoding' is not definedIn\u00a0[29]: Copied!
np.array([45],dtype=np.uint8).view('S1')[0].decode()\nnp.array([45],dtype=np.uint8).view('S1')[0].decode() Out[29]:
'-'"}]} \ No newline at end of file