From 115a705581c2a5abded769c2f697345f4abfb395 Mon Sep 17 00:00:00 2001 From: Yves Gaetan Nana Teukam <56451020+yvesnana@users.noreply.github.com> Date: Fri, 6 Sep 2024 18:38:04 +0200 Subject: [PATCH] feat: using enzeptional via pypi. (#252) Signed-off-by: nanayves --- examples/enzeptional/data.csv | 106 --- examples/enzeptional/example_enzeptional.py | 165 +++-- requirements.txt | 1 + setup.cfg | 4 + src/gt4sd/frameworks/enzeptional/__init__.py | 16 +- src/gt4sd/frameworks/enzeptional/core.py | 669 ------------------ .../frameworks/enzeptional/processing.py | 586 --------------- .../frameworks/enzeptional/tests/test_core.py | 70 +- .../enzeptional/tests/test_processing.py | 117 ++- .../frameworks/gflownet/ml/models/mxmnet.py | 2 +- 10 files changed, 267 insertions(+), 1469 deletions(-) delete mode 100644 examples/enzeptional/data.csv delete mode 100644 src/gt4sd/frameworks/enzeptional/core.py delete mode 100644 src/gt4sd/frameworks/enzeptional/processing.py diff --git a/examples/enzeptional/data.csv b/examples/enzeptional/data.csv deleted file mode 100644 index 66f082648..000000000 --- a/examples/enzeptional/data.csv +++ /dev/null @@ -1,106 +0,0 @@ -substrates,products,sequences,intervals -NCC(=O)O,CC(=O)C(C(=O)[O-])N,MRGEFYQQLTNDLETARAEGLFKEERIITSAQQADITVADGSHVINFCANNYLGLANHPDLIAAAKAGMDSHGFGMASVRFICGTQDSHKELEQKLAAFLGMEDAILYSSCFDANGGLFETLLGAEDAIISDALNHASIIDGVRLCKAKRYRYANNDMQELEARLKEAREAGARHVLIATDGVFSMDGVIANLKGVCDLADKYDALVMVDDSHAVGFVGENGRGSHEYCDVMGRVDIITGTLGKALGGASGGYTAARKEVVEWLRQRSRPYLFSNSLAPAIVAASIKVLEMVEAGSELRDRLWANARQFREQMSAAGFTLAGADHAIIPVMLGDAVVAQKFARELQKEGIYVTGFFYPVVPKGQARIRTQMSAAHTPEQITRAVEAFTRIGKQLGVIA,"[(50, 52), (77, 80), (92, 92), (96, 96), (107, 107), (109, 110), (113, 116), (136, 139), (141, 141), (180, 184), (187, 188), (209, 209), (214, 215), (217, 218), (223, 226), (238, 240), (245, 253), (269, 273), (276, 277), (281, 281), (285, 285), (325, 325), (327, 327), (368, 368), (370, 370)]" -CC(=CCC/C(=C/CC/C(=C/COP(=O)(O)OP(=O)(O)O)/C)/C)C,CC(=CCC[C@@]1([C@H]2CC[C@H](C2)C1=C)C)C,MDSSTATAMTAPFIDPTDHVNLKTDTDASENRRMGNYKPSIWNYDFLQSLATHHNIVEERHLKLAEKLKGQVKFMFGAPMEPLAKLELVDVVQRLGLNHLFETEIKEALFSIYKDGSNGWWFGHLHATSLRFRLLRQCGLFIPQDVFKTFQNKTGEFDMKLCDNVKGLLSLYEASYLGWKGENILDEAKAFTTKCLKSAWENISEKWLAKRVKHALALPLHWRVPRIEARWFIEAYEQEANMNPTLLKLAKLDFNMVQSIHQKEIGELARWWVTTGLDKLAFARNNLLQSYMWSCAIASDPKFKLARETIVEIGSVLTVVDDGYDVYGSIDELDLYTSSVERWSCVEIDKLPNTLKLIFMSMFNKTNEVGLRVQHERGYNSIPTFIKAWVEQCKSYQKEARWFHGGHTPPLEEYSLNGLVSIGFPLLLITGYVAIAENEAALDKVHPLPDLLHYSSLLSRLINDIGTSPDEMARGDNLKSIHCYMNETGASEEVAREHIKGVIEENWKILNQCCFDQSQFQEPFITFNLNSVRGSHFFYEFGDGFGVTDSWTKVDMKSVLIDPIPLGEE,"[(44, 44), (281, 283), (285, 286), (317, 320), (322, 324), (326, 327), (396, 396), (399, 399), (414, 414), (418, 419), (421, 422), (456, 459), (461, 462), (464, 466), (468, 468), (477, 477), (479, 479), (481, 481), (496, 496), (539, 539), (546, 546), (548, 548)]" -CC(=CCOP(=O)(O)OP(=O)(O)O)C,O=P([O-])([O-])OP(=O)([O-])[O-],MTADELVFFVNGKKVVEKNADPETTLLVYLRRKLGLCGTKLGCGEGGCGACTVMISKYDRLQNKIVHFSVNACLAPICSLHHVAVTTVEGIGNTQKLHPVQERIARSHGSQCGFCTPGIVMSMYTLLRNQPEPTVEEIENAFQGNLCRCTGYRPILQGFRTFAKDGGCCGGSGNNPNCCMNQTKDQTVSLSPSLFNPEDFKPLDPTQEPIFPPELLRLKDTPQKKLRFEGERVTWIQASTMEELLDLKAQHPDAKLVVGNTEIGIEMKFKNMLFPLIVCPAWIPELNSVVHGPEGISFGASCPLSLVESVLAEEIAKLPEQKTEVFRGVMEQLRWFAGKQVKSVASIGGNIITASPISDLNPVFMASGAKLTLVSRGTRRTVRMDHTFFPGYRKTLLRPEEILLSIEIPYSKEGEFFSAFKQASRREDDIAKVTSGMRVLFKPGTIEVQELSLCFGGMADRTISALKTTPKQLSKSWNEELLQSVCAGLAEELQLAPDAPGGMVEFRRTLTLSFFFKFYLTVLQKLGRADLEDMCGKLDPTFASATLLFQKDPPANVQLFQEVPKDQSEEDMVGRPLPHLAANMQASGEAVYCDDIPRYENELSLRLVTSTRAHAKITSIDTSEAKKVPGFVCFLTAEDVPNSNATGLFNDETVFAKDEVTCVGHIIGAVVADTPEHAQRAARGVKITYEDLPAIITIQDAINNNSFYGSEIKIEKGDLKKGFSEADNVVSGELYIGGQEHFYLETNCTIAVPKGEAGEMELFVSTQNTMKTQSFVAKMLGVPDNRIVVRVKRMGGGFGGKETRSTVVSTALALAAHKTGRPVRCMLDRDEDMLITGGRHPFLAKYKVGFMKTGTVVALEVAHFSNGGNTEDLSRSIMERALFHMDNAYKIPNIRGTGRICKTNLPSNTAFRGFGGPQGMLIAEYWMSEVAITCGLPAEEVRRKNMYKEGDLTHFNQKLEGFTLPRCWDECIASSQYLARKREVEKFNRENCWKKRGLCIIPTKFGISFTLPFLNQGGALVHVYTDGSVLLTHGGTEMGQGLHTKMVQVASRALKIPTSKIHISETSTNTVPNTSPTAASASADLNGQGVYEACQTILKRLEPFKKKKPTGPWEAWVMDAYTSAVSLSATGFYKTPNLGYSFETNSGNPFHYFSYGVACSEVEIDCLTGDHKNLRTDIVMDVGSSLNPAIDIGQVEGAFVQGLGLFTMEELHYSPEGSLHTRGPSTYKIPAFGSIPIEFRVSLLRDCPNKRAIYASKAVGEPPLFLASSIFFAIKDAIRAARAQHGDNAKQLFQLDSPATPEKIRNACVDQFTTLCVTGVPENCKSWSVRI,"[(25, 25), (27, 27), (39, 42), (44, 47), (49, 50), (52, 53), (70, 72), (74, 76), (110, 111), (113, 114), (116, 119), (143, 146), (148, 148), (150, 152), (155, 155), (235, 235), (244, 244), (248, 248), (254, 255), (264, 270), (274, 274), (277, 281), (286, 286), (298, 304), (307, 307), (332, 335), (337, 338), (341, 342), (344, 345), (351, 355), (357, 358), (360, 364), (371, 373), (393, 393), (397, 397), (401, 402), (404, 406), (408, 408), (419, 420), (422, 423), (429, 431), (433, 434), (514, 514), (585, 585), (592, 592), (741, 744), (765, 766), (768, 769), (793, 797), (799, 802), (829, 829), (839, 839), (910, 911), (913, 914), (917, 917), (1007, 1009), (1036, 1036), (1038, 1039), (1075, 1078), (1080, 1081), (1193, 1194), (1197, 1198), (1201, 1201), (1224, 1224), (1229, 1230), (1260, 1261), (1264, 1264)]" -Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,O=C[C@H](O)[C@@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,MLDDRARMEAAKKEKVEQILAEFQLQEEDLKKVMRRMQKEMDRGLRLETHEEASVKMLPTYVRSTPEGSEVGDFLSLDLGGTNFRVMLVKVGEGEEGQWSVKTKHQMYSIPEDAMTGTAEMLFDYISECISDFLDKHQMKHKKLPLGFTFSFPVRHEDIDKGILLNWTKGFKASGAEGNNVVGLLRDAIKRRGDFEMDVVAMVNDTVATMISCYYEDHQCEVGMIVGTGCNACYMEEMQNVELVEGDEGRMCVNTEWGAFGDSGELDEFLLEYDRLVDESSANPGQQLYEKLIGGKYMGELVRLVLLRLVDENLLFHGEASEQLRTRGAFETRFVSQVESDTGDRKQIYNILSTLGLRPSTTDCDIVRRACESVSTRAAHMCSAGLAGVINRMRESRSEDVMRITVGVDGSVYKLHPSFKERFHASVRRLTPSCEITFIESEEGSGRGAALVSAVACKKACMLGQ,"[(76, 77), (84, 85), (107, 110), (115, 115), (122, 123), (126, 126), (148, 152), (167, 167), (169, 171), (205, 205), (224, 224), (226, 227), (229, 230), (290, 290), (293, 294), (297, 300), (302, 302), (329, 331), (337, 340), (351, 351), (355, 355), (371, 371), (375, 375), (408, 410), (416, 417), (419, 420), (423, 424), (438, 438), (440, 442), (445, 445)]" -NCCCC[C@H](N)C(=O)O,CC(C)(N)CO,MSHEELNDQLRVRREKLKKIEELGVDPFGKRFERTHKAEELFELYGDLSKEELEEQQIEVAVAGRIMTKRGMGKAGFAHIQDVTGQIQIYVRQDDVGEQQYELFKISDLGDIVGVRGTMFKTKVGELSIKVSSYEFLTKALRPLPEKYHGLKDIEQRYRQRYLDLIMNPESKKTFITRSLIIQSMRRYLDSHGYLEVETPMMHAVAGGAAARPFITHHNALDMTLYMRIAIELHLKRLIVGGLEKVYEIGRVFRNEGISTRHNPEFTMLELYEAYADFRDIMKLTENLIAHIATEVLGTTKIQYGEHLVDLTPEWRRLHMVDAIKEYVGVDFWRQMSDEEARELAKEHGVEVAPHMTFGHIVNEFFEQKVEDKLIQPTFIYGHPVEISPLAKKNPDDPRFTDRFELFIVGREHANAFTELNDPIDQRQRFEEQLKEREQGNDEAHEMDEDFLEALEYGMPPTGGLGIGVDRLVMLLTNSPSIRDVLLFPQMRHK,"[(284, 284), (288, 288), (314, 314), (376, 378), (403, 404), (406, 408), (410, 411), (413, 414), (465, 467), (470, 471), (474, 474)]" -N,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,MAKYTREDIEKLVKEENVKYIRLQFTDILGTIKNVEIPVSQLGKALDNKVMFDGSSIEGFVRIEESDMYLYPDLNTFVIFPWTAEKGKVARFICDIYNPDGTPFEGDPRNNLKRILKEMEDLGFSDFNLGPEPEFFLFKLDEKGEPTLELNDKGGYFDLAPTDLGENCRRDIVLELEEMGFEIEASHHEVAPGQHEIDFKYAGAVRSCDDIQTFKLVVKTIARKHGLHATFMPKPLFGVNGSGMHCNLSLFKNGVNAFFDENADLQLSETAKHFIAGIVKHATSFTAVTNPTVNSYKRLVPGYEAPCYVAWSAQNRSPLIRIPASRGISTRVEVRSVDPAANPYLALSVLLAAGLDGIKNKLEAPAPIDRNIYVMSKEERMENGIVDLPATLAEALEEFKSNEVMVKALGEHLFEHFIEAKEIEWDMFRTQVHPWEREQYMSQY,"[(126, 131), (133, 133), (135, 136), (154, 157), (169, 169), (182, 183), (185, 188), (190, 191), (194, 195), (197, 201), (214, 214), (230, 231), (233, 235), (238, 239), (242, 244), (246, 248), (250, 251), (256, 258), (290, 290), (294, 297), (299, 300), (302, 303), (305, 306), (311, 315), (317, 320), (322, 324), (328, 328), (330, 332), (334, 334), (336, 339), (373, 373)]" -Oc1ccccc1,C1=NC(=C2C(=N1)N(C=N2)[C@H]3[C@@H]([C@@H]([C@H](O3)COP(=O)(O)O)OP(=O)(O)O)O)N,MELIQDTSRPPLEYVKGVPLIKYFAEALGPLQSFQARPDDLLISTYPKSGTTWVSQILDMIYQGGDLEKCHRAPIFMRVPFLEFKAPGIPSGMETLKDTPAPRLLKTHLPLALLPQTLLDQKVKVVYVARNAKDVAVSYYHFYHMAKVHPEPGTWDSFLEKFMVGEVSYGSWYQHVQEWWELSRTHPVLYLFYEDMKENPKREIQKILEFVGRSLPEETVDFVVQHTSFKEMKKNPMTNYTTVPQEFMDHSISPFMRKGMAGDWKTTFTVAQNERFDADYAEKMAGCSLSFRSEL,"[(43, 43), (45, 47), (54, 57), (60, 60), (75, 75), (106, 106), (108, 108), (128, 129), (131, 132), (134, 137), (139, 142), (145, 145), (172, 172), (191, 192), (194, 197), (200, 200), (204, 204), (223, 226), (233, 235), (238, 240), (248, 248), (250, 250), (253, 254), (260, 261), (263, 263), (266, 266), (291, 291)]" -C1=C2C(=NC=N1)N(C=N2)[C@H]3[C@@H]([C@@H]([C@H](O3)CO)O)O,O=P([O-])([O-])O[C@H]1O[C@H](CO)[C@@H](O)[C@H]1O,MANGYTYEDYQDTAKWLLSHTEQRPQVAVICGSGLGGLVNKLTQAQTFDYSEIPNFPESTVPGHAGRLVFGILNGRACVMMQGRFHMYEGYPFWKVTFPVRVFRLLGVETLVVTNAAGGLNPNFEVGDIMLIRDHINLPGFSGENPLRGPNEERFGVRFPAMSDAYDRDMRQKAHSTWKQMGEQRELQEGTYVMLGGPNFETVAECRLLRNLGADAVGMSTVPEVIVARHCGLRVFGFSLITNKVIMDYESQGKANHEEVLEAGKQAAQKLEQFVSLLMASIPVSGHTG,"[(30, 32), (34, 35), (50, 50), (56, 57), (81, 83), (87, 87), (89, 91), (95, 96), (99, 99), (113, 115), (117, 119), (126, 126), (192, 192), (194, 200), (202, 203), (205, 206), (209, 209), (217, 218), (221, 224), (239, 242), (244, 245), (255, 256), (258, 262), (271, 271)]" -O,CCC(=O)SCCNC(=O)CCNC(=O)[C@@H](C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@@H]1[C@H]([C@H]([C@@H](O1)N2C=NC3=C(N=CN=C32)N)O)OP(=O)(O)O)O,MAEIRKLKNYINGEWVESKTDQYEDVVNPATKEVLCQVPISTKEDIDYAAQTAAEAFKTWSKVAVPRRARILFNFQQLLSQHKEELAHLITIENGKNTKEALGEVGRGIENVEFAAGAPSLMMGDSLASIATDVEAANYRYPIGVVGGIAPFNFPMMVPCWMFPMAIALGNTFILKPSERTPLLTEKLVELFEKAGLPKGVFNVVYGAHDVVNGILEHPEIKAISFVGSKPVGEYVYKKGSENLKRVQSLTGAKNHTIVLNDANLEDTVTNIVGAAFGSAGERCMACAVVTVEEGIADEFMAKLQEKVADIKIGNGLDDGVFLGPVIREDNKKRTLSYIEKGLEEGARLVCDGRENVSDDGYFVGPTIFDNVTTEMTIWKDEIFAPVLSVIRVKNLKEAIEIANKSEFANGACLFTSNSNAIRYFRENIDAGMLGINLGVPAPMAFFPFSGWKSSFFGTLHANGKDSVDFYTRKKVVTARYPAPDFN,"[(24, 24), (26, 26), (38, 38), (93, 93), (148, 152), (174, 175), (181, 182), (185, 185), (204, 208), (211, 212), (215, 215), (252, 254), (283, 284), (327, 328), (331, 331), (334, 334), (338, 338), (380, 381), (383, 384), (408, 408)]" -Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,CC(C)(COP(=O)(O)OP(=O)(O)OCC1C(C(C(O1)N2C=NC3=C(N=CN=C32)N)O)OP(=O)(O)O)C(C(=O)NCCC(=O)NCCSC(=O)CCCl)O,MPSTLTINGKAPIVAYAELIAARIVNALAPNSIAIKLVDDKKAPAAKLDDATEDVFNKITSKFAAIFDNGDKEQVAKWVNLAQKELVIKNFAKLSQSLETLDSQLNLRTFILGGLKYSAADVACWGALRSNGMCGSIIKNKVDVNVSRWYTLLEMDPIFGEAHDFLSKSLLELKKSANVGKKKETHKANFEIDLPDAKMGEVVTRFPPEPSGYLHIGHAKAALLNQYFAQAYKGKLIIRFDDTNPSKEKEEFQDSILEDLDLLGIKGDRITYSSDYFQEMYDYCVQMIKDGKAYCDDTPTEKMREERMDGVASARRDRSVEENLRIFTEEMKNGTEEGLKNCVRAKIDYKALNKTLRDPVIYRCNLTPHHRTGSTWKIYPTYDFCVPIVDAIEGVTHALRTIEYRDRNAQYDWMLQALRLRKVHIWDFARINFVRTLLSKRKLQWMVDKDLVGNWDDPRFPTVRGVRRRGMTVEGLRNFVLSQGPSRNVINLEWNLIWAFNKKVIDPIAPRHTAIVNPVKIHLEGSEAPQEPKIEMKPKHKKNPAVGEKKVIYYKDIVVDKDDADVINVDEEVTLMDWGNVIITKKNDDGSMVAKLNLEGDFKKTKHKLTWLADTKDVVPVDLVDFDHLITKDRLEEDESFEDFLTPQTEFHTDAIADLNVKDMKIGDIIQFERKGYYRLDALPKDGKPYVFFTIPDGKSVNKYGAKK,"[(87, 87), (132, 134)]" -C[C@]12CCCC(C1CCC34C2C[C@@H]5C(C3)C5(C4)C)(C)C,O,MKNRIPVVLLACGSFNPITNMHLRLFEVARDHLHQTGRYQVIEGIISPVNDSYGKKDLVASHHRVAMARLALQTSDWIRVDPWESEQAQWMETVKVLRHHHRELLRSSAQMDGPDPSKTPSASAALPELKLLCGADVLKTFQTPNLWKDTHIQEIVEKFGLVCVSRSGHDPERYISDSPILQQFQHNIHLAREPVLNEISATYVRKALGQGQSVKYLLPEAVITYIRDQGLYINDGSWKGKGKTG,"[(11, 13), (16, 21), (23, 26), (46, 50), (52, 55), (57, 58), (64, 64), (67, 67), (88, 89), (91, 92), (94, 97), (131, 133), (135, 135), (137, 138), (140, 145), (148, 149), (152, 152), (155, 155), (163, 165), (167, 169), (174, 174), (177, 180), (195, 196), (198, 201), (206, 209), (212, 212), (214, 214), (217, 218), (232, 232)]" -N#CC(O)c1ccccc1,N,MAPKAVLVGLPGSGKSTIGRRLAKALGVGLLDTDVAIEQRTGRSIADIFATDGEQEFRRIEEDVVRAALADHDGVLSLGGGAVTSPGVRAALAGHTVVYLEISAAEGVRRTGGNTVRPLLAGPDRAEKYRALMAKRAPLYRRVATMRVDTNRRNPGAVVRHILSRLQVPSPSEAAT,"[(7, 11), (18, 22), (30, 30), (77, 78), (80, 80), (100, 100), (102, 102), (107, 107), (110, 111), (115, 116), (118, 120), (122, 124), (127, 127), (129, 129), (148, 152), (154, 155), (157, 158), (161, 161)]" -C[C@@H](C(=O)N[C@H](CC(=O)[O-])C(=O)[O-])[NH3+],C[C@@H](C(=O)N[C@@H](CC(=O)O)C(=O)O)N,MKIIRIETSRIAVPLTKPFKTALRTVYTAESVIVRITYDSGAVGWGEAPPTLVITGDSMDSIESAIHHVLKPALLGKSLAGYEAILHDIQHLLTGNMSAKAAVEMALYDGWAQMCGLPLYQMLGGYRDTLETDYTVSVNSPEEMAADAENYLKQGFQTLKIKVGKDDIATDIARIQEIRKRVGSAVKLRLDANQGWRPKEAVTAIRKMEDAGLGIELVEQPVHKDDLAGLKKVTDATDTPIMADESVFTPRQAFEVLQTRSADLINIKLMKAGGISGAEKINAMAEACGVECMVGSMIETKLGITAAAHFAASKRNITRFDFDAPLMLKTDVFNGGITYSGSTISMPGKPGLGIIGAALLKGEKEQ,"[(160, 164), (189, 190), (192, 193), (217, 218), (220, 221), (242, 243), (245, 248), (266, 266), (268, 268), (271, 271), (293, 293), (321, 321)]" -C1=CN(C(=O)N=C1N)[C@H]2[C@@H]([C@@H]([C@H](O2)COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O)O,O=C1CCCN1CCl,MNGDVQSVIRGYLERAQVAKTMSDAGRWNEAGDLLRQLMTDVKSCKISASNRDEHDARNTFLRALEANLKLVQQNVRDEDDLHEAMTRQSGSPEPPADPDVWSKPSPPLPSSSKFGATKKGVGAAGPRPREISKSTSSMSTNPADVKPANPTQGILPQNSAGDSFDASAYDAYIVQAVRGTMATNTENTMSLDDIIGMHDVKQVLHEAVTLPLLVPEFFQGLRSPWKAMVLAGPPGTGKTLIARAIASESSSTFFTVSSTDLSSKWRGDSEKIVRLLFELARFYAPSIIFIDEIDTLGGQRGNSGEHEASRRVKSEFLVQMDGSQNKFDSRRVFVLAATNIPWELDEALRRRFEKRIFIPLPDIDARKKLIEKSMEGTPKSDEINYDDLAARTEGFSGADVVSLCRTAAINVLRRYDTKSLRGGELTAAMESLKAELVRNIDFEAALQAVSPSAGPDTMLKCKEWCDSFGAM,"[(228, 228), (231, 232), (241, 245), (254, 254), (256, 256), (290, 290), (292, 292), (318, 318), (322, 322), (325, 325), (336, 336), (338, 340), (347, 350), (353, 354), (356, 356), (358, 361), (397, 399), (453, 454), (462, 462)]" -CC(C)c1ccc(CO)cc1,NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,MDFTSLETTTFEEVVIALGSNVGNRMNNFKEALRLMKDYGISVTRHSCLYETEPVHVTDQPRFLNAAIRGVTKLKPHELLNVLKKIEKEMGREENGLRYGPRPLDLDILFYGKHKIISDKLIIPHERIWERPFVLAPLVDLLGTEDIDNDKIVAYWHSLSMHSGGIFQAWERLGGESLLGKDGIIQRVIPIGDHLWDFSKKTYVMGILNLTPDSFSDGGKFQSVDTAVSRVRSMISEGVDIIDIGAQSTRPMASRISSQEEIDRLIPVLKVVRGMAEMKGKLISVDTFNSEVALEAIRNGADILNDVSGGSLDENMHKVVADSDVPYMIMHMRGDPCTMQNKENLEYNEICKDVATELYERVREAELSGIPAWRIMIDPGIGFSKGIDHNLDIVMELPKIREEMAKKSIGLSHAPILIGPSRKRFLGDICGRPEASERDAATVACVTAGILKGANIIRVHNVRDNVDAARLCDAMMTKRFKNVD,"[(204, 208), (210, 212), (230, 230), (243, 246), (284, 285), (287, 288), (303, 304), (306, 307), (328, 331), (358, 358), (376, 377), (379, 383), (417, 422), (424, 427), (438, 439), (442, 442), (446, 446), (456, 457), (461, 462), (465, 465)]" -C([C@H]([C@@H]([C@H]([C@H](C(=O)O)O)O)O)O)O,O,MTLPKIKQVRAWFTGGATAEKGAGGGDYHDQGANHWIDDHIATPMSKYRDYEQSRQSFGINVLGTLVVEVEAENGQTGFAVSTAGEMGCFIVEKHLNRFIEGKCVSDIKLIHDQMLSATLYYSGSGGLVMNTISCVDLALWDLFGKVVGLPVYKLLGGAVRDEIQFYATGARPDLAKEMGFIGGKMPTHWGPHDGDAGIRKDAAMVADMREKCGEDFWLMLDCWMSQDVNYATKLAHACAPYNLKWIEECLPPQQYESYRELKRNAPVGMMVTSGEHHGTLQSFRTLSETGIDIMQPDVGWCGGLTTLVEIAAIAKSRGQLVVPHGSSVYSHHAVITFTNTPFSEFLMTSPDCSTMRPQFDPILLNEPVPVNGRIHKSVLDKPGFGVELNRDCNLKRPYSH,"[(171, 171), (186, 188), (190, 190), (210, 210), (214, 214), (220, 221), (223, 225), (243, 244), (246, 247), (249, 251), (263, 263), (266, 267), (270, 270), (274, 275), (277, 278), (296, 298), (347, 347)]" -O,O=P([O-])([O-])OP(=O)([O-])[O-],MTTNYIFVTGGVVSSLGKGIAAASLAAILEARGLNVTIMKLDPYINVDPGTMSPIQHGEVFVTEDGAETDLDLGHYERFIRTKMSRRNNFTTGRIYSDVLRKERRGDYLGATVQVIPHITNAIKERVLEGGEGHDVVLVEIGGTVGDIESLPFLEAIRQMAVEIGREHTLFMHLTLVPYMAASGEVKTKPTQHSVKELLSIGIQPDILICRSDRAVPANERAKIALFCNVPEKAVISLKDVDSIYKIPGLLKSQGLDDYICKRFSLNCPEANLSEWEQVIFEEANPVSEVTIGMVGKYIELPDAYKSVIEALKHGGLKNRVSVNIKLIDSQDVETRGVEILKGLDAILVPGGFGYRGVEGMITTARFARENNIPYLGICLGMQVALIDYARHVANMENANSTEFVPDCKYPVVALITEWRDENGNVEVRSEKSDLGGTMRLGAQQCQLVDDSLVRQLYNAPTIVERHRHRYEVNNMLLKQIEDAGLRVAGRSGDDQLVEIIEVPNHPWFVACQFHPEFTSTPRDGHPLFAGFVKAASEFQKRQAK,"[(8, 13), (21, 25), (39, 41), (43, 43), (70, 71), (73, 76), (79, 79), (92, 92), (138, 139), (141, 146), (150, 153), (174, 178), (180, 180), (183, 183), (185, 186), (193, 196), (208, 208), (210, 215), (218, 222), (224, 229), (237, 238), (242, 244), (246, 247), (250, 250), (297, 298), (349, 351), (353, 355), (358, 358), (361, 361), (375, 375), (377, 379), (384, 387), (401, 402), (404, 405), (413, 413), (415, 415), (438, 440), (468, 469), (471, 473), (498, 501), (511, 513)]" -CSCCC(=O)/C(=C/O)/O,O=C[O-],MVQAWYMDESTADPRKPHRAQPDRPVSLEQLRTLGVLYWKLDADKYENDPELEKIRKMRNYSWMDIITICKDTLPNYEEKIKMFFEEHLHLDEEIRYILEGSGYFDVRDKEDKWIRISMEKGDMITLPAGIYHRFTLDEKNYVKAMRLFVGEPVWTPYNRPADHFDARVQYMSFLEGTA,"[(83, 84), (86, 87), (89, 89), (91, 93), (95, 96), (105, 107), (114, 114), (125, 127), (129, 132), (134, 135), (155, 160), (167, 167)]" -CCCCCCCCCCCCCC(=O)O,O,MNAKPGFTDYIVKDIALADFGRKEISLAETEMPGLMATREEYGPKQPLKGARIAGSLHMTIQTAVLIETLAALGADIRWVSCNIYSTQDHAAAAIAAAGIPVFAVKGETLTEYWDYTAKLFDWHGGGTPNMILDDGGDATMLVHAGYRAEQGDTAFLDKPGSEEEEIFYALVKRLLKEKPKGWFAEIAKNIKGVSEETTTGVHRLYEMANKGTLLFPAINVNDSVTKSKFDNLYGCRESLVDGIRRGTDVMLSGKVAMVAGFGDVGKGSAASLRQAGCRVMVSEVDPICALQAAMEGYEVVTMEDAAPRADIFVTATGNKDIITIEHMRAMKDRAIVCNIGHFDNEIQIASLRNLKWTNIKPQVDEIEFPDKHRIIMLSEGRLVNLGNAMGHPSFVMSASFTNQTLAQIELFANNKDSKYAKKVYVLPKTLDEKVARLHLAKIGVKLTELRKDQADYIGVKQEGPYKSDHYRY,"[(58, 58), (82, 83), (135, 136), (196, 197), (201, 204), (222, 222), (227, 228), (230, 231), (233, 236), (239, 240), (243, 243), (261, 264), (266, 270), (282, 283), (285, 286), (289, 290), (300, 301), (314, 318), (320, 322), (338, 339), (343, 346), (383, 384), (386, 392), (429, 429), (433, 433), (460, 460), (465, 466), (468, 470), (472, 473)]" -[C@@H]([C@@H]([C@H](C(=O)O)O)O)([C@@H](C(=O)O)O)O,C([C@@H]([C@H](C(=O)[O-])O)O)C(=O)C(=O)[O-],MALSANSDAVTYAKAANTRTAAETGDRIEWVKLSLAFLPLATPVSDAKVLTGRQKPLTEVAIIIAEIRSRDGFEGVGFSYSKRAGGQGIYAHAKEIADNLLGEDPNDIDKIYTKLLWAGASVGRSGMAVQAISPIDIALWDMKAKRAGLPLAKLLGAHRDSVQCYNTSGGFLHTPLDQVLKNVVISRENGIGGIKLKVGQPNCAEDIRRLTAVREALGDEFPLMVDANQQWDRETAIRMGRKMEQFNLIWIEEPLDAYDIEGHAQLAAALDTPIATGEMLTSFREHEQLILGNASDFVQPDAPRVGGISPFLKIMDLAAKHGRKLAPHFAMEVHLHLSAAYPLEPWLEHFEWLNPLFNEQLELRDGRMWISDRHGLGFTLSEQARRWTQLTCEFGKRP,"[(165, 165), (195, 199), (224, 225), (227, 228), (250, 251), (253, 254), (275, 277), (279, 280), (299, 299), (301, 301), (304, 304), (328, 328)]" -S,O=P([O-])([O-])[O-],MALADISGYLDVLDSVRGFSYLENAREVLRSGEARCLGNPRSEPEYVKALYVIGASRIPVGDGCSHTLEELGVFDISVPGEMVFPSPLDFFERGKPTPLVRSRLQLPNGVRVWLKLEWYNPFSLSVKDRPAVEIISRLSRRVEKGSLVADATSSNFGVALSAVARLYGYRARVYLPGAAEEFGKLLPRLLGAQVIVDPEAPSTVHLLPRVMKDSKNEGFVHVNQFYNDANFEAHMRGTAREIFVQSRRGGLALRGVAGSLGTSGHMSAAAFYLQSVDPSIRAVLVQPAQGDSIPGIRRVETGMLWINMLDISYTLAEVTLEEAMEAVVEVARSDGLVIGPSGGAAVKALAKKAAEGDLEPGDYVVVVPDTGFKYLSLVQNALEGAGDSV,"[(123, 123), (125, 127), (152, 154), (156, 160), (182, 182), (224, 225), (230, 231), (234, 234), (238, 238), (259, 260), (266, 269), (285, 285), (293, 297), (303, 306), (339, 340), (342, 345), (366, 366), (368, 369), (373, 374)]" -C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)OC[C@@H]2[C@H]([C@@H]([C@H]([C@H](O2)O[C@@H]3[C@H](O[C@@H]([C@@H]([C@H]3O)O)O)CO)O)O)O[C@@H]4[C@@H]([C@H]([C@@H]([C@H](O4)CO)O)O)O)O)O)O)O,O=P(O)(O)O[C@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O,MLDIVELSRLQFALTAMYHFLFVPLTLGMAFLLAIMETVYVLSGKQIYKDMTKFWGKLFGINFALGVATGLTMEFQFGTNWSYYSHYVGDIFGAPLAIEGLMAFFLESTFVGLFFFGWDRLGKVQHMCVTWLVALGSNLSALWILVANGWMQNPIASDFNFETMRMEMVSFSELVLNPVAQVKFVHTVASGYVTGAMFILGISAWYMLKGRDFAFAKRSFAIAASFGMAAVLSVIVLGDESGYEMGDVQKTKLAAIEAEWETQPAPAAFTLFGIPDQEEETNKFAIQIPYALGIIATRSVDTPVIGLKELMVQHEERIRNGMKAYSLLEQLRSGSTDQAVRDQFNSMKKDLGYGLLLKRYTPNVADATEAQIQQATKDSIPRVAPLYFAFRIMVACGFLLLAIIALSFWSVIRNRIGEKKWLLRAALYGIPLPWIAVEAGWFVAEYGRQPWAIGEVLPTAVANSSLTAGDLIFSMVLICGLYTLFLVAELFLMFKFARLGPSSLKTGRYHFEQSSTTTQPAR,"[(14, 18), (20, 23), (65, 66), (69, 70), (73, 73), (182, 185), (187, 190), (233, 234), (237, 239), (389, 392), (394, 397), (436, 436), (439, 440)]" -Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,MKVARFQKIPNGENETMIPVLTSKKASELPVSEVASILQADLQNGLNKCEVSHRRAFHGWNEFDISEDEPLWKKYISQFKNPLIMLLLASAVISVLMHQFDDAVSITVAILIVVTVAFVQEYRSEKSLEELSKLVPPECHCVREGKLEHTLARDLVPGDTVCLSVGDRVPADLRLFEAVDLSIDESSLTGETTPCSKVTAPQPAATNGDLASRSNIAFMGTLVRCGKAKGVVIGTGENSEFGEVFKMMQAEEAPKTPLQKSMDLLGKQLSFYSFGIIGIIMLVGWLLGKDILEMFTISVSLAVAAIPEGLPIVVTVTLALGVMRMVKKRAIVKKLPIVETLGCCNVICSDKTGTLTKNEMTVTHIFTSDGLHAEVTGVGYNQFGEVIVDGDVVHGFYNPAVSRIVEAGCVCNDAVIRNNTLMGKPTEGALIALAMKMGLDGLQQDYIRKAEYPFSSEQKWMAVKCVHRTQQDRPEICFMKGAYEQVIKYCTTYQSKGQTLTLTQQQRDVYQQEKARMGSAGLRVLALASGPELGQLTFLGLVGIIDPPRTGVKEAVTTLIASGVSIKMITGDSQETAVAIASRLGLYSKTSQSVSGEEIDAMDVQQLSQIVPKVAVFYRASPRHKMKIIKSLQKNGSVVAMTGDGVNDAVALKAADIGVAMGQTGTDVCKEAADMILVDDDFQTIMSAIEEGKGIYNNIKNFVRFQLSTSIAALTLISLATLMNFPNPLNAMQILWINIIMDGPPAQSLGVEPVDKDVIRKPPRNWKDSILTKNLILKILVSSIIIVCGTLFVFWRELRDNVITPRDTTMTFTCFVFFDMFNALSSRSQTKSVFEIGLCSNRMFCYAVLGSIMGQLLVIYFPPLQKVFQTESLSILDLLFLLGLTSSVCIVAEIIKKVERSREKIQKHVSSTSSSFLEV,"[(83, 84), (86, 87), (90, 90), (106, 106), (109, 110), (113, 113), (266, 266), (270, 270), (273, 274), (298, 302), (305, 305), (307, 307), (309, 312), (350, 350), (352, 354), (358, 358), (456, 456), (622, 622), (625, 625), (641, 643), (645, 647), (649, 653), (661, 661), (664, 666), (669, 669), (702, 702), (706, 706), (710, 710), (713, 713), (733, 737), (739, 741), (743, 747)]" -CCC(=O)C(=O)[O-],CCC=O,MRSKRFEALAKRPVNQDGFVKEWIEEGFIAMESPNDPKPSIKIVNGAVTELDGKPVSDFDLIDHFIARYGINLNRAEEVMAMDSVKLANMLCDPNVKRSEIVPLTTAMTPAKIVEVVSHMNVVEMMMAMQKMRARRTPSQQAHVTNVKDNPVQIAADAAEGAWRGFDEQETTVAVARYAPFNAIALLVGSQVGRPGVLTQCSLEEATELKLGMLGHTCYAETISVYGTEPVFTDGDDTPWSKGFLASSYASRGLKMRFTSGSGSEVQMGYAEGKSMLYLEARCIYITKAAGVQGLQNGSVSCIGVPSAVPSGIRAVLAENLICSSLDLECASSNDQTFTHSDMRRTARLLMQFLPGTDFISSGYSAVPNYDNMFAGSNEDAEDFDDYNVIQRDLKVDGGLRPVREEDVIAIRNKAARALQAVFAGMGLPPITDEEVEAATYAHGSKDMPERNIVEDIKFAQEIINKNRNGLEVVKALAQGGFTDVAQDMLNIQKAKLTGDYLHTSAIIVGDGQVLSAVNDVNDYAGPATGYRLQGERWEEIKNIPGALDPNEID,"[(140, 145), (168, 169), (171, 172), (186, 186), (200, 200), (202, 202), (208, 208), (219, 220), (222, 223), (257, 261), (294, 295), (297, 298), (300, 300), (331, 335), (359, 361), (363, 364), (374, 374)]" -CC(=O)N[C@@H](CCC(N)=O)C(=O)O,NC(=O)CC[C@H](N)C(=O)O,MTSKGPEEEHPSVTLFRQYLRIRTVQPKPDYGAAVAFFEETARQLGLGCQKVEVAPGYVVTVLTWPGTNPTLSSILLNSHTDVVPVFKEHWSHDPFEAFKDSEGYIYARGAQDMKCVSIQYLEAVRRLKVEGHRFPRTIHMTFVPDEEVGGHQGMELFVQRPEFHALRAGFALDEGIANPTDAFTVFYSERSPWWVRVTSTGRPGHASRFMEDTAAEKLHKVVNSILAFREKEWQRLQSNPHLKEGSVTSVNLTKLEGGVAYNVIPATMSASFDFRVAPDVDFKAFEEQLQSWCQAAGEGVTLEFAQKWMHPQVTPTDDSNPWWAAFSRVCKDMNLTLEPEIMPAATDNRYIRAVGVPALGFSPMNRTPVLLHDHDERLHEAVFLRGVDIYTRLLPALASVPALPSDS,"[(19, 19), (26, 26), (78, 79), (81, 84), (110, 112), (114, 116), (118, 118), (121, 121), (144, 144), (146, 147), (149, 150), (173, 174), (176, 177), (187, 187)]" -[C-]#N,CC(C)(O)C#N,MASLPVSFAKPDKNGVITCKAIMLKEAKLPGMSYADTVQIIDIQVDPPQNVELRVKMLCASVCRTDILTIEGFMAPTQFPKINGHEGVGIIESMGPDTKNFKVGDVIVAPTLGECQVCSSCRSGRTNFCQNYGANESALEPDGTSRFSYIDSDGKKKLLYYKLGCSTWTQYMVVDSNYATKLNEIAPELPPPHGSILSCAFATGYGAVWLDAAVQEGDSVAIFGVGSVGISAVIAAKELKAKQIIVVDRNEYKLKMAMELGATHCINSEKLPEGVTPSQAVRKLTPKEVGVDASIESSGYDVFMNEAMKAAIHGKAKTVITGEGIYENDRIFFDFKDFLFGGNVVGNVTGRVRIHSDFPGLLRKAQEPVIRAGMDKILGYDAATMKCKYEVDIREGTPALLKALEEVENVDCVKLVIKLNDY,"[(61, 62), (64, 67), (83, 84), (86, 87), (109, 109), (113, 114), (116, 117), (119, 120), (122, 124), (126, 128), (130, 133), (162, 162), (167, 168), (195, 198), (200, 204), (349, 349), (353, 353), (414, 414)]" -C[N+](C)(C)CCOC(=O)C1=CC=CC=C1,C[N+](C)(C)CCO,MHSKVTIICIRFLFWFLLLCMLIGKSHTEDDIIIATKNGKVRGMNLTVFGGTVTAFLGIPYAQPPLGRLRFKKPQSLTKWSDIWNATKYANSCCQNIDQSFPGFHGSEMWNPNTDLSEDCLYLNVWIPAPKPKNATVLIWIYGGGFQTGTSSLHVYDGKFLARVERVIVVSMNYRVGALGFLALPGNPEAPGNMGLFDQQLALQWVQKNIAAFGGNPKSVTLFGESAGAASVSLHLLSPGSHSLFTRAILQSGSFNAPWAVTSLYEARNRTLNLAKLTGCSRENETEIIKCLRNKDPQEILLNEAFVVPYGTPLSVNFGPTVDGDFLTDMPDILLELGQFKKTQILVGVNKDEGTAFLVYGAPGFSKDNNSIITRKEFQEGLKIFFPGVSEFGKESILFHYTDWVDDQRPENYREALGDVVGDYNFICPALEFTKKFSEWGNNAFFYYFEHRSSKLPWPEWMGVMHGYEIEFVFGLPLERRDNYTKAEEILSRSIVKRWANFAKYGNPNETQNNSTSWPVFKSTEQKYLTLNTESTRIMTKLRAQQCRFWTSFFPKVLEMTGNIDEAEWEWKAGFHRWNNYMMDWKNQFNDYTSKKESCVGL,"[(108, 109), (111, 112), (133, 133), (139, 142), (193, 193), (195, 195), (461, 465), (467, 470), (475, 475), (482, 482)]" -O,CC(C)(N)CO,MVCKVCGQKAQVEMRSRGLALCREHYLDWFVKETERAIRRHRMLLPGERVLVAVSGGKDSLALWDVLSRLGYQAVGLHIELGIGEYSKRSLEVTQAFARERGLELLVVDLKEAYGFGVPELARLSGRVACSACGLSKRYIINQVAVEEGFRVVATGHNLDDEAAVLFGNLLNPQEETLSRQGPVLPEKPGLAARVKPFYRFSEREVLSYTLLRGIRYLHEECPNAKGAKSLLYKEALNLVERSMPGAKLRFLDGFLEKIRPRLDVGEEVALRECERCGYPTTGAVCAFCRMWDAVYRRAKKRKLLPEEVSFRPRVKPLRAG,"[(2, 2), (4, 5), (7, 11), (20, 21), (23, 24), (26, 29), (51, 52), (56, 58), (60, 64), (76, 78), (80, 81), (83, 83), (86, 86), (90, 90), (94, 94), (105, 105), (108, 108), (110, 110), (118, 118), (128, 129), (131, 132), (134, 137), (139, 141), (154, 155), (157, 160), (162, 165), (198, 198), (203, 203), (206, 206), (210, 210), (212, 212), (218, 218), (220, 221), (234, 234), (272, 273), (275, 276), (278, 281), (284, 285), (287, 288), (290, 293), (311, 311), (313, 316)]" -O,Oc1ccccc1,MKIIRIETSRIAVPLTKPFKTALRTVYTAESVIVRITYDSGAVGWGEAPPTLVITGDSMDSIESAIHHVLKPALLGKSLAGYEAILHDIQHLLTGNMSAKAAVEMALYDGWAQMCGLPLYQMLGGYRDTLETDYTVSVNSPEEMAADAENYLKQGFQTLKIKVGKDDIATDIARIQEIRKRVGSAVKLRLDANQGWRPKEAVTAIRKMEDAGLGIELVEQPVHKDDLAGLKKVTDATDTPIMADESVFTPRQAFEVLQTRSADLINIKLMKAGGISGAEKINAMAEACGVECMVGSMIETKLGITAAAHFAASKRNITRFDFDAPLMLKTDVFNGGITYSGSTISMPGKPGLGIIGAALLKGEKEQ,"[(160, 164), (189, 190), (192, 193), (217, 218), (220, 221), (242, 243), (245, 248), (266, 266), (268, 268), (271, 271), (293, 293), (321, 321)]" -O,C[C@H](CCC(=O)O)[C@H]1CC[C@H]2[C@@H]3CC[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3C[C@H](O)[C@@]21C,MSSAEEKLFMKALKEKFEESPEEKYTKFYIFGGWKQSERKKEFKEWADKIVEERGVPHYNPDIGVPLGQRKLMSYQVSGTDVFVEGDDLHFVNNAAMQQMWDDIRRTVIVGMDTAHRVLERRLGKEVTPETINEYMETLNHALPGGAVVQEHMVEIHPGLTWDCYAKIITGDLELADEIDDKFLIDIEKLFPEEQAEQLIKAIGNRTYQVCRMPTIVGHVCDGATMYRWAAMQIAMSFICAYKIAAGEAAVSDFAFASKHAEVINMGEMLPARRARGENEPGGVPFGVLADCVQTMRKYPDDPAKVALEVIAAGAMLYDQIWLGSYMSGGVGFTQYATAVYTDNILDDYVYYGLEYVEDKYGIAEAEPSMDVVKDVATEVTLYGLEQYERYPAAMETHFGGSQRAAVCAAAAGCSTAFATGHAQAGLNGWYLSQILHKEGHGRLGFYGYALQDQCGAANSLSVRSDEGLPLELRGPNYPNYAMNVGHLGEYAGIVQAAHAARGDAFCVHPVIKVAFADENLVFDFTEPRKEFAKGALREFEPAGERDLIVPAE,"[(147, 149), (151, 153), (227, 228), (231, 232), (254, 258), (260, 261), (263, 263), (271, 272), (274, 275), (319, 319), (333, 335), (337, 339), (402, 403), (445, 446), (448, 450), (485, 485)]" -NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,MDSKYQCVKLNDGHFMPVLGFGTYAPAEVPKSKALEAVKLAIEAGFHHIDSAHVYNNEEQVGLAIRSKIADGSVKREDIFYTSKLWSNSHRPELVRPALERSLKNLQLDYVDLYLIHFPVSVKPGEEVIPKDENGKILFDTVDLCATWEAMEKCKDAGLAKSIGVSNFNHRLLEMILNKPGLKYKPVCNQVECHPYFNQRKLLDFCKSKDIVLVAYSALGSHREEPWVDPNSPVLLEDPVLCALAKKHKRTPALIALRYQLQRGVVVLAKSYNEQRIRQNVQVFEFQLTSEEMKAIDGLNRNVRYLTLDIFAGPPNYPFSDEY,"[(17, 19), (25, 27), (29, 29), (33, 34), (36, 38), (40, 41), (44, 44), (46, 46), (48, 49), (51, 52), (55, 55), (57, 57), (61, 61), (82, 82), (84, 84), (115, 119), (164, 165), (168, 169), (172, 172), (188, 189), (191, 195), (214, 215), (223, 224), (227, 228), (235, 236), (251, 251), (253, 254), (256, 257), (260, 260), (267, 269), (281, 283), (305, 306), (308, 308), (318, 319)]" -Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,MNFYSAYQHGFVRVAACTHHTTIGDPAANAASVLDMARACHDDGAALAVFPELTLSGYSIEDVLLQDSLLDAVEDALLDLVTESADLLPVLVVGAPLRHRHRIYNTAVVIHRGAVLGVVPKSYLPTYREFYERRQMAPGDGERGTIRIGGADVAFGTDLLFAASDLPGFVLHVEICEDMFVPMPPSAEAALAGATVLANLSGSPITIGRAEDRRLLARSASARCLAAYVYAAAGEGESTTDLAWDGQTMIWENGALLAESERFPKGVRRSVADVDTELLRSERLRMGTFDDNRRHHRELTESFRRIDFALDPPAGDIGLLREVERFPFVPADPQRLQQDCYEAYNIQVSGLEQRLRALDYPKVVIGVSGGLDSTHALIVATHAMDREGRPRSDILAFALPGFATGEHTKNNAIKLARALGVTFSEIDIGDTARLMLHTIGHPYSVGEKVYDVTFENVQAGLRTDYLFRIANQRGGIVLGTGDLSELALGWSTYGVGDQMSHYNVNAGVPKTLIQHLIRWVISAGEFGEKVGEVLQSVLDTEITPELIPTGEEELQSSEAKVGPFALQDFSLFQVLRYGFRPSKIAFLAWHAWNDAERGNWPPGFPKSERPSYSLAEIRHWLQIFVQRFYSFSQFKRSALPNGPKVSHGGALSPRGDWRAPSDMSARIWLDQIDREVPKG,"[(52, 52), (58, 58), (125, 126), (128, 130), (177, 177), (201, 202), (204, 208), (210, 214), (230, 230), (232, 232), (243, 244), (350, 353), (355, 358), (361, 365), (374, 377), (396, 399), (410, 412), (423, 423), (452, 455), (457, 461), (466, 470), (472, 474), (476, 479), (481, 484), (486, 489), (494, 500), (502, 503), (505, 505), (510, 510), (513, 514), (517, 517), (537, 538), (541, 542), (557, 557), (560, 561), (564, 564), (627, 627), (630, 634), (636, 641), (659, 660), (662, 663)]" -CC(C)C[C@H](N)C(=O)O,CC(C)C[C@H](NC(=O)[C@@H](N)Cc1ccccc1)C(=O)O,MKSSAAKQTVLCLNRYAVVALPLAIASFAAFGASPASTLWAPTDTKAFVTPAQVEARSAAPLLELAAGETAHIVVSLKLRDEAQLKQLAQAVNQPGNAQFGKFLKRRQFLSQFAPTEAQVQAVVAHLRKNGFVNIHVVPNRLLISADGSAGAVKAAFNTPLVRYQLNGKAGYANTAPAQVPQDLGEIVGSVLGLQNVTRAHPMLKVGERSAAKTLAAGTAKGHNPTEFPTIYDASSAPTAANTTVGIITIGGVSQTLQDLQQFTSANGLASVNTQTIQTGSSNGDYSDDQQGQGEWDLDSQSIVGSAGGAVQQLLFYMADQSASGNTGLTQAFNQAVSDNVAKVINVSLGWCEADANADGTLQAEDRIFATAAAQGQTFSVSSGDEGVYECNNRGYPDGSTYSVSWPASSPNVIAVGGTTLYTTSAGAYSNETVWNEGLDSNGKLWATGGGYSVYESKPSWQSVVSGTPGRRLLPDISFDAAQGTGALIYNYGQLQQIGGTSLASPIFVGLWARLQSANSNSLGFPAASFYSAISSTPSLVHDVKSGNNGYGGYGYNAGTGWDYPTGWGSLDIAKLSAYIRSNGFGH,"[(51, 52), (63, 63), (69, 74), (106, 106), (111, 112), (115, 115), (146, 146), (149, 150), (220, 220), (222, 222), (231, 231), (268, 268)]" -C1=CC(=CC=C1[N+](=O)[O-])O[C@H]2[C@@H]([C@H]([C@@H]([C@H](O2)CO)O[C@H]3[C@@H]([C@H]([C@@H]([C@H](O3)CO)O)O)O)O)O,OC[C@H]1O[C@@H](O[C@@H]2[C@@H](CO)O[C@@H](O)[C@H](O)[C@H]2O)[C@H](O)[C@@H](O)[C@@H]1O,MEKDTKQVDIIFRSKLPDIYIPNHLPLHSYCFENISEFSSRPCLINGANKQIYTYADVELNSRKVAAGLHKQGIQPKDTIMILLPNSPEFVFAFIGASYLGAISTMANPLFTPAEVVKQAKASSAKIIVTQACHVNKVKDYAFENDVKIICIDSAPEGCLHFSVLTQANEHDIPEVEIQPDDVVALPYSSGTTGLPKGVMLTHKGLVTSVAQQVDGENPNLYIHSEDVMLCVLPLFHIYSLNSVLLCGLRVGAAILIMQKFDIVSFLELIQRYKVTIGPFVPPIVLAIAKSPMVDDYDLSSVRTVMSGAAPLGKELEDTVRAKFPNAKLGQGYGMTEAGPVLAMCLAFAKEPFEIKSGACGTVVRNAEMKIVDPKTGNSLPRNQSGEICIRGDQIMKGYLNDPEATARTIDKEGWLYTGDIGYIDDDDELFIVDRLKELIKYKGFQVAPAELEALLLNHPNISDAAVVPMKDEQAGEVPVAFVVRSNGSTITEDEVKDFISKQVIFYKRIKRVFFVDAIPKSPSGKILRKDLRAKLAAGLPN,"[(187, 188), (194, 196), (198, 199), (201, 201), (212, 213), (220, 221), (229, 229), (232, 233), (237, 238), (240, 242), (244, 245), (247, 248), (258, 259), (261, 262), (280, 281), (283, 283), (306, 308), (310, 313), (317, 317), (329, 330), (333, 335), (337, 343), (345, 346), (348, 348), (358, 364), (386, 388), (399, 399), (402, 402), (405, 405), (408, 409), (417, 419), (421, 422), (432, 434), (436, 436), (438, 440), (442, 442), (445, 445), (447, 448), (474, 477), (507, 508), (520, 522), (524, 525), (527, 528), (533, 533)]" -O=C[C@H](O)[C@@H](O)[C@@H](O)CO,O=C(CO)[C@@H](O)[C@@H](O)CO,MEMKKSGLGTTAIHAGTLKNLYGTLAMPIYQTSTFIFDSAEQGGRRFALEEAGYIYTRLGNPTTTVLENKIAALEEGEAGIAMSSGMGAISSTLWTVLKAGDHVVTDKTLYGCTFALMNHGLTRFGVEVTFVDTSNLEEVKNAMKKNTRVVYLETPANPNLKIVDLEALSKIAHTNPNTLVIVDNTFATPYMQKPLKLGVDIVVHSATKYLNGHGDVIAGLVVTRQELADQIRFVGLKDMTGAVLGPQEAYYIIRGLKTFEIRMERHCKNARTIVDFLNKHPKVEKVYYPGLETHPGYEIAKKQMKDFGAMISFELKGGFEAGKTLLNNLKLCSLAVSLGDTETLIQHPASMTHSPYTKEEREVAGITDGLVRLSVGLENVEDIIADLEQGLEKI,"[(30, 30), (32, 35), (54, 55), (59, 61), (84, 85), (88, 92), (111, 111), (113, 114), (117, 117), (184, 184), (186, 186), (204, 205), (210, 213), (215, 215), (217, 221), (237, 239), (242, 242), (244, 245), (247, 247), (253, 253), (338, 339), (341, 341)]" -CC/C=C\C/C=C\C/C=C\CCCCCCCC(=O)SCCNC(=O)CCNC(=O)[C@@H](C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@@H]1[C@H]([C@H]([C@@H](O1)N2C=NC3=C(N=CN=C32)N)O)OP(=O)(O)O)O,CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS,MMTTSLIWGIAIAACCCLWLILGIRRRQTGEPPLENGLIPYLGCALQFGANPLEFLRANQRKHGHVFTCKLMGKYVHFITNPLSYHKVLCHGKYFDWKKFHFATSAKAFGHRSIDPMDGNTTENINDTFIKTLQGHALNSLTESMMENLQRIMRPPVSSNSKTAAWVTEGMYSFCYRVMFEAGYLTIFGRDLTRRDTQKAHILNNLDNFKQFDKVFPALVAGLPIHMFRTAHNAREKLAESLRHENLQKRESISELISLRMFLNDTLSTFDDLEKAKTHLVVLWASQANTIPATFWSLFQMIRNPEAMKAATEEVKRTLENAGQKVSLEGNPICLSQAELNDLPVLDSIIKESLRLSSASLNIRTAKEDFTLHLEDGSYNIRKDDIIALYPQLMHLDPEIYPDPLTFKYDRYLDENGKTKTTFYCNGLKLKYYYMPFGSGATICPGRLFAIHEIKQFLILMLSYFELELIEGQAKCPPLDQSRAGLGILPPLNDIEFKYKFKHL,"[(134, 134), (437, 438), (442, 443), (445, 447)]" -CC(=O)CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O,C[C@](CC(=O)O)(CC(=O)SCCNC(=O)CCNC(=O)[C@@H](C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@@H]1[C@H]([C@H]([C@@H](O1)N2C=NC3=C(N=CN=C32)N)O)OP(=O)(O)O)O)O,MARSRGERTPAARRITSRNARFQQWQALLGNRNKRTRAGEFLVMGVRPISLAVEHGWPVRTLLYDGQRELSKWARELLRTVRTEQIAMAPDLLMELGEKNEAPPEVVAVVEMPADDLDRIPVREDFLGVLFDRPTSPGNIGSIIRSADALGAHGLIVAGHAADVYDPKSVRSSTGSLFSLPAVRVPSPGEVMDWVEARRAAGTPIVLVGTDEHGDCDVFDFDFTQPTLLLIGNETAGLSNAWRTLCDYTVSIPMAGSASSLNAANAATAILYEAVRQRISGRTATTP,"[(131, 132), (208, 209), (212, 214), (217, 219), (229, 231), (233, 234), (237, 238), (243, 243), (249, 251), (255, 256), (259, 261), (263, 263), (266, 266), (269, 270)]" -N[C@@H](CC(=O)[O-])C(=O)[O-],O=P([O-])([O-])OP(=O)([O-])[O-],MPSASASRKSQEKPREIMDAAEDYAKERYGISSMIQSQEKPDRVLVRVRDLTIQKADEVVWVRARVHTSRAKGKQCFLVLRQQQFNVQALVAVGDHASKQMVKFAANINKESIVDVEGVVRKVNQKIGSCTQQDVELHVQKIYVISLAEPRLPLQLDDAVRPEAEGEEEGRATVNQDTRLDNRVIDLRTSTSQAVFRLQSGICHLFRETLINKGFVEIQTPKIISAASEGGANVFTVSYFKNNAYLAQSPQLYKQMCICADFEKVFSIGPVFRAEDSNTHRHLTEFVGLDIEMAFNYHYHEVMEEIADTMVQIFKGLQERFQTEIQTVNKQFPCEPFKFLEPTLRLEYCEALAMLREAGVEMGDEDDLSTPNEKLLGHLVKEKYDTDFYILDKYPLAVRPFYTMPDPRNPKQSNSYDMFMRGEEILSGAQRIHDPQLLTERALHHGIDLEKIKAYIDSFRFGAPPHAGGGIGLERVTMLFLGLHNVRQTSMFPRDPKRLTP,"[(199, 199), (202, 203), (251, 251), (254, 255), (272, 272), (284, 289), (291, 292), (373, 373), (399, 399), (402, 404), (416, 419), (422, 423), (425, 426), (428, 430), (432, 433), (466, 471), (476, 480), (486, 486), (489, 490)]" -C[C@@H](C(=O)N[C@@H](CCC(=O)N[C@@H](CCC(=O)O)C(=O)O)C(=O)O)OP(=O)(O)OC[C@H]([C@H]([C@H](CN1C2=CC(=O)C=CC2=CC3=C1NC(=O)NC3=O)O)O)O,NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,MSTLRLLISDSYDPWFNLAVEECIFRQMPATQRVLFLWRNADTVVIGRAQNPWKECNTRRMEEDNVRLARRSSGGGAVFHDLGNTCFTFMAGKPEYDKTISTSIVLNALNALGVSAEASGRNDLVVKTVEGDRKVSGSAYRETKDRGFHHGTLLLNADLSRLANYLNPDKKKLAAKGITSVRSRVTNLTELLPGITHEQVCEAITEAFFAHYGERVEAEIISPNKTPDLPNFAETFARQSSWEWNFGQAPAFSHLLDERFTWGGVELHFDVEKGHITRAQVFTDSLNPAPLEALAGRLQGCLYRADMLQQECEALLVDFPEQEKELRELSAWMAGAVR,"[(21, 21), (39, 45), (47, 47), (49, 49), (54, 54), (65, 65), (69, 70), (72, 75), (80, 81), (83, 83), (85, 85), (123, 125), (132, 133), (135, 136), (140, 140), (147, 147), (151, 153), (157, 157), (160, 161), (164, 165), (179, 179), (185, 185), (187, 187), (243, 243)]" -O=[N+]([O-])c1ccc(Cl)c([N+](=O)[O-])c1,C1=CC(=C(C=C1[N+](=O)[O-])[N+](=O)[O-])SC[C@@H](C(=O)NCC(=O)O)NC(=O)CC[C@@H](C(=O)O)N,MPNYKLTYFNMRGRAEIIRYIFAYLDIQYEDHRIEQADWPEIKSTLPFGKIPILEVDGLTLHQSLAIARYLTKNTDLAGNTEMEQCHVDAIVDTLDDFMSCFPWAEKKQDVKEQMFNELLTYNAPHLMQDLDTYLGGREWLIGNSVTWADFYWEICSTTLLVFKPDLLDNHPRLVTLRKKVQAIPAVANWIKRRPQTKL,"[(6, 7), (9, 13), (15, 19), (32, 38), (40, 43), (46, 48), (52, 53), (61, 62), (65, 69), (96, 96), (99, 100), (104, 104), (152, 152)]" -O,O=P([O-])([O-])[O-],MSSSNVEVFIPVSQGNTNGFPATASNDLKAFTEGAVLSFHNICYRVKLKSGFLPCRKPVEKEILSNINGIMKPGLNAILGPTGGGKSSLLDVLAARKDPSGLSGDVLINGAPRPANFKCNSGYVVQDDVVMGTLTVRENLQFSAALRLATTMTNHEKNERINRVIQELGLDKVADSKVGTQFIRGVSGGERKRTSIGMELITDPSILFLDEPTTGLDSSTANAVLLLLKRMSKQGRTIIFSIHQPRYSIFKLFDSLTLLASGRLMFHGPAQEALGYFESAGYHCEAYNNPADFFLDIINGDSTAVALNREEDFKATEIIEPSKQDKPLIEKLAEIYVNSSFYKETKAELHQLSGGEKKKKITVFKEISYTTSFCHQLRWVSKRSFKNLLGNPQASIAQIIVTVVLGLVIGAIYFGLKNDSTGIQNRAGVLFFLTTNQCFSSVSAVELFVVEKKLFIHEYISGYYRVSSYFLGKLLSDLLPMRMLPSIIFTCIVYFMLGLKPKADAFFVMMFTLMMVAYSASSMALAIAAGQSVVSVATLLMTICFVFMMIFSGLLVNLTTIASWLSWLQYFSIPRYGFTALQHNEFLGQNFCPGLNATGNNPCNYATCTGEEYLVKQGIDLSPWGLWKNHVALACMIVIFLTIAYLKLLFLKKYS,"[(63, 64), (78, 79), (88, 91), (123, 123), (126, 126), (128, 128), (136, 136), (170, 170), (173, 173), (176, 180), (182, 183), (191, 194), (209, 210), (212, 216), (241, 242), (244, 245), (258, 263), (295, 295), (298, 299)]" -Cc1cn([C@H]2C[C@H](O)[C@@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O2)c(=O)[nH]c1=O,Cc1cn([C@H]2C[C@H](O)[C@@H](COP(=O)(O)OP(=O)(O)O)O2)c(=O)[nH]c1=O,MKAFILAAGSGERLEPITHTRPKAFVPILSKPLIEYQIEYLRKCGIRDITVIVSSKNKEYFEKKLKEISIVTQKDDIKGTGAAILSAKFNDEALIIYGDLFFSNEKEICNIITLKENAIIGVKVSNPKDYGVLVLDNQNNLSKIIEKPEIPPSNLINAGIYKLNSDIFTYLDKISISERGELELTDAINLMAKDHRVKVIEYEGYWMDIGKPWNIIDVNKWALDNLVFSQNLGNVEDNVKIKGKVIIEEDAEIKSGTYIEGPVYIGKGSEIGPNSYLRPYTILVEKNKIGASVEVKESVIMEGSKIPHLSYVGDSVIAEDVNFGAGTLIANLRFDEKEVKVNVKGKRISSGRRKLGAFIGGHVRTGINVTILPGVKIGAYARIYPGAVVNRDVGYGEFFKV,"[(5, 7), (14, 15), (18, 18), (23, 23), (37, 37), (52, 57), (71, 72), (74, 75), (77, 78), (81, 84), (86, 86), (95, 96), (98, 100), (119, 122), (129, 130), (132, 133), (144, 145), (147, 148), (155, 156), (158, 161), (181, 185), (206, 206), (208, 212), (343, 344)]" -N[C@@H](Cc1ccncc1)C(=O)O,N,MKTLSQAQSKTSSQQFSFTGNSSANVIIGNQKLTINDVARVARNGTLVSLTNNTDILQGIQASCDYINNAVESGEPIYGVTSGFGGMANVAISREQASELQTNLVWFLKTGAGNKLPLADVRAAMLLRANSHMRGASGIRLELIKRMEIFLNAGVTPYVYEFGSIGASGDLVPLSYITGSLIGLDPSFKVDFNGKEMDAPTALRQLNLSPLTLLPKEGLAMMNGTSVMTGIAANCVYDTQILTAIAMGVHALDIQALNGTNQSFHPFIHNSKPHPGQLWAADQMISLLANSQLVRDELDGKHDYRDHELIQDRYSLRCLPQYLGPIVDGISQIAKQIEIEINSVTDNPLIDVDNQASYHGGNFLGQYVGMGMDHLRYYIGLLAKHLDVQIALLASPEFSNGLPPSLLGNRERKVNMGLKGLQICGNSIMPLLTFYGNSIADRFPTHAEQFNQNINSQGYTSATLARRSVDIFQNYVAIALMFGVQAVDLRTYKKTGHYDARACLSPATERLYSAVRHVVGQKPTSDRPYIWNDNEQGLDEHIARISADIAAGGVIVQAVQDILPCLH,"[(128, 128), (171, 171), (174, 174), (216, 216), (218, 222), (224, 225), (263, 263), (295, 295), (310, 310), (312, 316), (318, 321), (345, 346), (348, 349), (358, 361), (363, 363), (405, 405), (416, 418), (420, 423), (446, 447), (449, 450), (452, 453)]" -CSCC[C@H](N)C(=O)O,CS,MSVHKTNDAFKVLMNSAKEPIVEDIPKKYRKQSFRDNLKVYIESPESYKNVIYYDDDVVLVRDMFPKSKMHLLLMTRDPHLTHVHPLEIMMKHRSLVEKLVSYVQGDLSGLIFDEARNCLSQQLTNEALCNYIKVGFHAGPSMNNLHLHIMTLDHVSPSLKNSAHYISFTSPFFVKIDTPTSNLPTRGTLTSLFQEDLKCWRCGETFGRHFTKLKAHLQEEYDDWLDKSVSM,"[(166, 166), (198, 199), (201, 202), (204, 205), (207, 207), (213, 216), (218, 220), (222, 226)]" -CCCCCCCC/C=C\CCCCCCCC(=O)O,CC(C)(N)CO,MRRLSSWRKMATAEKQKHDGRVKIGHYILGDTLGVGTFGKVKVGKHELTGHKVAVKILNRQKIRSLDVVGKIRREIQNLKLFRHPHIIKLYQVISTPSDIFMVMEYVSGGELFDYICKNGRLDEKESRRLFQQILSGVDYCHRHMVVHRDLKPENVLLDAHMNAKIADFGLSNMMSDGEFLRTSCGSPNYAAPEVISGRLYAGPEVDIWSSGVILYALLCGTLPFDDDHVPTLFKKICDGIFYTPQYLNPSVISLLKHMLQVDPMKRAAIKDIREHEWFKQDLPKYLFPEDPSYSSTMIDDEALKEVCEKFECSEEEVLSCLYNRNHQDPLAVAYHLIIDNRRIMNEAKDFYLATSPPDSFLDDHHLTRPHPERVPFLVAETPRARHTLDELNPQKSKHQGVRKAKWHLGIRSQSRPNDIMAEVCRAIKQLDYEWKVVNPYYLRVRRKNPVTSTFSKMSLQLYQVDSRTYLLDFRSIDDEITEAKSGTATPQRSGSISNYRSCQRSDSDAEAQGKPSDVSLTSSVTSLDSSPVDVAPRPGSHTIEFFEMCANLIKILAQ,"[(12, 12), (15, 19), (31, 32), (42, 44), (52, 55), (57, 59), (80, 80), (95, 95)]" -[C@H](C(=O)O)(NC(=O)N)O,O=C=O,MESLKRFLCSIALLLISLLLPSSLAQQQQHESIRTMEDFSGYPIHEPGQFGSINLASSLSVDAPGLQNQIDELSSFSDAPSPSVTRVLYTDKDVSARRYVKNLMALAGLTVREDAVGNIFGKWDGLEPNLPAVATGSHIDAIPYSGKYDGVVGVLGAIEAINVLKRSGFKPKRSLEIILFTSEEPTRFGISCLGSRLLAGSKELAEALKTTVVDGQNVSFIEAARSAGYAEDKDDDLSSVFLKKGSYFAFLELHIEQGPILEDEGLDIGVVTAIAAPASLKVEFEGNGGHAGAVLMPYRNDAGLAAAELALAVEKHVLESESIDTVGTVGILELHPGAINSIPSKSHLEIDTRDIDEARRNTVIKKIQESANTIAKKRKVKLSEFKIVNQDPPALSDKLVIKKMAEAATELNLSHKMMISRAYHDSLFMARISPMGMIFIPCYKGYSHKPEEYSSPEDMANGVKVLSLTLAKLSLD,"[(136, 137), (139, 142), (144, 148), (150, 152), (154, 154), (179, 180), (182, 183), (185, 187), (190, 192), (252, 253), (255, 257), (423, 425), (437, 440), (446, 447), (449, 450), (452, 452)]" -Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,MVKDTYISSASKTPPMERTVRVTGMTCAMCVKSIETAVGSLEGVEEVRVNLATETAFIRFDEKRIDFETIKRVIEDLGYGVVDEQAAVSAEVEHLSRMKRKLYVAAFAGVLLLFLAHFISLPYEDFVQLLIALPAIFYSGSSIFKAAFSALRRRTLNMDVMYSMGVGAAFLASVLSTAGVLPREYSFYETSVLLLAFLLLGRTLEARAKSRTGEAIKKLVGLQAKTAVVIRDGKEIAVPVEEVAVGDIVIVRPGEKIPVDGVVVEGESYVDESMISGEPVPVLKSKGDEVFGATINNTGVLKIRATRVGGETLLAQIVKLVEDAMGSKPPIQRLADKVVAYFIPTVLLVAISAFIYWYFIAHAPLLFAFTTLIAVLVVACPCAFGLATPTALTVGMGKGAELGILIKNADALEVAEKVTAVIFDKTGTLTKGKPEVTDLVPLNGDERELLRLAAIAERRSEHPIAEAIVKKALEHGIELGEPEKVEVIAGEGVVADGILVGNKRLMEDFGVAVSNEVELALEKLEREAKTAVIVARNGRVEGIIAVSDTLKESAKPAVQELKRMGIKVGMITGDNWRSAEAISRELNLDLVIAEVLPHQKSEEVKKLQAKEVVAFVGDGINDAPALAQADLGIAVGSGSDVAVESGDIVLIRDDLRDVVAAIQLSRKTMSKIKQNIFWALIYNVILIPAAAGLLYPIFGVVFRPEFAGLAMAMSSVSVVANSLLLRNYVPPIRRGGDSVEKIVLELSGLSCHHCVARVKKALEEAGAKVEKVDLNEAVVAGNKEDVDKYIKAVEAAGYQAKLRS,"[(223, 223), (240, 240), (258, 260), (266, 267), (269, 269), (272, 274), (278, 278), (281, 289), (291, 293), (297, 298), (309, 309), (312, 312), (314, 315), (318, 318)]" -O=C(O)[C@@H](CO)OP(=O)(O)O,O=C(O)[C@H](O)COP(=O)(O)O,MSKKPVALIILDGFALRDETYGNAVAQANKPNFDRYWNEYPHTTLKACGEAVGLPEGQMGNSEVGHLNIGAGRIVYQSLTRINIAIREGEFDRNETFLAAMNHVKQHGTSLHLFGLLSDGGVHSHIHHLYALLRLAAKEGVKRVYIHGFLDGRDVGPQTAPQYIKELQEKIKEYGVGEIATLSGRYYSMDRDKRWDRVEKAYRAMVYGEGPTYRDPLECIEDSYKHGIYDEFVLPSVIVREDGRPVATIQDNDAIIFYNFRPDRAIQISNTFTNEDFREFDRGPKHPKHLFFVCLTHFSETVKGYVAFKPTNLDNTIGEVLSQHGLRQLRIAETEKYPHVTFFMSGGREEKFPGEDRILINSPKVPTYDLKPEMSAYEVTDALLKEIEADKYDAIILNYANPDMVGHSGKLEPTIKAVEAVDECLGKVVDAILAKGGIAIITADHGNADEVLTPDGKPQTAHTTNPVPVIVTKKGIKLRDGGILGDLAPTMLDLLGLPQPKEMTGKSLIVK,"[(10, 11), (13, 15), (47, 47), (59, 61), (63, 67), (69, 69), (122, 122), (153, 153), (231, 231), (261, 261), (336, 336), (339, 339), (343, 343), (398, 402), (404, 406), (408, 409), (442, 443), (446, 447), (459, 461), (463, 464), (467, 467), (484, 484)]" -Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,C[C@H](C(=O)OP(=O)([O-])OC[C@@H]1[C@H]([C@H]([C@@H](O1)N2C=NC3=C(N=CN=C32)N)O)O)[NH3+],MKLLEQIEKWAAETPDQTAFVWRDAKITYKQLKEDSDALAHWISSEYPDDRSPIMVYGHMQPEMIINFLGCVKAGHAYIPVDLSIPADRVQRIAENSGAKLLLSATAVTVTDLPVRIVSEDNLKDIFFTHKGNTPNPEHAVKGDENFYIIYTSGSTGNPKGVQITYNCLVSFTKWAVEDFNLQTGQVFLNQAPFSFDLSVMDIYPSLVTGGTLWAIDKDMIARPKDLFASLEQSDIQVWTSTPSFAEMCLMEASFSESMLPNMKTFLFCGEVLPNEVARKLIERFPKATIMNTYGPTEATVAVTGIHVTEEVLDQYKSLPVGYCKSDCRLLIMKEDGTIAPDGEKGEIVIVGPSVSVGYLGSPELTEKAFTMIDGERAYKTGDAGYVENGLLFYNGRLDFQIKLHGYRMELEEIEHHLRACSYVEGAVIVPIKKGEKYDYLLAVVVPGEHSFEKEFKLTSAIKKELNERLPNYMIPRKFMYQSSIPMTPNGKVDRKKLLSEVTA,"[(89, 89), (148, 148), (150, 151), (154, 154), (159, 160), (162, 162), (164, 164), (172, 172), (175, 175), (180, 180), (195, 196), (198, 201), (242, 242), (244, 244), (268, 273), (290, 291), (298, 300), (302, 306), (308, 308), (318, 321), (345, 348), (350, 350), (359, 359), (379, 379), (381, 382), (384, 386), (392, 393), (398, 401), (410, 410), (412, 413), (486, 488), (490, 491), (493, 494)]" -Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,O=P([O-])([O-])[O-],MELSEGELSHTSSSSSFVPVDQRQLQDAIQIIDENKHFNTGILDYINKTSPADVGNNYHIISVFGSQSTGKSTLLNRLFNTNFDVMDESNRQQTTKGIWLAYSPVVSTTLGHTTSKSNILVMDVEGTDGRERGEDQDFERKAALFALSTSEVLIINIWETQVGLYQGANMGLLKTVFEVNLSLFGKSKLETHNDHKVLLLIVIRDHVGVTPVESLAKTFTSDLQNMWSSLAKPAELEHLQFADFFDVTFHALNHKVLQPKEFGEGINRLDDRLVVSNELFKPEYHHDVPIDGWTMYAERCWEQIETNKDLDLPTQQILVAQFKCDEIVESVFQEFLAKYQHHFKEVDAAPDFEELGALFADLRQDAFEDYDASASRYNKAVYEQKRKKLRWLINDKLKEVFDVHAKNLCNTLLEKFEKDLVALKGKDFAVNVKTLSTKLVEDVNFQVSLMSLQGDLSLDEIILALTKDIDAIVAKQQVIELNSIVNKSVKKLSASLSKSIQFELGDPNEETWDNVLQQFKGVYEKFGGDFGLGTSSTQNQQAIEKFKFKSWCQFYDVTHKLISREKLLALLQDRFDDKFRYDENGLPKLYLNEQDLEKTFAVAKQHALQVLPILTFAKLADGSEIVPDYDIFDSKLREQFLGGYDDSDDEEDHCFAEIITEQEKSEVLAKFKKEVDAKYIETKRSIVQHITQIPYYIYLIILVLGWNEFMAIIRNPLFFSLSIVLGATVYVLYYLGLLRPALVVAQRTMDEVIVMAKTKLREVLIDDHEVTGRQLNKMAGSKENIELDDM,"[(63, 64), (73, 77), (83, 86), (123, 128), (156, 156), (158, 158), (169, 169), (172, 172), (204, 204)]" -O=C1OC2(c3ccc(O)cc3Oc3cc(O)ccc32)c2ccccc21,C(C(=N)C(=O)O)C(=O)O,MIYIIGSGIAGLSAGVALRRAGKKVTLISKRIDGGSTPIAKGGVAASVGSDDSPELHAQDTIRVGDGLCDVKTVNYVTSEAKNVIETFESWGFEFEEDLRLEGGHTKRRVLHRTDETGREIFNFLLKLAREEGIPIIEDRLVEIRVKDGKVTGFVTEKRGLVEDVDKLVLATGGYSYLYEYSSTQSTNIGDGMAIAFKAGTILADMEFVQFHPTVTSLDGEVFLLTETLRGEGAQIINENGERFLFNYDKRGELAPRDILSRAIYIEMLKGHKVFIDLSKIEDFERKFPVVAKYLARHGHNYKVKIPIFPAAHFVDGGIRVNIRGESNIVNLYAIGEVSDSGLHGANRLASNSLLEGLVFGINLPRYVDSSWEGISTDDGIVHSVRISGNKTLSLKEIRRINWENVGIIRNEEKLVKAINTYSSSTQNEAIISYLTALAAEIRKESRGNHFREDYPYKDPNWEKRIYFKLVV,"[(5, 6), (11, 15), (27, 28), (30, 35), (38, 41), (44, 45), (102, 102), (110, 112), (117, 119), (121, 122), (125, 125), (137, 140), (171, 176), (184, 184), (187, 190), (192, 196), (313, 315), (317, 317), (335, 336), (338, 339), (344, 344), (348, 349), (351, 352), (355, 358), (361, 361), (428, 428)]" -O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,MNSPGGRGKKKGSGGASNPVPPRPPPPCLAPAPPAAGPAPPPESPHKRNLYYFSYPLFVGFALLRLVAFHLGLLFVWLCQRFSRALMAAKRSSGAAPAPASASAPAPVPGGEAERVRVFHKQAFEYISIALRIDEDEKAGQKEQAVEWYKKGIEELEKGIAVIVTGQGEQCERARRLQAKMMTNLVMAKDRLQLLEKMQPVLPFSKSQTDVYNDSTNLACRNGHLQSESGAVPKRKDPLTHTSNSLPRSKTVMKTGSAGLSGHHRAPSYSGLSMVSGVKQGSGPAPTTHKGTPKTNRTNKPSTPTTATRKKKDLKNFRNVDSNLANLIMNEIVDNGTAVKFDDIAGQDLAKQALQEIVILPSLRPELFTGLRAPARGLLLFGPPGNGKTMLAKAVAAESNATFFNISAASLTSKYVGEGEKLVRALFAVARELQPSIIFIDEVDSLLCERREGEHDASRRLKTEFLIEFDGVQSAGDDRVLVMGATNRPQELDEAVLRRFIKRVYVSLPNEETRLLLLKNLLCKQGSPLTQKELAQLARMTDGYSGSDLTALAKDAALGPIRELKPEQVKNMSASEMRNIRLSDFTESLKKIKRSVSPQTLEAYIRWNKDFGDTTV,"[(114, 115), (118, 119), (121, 122), (124, 124), (150, 150), (153, 153), (155, 155), (157, 157), (189, 189), (192, 192)]" -O=P([O-])([O-])[O-],NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,MALVRALVCCLLTAWHCRSGLGLPVAPAGGRNPPPAIGQFWHVTDLHLDPTYHITDDHTKVCASSKGANASNPGPFGDVLCDSPYQLILSAFDFIKNSGQEASFMIWTGDSPPHVPVPELSTDTVINVITNMTTTIQSLFPNLQVFPALGNHDYWPQDQLPVVTSKVYNAVANLWKPWLDEEAISTLRKGGFYSQKVTTNPNLRIISLNTNLYYGPNIMTLNKTDPANQFEWLESTLNNSQQNKEKVYIIAHVPVGYLPSSQNITAMREYYNEKLIDIFQKYSDVIAGQFYGHTHRDSIMVLSDKKGSPVNSLFVAPAVTPVKSVLEKQTNNPGIRLFQYDPRDYKLLDMLQYYLNLTEANLKGESIWKLEYILTQTYDIEDLQPESLYGLAKQFTILDSKQFIKYYNYFFVSYDSSVTCDKTCKAFQICAIMNLDNISYADCLKQLYIKHNY,"[(43, 44), (46, 46), (48, 49), (82, 83), (88, 88), (108, 109), (111, 114), (148, 150), (152, 154), (157, 160), (208, 208), (210, 211), (214, 214), (250, 251), (253, 254), (266, 266), (291, 292), (294, 294), (296, 297), (316, 320), (322, 322), (331, 331), (410, 410)]" -NCCC[C@H](N)C(=O)O,O=P([O-])([O-])[O-],MARTVVLITGCSSGIGLHLAVRLASDPSQSFKVYATLRDLKTQGRLWEAARALACPPGSLETLQLDVRDSKSVAAARERVTEGRVDVLVCNAGLGLLGPLEALGEDAVASVLDVNVVGTVRMLQAFLPDMKRRGSGRVLVTGSVGGLMGLPFNDVYCASKFALEGLCESLAVLLLPFGVHLSLIECGPVHTAFMEKVLGSPEEVLDRTDIHTFHRFYQYLAHSKQVFREAAQNPEEVAEVFLTALRAPKPTLRYFTTERFLPLLRMRLDDPSGSNYVTAMHREVFGDVPAKAEAGAEAGGGAGPGAEDEAGRGAVGDPELGDPPAAPQ,"[(1, 9), (39, 42), (44, 45), (48, 49), (52, 55), (57, 65), (67, 68), (72, 72), (78, 81), (88, 88), (90, 91), (112, 113), (116, 117), (120, 121), (140, 140), (144, 148), (155, 159), (161, 164), (185, 185), (188, 188), (190, 190), (233, 234), (236, 238), (240, 241), (244, 245), (266, 266)]" -O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,MFLKVRAEKRLGNFRLNVDFEMGRDYCVLLGPTGAGKSVFLELIAGIVKPDRGEVRLNGADITPLPPERRGIGFVPQDYALFPHLSVYRNIAYGLRNVERVERDRRVREMAEKLGIAHLLDRKPARLSGGERQRVALARALVIQPRLLLLDEPLSAVDLKTKGVLMEELRFVQREFDVPILHVTHDLIEAAMLADEVAVMLNGRIVEKGKLKELFSAKNGEVAEFLSARNLLLKVSKILD,"[(14, 14), (16, 16), (29, 30), (39, 42), (77, 77), (151, 152), (183, 185), (200, 204), (225, 225)]" -Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,O=P([O-])([O-])[O-],MELEEDLKGRADKNFSKMGKKSKKEKKEKKPAVSVLTMFRYAGWLDRLYMLVGTLAAIIHGVALPLMMLIFGDMTDSFASVGNVSKNSTNMSEADKRAMFAKLEEEMTTYAYYYTGIGAGVLIVAYIQVSFWCLAAGRQIHKIRQKFFHAIMNQEIGWFDVHDVGELNTRLTDDVSKINEGIGDKIGMFFQAMATFFGGFIIGFTRGWKLTLVILAISPVLGLSAGIWAKILSSFTDKELHAYAKAGAVAEEVLAAIRTVIAFGGQKKELERYNNNLEEAKRLGIKKAITANISMGAAFLLIYASYALAFWYGTSLVISKEYSIGQVLTVFFSVLIGAFSVGQASPNIEAFANARGAAYEVFKIIDNKPSIDSFSKSGHKPDNIQGNLEFKNIHFSYPSRKEVQILKGLNLKVKSGQTVALVGNSGCGKSTTVQLMQRLYDPLDGMVSIDGQDIRTINVRYLREIIGVVSQEPVLFATTIAENIRYGREDVTMDEIEKAVKEANAYDFIMKLPHQFDTLVGERGAQLSGGQKQRIAIARALVRNPKILLLDEATSALDTESEAVVQAALDKAREGRTTIVIAHRLSTVRNADVIAGFDGGVIVEQGNHDELMREKGIYFKLVMTQTAGNEIELGNEACKSKDEIDNLDMSSKDSGSSLIRRRSTRKSICGPHDQDRKLSTKEALDEDVPPASFWRILKLNSTEWPYFVVGIFCAIINGGLQPAFSVIFSKVVGVFTNGGPPETQRQNSNLFSLLFLILGIISFITFFLQGFTFGKAGEILTKRLRYMVFKSMLRQDVSWFDDPKNTTGALTTRLANDAAQVKGATGSRLAVIFQNIANLGTGIIISLIYGWQLTLLLLAIVPIIAIAGVVEMKMLSGQALKDKKELEGSGKIATEAIENFRTVVSLTREQKFETMYAQSLQIPYRNAMKKAHVFGITFSFTQAMMYFSYAACFRFGAYLVTQQLMTFENVLLVFSAIVFGAMAVGQVSSFAPDYAKATVSASHIIRIIEKTPEIDSYSTQGLKPNMLEGNVQFSGVVFNYPTRPSIPVLQGLSLEVKKGQTLALVGSSGCGKSTVVQLLERFYDPMAGSVFLDGKEIKQLNVQWLRAQLGIVSQEPILFDCSIAENIAYGDNSRVVSYEEIVRAAKEANIHQFIDSLPDKYNTRVGDKGTQLSGGQKQRIAIARALVRQPHILLLDEATSALDTESEKVVQEALDKAREGRTCIVIAHRLSTIQNADLIVVIQNGKVKEHGTHQQLLAQKGIYFSMVSVQAGAKRS,"[(397, 397), (405, 406), (409, 409), (421, 422), (431, 435), (471, 471), (551, 552), (581, 583), (596, 601), (617, 617), (621, 621), (1040, 1040), (1048, 1049), (1052, 1052), (1064, 1065), (1074, 1078), (1196, 1197), (1226, 1228), (1241, 1245), (1262, 1262), (1266, 1266)]" -C1=CN(C(=O)NC1=O)[C@H]2[C@@H]([C@@H]([C@H](O2)COP(=O)(O)OP(=O)(O)OC3[C@@H]([C@H]([C@H]([C@H](O3)CO)O)O)O)O)O,O=c1ccn([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,MIIDRLLQRSHSHLPILQATFGLERESLRIHQPTQRVAQTPHPKTLGSRNYHPYIQTDYSEPQLELITPIAKDSQEAIRFLKAISDVAGRSINHDEYLWPLSMPPKVREEDIQIAQLEDAFEYDYRKYLEKTYGKLIQSISGIHYNLGLGQELLTSLFELSQADNAIDFQNQLYMKLSQNFLRYRWLLTYLYGASPVAEEDFLDQKLNNPVRSLRNSHLGYVNHKDIRISYTSLKDYVNDLENAVKSGQLIAEKEFYSPVRLRGSKACRNYLEKGITYLEFRTFDLNPFSPIGITQETVDTVHLFLLALLWIDSSSHIDQDIKEANRLNDLIALSHPLEKLPNQAPVSDLVDAMQSVIQHFNLSPYYQDLLESVKRQIQSPELTVAGQLLEMIEGLSLETFGQRQGQIYHDYAWEAPYALKGYETMELSTQLLLFDVIQKGVNFEVLDEQDQFLKLWHNSHIEYVKNGNMTSKDNYIVPLAMANKVVTKKILDEKHFPTPFGDEFTDRKEALNYFSQIQDKPIVVKPKSTNFGLGISIFKTSANLASYEKAIDIAFTEDSAILVEEYIEGTEYRFFVLEGDCIAVLLRVAANVVGDGIHTISQLVKLKNQNPLRGYDHRSPLEVIELGEVEQLMLEQQGYTVNSIPPEGTKIELRRNSNISTGGDSIDVTNTMDPTYKQLAAEMAEAMGAWVCGVDLIIPNATQAYSKDKKNATCIELNFNPLMYMHTYCQEGPGQSITPRILAKLFPEL,"[(482, 482), (499, 508), (511, 515), (575, 576), (584, 588), (630, 630), (655, 657), (659, 659), (662, 662), (669, 669), (673, 673), (692, 695), (697, 700), (702, 702), (714, 716), (718, 718), (720, 723), (727, 727)]" -CC(C)[C@@H](N)C(=O)O,CC(C)[C@H](N)C(=O)O,MGKLDKASKLIDEENKYYARSARINYYNLVIDHAHGATLVDVDGNKYIDLLASASAINVGHTHEKVVKAIADQAQKLIHYTPAYFHHVPGMELSEKLAKIAPGNSPKMVSFGNSGSDANDAIIKFARAYTGRQYIVSYMGSYHGSTYGSQTLSGSSLNMTRKIGPMLPSVVHVPYPDSYRTYPGETEHDVSLRYFNEFKKPFESFLPADETACVLIEPIQGDGGIIKAPEEYMQLVYKFCHEHGILFAIDEVNQGLGRTGKMWAIQQFKDIEPDLMSVGKSLASGMPLSAVIGKKEVMQSLDAPAHLFTTAGNPVCSAASLATLDVIEYEGLVEKSATDGAYAKQRFLEMQQRHPMIGDVRMWGLNGGIELVKDPKTKEPDSDAATKVIYYAFAHGVVIITLAGNILRFQPPLVIPREQLDQALQVLDDAFTAVENGEVTIPKDTGKIGW,"[(54, 55), (79, 79), (81, 83), (113, 114), (117, 120), (140, 141), (143, 146), (149, 149), (153, 155), (159, 159), (215, 218), (221, 222), (248, 249), (254, 255), (258, 258), (262, 265), (276, 281), (289, 289), (307, 308), (310, 311), (366, 366), (408, 408), (410, 410)]" -N,N[C@@H](Cc1ccccc1)C(=O)O,MDKLRVAVVGYGNVGRYALEAVQAAPDMELVGVVRRKVLAATPPELTGVRVVTDISQLEGVQGALLCVPTRSVPEYAEAMLRRGIHTVDSYDIHGDLADLRRRLDPVAREHGAAAVISAGWDPGTDSIIRALLEFMAPKGITYTNFGPGMSMGHSVAVKAIPGVRDALSMTIPAGMGVHKRAVYVELEPGADFAEVERAIKTDPYFVRDETRVTQVESVSALMDVGHGVVMERKGVSGATHNQLFRFEMRINNPALTAQVMVAALRAAARQKPGCYTMIEIPVIDYLPGDREAWIRKLV,"[(8, 10), (15, 19), (33, 34), (38, 39), (43, 43), (45, 46), (51, 53), (65, 66), (72, 75), (77, 81), (88, 89), (93, 94), (96, 97), (100, 100), (116, 118), (124, 127), (130, 130), (144, 144), (146, 146), (155, 158), (160, 161), (164, 167), (225, 225), (227, 229), (249, 249), (251, 251), (253, 254), (256, 258), (260, 261), (264, 264), (278, 278)]" -O,O=P([O-])([O-])[O-],MSSLEDIKNETVDLEKIPIEEVFQQLKCSREGLTTQEGEDRIQIFGPNKLEEKKESKLLKFLGFMWNPLSWVMEMAAIMAIALANGDGRPPDWQDFVGIICLLVINSTISFIEENNAGNAAAALMAGLAPKTKVLRDGKWSEQEAAILVPGDIVSIKLGDIIPADARLLEGDPLKVDQSALTGESLPVTKHPGQEVFSGSTCKQGEIEAVVIATGVHTFFGKAAHLVDSTNQVGHFQKVLTAIGNFCICSIAIGMVIEIIVMYPIQRRKYRDGIDNLLVLLIGGIPIAMPTVLSVTMAIGSHRLSQQGAITKRMTAIEEMAGMDVLCSDKTGTLTLNKLSVDKNLVEVFCKGVEKDQVLLFAAMASRVENQDAIDAAMVGMLADPKEARAGIREVHFLPFNPVDKRTALTYIDGSGNWHRVSKGAPEQILELAKASNDLSKKVLSIIDKYAERGLRSLAVARQVVPEKTKESPGAPWEFVGLLPLFDPPRHDSAETIRRALNLGVNVKMITGDQLAIGKETGRRLGMGTNMYPSSALLGTHKDANLASIPVEELIEKADGFAGVFPEHKYEIVKKLQERKHIVGMTGDGVNDAPALKKADIGIAVADATDAARGASDIVLTEPGLSVIISAVLTSRAIFQRMKNYTIYAVSITIRIVFGFMLIALIWEFDFSAFMVLIIAILNDGTIMTISKDRVKPSPTPDSWKLKEIFATGVVLGGYQAIMTVIFFWAAHKTDFFSDTFGVRSIRDNNHELMGAVYLQVSIISQALIFVTRSRSWSFVERPGALLMIAFLIAQLIATLIAVYANWEFAKIRGIGWGWAGVIWLYSIVTYFPLDVFKFAIRYILSGKAWLNLFENKTAFTMKKDYGKEEREAQWALAQRTLHGLQPKEAVNIFPEKGSYRELSEIAEQAKRRAEIARLRELHTLKGHVESVVKLKGLDIETPSHYTV,"[(329, 329), (569, 569), (586, 587), (589, 591), (593, 597), (605, 605), (607, 609), (612, 612)]" -[Co+2],[H+],MVKSLQLAHQLKDKKILLIGGGEVGLTRLYKLIPTGCKLTLVSPDLHKSIIPKFGKFIQNEDQPDYREDAKRFINPNWDPTKNEIYEYIRSDFKDEYLDLEDENDAWYIIMTCIPDHPESARIYHLCKERFGKQQLVNVADKPDLCDFYFGANLEIGDRLQILISTNGLSPRFGALVRDEIRNLFTQMGDLALEDAVVKLGELRRGIRLLAPDDKDVKYRMDWARRCTDLFGIQHCHNIDVKRLLDLFKVMFQEQNCSLQFPPRERLLSEYCSS,"[(17, 17), (19, 22), (25, 28), (41, 42), (46, 47), (88, 92), (94, 95), (97, 98), (113, 113), (119, 119), (122, 123), (126, 126), (140, 141)]" -O,CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS,MDIYMSRYEEITQQLIFSPKTWLITGVAGFIGSNLLEKLLKLNQVVIGLDNFSTGHQYNLDEVKTLVSTEQWSRFCFIEGDIRDLTTCEQVMKGVDHVLHQAALGSVPRSIVDPITTNATNITGFLNILHAAKNAQVQSFTYAASSSTYGDHPALPKVEENIGNPLSPYAVTKYVNEIYAQVYARTYGFKTIGLRYFNVFGRRQDPNGAYAAVIPKWTAAMLKGDDVYINGDGETSRDFCYIDNVIQMNILSALAKDSAKDNIYNVAVGDRTTLNELSGYIYDELNLIHHIDKLSIKYREFRSGDVRHSQADVTKAIDLLKYRPNIKIREGLRLSMPWYVRFLKG,"[(21, 21), (23, 25), (32, 35), (47, 49), (56, 60), (74, 74), (78, 80), (83, 87), (98, 100), (104, 107), (113, 119), (121, 125), (128, 128), (139, 140), (165, 168), (170, 172), (174, 178), (193, 193), (196, 198), (200, 202), (207, 207), (238, 240), (242, 243), (246, 246), (332, 332), (335, 336), (339, 339)]" -N[C@@H](CCC(=O)N[C@@H](CS)C(=O)NCC(=O)O)C(=O)O,Cl,MASPPCTTEELSPPPGGSLVEYSGGSLRVPDNPVVAFIRGDGVGPEVVESALKVVDAAVKKVYGGSRRIVWWELLAGHLAREKCGELLPKATLEGIRLARVALKGPLETPVGTGYRSLNVAIRQALDLYANIRPVRYYGQPAPHKYADRVDMVIFRENTEDVYAGIEWPHDSPEAARIRRFLAEEFGISIREDAGIGVKPISRFATRRLMERALEWALRNGNTVVTIMHKGNIMKYTEGAFMRWAYEVALEKFREHVVTEQEVQEKYGGVRPEGKILVNDRIADNMLQQIITRPWDYQVIVAPNLNGDYISDAASALVGGIGMAAGMNMGDGIAVAEPVHGTAPKYAGKDLINPSAEILSASLLIGEFMGWREVKSIVEYAIRKAVQSKKVTQDLARHMPGVQPLRTSEYTETLIAYIDEADLNEVLAGKRG,"[(41, 47), (49, 51), (103, 106), (108, 108), (110, 112), (115, 116), (118, 118), (120, 122), (124, 128), (131, 132), (134, 138), (154, 155), (157, 162), (164, 167), (203, 205), (208, 208), (212, 213), (219, 219), (228, 229), (231, 231), (233, 235), (241, 242), (244, 245), (248, 249), (263, 263), (281, 283), (285, 287), (290, 293), (300, 304), (306, 307), (309, 313), (315, 315), (325, 333), (335, 336), (338, 339), (347, 352), (354, 356), (359, 360), (363, 363), (367, 367), (371, 371), (388, 389), (392, 393), (395, 396), (398, 402), (405, 411), (413, 413)]" -Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,C1=C(N(C=N1)C2[C@@H]([C@@H]([C@H](O2)COP(=O)(O)O)O)O)N,MASQSSVAVISSAAARGESFPDSKKPIGSVRFQQPLRLSFSYCKSGNMSSRICAMAKPNDAETLSSSVDMSLSPRVQSLKPSKTMVITDLAATLVQSGVPVIRLAAGEPDFDTPKVVAEAGINAIREGFTRYTLNAGITELREAICRKLKEENGLSYAPDQILVSNGAKQSLLQAVLAVCSPGDEVIIPAPYWVSYTEQARLADATPVVIPTKISNNFLLDPKDLESKLTEKSRLLILCSPSNPTGSVYPKSLLEEIARIIAKHPRLLVLSDEIYEHIIYAPATHTSFASLPDMYERTLTVNGFSKAFAMTGWRLGYLAGPKHIVAACSKLQGQVSSGASSIAQKAGVAALGLGKAGGETVAEMVKAYRERRDFLVKSLGDIKGVKISEPQGAFYLFIDFSAYYGSEAEGFGLINDSSSLALYFLDKFQVAMVPGDAFGDDSCIRISYATSLDVLQAAVEKIRKALEPLRATVSV,"[(84, 84), (104, 106), (108, 109), (169, 169), (172, 172), (191, 192), (194, 197), (199, 199), (239, 239), (241, 242), (244, 246), (275, 275), (306, 306), (394, 397), (432, 435), (438, 438), (443, 444), (446, 447), (449, 449)]" -CC(=CCC/C(=C/CC/C(=C/COP(=O)(O)OP(=O)(O)O)/C)/C)C,O=P([O-])([O-])OP(=O)([O-])[O-],MLEEYRKHVAERAAEGIAPKPLDANQMAALVELLKNPPAGEEEFLLDLLTNRVPPGVDEAAYVKAGFLAAIAKGEAKSPLLTPEKAIELLGTMQGGYNIHPLIDALDDAKLAPIAAKALSHTLLMFDNFYDVEEKAKAGNEYAKQVMQSWADAEWFLNRPALAEKLTVTVFKVTGETNTDDLSPAPDAWSRPDIPLHALAMLKNAREGIEPDQPGVVGPIKQIEALQQKGFPLAYVGDVVGTGSSRKSATNSVLWFMGDDIPHVPNKRGGGLCLGGKIAPIFFNTMEDAGALPIEVDVSNLNMGDVIDVYPYKGEVRNHETGELLATFELKTDVLIDEVRAGGRIPLIIGRGLTTKAREALGLPHSDVFRQAKDVAESDRGFSLAQKMVGRACGVKGIRPGAYCEPKMTSVGSQDTTGPMTRDELKDLACLGFSADLVMQSFCHTAAYPKPVDVNTHHTLPDFIMNRGGVSLRPGDGVIHSWLNRMLLPDTVGTGGDSHTRFPIGISFPAGSGLVAFAAATGVMPLDMPESVLVRFKGKMQPGITLRDLVHAIPLYAIKQGLLTVEKKGKKNIFSGRILEIEGLPDLKVEQAFELTDASAERSAAGCTIKLNKEPIIEYLNSNIVLLKWMIAEGYGDRRTLERRIQGMEKWLANPELLEADADAEYAAVIDIDLADIKEPILCAPNDPDDARPLSAVQGEKIDEVFIGSCMTNIGHFRAAGKLLDAHKGQLPTRLWVAPPTRMDAAQLTEEGYYSVFGKSGARIEIPGCSLCMGNQARVADGATVVSTSTRNFPNRLGTGANVFLASAELAAVAALIGKLPTPEEYQTYVAQVDKTAVDTYRYLNFNQLSQYTEKADGVIFQTAV,"[(443, 444), (478, 478), (480, 481), (499, 499), (593, 593), (597, 597), (706, 706), (708, 709), (711, 712), (740, 740), (767, 768), (770, 771), (773, 774), (789, 790), (796, 797)]" -O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O,MSGKPVLHYFNARGRMECIRWLLAAAGVEFEEKLIQSPEDLEKLKKDGNLMFDQVPMVEIDGMKLAQTRAILNYIATKYDLYGKDMKERALIDMYSEGILDLTEMIIQLVICPPDQREAKTALAKDRTKNRYLPAFEKVLKSHGQDYLVGNRLTRVDIHLLELLLYVEEFDASLLTSFPLLKAFKSRISSLPNVKKFLQPGSQRKPAMDAKQIEEARKVFKF,"[(7, 8), (10, 16), (19, 20), (33, 35), (41, 44), (46, 50), (52, 53), (56, 57), (65, 66), (69, 73)]" -Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,MAKYTREDIEKLVKEENVKYIRLQFTDILGTIKNVEIPVSQLGKALDNKVMFDGSSIEGFVRIEESDMYLYPDLNTFVIFPWTAEKGKVARFICDIYNPDGTPFEGDPRNNLKRILKEMEDLGFSDFNLGPEPEFFLFKLDEKGEPTLELNDKGGYFDLAPTDLGENCRRDIVLELEEMGFEIEASHHEVAPGQHEIDFKYAGAVRSCDDIQTFKLVVKTIARKHGLHATFMPKPLFGVNGSGMHCNLSLFKNGVNAFFDENADLQLSETAKHFIAGIVKHATSFTAVTNPTVNSYKRLVPGYEAPCYVAWSAQNRSPLIRIPASRGISTRVEVRSVDPAANPYLALSVLLAAGLDGIKNKLEAPAPIDRNIYVMSKEERMENGIVDLPATLAEALEEFKSNEVMVKALGEHLFEHFIEAKEIEWDMFRTQVHPWEREQYMSQY,"[(126, 131), (133, 133), (135, 136), (154, 157), (169, 169), (182, 183), (185, 188), (190, 191), (194, 195), (197, 201), (214, 214), (230, 231), (233, 235), (238, 239), (242, 244), (246, 248), (250, 251), (256, 258), (290, 290), (294, 297), (299, 300), (302, 303), (305, 306), (311, 315), (317, 320), (322, 324), (328, 328), (330, 332), (334, 334), (336, 339), (373, 373)]" -N,N[C@@H](Cc1cccc(F)c1)C(=O)O,MENGNGATTNGHVNGNGMDFCMKTEDPLYWGIAAEAMTGSHLDEVKKMVAEYRKPVVKLGGETLTISQVAAISARDGSGVTVELSEAARAGVKASSDWVMDSMNKGTDSYGVTTGFGATSHRRTKQGGALQKELIRFLNAGIFGNGSDNTLPHSATRAAMLVRINTLLQGYSGIRFEILEAITKFLNQNITPCLPLRGTITASGDLVPLSYIAGLLTGRPNSKAVGPTGVILSPEEAFKLAGVEGGFFELQPKEGLALVNGTAVGSGMASMVLFEANILAVLAEVMSAIFAEVMQGKPEFTDHLTHKLKHHPGQIEAAAIMEHILDGSAYVKAAQKLHEMDPLQKPKQDRYALRTSPQWLGPQIEVIRSSTKMIEREINSVNDNPLIDVSRNKAIHGGNFQGTPIGVSMDNTRLAIAAIGKLMFAQFSELVNDFYNNGLPSNLSGGRNPSLDYGFKGAEIAMASYCSELQFLANPVTNHVQSAEQHNQDVNSLGLISSRKTSEAVEILKLMSTTFLVGLCQAIDLRHLEENLKSTVKNTVSSVAKRVLTMGVNGELHPSRFCEKDLLRVVDREYIFAYIDDPCSATYPLMQKLRQTLVEHALKNGDNERNLSTSIFQKIATFEDELKALLPKEVESARAALESGNPAIPNRIEECRSYPLYKFVRKELGTEYLTGEKVTSPGEEFEKVFIAMSKGEIIDPLLECLESWNGAPLPIC,"[(163, 163), (205, 206), (209, 209), (253, 253), (255, 259), (261, 262), (297, 297), (300, 300), (344, 347), (349, 353), (355, 358), (382, 383), (385, 386), (395, 397), (400, 400), (442, 442), (453, 455), (457, 460), (482, 483), (485, 486), (488, 489)]" -C(CC(=O)[O-])C(CC(=O)C(=O)[O-])O,O=CCCC(=O)[O-],MENSFKAALKAGRPQIGLWLGLSSSYSAELLAGAGFDWLLIDGEHAPNNVQTVLTQLQAIAPYPSQPVVRPSWNDPVQIKQLLDVGTQTLLVPMVQNADEAREAVRATRYPPAGIRGVGSALARASRWNRIPDYLQKANDQMCVLVQIETREAMKNLPQILDVEGVDGVFIGPADLSADMGYAGNPQHPEVQAAIEQAIVQIRESGKAPGILIANEQLAKRYLELGALFVAVGVDTTLLARAAEALAARFGAQATAVKPGVY,"[(93, 94), (96, 96), (147, 148), (150, 151), (153, 153), (172, 174), (176, 179)]" -O=[N+]([O-])c1ccc(O)cc1,C1=CC(=CC=C1[N+](=O)[O-])O[C@H]2[C@@H]([C@H]([C@@H]([C@H](O2)C(=O)O)O)O)O,MKQSHFFAHLSRLKLINRWPLMRNVRTENVSEHSLQVAMVAHALAAIKNRKFGGNVNAERIALLAMYHDASEVLTGDLPTPVKYFNSQIAQEYKAIEKIAQQKLVDMVPEELRDIFAPLIDEHAYSDEEKSLVKQADALCAYLKCLEELAAGNNEFLLAKTRLEATLEARRSQEMDYFMEIFVPSFHLSLDEISQDSPL,"[(13, 13), (18, 18), (28, 32), (34, 38), (64, 65), (67, 67), (70, 73), (119, 119), (122, 122), (125, 125), (133, 136), (138, 142)]" -C[C@@H](C(=O)N[C@@H](CC(=O)O)C(=O)O)N,C[C@@H](C(=O)N[C@H](CC(=O)[O-])C(=O)[O-])[NH3+],MKIIRIETSRIAVPLTKPFKTALRTVYTAESVIVRITYDSGAVGWGEAPPTLVITGDSMDSIESAIHHVLKPALLGKSLAGYEAILHDIQHLLTGNMSAKAAVEMALYDGWAQMCGLPLYQMLGGYRDTLETDYTVSVNSPEEMAADAENYLKQGFQTLKIKVGKDDIATDIARIQEIRKRVGSAVKLRLDANQGWRPKEAVTAIRKMEDAGLGIELVEQPVHKDDLAGLKKVTDATDTPIMADESVFTPRQAFEVLQTRSADLINIKLMKAGGISGAEKINAMAEACGVECMVGSMIETKLGITAAAHFAASKRNITRFDFDAPLMLKTDVFNGGITYSGSTISMPGKPGLGIIGAALLKGEKEQ,"[(160, 164), (189, 190), (192, 193), (217, 218), (220, 221), (242, 243), (245, 248), (266, 266), (268, 268), (271, 271), (293, 293), (321, 321)]" -Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,CC(C)(N)CO,MEFSEWYSDILEKAEIYDVRYPIKGCGVYLPYGFKIRRYTFEIIRNLLDESGHDEALFPMLIPEDLLAKEAEHIKGFEDEVYWVTHGGKTQLDVKLALRPTSETPIYYMMKLWVKVHTDLPIKIYQIVNTFRYETKHTRPLIRLREIMTFKEAHTAHSTKEEAENQVKEAISIYKKFFDTLGIPYLISKRPEWDKFPGAEYTMAFDTIFPDGRTMQIATVHNLGQNFSKTFEIIFETPTGDKDYAYQTCYGISDRVIASIIAIHGDEKGLILPPIVAPIQVVIVPLIFKGKEDIVMEKAKEIYEKLKGKFRVHIDDRDIRPGRKFNDWEIKGVPLRIEVGPKDIENKKITLFRRDTMEKFQVDETQLMEVVEKTLNNIMENIKNRAWEKFENFITILEDINPDEIKNILSEKRGVILVPFKEEIYNEELEEKVEATILGETEYKGNKYIAIAKTY,"[(41, 41), (70, 70), (77, 77), (80, 80), (82, 82), (99, 100), (102, 102), (104, 108), (130, 131), (133, 133), (135, 137), (141, 145), (147, 147), (149, 150), (152, 152), (154, 154), (167, 167), (174, 174), (190, 190), (196, 197), (199, 199), (201, 207), (209, 209), (214, 215), (217, 218), (220, 220), (222, 223), (227, 227), (231, 231), (248, 252), (254, 254), (256, 260), (455, 455)]" -Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,O=P([O-])([O-])[O-],MVRIIVKNVSKVFKKGKVVALDNVNINIENGERFGILGPSGAGKTTFMRIIAGLDVPSTGELYFDDRLVASNGKLIVPPEDRKIGMVFQTWALYPNLTAFENIAFPLTNMKMSKEEIRKRVEEVAKILDIHHVLNHFPRELSGGQQQRVALARALVKDPSLLLLDEPFSNLDARMRDSARALVKEVQSRLGVTLLVVSHDPADIFAIADRVGVLVKGKLVQVGKPEDLYDNPVSIQVASLIGEINELEGKVTNEGVVIGSLRFPVSVSSDRAIIGIRPEDVKLSKDVIKDDSWILVGKGKVKVIGYQGGLFRITITPLDSEEEIFTYSDHPIHSGEEVLVYVRKDKIKVFEKN,"[(11, 11), (13, 13), (20, 21), (36, 39), (47, 50), (55, 55), (87, 88), (90, 92), (164, 165), (167, 170), (197, 200), (214, 214)]" -C1[C@H](C([C@@H](CC1(C(=O)O)O)O)O)O,O=C1C[C@@](O)(C(=O)O)C[C@@H](O)[C@@H]1O,MAGQHLPVPRLEGVSREQFMQHLYPQRKPLVLEGIDLGPCTSKWTVDYLSQVGGKKEVKIHVAAVAQMDFISKNFVYRTLPFDQLVQRAAEEKHKEFFVSEDEKYYLRSLGEDPRKDVADIRKQFPLLKGDIKFPEFFKEEQFFSSVFRISSPGLQLWTHYDVMDNLLIQVTGKKRVVLFSPRDAQYLYLKGTKSEVLNIDNPDLAKYPLFSKARRYECSLEAGDVLFIPALWFHNVISEEFGVGVNIFWKHLPSECYDKTDTYGNKDPTAASRAAQILDRALKTLAELPEEYRDFYARRMVLHIQDKAYSKNSE,"[(59, 61), (68, 68), (85, 85), (104, 105), (107, 108), (149, 151), (155, 155), (157, 159), (161, 161), (163, 165), (167, 168), (170, 170), (173, 174), (176, 180), (193, 194), (219, 221), (227, 229), (231, 234), (236, 240), (243, 245), (247, 250), (264, 266)]" -C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1CCC2=O,NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,MARTVVLITGCSSGIGLHLAVRLASDPSQSFKVYATLRDLKTQGRLWEAARALACPPGSLETLQLDVRDSKSVAAARERVTEGRVDVLVCNAGLGLLGPLEALGEDAVASVLDVNVVGTVRMLQAFLPDMKRRGSGRVLVTGSVGGLMGLPFNDVYCASKFALEGLCESLAVLLLPFGVHLSLIECGPVHTAFMEKVLGSPEEVLDRTDIHTFHRFYQYLAHSKQVFREAAQNPEEVAEVFLTALRAPKPTLRYFTTERFLPLLRMRLDDPSGSNYVTAMHREVFGDVPAKAEAGAEAGGGAGPGAEDEAGRGAVGDPELGDPPAAPQ,"[(1, 9), (39, 42), (44, 45), (48, 49), (52, 55), (57, 65), (67, 68), (72, 72), (78, 81), (88, 88), (90, 91), (112, 113), (116, 117), (120, 121), (140, 140), (144, 148), (155, 159), (161, 164), (185, 185), (188, 188), (190, 190), (233, 234), (236, 238), (240, 241), (244, 245), (266, 266)]" -Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,MSPRVGVTLSGRYRLQRLIATGGMGQVWEAVDNRLGRRVAVKVLKSEFSSDPEFIERFRAEARTTAMLNHPGIASVHDYGESQMNGEGRTAYLVMELVNGEPLNSVLKRTGRLSLRHALDMLEQTGRALQIAHAAGLVHRDVKPGNILITPTGQVKITDFGIAKAVDAAPVTQTGMVMGTAQYIAPEQALGHDASPASDVYSLGVVGYEAVSGKRPFAGDGALTVAMKHIKEPPPPLPPDLPPNVRELIEITLVKNPAMRYRSGGPFADAVAAVRAGRRPPRPSQTPPPGRAAPAAIPSGTTARVAANSAGRTAASRRSRPATGGHRPPRRTFSSGQRALLWAAGVLGALAIIIAVLLVIKAPGDNSPQQAPTPTVTTTGNPPASNTGGTDASPRLNWTERGETRHSGLQSWVVPPTPHSRASLARYEIAQ,"[(3, 3), (17, 18), (28, 29), (40, 41), (43, 45), (48, 48), (54, 54), (58, 58), (91, 95), (97, 97), (159, 159), (162, 163), (166, 166)]" -O,C[C@H](N)C(=O)O,MDIMNEKVKKIIEFMDKNSIDAVLIAKNPNVYYISGASPLAGGYILITGESATLYVPELEYEMAKEESNIPVEKFKKMDEFYKALEGIKSLGIESSLPYGFIEELKKKANIKEFKKVDDVIRDMRIIKSEKEIKIIEKACEIADKAVMAAIEEITEGKKEREVAAKVEYLMKMNGAEKPAFDTIIASGYRSALPHGVASDKRIERGDLVVIDLGALYQHYNSDITRTIVVGSPNEKQKEIYEIVLEAQKKAVESAKPGITAKELDSIARNIIAEYGYGEYFNHSLGHGVGLEVHEWPRVSQYDETVLREGMVITIEPGIYIPKIGGVRIEDTILITKNGSKRLTKTERELI,"[(140, 140), (181, 184), (210, 211), (213, 214), (221, 222), (224, 226), (248, 248), (281, 281), (283, 286), (288, 289), (293, 295), (297, 299), (312, 315), (317, 318), (320, 320), (328, 329), (331, 332), (344, 344)]" -Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,O=P([O-])([O-])OP(=O)([O-])[O-],MRYDVVIAGAGPTGLMLACELRLAGARTLVLERLAEPVDFSKALGVHARTVELLDMRGLGEGFQAEAPKLRGGNFASLGVPLDFSSFDTRHPYALFVPQVRTEELLTGRALELGAELRRGHAVTALEQDADGVTVSVTGPEGPYEVECAYLVGCDGGGSTVRKLLGIDFPGQDPHMFAVIADARFREELPHGEGMGPMRPYGVMRHDLRAWFAAFPLEPDVYRATVAFFDRPYADRRAPVTEEDVRAALTEVAGSDFGMHDVRWLSRLTDTSRQAERYRDGRVLLAGDACHIHLPAGGQGLNLGFQDAVNLGWKLGATIAGTAPPELLDTYEAERRPIAAGVLRNTRAQAVLIDPDPRYEGLRELMIELLHVPETNRYLAGLISALDVRYPMAGEHPLLGRRVPDLPLVTEDGTRQLSTYFHAARGVLLTLGCDQPLADEAAAWKDRVDLVAAEGVADPGSAVDGLTALLVRPDGYICWTAAPETGTDGLTDALRTWFGPPAM,"[(8, 12), (14, 17), (30, 31), (34, 35), (40, 43), (45, 47), (50, 50), (95, 98), (100, 104), (117, 122), (124, 125), (135, 137), (151, 151), (154, 159), (161, 164), (169, 169), (215, 215), (217, 217), (223, 223), (286, 287), (289, 292), (298, 300), (303, 306), (308, 308), (335, 335)]" -O=O,O=Cc1ccccc1,MSFGALRQLLLIACLALPSLAATNLPTADFDYVVVGAGNAGNVVAARLTEDPDVSVLVLEAGVSDENVLGAEAPLLAPGLVPNSIFDWNYTTTAQAGYNGRSIAYPRGRMLGGSSSVHYMVMMRGSTEDFDRYAAVTGDEGWNWDNIQQFVRKNEMVVPPADNHNTSGEFIPAVHGTNGSVSISLPGFPTPLDDRVLATTQEQSEEFFFNPDMGTGHPLGISWSIASVGNGQRSSSSTAYLRPAQSRPNLSVLINAQVTKLVNSGTTNGLPAFRCVEYAEQEGAPTTTVCAKKEVVLSAGSVGTPILLQLSGIGDENDLSSVGIDTIVNNPSVGRNLSDHLLLPAAFFVNSNQTFDNIFRDSSEFNVDLDQWTNTRTGPLTALIANHLAWLRLPSNSSIFQTFPDPAAGPNSAHWETIFSNQWFHPAIPRPDTGSFMSVTNALISPVARGDIKLATSNPFDKPLINPQYLSTEFDIFTMIQAVKSNLRFLSGQAWADFVIRPFDPRLRDPTDDAAIESYIRDNANTIFHPVGTASMSPRGASWGVVDPDLKVKGVDGLRIVDGSILPFAPNAHTQGPIYLVGKQGADLIKADQ,"[(11, 12), (20, 21), (23, 23), (34, 34), (37, 38), (41, 42), (45, 45), (54, 59), (62, 63), (78, 82), (85, 87), (89, 91), (103, 104), (112, 113), (115, 117), (119, 119), (122, 125), (147, 147), (187, 187), (206, 206), (208, 210), (215, 215), (230, 230), (243, 244), (246, 246), (251, 253), (256, 257), (259, 260), (286, 286), (301, 301), (435, 435), (525, 527), (529, 531), (548, 548), (552, 553), (557, 562), (564, 566)]" -C([C@H](C(=O)[O-])O)O,[H+],MTWKNFGFEIFGEKYGQEELEKRIKDEHTPPPDSPVFGGLKLKLKKEKFKTLFTLGTTLKGFRRATHTVGTGGIGEITIVNDPKFPEHEFFTAGRTFPARLRHANLKYPDDAGADARSFSIKFADSDSDGPLDIVMNTGEANIFWNSPSLEDFVPVEEGDAAEEYVYKNPYYYYNLVEALRRAPDTFAHLYYYSQVTMPFKAKDGKVRYCRYRALPGDVDIKEEDESGRLTEEEQRKIWIFSRHENEKRPDDYLRKEYVERLQKGPVNYRLQIQIHEASPDDTATIFHAGILWDKETHPWFDLAKVSIKTPLSPDVLEKTAFNIANQPASLGLLEAKSPEDYNSIGELRVAVYTWVQHLRKLKIGSLVPAGQNAIYNVEVETGDREHAGTDATITIRITGAKGRTDYLKLDKWFHNDFEAGSKEQYTVQGFDVGDIQLIELHSDGGGYWSGDPDWFVNRVIIISSTQDRVYSFPCFRWVIKDMVLFPGEATLPFNEVPAIVSEQRQKELEQRKLTYQWDYVSDDMPGNIKAKTHDDLPRDVQFTDEKSRSYQESRKAALVNLGIGSLFTMFENWDSYDDYHILYRNWILGGTPNMADRWHEDRWFGYQFLNGANPVILTRCDALPSNFPVTNEHVNASLDRGKNLDEEIKDGHIYIVDFKVLVGAKSYGGPVLEDIGYKVPDHLKHDEADIRYCAAPLALFYVNKLGHLMPIAIQINQEPGPENPIWTPHEENEHDWMMAKFWLGVAESNFHQLNTHLLRTHLTTESFALSTWRNLASAHPVFKLLQPHIYGVLAIDTIGRKELIGSGGIVDQSLSLGGGGHVTFMEKCFKEVNLQDYHLPNALKKRGVDDPSKLPGFYYRDDGLALWEAIETFIGEIIAIFYKNDDDVKRDNEIQSWIYDVHKNGWRVNPGHQDHGVPASFESREQLKEVLTSLVFTFSCQHAAVNFSQKDHYGFTPNAPAVLRHPPPKKKGEATLQSILSTLPSKSQAAKAIATVYILTKFSEDERYLGNYSATAWEDKDALDAINRFQDKLEDISKKIKQRNENLEVPYIYLLPERIPNGTAI,"[(382, 383), (385, 386), (388, 388), (392, 394), (414, 414), (418, 418), (420, 422), (424, 424), (446, 446), (449, 451), (453, 453), (455, 456), (478, 480), (539, 539), (543, 543), (545, 545), (551, 551), (603, 603), (752, 756), (758, 761), (763, 766), (793, 793), (796, 797), (859, 859), (937, 942), (944, 946), (948, 951), (999, 1002), (1047, 1050), (1063, 1065)]" -C1=NC(=C2C(=N1)N(C=N2)[C@H]3[C@@H]([C@@H]([C@H](O3)COP(=O)(O)OS(=O)(=O)O)OP(=O)(O)O)O)N,C1=CC=C(C=C1)OS(=O)(=O)[O-],MALTSDLGKQIKLKEVEGTLLQPATVDNWSQIQSFEAKPDDLLICTYPKAGTTWIQEIVDMIEQNGDVEKCQRAIIQHRHPFIEWARPPQPSGVEKAKAMPSPRILKTHLSTQLLPPSFWENNCKFLYVARNAKDCMVSYYHFQRMNHMLPDPGTWEEYFETFINGKVVWGSWFDHVKGWWEMKDRHQILFLFYEDIKRDPKHEIRKVMQFMGKKVDETVLDKIVQETSFEKMKENPMTNRSTVSKSILDQSISSFMRKGTVGDWKNHFTVAQNERFDEIYRRKMEGTSINFCMEL,"[(44, 44), (46, 48), (55, 58), (61, 61), (107, 107), (109, 109), (129, 130), (132, 133), (135, 138), (140, 143), (146, 146), (173, 173), (192, 193), (195, 198), (205, 205), (223, 227), (234, 236), (261, 262), (264, 264), (268, 268), (292, 292)]" -C(=O)(O)[O-],O=P([O-])([O-])[O-],MASSAQDGNNPLFSPYKMGKFNLSHRVVLAPMTRCRALNNIPQAALGEYYEQRATAGGFLITEGTMISPTSAGFPHVPGIFTKEQVREWKKIVDVVHAKGAVIFCQLWHVGRASHEVYQPAGAAPISSTEKPISNRWRILMPDGTHGIYPKPRAIGTYEISQVVEDYRRSALNAIEAGFDGIEIHGAHGYLIDQFLKDGINDRTDEYGGSLANRCKFITQVVQAVVSAIGADRVGVRVSPAIDHLDAMDSNPLSLGLAVVERLNKIQLHSGSKLAYLHVTQPRYVAYGQTEAGRLGSEEEEARLMRTLRNAYQGTFICSGGYTRELGIEAVAQGDADLVSYGRLFISNPDLVMRIKLNAPLNKYNRKTFYTQDPVVGYTDYPFLQGNGSNGPLSRL,"[(29, 30), (34, 35), (49, 50), (61, 63), (65, 66), (74, 74), (77, 78), (104, 105), (107, 108), (183, 188), (235, 236), (238, 239), (278, 280), (283, 284), (319, 320), (322, 323), (326, 326), (340, 341), (344, 347), (364, 364), (366, 366), (369, 370)]" -[H+],NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,MAPKRSSDLFSQVVNSGPGSFLARQLGVPQPETLRRYRAGEPPLTGSLLIGGAGRVVEPLRAALEKDYDLVGNNLGGRWADSFGGLVFDATGITEPAGLKGLHEFFTPVLRNLGRCGRVVVVGGTPEAAASTNERIAQRALEGFTRSLGKELRRGATTALVYLSPDAKPAATGLESTMRFLLSAKSAYVDGQVFSVGADDSTPPADWEKPLDGKVAIVTGAARGIGATIAEVFARDGAHVVAIDVESAAENLAETASKVGGTALWLDVTADDAVDKISEHLRDHHGGKADILVNNAGITRDKLLANMDDARWDAVLAVNLLAPLRLTEGLVGNGSIGEGGRVIGLSSIAGIAGNRGQTNYATTKAGMIGITQALAPGLAAKGITINAVAPGFIETQMTAAIPLATREVGRRLNSLLQGGQPVDVAEAIAYFASPASNAVTGNVIRVCGQAMIGA,"[(188, 188), (218, 222), (224, 224), (226, 229), (242, 243), (245, 246), (248, 248), (251, 252), (255, 255), (264, 266), (269, 270), (273, 273), (293, 294), (296, 296), (298, 299), (301, 301), (315, 315), (318, 319), (322, 322), (326, 326), (345, 350), (357, 359), (361, 363), (365, 368), (388, 389), (391, 392), (394, 394), (419, 421), (424, 424), (446, 447)]" -O,O=P([O-])([O-])[O-],MQKSPLEKASFISKLFFSWTTPILRKGYRHHLELSDIYQAPSADSADHLSEKLEREWDREQASKKNPQLIHALRRCFFWRFLFYGILLYLGEVTKAVQPVLLGRIIASYDPENKVERSIAIYLGIGLCLLFIVRTLLLHPAIFGLHRIGMQMRTAMFSLIYKKTLKLSSRVLDKISIGQLVSLLSNNLNKFDEGLALAHFIWIAPLQVTLLMGLLWDLLQFSAFCGLGLLIILVIFQAILGKMMVKYRDQRAAKINERLVITSEIIDNIYSVKAYCWESAMEKMIENLREVELKMTRKAAYMRFFTSSAFFFSGFFVVFLSVLPYTVINGIVLRKIFTTISFCIVLRMSVTRQFPTAVQIWYDSFGMIRKIQDFLQKQEYKVLEYNLMTTGIIMENVTAFWEEGFGELLEKVQQSNGDRKHSSDENNVSFSHLCLVGNPVLKNINLNIEKGEMLAITGSTGSGKTSLLMLILGELEASEGIIKHSGRVSFCSQFSWIMPGTIKENIIFGVSYDEYRYKSVVKACQLQQDITKFAEQDNTVLGEGGVTLSGGQRARISLARAVYKDADLYLLDSPFGYLDVFTEEQVFESCVCKLMANKTRILVTSKMEHLRKADKILILHQGSSYFYGTFSELQSLRPDFSSKLMGYDTFDQFTEERRSSILTETLRRFSVDDSSAPWSKPKQSFRQTGEVGEKRKNSILNSFSSVRKISIVQKTPLCIDGESDDLQEKRLSLVPDSEQGEAALPRSNMIATGPTFPGRRRQSVLDLMTFTPNSGSSNLQRTRTSIRKISLVPQISLNEVDVYSRRLSQDSTLNITEEINEEDLKECFLDDVIKIPPVTTWNTYLRYFTLHKGLLLVLIWCVLVFLVEVAASLFVLWLLKNNPVNSGNNGTKISNSSYVVIITSTSFYYIFYIYVGVADTLLALSLFRGLPLVHTLITASKILHRKMLHSILHAPMSTISKLKAGGILNRFSKDIAILDDFLPLTIFDFIQLVFIVIGAIIVVSALQPYIFLATVPGLVVFILLRAYFLHTAQQLKQLESEGRSPIFTHLVTSLKGLWTLRAFRRQTYFETLFHKALNLHTANWFMYLATLRWFQMRIDMIFVLFFIVVTFISILTTGEGEGTAGIILTLAMNIMSTLQWAVNSSIDTDSLMRSVSRVFKFIDIQTEESMYTQIIKELPREGSSDVLVIKNEHVKKSDIWPSGGEMVVKDLTVKYMDDGNAVLENISFSISPGQRVGLLGRTGSGKSTLLSAFLRMLNIKGDIEIDGVSWNSVTLQEWRKAFGVITQKVFIFSGTFRQNLDPNGKWKDEEIWKVADEVGLKSVIEQFPGQLNFTLVDGGYVLSHGHKQLMCLARSVLSKAKIILLDEPSAHLDPITYQVIRRVLKQAFAGCTVILCEHRIEAMLDCQRFLVIEESNVWQYDSLQALLSEKSIFQQAISSSEKMRFFQGRHSSKHKPRTQITALKEETEEEVQETRL,"[(399, 400), (402, 403), (405, 406), (409, 409), (412, 413), (430, 430), (433, 434), (438, 441), (456, 457), (466, 469), (490, 492), (494, 495), (572, 573), (577, 577), (603, 604), (618, 623), (659, 659), (662, 663), (666, 666)]" -O,CC(C)(CO)[C@H](C(=O)NCCC(=O)O)O,MSTLANLTEVLFRLDFDPDTAVYHYRGQTLSRLQCRTYILSQASQLARLLKPGDRVVLALNDSPSLACLFLACIAVGAIPAVINPKSREQALADIAADCQASLVVREADAPSLSGPLAPLTLRAAAGRPLLDDFSLDALVGPADLDWSAFHRQDPAAACFLQYTSGSTGAPKGVMHSLRNTLGFCRAFATELLALQAGDRLYSIPKMFFGYGMGNSLFFPWFSGASALLDDTWPSPERVLENLVAFRPRVLFGVPAIYASLRPQARELLSSVRLAFSAGSPLPRGEFEFWAAHGLEICDGIGATEVGHVFLANRPGQARADSTGLPLPGYECRLVDREGHTIEEAGRQGVLLVRGPGLSPGYWRASEEQQARFAGGWYRTGDLFERDESGAYRHCGREDDLFKVNGRWVVPTQVEQAICRHLPEVSEAVLVPTCRLHDGLRPTLFVTLATPLDDNQILLAQRIDQHLAEQIPSHMLPSQLHVLPALPRNDNGKLARAELRHLADTLYHDNLPEERAC,"[(55, 55), (57, 57), (70, 70), (79, 83), (91, 91), (94, 95), (98, 99), (158, 160), (173, 176), (184, 184), (208, 210), (213, 214), (218, 218), (304, 305), (307, 307), (362, 365), (369, 369), (372, 372), (378, 378)]" -[C@H](C(=O)O)(N)NC(=O)N,N,MRSLYLIVFIVISLVKASKSDDGFCSAPSIVESDEKTNPIYWKATNPTLSPSHLQDLPGFTRSVYKRDHALITPESHVYSPLPDWTNTLGAYLITPATGSHFVMYLAKMKEMSSSGLPPQDIERLIFVVEGAVTLTNTSSSSKKLTVDSYAYLPPNFHHSLDCVESATLVVFERRYEYLGSHTTELIVGSTDKQPLLETPGEVFELRKLLPMSVAYDFNIHTMDFQPGEFLNVKEVHYNQHGLLLLEGQGIYRLGDNWYPVQAGDVIWMAPFVPQWYAALGKTRSRYLLYKDVNRNPL,"[(94, 94), (102, 102), (233, 234), (236, 236), (238, 240), (242, 243), (252, 254), (267, 274), (276, 277), (289, 291)]" -C([C@@H]1[C@H]([C@@H]([C@@H]([C@H](O1)OP(=O)(O)O)O)O)O)O,C([C@@H]1[C@H]([C@@H]([C@@H]([C@H](O1)O)O)O)O)OP(=O)(O)O,MAVTAQAARRKERVLCLFDVDGTLTPARQKIDPEVAAFLQKLRSRVQIGVVGGSDYCKIAEQLGDGDEVIEKFDYVFAENGTVQYKHGRLLSKQTIQNHLGEELLQDLINFCLSYMALLRLPKKRGTFIEFRNGMLNISPIGRSCTLEERIEFSELDKKEKIREKFVEALKTEFAGKGLRFSRGGMISFDVFPEGWDKRYCLDSLDQDSFDTIHFFGNETSPGGNDFEIFADPRTVGHSVVSPQDTVQRCREIFFPETAHEA,"[(17, 18), (20, 20), (22, 27), (29, 30), (51, 52), (54, 54), (58, 58), (62, 62), (79, 79), (96, 96), (126, 128), (130, 131), (133, 134), (136, 142), (144, 149), (151, 154), (157, 157), (163, 163), (180, 185), (187, 187), (189, 189), (191, 192), (198, 199), (202, 202), (213, 213), (215, 217), (219, 220), (225, 229), (231, 231), (233, 234), (236, 241)]" -O=C([O-])C(=O)C[C@]1(C(=O)[O-])C=C[C@@H](O)C=C1,O,MFDKHTHTLIAQRLDQAEKQREQIRAISLDYPEITIEDAYAVQREWVRLKIAEGRTLKGHKIGLTSKAMQASSQISEPDYGALLDDMFFHDGSDIPTDRFIVPRIEVELAFVLAKPLRGPNCTLFDVYNATDYVIPALELIDARCHNIDPETQRPRKVFDTISDNAANAGVILGGRPIKPDELDLRWISALMYRNGVIEETGVAAGVLNHPANGVAWLANKLAPYDVQLEAGQIILGGSFTRPVPARKGDTFHVDYGNMGSISCRFV,"[(61, 62), (83, 83), (104, 105), (107, 107), (109, 110), (135, 138), (140, 141), (168, 168), (170, 171), (236, 241), (243, 243), (256, 256)]" -[H+],O,MSKLLMIGTGPVAIQLANICYLKSDYEIDMVGRASTSEKSKRLYQAYKKEKQFEVKIQNEAHQHLEGKFEINRLYKDVKNVKGEYETVVMACTADAYYDTLQQLSLETLQSVKHVILISPTFGSQMIVEQFMSKFSQDIEVISFSTYLGDTRIVDKEAPNHVLTTGVKKKLYMGSTHSNSTMCQRISALAEQLKIQLEVVESPLHAETRNSSLYVHPPLFMNDFSLKAIFEGTDVPVYVYKLFPEGPITMTLIREMRLMWKEMMAILQAFRVPSVNLLQFMVKENYPVRPETLDEGDIEHFEILPDILQEYLLYVRYTAILIDPFSQPDENGHYFDFSAVPFKQVYKNEQDVVQIPRMPSEDYYRTAMIQHIGKMLGIKTPMIDQFLTRYEASCQAYKDMHQDQQLSSQFNTNLFEGDKALVTKFLEINRTLS,"[(7, 8), (13, 17), (30, 32), (34, 36), (41, 44), (74, 74), (76, 78), (91, 91), (96, 98), (100, 103), (118, 118), (147, 147), (151, 153), (162, 162), (349, 350)]" -C([C@H]([C@H]([C@H]([C@@H](C(=O)CO)O)O)O)O)OP(=O)(O)O,C([C@H]([C@@H]1[C@H]([C@@H]([C@@H](C(O1)O)O)O)O)O)OP(=O)(O)O,MENRELTYITNSIAEAQRVMAAMLADERLLATVRKVADACIASIAQGGKVLLAGNGGSAADAQHIAGEFVSRFAFDRPGLPAVALTTDTSILTAIGNDYGYEKLFSRQVQALGNEGDVLIGYSTSGKSPNILAAFREAKAKGMTCVGFTGNRGGEMRELCDLLLEVPSADTPKIQEGHLVLGHIVCGLVEHSIFGKQ,"[(57, 58), (60, 63), (65, 67), (69, 73), (77, 77), (124, 124), (170, 174), (176, 182), (184, 187)]" -Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,MDSSTATAMTAPFIDPTDHVNLKTDTDASENRRMGNYKPSIWNYDFLQSLATHHNIVEERHLKLAEKLKGQVKFMFGAPMEPLAKLELVDVVQRLGLNHLFETEIKEALFSIYKDGSNGWWFGHLHATSLRFRLLRQCGLFIPQDVFKTFQNKTGEFDMKLCDNVKGLLSLYEASYLGWKGENILDEAKAFTTKCLKSAWENISEKWLAKRVKHALALPLHWRVPRIEARWFIEAYEQEANMNPTLLKLAKLDFNMVQSIHQKEIGELARWWVTTGLDKLAFARNNLLQSYMWSCAIASDPKFKLARETIVEIGSVLTVVDDGYDVYGSIDELDLYTSSVERWSCVEIDKLPNTLKLIFMSMFNKTNEVGLRVQHERGYNSIPTFIKAWVEQCKSYQKEARWFHGGHTPPLEEYSLNGLVSIGFPLLLITGYVAIAENEAALDKVHPLPDLLHYSSLLSRLINDIGTSPDEMARGDNLKSIHCYMNETGASEEVAREHIKGVIEENWKILNQCCFDQSQFQEPFITFNLNSVRGSHFFYEFGDGFGVTDSWTKVDMKSVLIDPIPLGEE,"[(44, 44), (281, 283), (285, 286), (317, 320), (322, 324), (326, 327), (396, 396), (399, 399), (414, 414), (418, 419), (421, 422), (456, 459), (461, 462), (464, 466), (468, 468), (477, 477), (479, 479), (481, 481), (496, 496), (539, 539), (546, 546), (548, 548)]" -CCN(CC)C(=O)/C(C#N)=C/c1cc(O)c(O)c([N+](=O)[O-])c1,O=c1ccn([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,MSVKWTSVILLIQLSFCFSSGNCGKVLVWAAEYSHWMNIKTILDELIQRGHEVTVLASSASILFDPNNSSALKIEIYPTSLTKTELENFIMQQIKRWSDLPKDTFWLYFSQVQEIMSIFGDITRKFCKDVVSNKKFMKKVQESRFDVIFADAIFPCSELLAELFNIPFVYSLSFSPGYTFEKHSGGFIFPPSYVPVVMSELTDQMTFMERVKNMIYVLYFDFWFEIFDMKKWDQFYSEVLGRPTTLSETMGKADVWLIRNSWNFQFPYPLLPNVDFVGGLHCKPAKPLPKEMEDFVQSSGENGVVVFSLGSMVSNMTEERANVIASALAQIPQKVLWRFDGNKPDTLGLNTRLYKWIPQNDLLGHPKTRAFITHGGANGIYEAIYHGIPMVGIPLFADQPDNIAHMKARGAAVRVDFNTMSSTDLLNALKRVINDPSYKENVMKLSRIQHDQPVKPLDRAVFWIEFVMRHKGAKHLRVAAHDLTWFQYHSLDVIGFLLVCVATVIFIVTKCCLFCFWKFARKAKKGKND,"[(308, 310), (359, 359), (363, 363), (371, 372), (380, 383), (391, 397), (399, 403)]" -CC(=O)Oc1ccc2ccccc2c1,Oc1ccc2ccccc2c1,MAKLIALTLLGMGLALFRNHQSSYQTRLNALREVQPVELPNCNLVKGIETGSEDLEILPNGLAFISSGLKYPGIKSFNPNSPGKILLMDLNEEDPTVLELGITGSKFDVSSFNPHGISTFTDEDNAMYLLVVNHPDAKSTVELFKFQEEEKSLLHLKTIRHKLLPNLNDIVAVGPEHFYGTNDHYFLDPYLQSWEMYLGLAWSYVVYYSPSEVRVVAEGFDFANGINISPDGKYVYIAELLAHKIHVYEKHANWTLTPLKSLDFNTLVDNISVDPETGDLWVGCHPNGMKIFFYDSENPPASEVLRIQNILTEEPKVTQVYAENGTVLQGSTVASVYKGKLLIGTVFHKALYCEL,"[(51, 52), (55, 56), (64, 66), (69, 69), (85, 85), (115, 116), (118, 119), (129, 132), (166, 167), (170, 171), (179, 183), (222, 223), (225, 226), (238, 238), (240, 240), (267, 268), (271, 272), (281, 285), (331, 333), (345, 346)]" -NC(=O)[C@@H](N)Cc1ccccc1,NC(=O)[C@H](N)Cc1ccccc1,MTKALYDRDGAAIGNLQKLRFFPLAISGGRGARLIEENGRELIDLSGAWGAASLGYGHPAIVAAVSAAAANPAGATILSASNAPAVTLAERLLASFPGEGTHKIWFGHSGSDANEAAYRAIVKATGRSGVIAFAGAYHGCTVGSMAFSGHSVQADAAKADGLILLPYPDPYRPYRNDPTGDAILTLLTEKLAAVPAGSIGAAFIEPIQSDGGLIVPPDGFLRKFADICRAHGILVVCDEVKVGLARSGRLHCFEHEGFVPDILVLGKGLGGGLPLSAVIAPAEILDCASAFAMQTLHGNPISAAAGLAVLETIDRDDLPAMAERKGRLLRDGLSELAKRHPLIGDIRGRGLACGMELVCDRQSREPARAETAKLIYRAYQLGLVVYYVGMNGNVLEFTPPLTITETDIHKALDLLDRAFSELSAVSNEEIAQFAGW,"[(49, 50), (75, 76), (78, 78), (108, 109), (112, 116), (135, 136), (138, 141), (144, 144), (148, 150), (152, 153), (204, 206), (209, 210), (236, 237), (242, 244), (246, 246), (250, 253), (263, 268), (276, 276), (293, 294), (296, 297), (396, 396), (398, 398)]" -CC(=O)NCCCCNCCCN,NCCCCNCCCN,MAIGYVWNTLYGWVDTGTGSLAAANLTARMQPISHHLAHPDTKRRFHELVCASGQIEHLTPIAAVAATDADILRAHSAAHLENMKRVSNLPTGGDTGDGITMMGNGGLEIARLSAGGAVELTRRVATGELSAGYALVNPPGHHAPHNAAMGFCIFNNTSVAAGYARAVLGMERVAILDWDVHHGNGTQDIWWNDPSVLTISLHQHLCFPPDSGYSTERGAGNGHGYNINVPLPPGSGNAAYLHAMDQVVLHALRAYRPQLIIVGSGFDASMLDPLARMMVTADGFRQMARRTIDCAADICDGRIVFVQEGGYSPHYLPFCGLAVIEELTGVRSLPDPYHEFLAGMGGNTLLDAERAAIEEIVPLLADIR,"[(142, 143), (178, 179), (181, 181), (183, 184), (201, 204), (207, 208), (264, 264), (266, 267), (269, 270), (273, 273), (275, 276), (285, 285), (309, 312)]" -CC[C@H](/C=C/[C@@H](C)[C@H]1CC[C@@H]2[C@@]1(CC[C@H]3[C@H]2CC=C4[C@@]3(CC[C@@H](C4)O)C)C)C(C)C,OO,MTAQQHLSRRRMLGMAAFGAAALAGGTTIAAPRAAAAAKSAADNGGYVPAVVIGTGYGAAVSALRLGEAGVQTLMLEMGQLWNQPGPDGNIFCGMLNPDKRSSWFKNRTEAPLGSFLWLDVVNRNIDPYAGVLDRVNYDQMSVYVGRGVGGGSLVNGGMAVEPKRSYFEEILPRVDSSEMYDRYFPRANSMLRVNHIDTKWFEDTEWYKFARVSREQAGKAGLGTVFVPNVYDFGYMQREAAGEVPKSALATEVIYGNNHGKQSLDKTYLAAALGTGKVTIQTLHQVKTIRQTKDGGYALTVEQKDTDGKLLATKEISCRYLFLGAGSLGSTELLVRARDTGTLPNLNSEVGAGWGPNGNIMTARANHMWNPTGAHQSSIPALGIDAWDNSDSSVFAEIAPMPAGLETWVSLYLAITKNPQRGTFVYDAATDRAKLNWTRDQNAPAVNAAKALFDRINKANGTIYRYDLFGTQLKAFADDFCYHPLGGCVLGKATDDYGRVAGYKNLYVTDGSLIPGSVGVNPFVTITALAERNVERIIKQDVTAS,"[(15, 16), (51, 51), (55, 56), (59, 61), (75, 76), (78, 82), (85, 85), (123, 123), (125, 125), (130, 130), (147, 151), (153, 155), (160, 161), (191, 198), (214, 220), (222, 222), (226, 226), (230, 231), (234, 234), (253, 253), (285, 286), (288, 289), (293, 295), (297, 298), (321, 321), (325, 325), (342, 342), (351, 351), (359, 359), (365, 366), (375, 375), (379, 380), (432, 433), (464, 464), (470, 474), (481, 482), (484, 485), (487, 488)]" -N[C@@H](Cc1ccc(O)c(O)c1)C(=O)O,NCCc1ccc(O)c(O)c1,MNASEFRRRGKEMVDYMANYMEGIEGRQVYPDVEPGYLRPLIPAAAPQEPDTFEDIINDVEKIIMPGVTHWHSPYFFAYFPTASSYPAMLADMLCGAIGCIGFSWAASPACTELETVMMDWLGKMLELPKAFLNEKAGEGGGVIQGSASEATLVALLAARTKVIHRLQAASPELTQAAIMEKLVAYSSDQAHSSVERAGLIGGVKLKAIPSDGNFAMRASALQEALERDKAAGLIPFFMVATLGTTTCCSFDNLLEVGPICNKEDIWLHVDAAYAGSAFICPEFRHLLNGVEFADSFNFNPHKWLLVNFDCSAMWVKKRTDLTGAFRLDPTYLKHSHQDSGLITDYRHWQIPLGRRFRSLKMWFVFRMYGVKGLQAYIRKHVQLSHEFESLVRQDPRFEICVEVILGLVCFRLKGSNKVNEALLQRINSAKKIHLVPCHLRDKFVLRFAICSRTVESAHVQRAWEHIKELAADVLRAERE,"[(146, 147), (150, 153), (190, 190), (192, 192), (194, 194), (197, 197), (244, 245), (247, 248), (271, 271), (273, 274), (277, 277), (298, 299), (301, 303), (311, 313), (447, 447)]" -O,O=C[C@H](O)[C@@H](O)[C@H](O)[C@H](O)CO,MSKLFSTVNSARHSVPLGGMRDYVHIKKLEMNTVLGPDSWNQLMPQKCLLSLDMGTDFSKSAATDDLKYSLNYAVISRDLTNFVSKKKNWGSVSNLAKSVSQFVMDKYSGVECLNLEVQADTTHIRSDHISCIIQQERGNPESQEFDVVRISELKMLTLIGVFTFERLKKQYVTLDIKLPWPKKAELPPPVQSIIDNVVKFVEESNFKTVEALVESVSAVIAHNEYFQKFPDSPLVVKVLKLNAITATEGVGVSCIREPREIAMVNIPYLSSIHESSDIKFQLSSSQNTPIEGKNTWKRAFLAFGSNIGDRFKHIQMALQLLSREKTVKLRNISSIFESEPMYFKDQTPFMNGCVEVETLLTPSELLKLCKKIEYEELQRVKHFDNGPRTIDLDIVMFLNSAGEDIIVNEPDLNIPHPRMLERTFVLEPLCELISPVHLHPVTAEPIVDHLKQLYDKQHDEDTLWKLVPLPYRSGVEPRFLKFKTATKLDEFTGETNRITVSPTYIMAIFNATPDSFSDGGEHFADIESQLNDIIKLCKDALYLHESVIIDVGGCSTRPNSIQASEEEEIRRSIPLIKAIRESTELPQDKVILSIDTYRSNVAKEAIKVGVDIINDISGGLFDSNMFAVIAENPEICYILSHTRGDISTMNRLAHYENFALGDSIQQEFVHNTDIQQLDDLKDKTVLIRNVGQEIGERYIKAIDNGVKRWQILIDPGLGFAKTWKQNLQIIRHIPILKNYSFTMNSNNSQVYVNLRNMPVLLGPSRKKFIGHITKDVDAKQRDFATGAVVASCIGFGSDMVRVHDVKNCSKSIKLADAIYKGLE,"[(509, 510), (512, 513), (519, 519), (553, 553), (555, 557), (559, 561), (567, 567), (594, 595), (597, 598), (601, 603), (610, 614), (616, 619), (651, 652), (675, 676), (687, 687), (708, 710), (713, 714), (716, 719), (721, 722), (729, 729), (755, 759), (762, 766), (768, 771), (774, 774), (800, 801), (805, 807), (809, 810), (820, 820), (826, 826), (829, 830), (833, 833), (838, 838), (840, 842)]" -CC(C)=CCC/C(C)=C/COP(=O)([O-])OP(=O)([O-])[O-],CC1=CC[C@@H]2[C@H](C1)C2(C)C,MSKILVFGHQNPDSDAIGSSVAFAYLAKEAWGLDTEAVALGTPNEETAYVLDYFGVQAPRVVESAKAEGVETVILTDHNEFQQSISDIKDVTVYGVVDHHRVANFETANPLYMRLEPVGSASSIVYRMFKENGVSVPKELAGLLLSGLISDTLLLKSPTTHASDIPVAKELAELAGVNLEEYGLEMLKAGTNLSSKTAAELIDIDAKTFELNGEAVRVAQVNTVDINDILARQEEIEVAIQEAIVTEGYSDFVLMITDIVNSNSEILALGSNMAKVEAAFEFTLENNHAFLAGAVSRKKQVVPQLTESYNA,"[(7, 8), (10, 12), (14, 14), (16, 20), (40, 40), (44, 44), (47, 47), (75, 76), (78, 79), (83, 83), (97, 98), (100, 101), (115, 122), (124, 124), (146, 150), (152, 153), (157, 157), (159, 159), (205, 205), (207, 207), (298, 298)]" -O,O=[N+]([O-])c1ccc(O)cc1,MDIMNEKVKKIIEFMDKNSIDAVLIAKNPNVYYISGASPLAGGYILITGESATLYVPELEYEMAKEESNIPVEKFKKMDEFYKALEGIKSLGIESSLPYGFIEELKKKANIKEFKKVDDVIRDMRIIKSEKEIKIIEKACEIADKAVMAAIEEITEGKKEREVAAKVEYLMKMNGAEKPAFDTIIASGYRSALPHGVASDKRIERGDLVVIDLGALYQHYNSDITRTIVVGSPNEKQKEIYEIVLEAQKKAVESAKPGITAKELDSIARNIIAEYGYGEYFNHSLGHGVGLEVHEWPRVSQYDETVLREGMVITIEPGIYIPKIGGVRIEDTILITKNGSKRLTKTERELI,"[(140, 140), (181, 184), (210, 211), (213, 214), (221, 222), (224, 226), (248, 248), (281, 281), (283, 286), (288, 289), (293, 295), (297, 299), (312, 315), (317, 318), (320, 320), (328, 329), (331, 332), (344, 344)]" -NC(=O)CC[C@H](N)C(=O)O,N[C@@H](CCC(=O)[O-])C(=O)[O-],MNFYSAYQHGFVRVAACTHHTTIGDPAANAASVLDMARACHDDGAALAVFPELTLSGYSIEDVLLQDSLLDAVEDALLDLVTESADLLPVLVVGAPLRHRHRIYNTAVVIHRGAVLGVVPKSYLPTYREFYERRQMAPGDGERGTIRIGGADVAFGTDLLFAASDLPGFVLHVEICEDMFVPMPPSAEAALAGATVLANLSGSPITIGRAEDRRLLARSASARCLAAYVYAAAGEGESTTDLAWDGQTMIWENGALLAESERFPKGVRRSVADVDTELLRSERLRMGTFDDNRRHHRELTESFRRIDFALDPPAGDIGLLREVERFPFVPADPQRLQQDCYEAYNIQVSGLEQRLRALDYPKVVIGVSGGLDSTHALIVATHAMDREGRPRSDILAFALPGFATGEHTKNNAIKLARALGVTFSEIDIGDTARLMLHTIGHPYSVGEKVYDVTFENVQAGLRTDYLFRIANQRGGIVLGTGDLSELALGWSTYGVGDQMSHYNVNAGVPKTLIQHLIRWVISAGEFGEKVGEVLQSVLDTEITPELIPTGEEELQSSEAKVGPFALQDFSLFQVLRYGFRPSKIAFLAWHAWNDAERGNWPPGFPKSERPSYSLAEIRHWLQIFVQRFYSFSQFKRSALPNGPKVSHGGALSPRGDWRAPSDMSARIWLDQIDREVPKG,"[(52, 52), (58, 58), (125, 126), (128, 130), (177, 177), (201, 202), (204, 208), (210, 214), (230, 230), (232, 232), (243, 244), (350, 353), (355, 358), (361, 365), (374, 377), (396, 399), (410, 412), (423, 423), (452, 455), (457, 461), (466, 470), (472, 474), (476, 479), (481, 484), (486, 489), (494, 500), (502, 503), (505, 505), (510, 510), (513, 514), (517, 517), (537, 538), (541, 542), (557, 557), (560, 561), (564, 564), (627, 627), (630, 634), (636, 641), (659, 660), (662, 663)]" -O=O,O=C[O-],MPQLEASLELDFQSESYKDAYSRINAIVIEGEQEAFDNYNRLAEMLPDQRDELHKLAKMEQRHMKGFMACGKNLSVTPDMGFAQKFFERLHENFKAAAAEGKVVTCLLIQSLIIECFAIAAYNIYIPVADAFARKITEGVVRDEYLHRNFGEEWLKANFDASKAELEEANRQNLPLVWLMLNEVADDARELGMERESLVEDFMIAYGEALENIGFTTREIMRMSAYGLAAV,"[(28, 31), (33, 36), (39, 39), (56, 59), (61, 62), (64, 68), (109, 114), (116, 120), (122, 122), (139, 140), (143, 146), (148, 151)]" -C[C@H](CCC(=O)NCC(=O)O)[C@H]1CC[C@@H]2[C@@]1([C@H](C[C@H]3[C@H]2[C@@H](C[C@H]4[C@@]3(CC[C@H](C4)O)C)O)O)C,NCC(=O)O,MCTGLALETKDGLHLFGRNMDIEYSFNQSIIFIPRNFKCVNKSNKKELTTKYAVLGMGTIFDDYPTFADGMNEKGLGCAGLNFPVYVSYSKEDIEGKTNIPVYNFLLWVLANFSSVEEVKEALKNANIVDIPISENIPNTTLHWMISDITGKSIVVEQTKEKLNVFDNNIGVLTNSPTFDWHVANLNQYVGLRYNQVPEFKLGDQSLTALGQGTGLVGLPGDFTPASRFIRVAFLRDAMIKNDKDSIDLIEFFHILNNVAMVRGSTRTVEEKSDLTQYTSCMCLEKGIYYYNTYENNQINAIDMNKENLDGNEIKTYKYNKTLSINHVN,"[(3, 5), (16, 17), (19, 21), (68, 69), (79, 81), (83, 84), (140, 143), (173, 176), (228, 228), (252, 252), (256, 256), (278, 280)]" -C1CCNC(=O)[C@H](C1)N,C1CCNC(=O)[C@@H](C1)N,MTKALYDRDGAAIGNLQKLRFFPLAISGGRGARLIEENGRELIDLSGAWGAASLGYGHPAIVAAVSAAAANPAGATILSASNAPAVTLAERLLASFPGEGTHKIWFGHSGSDANEAAYRAIVKATGRSGVIAFAGAYHGCTVGSMAFSGHSVQADAAKADGLILLPYPDPYRPYRNDPTGDAILTLLTEKLAAVPAGSIGAAFIEPIQSDGGLIVPPDGFLRKFADICRAHGILVVCDEVKVGLARSGRLHCFEHEGFVPDILVLGKGLGGGLPLSAVIAPAEILDCASAFAMQTLHGNPISAAAGLAVLETIDRDDLPAMAERKGRLLRDGLSELAKRHPLIGDIRGRGLACGMELVCDRQSREPARAETAKLIYRAYQLGLVVYYVGMNGNVLEFTPPLTITETDIHKALDLLDRAFSELSAVSNEEIAQFAGW,"[(49, 50), (75, 76), (78, 78), (108, 109), (112, 116), (135, 136), (138, 141), (144, 144), (148, 150), (152, 153), (204, 206), (209, 210), (236, 237), (242, 244), (246, 246), (250, 253), (263, 268), (276, 276), (293, 294), (296, 297), (396, 396), (398, 398)]" -O,C[S+](CCC(N)C(=O)O)CC1OC(n2cnc3c(N)ncnc32)C(O)C1O,IPAAPVAAQARKLLRDLAFRPPLLAARSQVVQLTPRRWLNLQEYQSKKLMSDNGVKVQRFFVADTANEALEAAKRLNAKEIVLKAQILAGGRGKGVFSSGLKGGVHLTKDPEVVGQLAKQMIGYNLATKQTPKEGVKVNKVMVAEALDISRETYLAILMDRSCNGPVLVGSPQGGVDIEEVAASNPELIFKEQIDIIEGIKDSQAQRMAENLGFLGPLQNQAADQIKKLYNLFLKIDATQVEVNPFGETPEGQVVCFDAKINFDDNAEFRQKDIFAMDDKSENEPIENEAAKYDLKYIGLDGNIACFVNGAGLAMATCDIIFLNGGKPANFLDLGGGVKESQVYQAFKLLTADPKVEAILVNIFGGIVNCAIIANGITKACRELELKVPLVVRLEGTNVHEAQNILTNSGLPITSAVDLEDAAKKAVASVTKK,"[(4, 4), (49, 53), (56, 57), (59, 60), (62, 65), (87, 90), (94, 98), (135, 135), (142, 146), (148, 151), (241, 243), (245, 246), (250, 250), (252, 252), (256, 257), (259, 260), (273, 273), (293, 298)]" diff --git a/examples/enzeptional/example_enzeptional.py b/examples/enzeptional/example_enzeptional.py index 05efbfc5a..6a06a9492 100644 --- a/examples/enzeptional/example_enzeptional.py +++ b/examples/enzeptional/example_enzeptional.py @@ -1,8 +1,16 @@ import logging import pandas as pd from typing import Tuple, List, Optional -from gt4sd.frameworks.enzeptional.processing import HFandTAPEModelUtility -from gt4sd.frameworks.enzeptional.core import SequenceMutator, EnzymeOptimizer +from gt4sd.frameworks.enzeptional import ( + EnzymeOptimizer, + SequenceMutator, + SequenceScorer, + CrossoverGenerator, + HuggingFaceEmbedder, + HuggingFaceModelLoader, + HuggingFaceTokenizerLoader, + SelectionGenerator, +) from gt4sd.configuration import GT4SDConfiguration, sync_algorithm_with_s3 @@ -17,23 +25,33 @@ def initialize_environment(model = "feasibility") -> Tuple[str, Optional[str]]: """ configuration = GT4SDConfiguration.get_instance() sync_algorithm_with_s3("proteins/enzeptional/scorers", module="properties") - return f"{configuration.gt4sd_local_cache_path}/properties/proteins/enzeptional/scorers/feasibility/model.pkl" - - -def load_experiment_parameters() -> Tuple[List, List, List, List]: + scorer = f"{configuration.gt4sd_local_cache_path}/properties/proteins/enzeptional/scorers/{model}/model.pkl" + if model == "feasibility": + return scorer, None + else: + scaler = f"{configuration.gt4sd_local_cache_path}/properties/proteins/enzeptional/scorers/{model}/scaler.pkl" + return scorer, scaler + +def load_experiment_parameters(model="feasibility") -> Tuple[List, List, List, List]: """Load experiment parameters from a CSV file.""" - df = pd.read_csv("data.csv").iloc[1] - return df["substrates"], df["products"], df["sequences"], eval(df["intervals"]) + substrate_smiles = "NC1=CC=C(N)C=C1" + product_smiles = "CNC1=CC=C(NC(=O)C2=CC=C(C=C2)C(C)=O)C=C1" + intervals = [(5, 10), (20, 25)] + sample_sequence = "MSKLLMIGTGPVAIDQFLTRYEASCQAYKDMHQDQQLSSQFNTNLFEGDKALVTKFLEINRTLS" + scorer_path, scaler_path = initialize_environment(model) + return substrate_smiles, product_smiles, sample_sequence, intervals, scorer_path, scaler_path def setup_optimizer( substrate_smiles: str, product_smiles: str, sample_sequence: str, - intervals: List[List[int]], scorer_path: str, scaler_path: str, + intervals: List[List[int]], concat_order: List[str], + top_k: int, + batch_size: int, use_xgboost_scorer: bool ): """Set up and return the optimizer with all necessary components configured @@ -44,48 +62,82 @@ def setup_optimizer( product_smiles (str): SMILES representation of the product. sample_sequence (str): The initial protein sequence. - intervals (List[List[int]]): Intervals for mutation. scorer_path (str): File path to the scoring model. scaler_path (str): Path to the scaller in case you are usinh the Kcat model. + intervals (List[List[int]]): Intervals for mutation. concat_order (List[str]): Order of concatenating embeddings. + top_k (int): Number of top amino acids to use to create mutants. + batch_size (int): Batch size. use_xgboost_scorer (bool): flag to specify if the fitness function is the Kcat. Returns: Initialized optmizer """ - model_tokenizer_paths = "facebook/esm2_t33_650M_UR50D" - chem_paths = "seyonec/ChemBERTa-zinc-base-v1" + language_model_path = "facebook/esm2_t33_650M_UR50D" + tokenizer_path = "facebook/esm2_t33_650M_UR50D" + chem_model_path = "seyonec/ChemBERTa-zinc-base-v1" + chem_tokenizer_path = "seyonec/ChemBERTa-zinc-base-v1" + + model_loader = HuggingFaceModelLoader() + tokenizer_loader = HuggingFaceTokenizerLoader() + + protein_model = HuggingFaceEmbedder( + model_loader=model_loader, + tokenizer_loader=tokenizer_loader, + model_path=language_model_path, + tokenizer_path=tokenizer_path, + cache_dir=None, + device="cpu", + ) - protein_model = HFandTAPEModelUtility( - embedding_model_path=model_tokenizer_paths, tokenizer_path=model_tokenizer_paths + chem_model = HuggingFaceEmbedder( + model_loader=model_loader, + tokenizer_loader=tokenizer_loader, + model_path=chem_model_path, + tokenizer_path=chem_tokenizer_path, + cache_dir=None, + device="cpu", ) + mutation_config = { "type": "language-modeling", - "embedding_model_path": model_tokenizer_paths, - "tokenizer_path": model_tokenizer_paths, - "unmasking_model_path": model_tokenizer_paths, + "embedding_model_path": language_model_path, + "tokenizer_path": tokenizer_path, + "unmasking_model_path": language_model_path, } mutator = SequenceMutator(sequence=sample_sequence, mutation_config=mutation_config) - optimizer_config = { - "sequence": sample_sequence, - "protein_model": protein_model, - "substrate_smiles": substrate_smiles, - "product_smiles": product_smiles, - "chem_model_path": chem_paths, - "chem_tokenizer_path": chem_paths, - "scorer_filepath": scorer_path, - "mutator": mutator, - "intervals": intervals, - "batch_size": 5, - "top_k": 3, - "selection_ratio": 0.25, - "perform_crossover": True, - "crossover_type": "single_point", - "concat_order": concat_order, - "scaler_filepath": scaler_path, - "use_xgboost_scorer": use_xgboost_scorer - } + mutator.set_top_k(top_k) + + scorer = SequenceScorer( + protein_model=protein_model, + scorer_filepath=scorer_path, + use_xgboost=use_xgboost_scorer, + scaler_filepath=scaler_path, + ) + + selection_generator = SelectionGenerator() + crossover_generator = CrossoverGenerator() + + optimizer_config = dict( + sequence=sample_sequence, + mutator=mutator, + scorer=scorer, + intervals=intervals, + substrate_smiles=substrate_smiles, + product_smiles=product_smiles, + chem_model=chem_model, + selection_generator=selection_generator, + crossover_generator=crossover_generator, + concat_order=concat_order, + batch_size=batch_size, + selection_ratio=0.25, + perform_crossover=True, + crossover_type="single_point", + pad_intervals=False, + minimum_interval_length=8, + seed=42, + ) return EnzymeOptimizer(**optimizer_config) @@ -106,17 +158,24 @@ def optimize_sequences(optimizer): def main_kcat(): """Optimization using Kcat model""" logging.basicConfig(level=logging.INFO) - scorer_path, scaler_path = initialize_environment(model="kcat") - concat_order, use_xgboost_scorer = ["substrate", "sequence"], True - ( + concat_order = ["substrate", "sequence"] + use_xgboost_scorer=True + top_k=2 + batch_size=2 + substrate_smiles, product_smiles, sample_sequence, intervals, scorer_path, scaler_path = load_experiment_parameters("kcat") + optimizer = setup_optimizer( substrate_smiles, product_smiles, sample_sequence, + scorer_path, + scaler_path, intervals, - ) = load_experiment_parameters() - optimizer = setup_optimizer( - substrate_smiles, product_smiles, sample_sequence, intervals, scorer_path, scaler_path, concat_order, use_xgboost_scorer + concat_order, + top_k, + batch_size, + use_xgboost_scorer ) + optimized_sequences, iteration_info = optimize_sequences(optimizer) logging.info("Optimization completed.") @@ -124,19 +183,27 @@ def main_kcat(): def main_feasibility(): """Optimization using Feasibility model""" logging.basicConfig(level=logging.INFO) - scorer_path, scaler_path = initialize_environment() - concat_order, use_xgboost_scorer = ["substrate", "sequence", "product"], False - ( + concat_order = ["substrate", "sequence", "product"] + use_xgboost_scorer=False + top_k=2 + batch_size=2 + substrate_smiles, product_smiles, sample_sequence, intervals, scorer_path, scaler_path = load_experiment_parameters("feasilibity") + optimizer = setup_optimizer( substrate_smiles, product_smiles, sample_sequence, + scorer_path, + scaler_path, intervals, - ) = load_experiment_parameters() - optimizer = setup_optimizer( - substrate_smiles, product_smiles, sample_sequence, intervals, scorer_path, scaler_path, concat_order, use_xgboost_scorer + concat_order, + top_k, + batch_size, + use_xgboost_scorer ) + optimized_sequences, iteration_info = optimize_sequences(optimizer) logging.info("Optimization completed.") if __name__ == "__main__": - main() + main_feasibility() + main_kcat() diff --git a/requirements.txt b/requirements.txt index 30735523c..194d5b72b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ setuptools==69.5.1 accelerate>=0.12,<0.20.0 datasets>=1.11.0 diffusers<=0.6.0 +enzeptional>=1.0.0 importlib-metadata>=1.7.0,<5.0.0 # temporary: https://github.com/python/importlib_metadata/issues/409 importlib-resources>=5.10.0 ipaddress>=1.0.23 diff --git a/setup.cfg b/setup.cfg index 7bcdf6d75..578244202 100644 --- a/setup.cfg +++ b/setup.cfg @@ -18,6 +18,7 @@ install_requires = accelerate datasets diffusers + enzeptional importlib_metadata importlib_resources ipaddress @@ -293,4 +294,7 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-pydantic_settings.*] +ignore_missing_imports = True + +[mypy-enzeptional.*] ignore_missing_imports = True \ No newline at end of file diff --git a/src/gt4sd/frameworks/enzeptional/__init__.py b/src/gt4sd/frameworks/enzeptional/__init__.py index 48d3007ce..fa0958349 100644 --- a/src/gt4sd/frameworks/enzeptional/__init__.py +++ b/src/gt4sd/frameworks/enzeptional/__init__.py @@ -25,5 +25,17 @@ Module for enzyme optimization. """ - -from .core import EnzymeOptimizer # noqa: F401 +from enzeptional import ( # noqa: F401 + EnzymeOptimizer, + SequenceMutator, + SequenceScorer, + CrossoverGenerator, + HuggingFaceEmbedder, + HuggingFaceModelLoader, + HuggingFaceTokenizerLoader, + SelectionGenerator, + mutate_sequence_with_variant, + round_up, + sanitize_intervals, + sanitize_intervals_with_padding, +) diff --git a/src/gt4sd/frameworks/enzeptional/core.py b/src/gt4sd/frameworks/enzeptional/core.py deleted file mode 100644 index f22fd1968..000000000 --- a/src/gt4sd/frameworks/enzeptional/core.py +++ /dev/null @@ -1,669 +0,0 @@ -# -# MIT License -# -# Copyright (c) 2024 GT4SD team -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# -import pandas as pd -import numpy as np -from abc import ABC, abstractmethod -from typing import Any, Dict, List, Optional, Tuple -import random -import logging -from itertools import product as iter_product -import time -from joblib import load -import xgboost as xgb -from .processing import ( - HFandTAPEModelUtility, - SelectionGenerator, - CrossoverGenerator, - sanitize_intervals, - sanitize_intervals_with_padding, -) - - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -class MutationModelManager: - """ - Manages and caches mutation models for efficient reuse. - """ - - _models_cache: Dict[Any, Any] = {} - - @staticmethod - def load_model(embedding_model_path, tokenizer_path, **kwargs): - """ - Loads or retrieves a model from the cache based on the given paths. - - Args: - embedding_model_path (str): Path to the embedding model. - tokenizer_path (str): Path to the tokenizer. - **kwargs: Additional arguments for model loading. - - Returns: - An instance of the loaded model. - """ - model_key = (embedding_model_path, tokenizer_path) - - if model_key in MutationModelManager._models_cache: - return MutationModelManager._models_cache[model_key] - - model = HFandTAPEModelUtility(embedding_model_path, tokenizer_path, **kwargs) - MutationModelManager._models_cache[model_key] = model - return model - - @staticmethod - def clear_cache(): - """ - Clears the cached models. - """ - MutationModelManager._models_cache.clear() - - -class MutationStrategy(ABC): - """ - Abstract base class for defining mutation strategies. - """ - - @abstractmethod - def mutate( - self, sequence: str, num_mutations: int, intervals: List[List[int]] - ) -> List[str]: - """Abstract method for mutating a sequence. - - Args: - sequence (str): The original sequence to be mutated. - num_mutations (int): The number of mutations to apply. - - Returns: - List[str]: The mutated sequence. - """ - pass - - -class LanguageModelMutationStrategy(MutationStrategy): - """ - Mutation strategy using a language model. - """ - - def __init__(self, mutation_model): - """Initializes the mutation strategy with a given model. - - Args: - mutation_model: The model to be used for mutation. - """ - self.mutation_model = mutation_model - self.top_k = 2 - - def set_top_k(self, top_k: int): - """Sets the top k mutations to consider during mutation. - - Args: - top_k (int): The number of top mutations to consider. - """ - self.top_k = top_k - - def mutate( - self, sequence: str, num_mutations: int, intervals: List[List[int]] - ) -> List[str]: - """Mutates a sequence within specified intervals using the model. - - Args: - sequence (str): The original sequence to be mutated. - num_mutations (int): The number of mutations to introduce. - intervals (List[List[int]]): Intervals within the sequence - where mutations are allowed. - - Returns: - List[str]: A list of mutated sequences. - """ - - flat_intervals = [ - i - for interval in intervals - for i in range(interval[0], interval[1] + 1) - if i < len(sequence) - ] - - num_mutations = random.randint(1, num_mutations) - - chosen_positions = random.sample( - flat_intervals, min(num_mutations, len(flat_intervals)) - ) - sequence_list = list(sequence) - - for pos in chosen_positions: - sequence_list[pos] = self.mutation_model.tokenizer.mask_token - - masked_sequence = " ".join(sequence_list) - - return self.mutation_model.unmask(masked_sequence, self.top_k) - - -class TransitionMatrixMutationStrategy(MutationStrategy): - """ - Mutation strategy based on a transition matrix. - """ - - def __init__(self, transition_matrix: str): - """Initializes the mutation strategy with a transition matrix. - - Args: - transition_matrix (str): Path to the CSV file containing - the transition matrix. - """ - logger.info(" USING TRNASITION MATRIX ") - self.transition_matrix = pd.read_csv( - transition_matrix, index_col=None, header=0 - ) - self.top_k = 2 - - def set_top_k(self, top_k: int): - """Sets the top k mutations to consider during mutation. - - Args: - top_k (int): The number of top mutations to consider. - """ - - self.top_k = top_k - - def mutate( - self, sequence: str, num_mutations: int, intervals: List[List[int]] - ) -> List[str]: - """Mutates a sequence based on the transition matrix within - specified intervals. - - Args: - sequence (str): The original sequence to be mutated. - num_mutations (int): The number of mutations to introduce. - intervals (List[List[int]]): Intervals within the sequence - where mutations are allowed. - - Returns: - List[str]: A list of mutated sequences. - """ - - flat_intervals = [ - i - for interval in intervals - for i in range(interval[0], interval[1] + 1) - if i < len(sequence) - ] - - num_mutations = random.randint(1, num_mutations) - - chosen_positions = random.sample( - flat_intervals, min(num_mutations, len(flat_intervals)) - ) - - mutated_sequences = [] - - mutation_options = [] - for pos in chosen_positions: - aa_probabilities = self.transition_matrix.iloc[pos] - top_mutations = aa_probabilities.nlargest(self.top_k).index.tolist() - mutation_options.append([(pos, aa) for aa in top_mutations]) - - for mutation_combination in iter_product(*mutation_options): - temp_sequence = list(sequence) - for pos, new_aa in mutation_combination: - temp_sequence[pos] = new_aa - mutated_sequences.append("".join(temp_sequence)) - - return mutated_sequences - - -class MutationFactory: - """ - Factory class for creating mutation strategies based on configuration. - """ - - @staticmethod - def get_mutation_strategy(mutation_config: Dict[str, Any]): - """Retrieves a mutation strategy based on the provided configuration. - - Args: - mutation_config (Dict[str, Any]): Configuration specifying - the type of mutation strategy and its parameters. - - Raises: - KeyError: If required configuration parameters are missing. - ValueError: If the mutation type is unsupported. - - Returns: - _type_: An instance of the specified mutation strategy - """ - if mutation_config["type"] == "language-modeling": - mutation_model = MutationModelManager.load_model( - embedding_model_path=mutation_config["embedding_model_path"], - tokenizer_path=mutation_config["tokenizer_path"], - unmasking_model_path=mutation_config.get("unmasking_model_path"), - ) - return LanguageModelMutationStrategy(mutation_model) - elif mutation_config["type"] == "transition-matrix": - transition_matrix = mutation_config.get("transition_matrix") - if transition_matrix is None: - raise KeyError( - "Transition matrix not provided in mutation configuration." - ) - return TransitionMatrixMutationStrategy(transition_matrix) - else: - raise ValueError("Unsupported mutation type") - - -class SequenceMutator: - """ - Class for mutating sequences using a specified strategy. - """ - - def __init__(self, sequence: str, mutation_config: Dict[str, Any]): - """Initializes the mutator with a sequence and a mutation strategy. - - Args: - sequence (str): The sequence to be mutated. - mutation_config (Dict[str, Any]): Configuration for - the mutation strategy. - """ - self.sequence = sequence - self.mutation_strategy = MutationFactory.get_mutation_strategy(mutation_config) - self.top_k = 2 - - def set_top_k(self, top_k: int): - """Sets the number of top mutations to consider in the mutation strategy. - - Args: - top_k (int): The number of top mutations to consider. - """ - self.top_k = top_k - if isinstance( - self.mutation_strategy, - (LanguageModelMutationStrategy, TransitionMatrixMutationStrategy), - ): - self.mutation_strategy.set_top_k(top_k) - - def get_mutations( - self, - num_sequences: int, - number_of_mutations: int, - intervals: List[Tuple[int, int]], - current_population: List[str], - already_evaluated_sequences: List[str], - ) -> List[str]: - """Generates a set of mutated sequences. - - Args: - num_sequences (int): Number of mutated sequences to generate. - number_of_mutations (int): Number of mutations to apply to - each sequence. - intervals (List[Tuple[int]]): Intervals within the sequence - where mutations are allowed. - already_evaluated_sequences (List[str]): List of sequences - that have already been evaluated. - - Returns: - List[str]: A list of mutated sequences. - """ - max_mutations = min(len(self.sequence), number_of_mutations) - if len(current_population) < 1: - current_population.append(self.sequence) - - random.shuffle(current_population) - mutated_sequences_set: List[str] = [] - - while len(mutated_sequences_set) < num_sequences: - for temp_sequence in current_population: - new_mutations = self.mutation_strategy.mutate( - temp_sequence, max_mutations, intervals - ) - mutated_sequences_set.extend(new_mutations) - if len(mutated_sequences_set) >= num_sequences: - break - return random.sample(mutated_sequences_set, num_sequences) - - -class EnzymeOptimizer: - """ - Optimizes protein sequences based on interaction with - substrates and products. - """ - - def __init__( - self, - sequence: str, - protein_model: HFandTAPEModelUtility, - substrate_smiles: str, - product_smiles: str, - chem_model_path: str, - chem_tokenizer_path: str, - scorer_filepath: str, - mutator: SequenceMutator, - intervals: List[Tuple[int, int]], - batch_size: int = 2, - seed: int = 123, - top_k: int = 2, - selection_ratio: float = 0.5, - perform_crossover: bool = False, - crossover_type: str = "uniform", - minimum_interval_length: int = 8, - pad_intervals: bool = False, - concat_order=["sequence", "substrate", "product"], - scaler_filepath: Optional[str] = None, - use_xgboost_scorer: Optional[bool] = False, - ): - """Initializes the optimizer with models, sequences, and - optimization parameters. - - - Args: - sequence (str): The initial protein sequence. - protein_model (HFandTAPEModelUtility): Model for protein embeddings. - substrate_smiles (str): SMILES representation of the substrate. - product_smiles (str): SMILES representation of the product. - chem_model_path (str): Path to the chemical model. - chem_tokenizer_path (str): Path to the chemical tokenizer. - scorer_filepath (str): File path to the scoring model. - mutator (SequenceMutator): The mutator for generating - sequence variants. - intervals (List[List[int]]): Intervals for mutation. - batch_size (int): The number of sequences to process in one batch. - top_k (int): Number of top mutations to consider. - selection_ratio (float): Ratio of sequences to select - after scoring. - perform_crossover (bool): Flag to perform crossover operation. - crossover_type (str): Type of crossover operation. - minimum_interval_length (int): Minimum length of - mutation intervals. - pad_intervals (bool): Flag to pad the intervals. - concat_order (list): Order of concatenating embeddings. - scaler_filepath (str): Path to the scaller in case you are usinh the Kcat model. - use_xgboost_scorer (bool): flag to specify if the fitness function is the Kcat. - """ - self.sequence = sequence - self.protein_model = protein_model - self.mutator = mutator - self.intervals = intervals - self.batch_size = batch_size - self.top_k = top_k - self.selection_ratio = selection_ratio - self.perform_crossover = perform_crossover - self.crossover_type = crossover_type - self.concat_order = concat_order - self.minimum_interval_length = minimum_interval_length - self.pad_intervals = pad_intervals - self.mutator.set_top_k(top_k) - self.concat_order = concat_order - self.scorer = load(scorer_filepath) - if scaler_filepath is not None: - self.scaler = load(scaler_filepath) - self.use_xgboost_scorer = use_xgboost_scorer - - self.chem_model = HFandTAPEModelUtility(chem_model_path, chem_tokenizer_path) - self.substrate_embedding = self.chem_model.embed([substrate_smiles])[0] - self.product_embedding = self.chem_model.embed([product_smiles])[0] - - self.selection_generator = SelectionGenerator() - self.crossover_generator = CrossoverGenerator() - - if intervals is None: - self.intervals = [(0, len(sequence))] - else: - self.intervals = sanitize_intervals(intervals) - if pad_intervals: - self.intervals = sanitize_intervals_with_padding( - self.intervals, minimum_interval_length, len(sequence) - ) - self.seed = seed - random.seed(self.seed) - - def optimize( - self, - num_iterations: int, - num_sequences: int, - num_mutations: int, - time_budget: Optional[int] = 360, - ): - """Runs the optimization process over a specified number - of iterations. - - Args: - num_iterations (int): Number of iterations to run - the optimization. - num_sequences (int): Number of sequences to generate - per iteration. - num_mutations (int): Max number of mutations to apply. - time_budget (Optional[int]): Time budget for - optimizer (in seconds). Defaults to 360. - - Returns: - A tuple containing the list of all sequences and - iteration information. - """ - - iteration_info = {} - - scored_original_sequence = self.score_sequence(self.sequence) - original_sequence_score_ = scored_original_sequence["score"] - - logger.info(f"Original sequence score: {original_sequence_score_}") - - all_mutated_sequences: List[str] = [scored_original_sequence["sequence"]] - current_best_score = original_sequence_score_ - - all_scored_sequences: List[Dict[str, Any]] = [] - - for iteration in range(num_iterations): - start_time = time.time() - - scored_sequences: List[Dict[str, Any]] = [scored_original_sequence] - - if iteration == 0: - current_population: List[str] = [self.sequence] - if len(current_population) < num_sequences: - while len(current_population) < num_sequences: - new_mutants = self.mutator.mutation_strategy.mutate( - self.sequence, num_mutations, self.intervals - ) - for mut in new_mutants: - if mut not in all_mutated_sequences: - current_population.append(mut) - else: - continue - if len(current_population) >= num_sequences: - break - - if len(current_population) >= num_sequences: - random.shuffle(current_population) - current_population = random.sample( - current_population, k=num_sequences - ) - - logger.info( - f"Number of sequences in current population: {len(current_population)}" - ) - - iteration_scored_sequences = [] - for _ in range(0, len(current_population), self.batch_size): - scored_sequences = self.score_sequences( - current_population[_ : _ + self.batch_size] - ) - all_mutated_sequences.extend( - current_population[_ : _ + self.batch_size] - ) - all_scored_sequences.extend(scored_sequences) - iteration_scored_sequences.extend(scored_sequences) - - if self.selection_ratio < 1.0: - - samples_with_higher_score = [ - i - for i in iteration_scored_sequences - if i["score"] > original_sequence_score_ - ] - selected_sequences = self.selection_generator.selection( - samples_with_higher_score, self.selection_ratio - ) - else: - selected_sequences = iteration_scored_sequences - - offspring_sequences = [] - if self.perform_crossover and len(selected_sequences) > 1: - for i in range(0, len(selected_sequences), 2): - if i + 1 < len(selected_sequences): - parent1 = selected_sequences[i]["sequence"] - parent2 = selected_sequences[i + 1]["sequence"] - if self.crossover_type == "single_point": - ( - offspring1, - offspring2, - ) = self.crossover_generator.sp_crossover(parent1, parent2) - else: - ( - offspring1, - offspring2, - ) = self.crossover_generator.uniform_crossover( - parent1, parent2 - ) - offspring_sequences.extend([offspring1, offspring2]) - - logger.info(f"Selected samples: {len(selected_sequences)}") - logger.info(f"Number Crossed-Over samples: {len(offspring_sequences)}") - - current_population = [ - seq["sequence"] for seq in selected_sequences - ] + offspring_sequences - - if len(current_population) < num_sequences: - while len(current_population) < num_sequences: - current_population.extend( - self.mutator.mutation_strategy.mutate( - self.sequence, num_mutations, self.intervals - ) - ) - if len(current_population) >= num_sequences: - break - - if len(current_population) >= num_sequences: - random.shuffle(current_population) - current_population = current_population[:num_sequences] - - higher_scoring_sequences = 0 - for temp_seq in iteration_scored_sequences: - if temp_seq["score"] > current_best_score: - current_best_score = temp_seq["score"] - higher_scoring_sequences += 1 - - end_time = time.time() - elapsed_time = end_time - start_time - iteration_info[iteration + 1] = { - "Iteration": iteration + 1, - "best_score": current_best_score, - "higher_scoring_sequences": higher_scoring_sequences, - "elapsed_time": elapsed_time, - } - logger.info( - f" Iteration {iteration + 1}: Best Score: {current_best_score}," - f" Higher Scoring Sequences: {higher_scoring_sequences}, " - f" Time: {elapsed_time} seconds," - f" Population length : {len(current_population)}" - ) - if time_budget is not None and elapsed_time > time_budget: - logger.warning(f"Used all the given time budget of {time_budget}s") - break - - all_scored_sequences = sorted( - all_scored_sequences, key=lambda x: x["score"], reverse=True - ) - - df = pd.DataFrame(all_scored_sequences) - df = df.drop_duplicates() - - all_scored_sequences = df.to_dict(orient="records") - - return all_scored_sequences, iteration_info - - def score_sequence(self, sequence: str) -> Dict[str, Any]: - """Scores a single protein sequence. - - Args: - sequence (str): The protein sequence to score. - - Returns: - Dict[str, Any]: The score of the sequence. - """ - sequence_embedding = self.protein_model.embed([sequence])[0] - embeddings = [ - sequence_embedding, - self.substrate_embedding, - self.product_embedding, - ] - ordered_embeddings = [ - embeddings[self.concat_order.index(item)] for item in self.concat_order - ] - combined_embedding = np.concatenate(ordered_embeddings) - combined_embedding = combined_embedding.reshape(1, -1) - - if self.use_xgboost_scorer: - if self.scaler is not None: - combined_embedding = self.scaler.transform(combined_embedding) - score = self.scorer.predict(xgb.DMatrix(combined_embedding))[0] - else: - score = self.scorer.predict_proba(combined_embedding)[0][1] - - return {"sequence": sequence, "score": score} - - def score_sequences(self, sequences: List[str]) -> List[Dict[str, float]]: - """Scores a list of protein sequences. - - Args: - sequences (List[str]): The list of protein sequences to score. - - Returns: - List[Dict[str, float]]: A list of dictionaries - containing sequences and their scores. - """ - sequence_embeddings = self.protein_model.embed(sequences) - - output = [] - for position in range(len(sequence_embeddings)): - sequence_embedding = sequence_embeddings[position] - embeddings = [ - sequence_embedding, - self.substrate_embedding, - self.product_embedding, - ] - ordered_embeddings = [ - embeddings[self.concat_order.index(item)] for item in self.concat_order - ] - combined_embedding = np.concatenate(ordered_embeddings) - combined_embedding = combined_embedding.reshape(1, -1) - - if self.use_xgboost_scorer: - if self.scaler is not None: - combined_embedding = self.scaler.transform(combined_embedding) - score = self.scorer.predict(xgb.DMatrix(combined_embedding))[0] - else: - score = self.scorer.predict_proba(combined_embedding)[0][1] - output.append({"sequence": sequences[position], "score": score}) - - return output diff --git a/src/gt4sd/frameworks/enzeptional/processing.py b/src/gt4sd/frameworks/enzeptional/processing.py deleted file mode 100644 index 600d26cc0..000000000 --- a/src/gt4sd/frameworks/enzeptional/processing.py +++ /dev/null @@ -1,586 +0,0 @@ -# -# MIT License -# -# Copyright (c) 2024 GT4SD team -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# -from abc import ABC -import torch -import numpy as np -from typing import Any, Dict, List, Optional, Tuple, Union -from tape.datasets import pad_sequences -from tape.registry import registry -from tape.tokenizers import TAPETokenizer -from transformers import ( - AutoModel, - EsmForMaskedLM, - AutoTokenizer, - T5Tokenizer, -) -import math -import random -import logging -from itertools import product as iter_product -from gt4sd.frameworks.torch import get_device - - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -# os.environ["TRANSFORMERS_CACHE"] = "~/.cache/huggingface/" -# torch.hub.set_dir("/dccstor/yna/.cache/torch/hub") - - -class ModelCache: - """ - A simple cache mechanism for storing and retrieving models. - """ - - def __init__(self): - """ - Initializes the cache as an empty dictionary. - """ - self.cache = {} - - def get(self, key): - """ - Retrieves a model from the cache using the given key. - - Args: - key: The key used to store the model. - - Returns: - The model associated with the key, or None if not found. - """ - return self.cache.get(key) - - def add(self, key, model): - """ - Adds a model to the cache with the specified key. - - Args: - key: The key to associate with the model. - model: The model to be cached. - """ - self.cache[key] = model - - -ENZEPTIONAL_MODEL_CACHE = ModelCache() - - -class StringEmbedding(ABC): - """ - Abstract base class for embedding string data. - - Attributes: - model (Any): The embedding model. - """ - - model: Any - - def embed(self, samples: List[str]) -> np.ndarray: - """Abstract method for embedding a list of string samples. - - Args: - samples (List[str]): The list of strings to be embedded. - - Raises: - NotImplementedError: If the method is not implemented in the subclass. - """ - raise NotImplementedError - - -class HFandTAPEModelUtility(StringEmbedding): - """ - Utility class for handling both Hugging Face and TAPE models for embedding - and unmasking tasks. - """ - - def __init__( - self, - embedding_model_path: str, - tokenizer_path: str, - unmasking_model_path: Optional[str] = None, - is_tape_model: bool = False, - device: Optional[Union[torch.device, str]] = None, - cache_dir: Optional[str] = None, - ) -> None: - """Initializes the utility with specified model and tokenizer paths. - - Args: - embedding_model_path (str): Path to the embedding model. - tokenizer_path (str): Path to the tokenizer. - unmasking_model_path (Optional[str], optional): Path to the unmasking model, if applicable. Defaults to None. - is_tape_model (bool, optional): Flag to indicate if a TAPE model is being used. Defaults to False. - device (Optional[Union[torch.device, str]], optional): The compute device to use ('cpu' or 'cuda:0'). Defaults to None. - cache_dir (Optional[str], optional): Path to cache directory. Defaults to None. - """ - self.device = get_device() - self.is_tape_model = is_tape_model - - embedding_cache_key = f"embedding_{embedding_model_path}" - self.embedding_model = ENZEPTIONAL_MODEL_CACHE.get(embedding_cache_key) - if not self.embedding_model: - if is_tape_model: - self.embedding_model = registry.get_task_model( - embedding_model_path, - "embed", - load_dir=embedding_model_path, - ).to(self.device) - else: - if cache_dir: - self.embedding_model = ( - AutoModel.from_pretrained( - embedding_model_path, - cache_dir=cache_dir, - ) - .to(self.device) - .eval() - ) - else: - self.embedding_model = ( - AutoModel.from_pretrained( - embedding_model_path, - ) - .to(self.device) - .eval() - ) - - ENZEPTIONAL_MODEL_CACHE.add(embedding_cache_key, self.embedding_model) - - if unmasking_model_path is not None: - unmasking_cache_key = f"unmasking_{unmasking_model_path}" - self.unmasking_model = ENZEPTIONAL_MODEL_CACHE.get(unmasking_cache_key) - if not self.unmasking_model: - if cache_dir: - self.unmasking_model = ( - EsmForMaskedLM.from_pretrained( - unmasking_model_path, - cache_dir=cache_dir, - ) - .to(self.device) - .eval() - ) - else: - self.unmasking_model = ( - EsmForMaskedLM.from_pretrained( - unmasking_model_path, - ) - .to(self.device) - .eval() - ) - ENZEPTIONAL_MODEL_CACHE.add(unmasking_cache_key, self.unmasking_model) - else: - logger.error("No Unmasking model loaded. Check you model inputs") - - if is_tape_model: - self.tokenizer = TAPETokenizer(vocab="iupac") - else: - self.tokenizer = self._load_tokenizer(tokenizer_path) - - def _load_tokenizer(self, tokenizer_path: str): - """Loads a tokenizer based on the given path, caching it for future use. - - Args: - tokenizer_path (str): Path to the tokenizer. - - Returns: - The loaded tokenizer - """ - tokenizer_cache_key = f"tokenizer_{tokenizer_path}" - tokenizer = ENZEPTIONAL_MODEL_CACHE.get(tokenizer_cache_key) - if not tokenizer: - try: - tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) - except Exception: - tokenizer = T5Tokenizer.from_pretrained(tokenizer_path) - ENZEPTIONAL_MODEL_CACHE.add(tokenizer_cache_key, tokenizer) - return tokenizer - - def embed(self, samples: List[str]) -> np.ndarray: - """Embeds a list of samples using either TAPE or Hugging Face models. - - Args: - samples (List[str]): List of strings to be embedded. - - Returns: - np.ndarray: The resulting embeddings. - """ - if self.is_tape_model: - return self._embed_tape(samples) - else: - return self._embed_huggingface(samples) - - def _embed_tape(self, samples: List[str]) -> np.ndarray: - """mbeds samples using a TAPE model. - - Args: - samples (List[str]): List of strings to be embedded. - - Returns: - np.ndarray: The resulting embeddings. - """ - token_ids: Dict[str, Any] = {"ids": [], "mask": []} - for sequence in samples: - encoded_sequence = self.tokenizer.encode(sequence) - token_ids["ids"].append(encoded_sequence) - token_ids["mask"].append(np.ones_like(encoded_sequence)) - - input_ids = torch.from_numpy(pad_sequences(token_ids["ids"])).to(self.device) - input_mask = torch.from_numpy(pad_sequences(token_ids["mask"])).to(self.device) - - inputs = {"input_ids": input_ids, "input_mask": input_mask} - - with torch.no_grad(): - sequence_embeddings = ( - self.embedding_model(**inputs)[0].cpu().detach().numpy() - ) - - sequence_lengths = input_mask.sum(1) - - return np.array( - [ - sequence_embedding[:sequence_length].mean(0) - for sequence_embedding, sequence_length in zip( - sequence_embeddings, sequence_lengths - ) - ] - ) - - def _embed_huggingface(self, samples: List[str]) -> np.ndarray: - """Embeds samples using a Hugging Face model. - - Args: - samples (List[str]): List of strings to be embedded. - - Returns: - np.ndarray: The resulting embeddings. - """ - inputs = self.tokenizer( - samples, - add_special_tokens=True, - padding=True, - return_tensors="pt", - ) - inputs = {k: v.to(self.device) for k, v in inputs.items()} - - with torch.no_grad(): - outputs = self.embedding_model(**inputs) - sequence_embeddings = outputs[0].cpu().detach().numpy() - - sequence_lengths = inputs["attention_mask"].sum(1) - - return np.array( - [ - sequence_embedding[:sequence_length].mean(0) - for sequence_embedding, sequence_length in zip( - sequence_embeddings, sequence_lengths - ) - ] - ) - - def unmask(self, sequence: str, top_k: int = 2) -> List[str]: - """Unmasks a given sequence using the model, retrieving top-k predictions. - - Args: - sequence (str): The sequence with masked tokens. - top_k (int, optional): Number of top predictions to retrieve. Defaults to 2. - - Raises: - NotImplementedError: If TAPE model is used. - KeyError: If the model used is not supported. - - Returns: - List[str]: List of top-k predicted sequences. - """ - if self.is_tape_model: - logger.error("Unmasking is not supported for TAPE models.") - raise NotImplementedError("Unmasking is not supported for TAPE models.") - - try: - return self._unmask_with_model(sequence, top_k) - except (KeyError, NotImplementedError) as e: - logger.warning(f"{e} Standard unmasking failed ") - raise KeyError("Check the unmasking model you want to use") - - def _unmask_with_model(self, sequence: str, top_k: int) -> List[str]: - """Unmasks a sequence using the model, providing top-k predictions. - - Args: - sequence (str): The sequence with masked tokens. - top_k (int): Number of top predictions to retrieve. - - Raises: - KeyError: If model used do not support unmasking. - - Returns: - List[str]: List of top-k predicted sequences. - """ - inputs = self.tokenizer( - sequence, - return_tensors="pt", - add_special_tokens=True, - padding=True, - ).to(self.device) - mask_token_index = torch.where( - inputs["input_ids"] == self.tokenizer.mask_token_id - )[1] - - with torch.no_grad(): - outputs = self.unmasking_model(inputs["input_ids"].to(self.device)) - - if "logits" in outputs: - logits = outputs.logits - else: - raise KeyError("Logits not available in the model's output.") - - mask_token_logits = logits[0, mask_token_index, :] - - top_tokens: List[Any] = [] - for i in range(len(mask_token_index)): - top_n_tokens = ( - torch.topk(mask_token_logits, top_k, dim=1).indices[i].tolist() - ) - top_tokens.append( - [self.tokenizer.decode([token]) for token in top_n_tokens] - ) - - mask_token_index = mask_token_index.cpu().numpy() - mutated_sequences = [] - tmp_top_tokens = [tuple(tokens) for tokens in top_tokens] - if len(set(tmp_top_tokens)) == 1: - for i in range(top_k): - temp_sequence = sequence.split(" ") - for mask_index in mask_token_index: - temp_sequence[mask_index - 1] = tmp_top_tokens[0][i] - mutated_sequences.append("".join(temp_sequence)) - else: - for combination in list(iter_product(*tmp_top_tokens)): - temp_sequence = sequence.split(" ") - for i, mask_index in enumerate(mask_token_index): - temp_sequence[mask_index - 1] = combination[i] - mutated_sequences.append("".join(temp_sequence)) - - return mutated_sequences - - -def mutate_sequence_with_variant(sequence: str, variant: str) -> str: - """Applies a specified variant mutation to an amino acid sequence. - - Args: - sequence (str): The original amino acid sequence. - variant (str): The variant to apply, formatted as a string. - - Returns: - str: The mutated amino acid sequence. - """ - mutated_sequence = list(sequence) - for variant_string in variant.split("/"): - index = int(variant_string[1:-1]) - 1 - mutated_sequence[index] = variant_string[-1] - return "".join(mutated_sequence) - - -def sanitize_intervals(intervals: List[Tuple[int, int]]) -> List[Tuple[int, int]]: - """Merges overlapping intervals into a single interval. - - Args: - intervals (List[Tuple[int, int]]): A list of - start and end points of intervals. - - Returns: - List[Tuple[int, int]]: A list of merged intervals. - """ - intervals.sort() - merged: List[Tuple[int, int]] = [] - for start, end in intervals: - if not merged or merged[-1][1] < start: - merged.append((start, end)) - else: - merged[-1] = (merged[-1][0], max(merged[-1][1], end)) - return merged - - -def round_up(number: float) -> int: - """Rounds up a floating-point number to the nearest integer. - - Args: - number (float): The number to round up. - - Returns: - int: The rounded-up integer. - """ - return math.ceil(number) - - -def sanitize_intervals_with_padding( - intervals: List[Tuple[int, int]], pad_value: int, max_value: int -) -> List[Tuple[int, int]]: - """Pads and sanitizes intervals within a given range. - - Args: - intervals (List[Tuple[int, int]]): A list of intervals. - pad_value (int): The value to pad intervals with. - max_value (int): The maximum value for the range of intervals. - - Returns: - List[Tuple[int, int]]: A list of padded and sanitized intervals. - """ - - def pad_interval( - interval: Tuple[int, int], pad: int, max_val: int - ) -> Tuple[int, int]: - """Pads an individual interval within the constraints of a maximum value. - - Args: - interval (Tuple[int, int]): The interval to pad. - pad (int): The padding value. - max_val (int): The maximum value for the interval. - - Returns: - Tuple[int, int]: The padded interval. - """ - start, end = interval - interval_length = end - start - padding_needed = max(0, pad - interval_length) // 2 - - padded_start = max(0, start - padding_needed) - padded_end = min(max_val, end + padding_needed) - - if padded_end > max_val: - padded_start = max(0, padded_start - (padded_end - max_val)) - return padded_start, padded_end - - padded_intervals = [ - pad_interval(interval, pad_value, max_value) for interval in intervals - ] - return sanitize_intervals(padded_intervals) - - -def reconstruct_sequence_with_mutation_range( - sequence: str, - mutated_sequence_range: str, - intervals: List[Tuple[int, int]], -) -> str: - """Reconstructs a sequence by inserting a mutated sequence - range at specific intervals. - - Args: - sequence (str): The original sequence. - mutated_sequence_range (str): The range of the sequence to be mutated. - intervals (List[Tuple[int, int]]): The intervals where - mutations are applied. - - Returns: - str: The reconstructed sequence with mutations. - """ - mutated_sequence = list(sequence) - range_index = 0 - for start, end in intervals: - size_fragment = end - start - mutated_sequence[start:end] = list( - mutated_sequence_range[range_index : range_index + size_fragment] - ) - range_index += size_fragment - return "".join(mutated_sequence) - - -class SelectionGenerator: - """ - A generator for selecting top sequences based on their scores. - """ - - def selection( - self, - pool_of_sequences: List[Dict[str, Any]], - k: float = 0.8, - ) -> List[Any]: - """Selects a subset of sequences from a pool based on their scores. - - Args: - pool_of_sequences (List[Dict[str, Any]]): A list of - dictionaries, each containing a sequence and its score. - k (float): A fraction representing the proportion - of top sequences to select. Defaults to 0.8. - - Returns: - List[Any]: A list of the top k sequences based on scores. - """ - n_samples_to_select = int(len(pool_of_sequences) * k) - return list(sorted(pool_of_sequences, key=lambda d: d["score"], reverse=True))[ - :n_samples_to_select - ] - - -class CrossoverGenerator: - """ - A generator for performing crossover operations between sequences. - """ - - def __init__(self, threshold_probability: float = 0.5) -> None: - """Initializes the CrossoverGenerator with a specified - threshold probability. - - Args: - threshold_probability (float, optional): The probability - threshold used in uniform crossover. Defaults to 0.5. - """ - self.threshold_probability = threshold_probability - - def sp_crossover(self, a_sequence: str, another_sequence: str) -> Tuple[str, str]: - """Performs a single point crossover between two sequences. - - Args: - a_sequence (str): The first sequence for crossover. - another_sequence (str): The second sequence for crossover. - - Returns: - Tuple[str, str]: A tuple of two new sequences resulting - from the crossover. - """ - random_point = random.randint(1, len(a_sequence) - 2) - return ( - a_sequence[:random_point] + another_sequence[random_point:], - another_sequence[:random_point] + a_sequence[random_point:], - ) - - def uniform_crossover( - self, a_sequence: str, another_sequence: str - ) -> Tuple[str, str]: - """Performs a uniform crossover between two sequences. - - Args: - a_sequence (str): The first sequence for crossover. - another_sequence (str): The second sequence for crossover. - - Returns: - Tuple[str, str]: A tuple of two new sequences resulting - from the crossover. - """ - return ( - "".join( - a if random.random() > self.threshold_probability else b - for a, b in zip(a_sequence, another_sequence) - ), - "".join( - b if random.random() > self.threshold_probability else a - for a, b in zip(a_sequence, another_sequence) - ), - ) diff --git a/src/gt4sd/frameworks/enzeptional/tests/test_core.py b/src/gt4sd/frameworks/enzeptional/tests/test_core.py index 2764b20a3..2b26a174f 100644 --- a/src/gt4sd/frameworks/enzeptional/tests/test_core.py +++ b/src/gt4sd/frameworks/enzeptional/tests/test_core.py @@ -22,13 +22,17 @@ # SOFTWARE. # import warnings -from gt4sd.frameworks.enzeptional.core import ( - SequenceMutator, +from gt4sd.frameworks.enzeptional import ( EnzymeOptimizer, + SequenceMutator, + SequenceScorer, + CrossoverGenerator, + HuggingFaceEmbedder, + HuggingFaceModelLoader, + HuggingFaceTokenizerLoader, + SelectionGenerator, ) -from gt4sd.frameworks.enzeptional.processing import HFandTAPEModelUtility - from gt4sd.configuration import sync_algorithm_with_s3 from gt4sd.configuration import GT4SDConfiguration @@ -45,46 +49,76 @@ def test_optimize(): language_model_path = "facebook/esm2_t33_650M_UR50D" tokenizer_path = "facebook/esm2_t33_650M_UR50D" - unmasking_model_path = "facebook/esm2_t33_650M_UR50D" chem_model_path = "seyonec/ChemBERTa-zinc-base-v1" chem_tokenizer_path = "seyonec/ChemBERTa-zinc-base-v1" - protein_model = HFandTAPEModelUtility( - embedding_model_path=language_model_path, tokenizer_path=tokenizer_path + model_loader = HuggingFaceModelLoader() + tokenizer_loader = HuggingFaceTokenizerLoader() + + protein_model = HuggingFaceEmbedder( + model_loader=model_loader, + tokenizer_loader=tokenizer_loader, + model_path=language_model_path, + tokenizer_path=tokenizer_path, + cache_dir=None, + device="cpu", + ) + + chem_model = HuggingFaceEmbedder( + model_loader=model_loader, + tokenizer_loader=tokenizer_loader, + model_path=chem_model_path, + tokenizer_path=chem_tokenizer_path, + cache_dir=None, + device="cpu", ) mutation_config = { "type": "language-modeling", "embedding_model_path": language_model_path, "tokenizer_path": tokenizer_path, - "unmasking_model_path": unmasking_model_path, + "unmasking_model_path": language_model_path, } intervals = [(5, 10), (20, 25)] - batch_size = 5 - top_k = 3 + batch_size = 2 + top_k = 1 substrate_smiles = "NC1=CC=C(N)C=C1" product_smiles = "CNC1=CC=C(NC(=O)C2=CC=C(C=C2)C(C)=O)C=C1" sample_sequence = "MSKLLMIGTGPVAIDQFLTRYEASCQAYKDMHQDQQLSSQFNTNLFEGDKALVTKFLEINRTLS" + mutator = SequenceMutator(sequence=sample_sequence, mutation_config=mutation_config) + mutator.set_top_k(top_k) - optimizer = EnzymeOptimizer( - sequence=sample_sequence, + scorer = SequenceScorer( protein_model=protein_model, - substrate_smiles=substrate_smiles, - product_smiles=product_smiles, - chem_model_path=chem_model_path, - chem_tokenizer_path=chem_tokenizer_path, scorer_filepath=scorer_filepath, + use_xgboost=False, + scaler_filepath=None, + ) + + selection_generator = SelectionGenerator() + crossover_generator = CrossoverGenerator() + + optimizer = EnzymeOptimizer( + sequence=sample_sequence, mutator=mutator, + scorer=scorer, intervals=intervals, + substrate_smiles=substrate_smiles, + product_smiles=product_smiles, + chem_model=chem_model, + selection_generator=selection_generator, + crossover_generator=crossover_generator, + concat_order=["substrate", "sequence", "product"], batch_size=batch_size, - top_k=top_k, selection_ratio=0.25, perform_crossover=True, crossover_type="single_point", - concat_order=["substrate", "sequence", "product"], + pad_intervals=False, + minimum_interval_length=8, + seed=123, ) num_iterations = 3 diff --git a/src/gt4sd/frameworks/enzeptional/tests/test_processing.py b/src/gt4sd/frameworks/enzeptional/tests/test_processing.py index efaf1c4df..64817952a 100644 --- a/src/gt4sd/frameworks/enzeptional/tests/test_processing.py +++ b/src/gt4sd/frameworks/enzeptional/tests/test_processing.py @@ -22,44 +22,85 @@ # SOFTWARE. # """Enzeptional processing tests.""" -from gt4sd.frameworks.enzeptional.processing import ( - ModelCache, - get_device, +import pytest +import numpy as np +from gt4sd.frameworks.enzeptional import ( + HuggingFaceModelLoader, + HuggingFaceTokenizerLoader, + HuggingFaceEmbedder, sanitize_intervals, + round_up, sanitize_intervals_with_padding, - reconstruct_sequence_with_mutation_range, + SelectionGenerator, + CrossoverGenerator, ) -import torch - - -def test_add_and_get_model(): - model_cache = ModelCache() - test_model = torch.nn.Module() - model_cache.add("test_model", test_model) - retrieved_model = model_cache.get("test_model") - assert test_model == retrieved_model - - -class TestUtilityFunctions: - def test_get_device(self): - expected_device = "cuda:0" if torch.cuda.is_available() else "cpu" - assert str(get_device()) == expected_device - - def test_sanitize_intervals(self): - intervals = [(1, 3), (2, 5), (6, 8)] - sanitized = sanitize_intervals(intervals) - assert sanitized == [(1, 5), (6, 8)] - - def test_sanitize_intervals_with_padding(self): - intervals = [(1, 3), (6, 8)] - padded_intervals = sanitize_intervals_with_padding(intervals, 8, 50) - assert padded_intervals == [(0, 11)] - - def test_reconstruct_sequence_with_mutation_range(self): - original_sequence = "AACCGGTT" - mutation_range = "NNNN" - intervals = [(2, 4), (6, 8)] - reconstructed = reconstruct_sequence_with_mutation_range( - original_sequence, mutation_range, intervals - ) - assert reconstructed == "AANNGGNN" + + +@pytest.fixture +def huggingface_embedder(): + model_loader = HuggingFaceModelLoader() + tokenizer_loader = HuggingFaceTokenizerLoader() + + language_model_path = "facebook/esm2_t33_650M_UR50D" + tokenizer_path = "facebook/esm2_t33_650M_UR50D" + cache_dir = None + device = "cpu" + + embedder = HuggingFaceEmbedder( + model_loader, + tokenizer_loader, + language_model_path, + tokenizer_path, + cache_dir, + device, + ) + return embedder + + +def test_huggingface_embedder(huggingface_embedder): + protein_sequences = ["MTEITAAMVKELRESTGAGMMDCKNALSETQHEEIAFLKRLME"] + embeddings = huggingface_embedder.embed(protein_sequences) + assert isinstance(embeddings, np.ndarray) + assert embeddings.shape[0] == len(protein_sequences) + + +def test_sanitize_intervals(): + intervals = [(1, 5), (3, 7), (8, 10)] + sanitized = sanitize_intervals(intervals) + assert sanitized == [(1, 7), (8, 10)] + + +def test_round_up(): + number = 3.14 + rounded = round_up(number) + assert rounded == 4 + + +def test_sanitize_intervals_with_padding(): + intervals = [(1, 3), (6, 8)] + padded_intervals = sanitize_intervals_with_padding( + intervals, pad_value=5, max_value=20 + ) + assert padded_intervals == [(0, 4), (5, 9)] + + +def test_selection_generator(): + pool_of_sequences = [ + {"sequence": "A", "score": 0.9}, + {"sequence": "B", "score": 0.8}, + {"sequence": "C", "score": 0.95}, + {"sequence": "D", "score": 0.7}, + ] + generator = SelectionGenerator() + selected = generator.selection(pool_of_sequences, k=0.5) + assert len(selected) == 2 + assert selected[0]["sequence"] == "C" + + +def test_crossover_generator(): + generator = CrossoverGenerator(threshold_probability=0.5) + seq_a = "AAAAAAAA" + seq_b = "BBBBBBBB" + offspring_a, offspring_b = generator.uniform_crossover(seq_a, seq_b) + assert len(offspring_a) == len(seq_a) + assert all(c in ["A", "B"] for c in offspring_a) diff --git a/src/gt4sd/frameworks/gflownet/ml/models/mxmnet.py b/src/gt4sd/frameworks/gflownet/ml/models/mxmnet.py index 0813b4e29..488f751b6 100644 --- a/src/gt4sd/frameworks/gflownet/ml/models/mxmnet.py +++ b/src/gt4sd/frameworks/gflownet/ml/models/mxmnet.py @@ -970,7 +970,7 @@ def forward(self, h, edge_attr, edge_index): return h - def message(self, x_i, x_j, edge_attr, edge_index, num_nodes): + def message(self, x_i, x_j, edge_attr, edge_index, num_nodes): # type:ignore num_edge = edge_attr.size()[0] x_edge = torch.cat((x_i[:num_edge], x_j[:num_edge], edge_attr), -1)