forked from lamalab-org/toolminutes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreferences.bib
749 lines (689 loc) · 31.1 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
@article{Ahneman_2018,
title = {Predicting reaction performance in C–N cross-coupling using machine learning},
volume = {360},
issn = {1095-9203},
url = {http://dx.doi.org/10.1126/science.aar5169},
doi = {10.1126/science.aar5169},
number = {6385},
journal = {Science},
publisher = {American Association for the Advancement of Science (AAAS)},
author = {Ahneman, Derek T. and Estrada, Jesús G. and Lin, Shishi and Dreher, Spencer D. and Doyle, Abigail G.},
year = {2018},
month = apr,
pages = {186–190}
}
@article{Chuang_2018,
title = {Comment on “Predicting reaction performance in C–N cross-coupling using machine learning”},
volume = {362},
issn = {1095-9203},
url = {http://dx.doi.org/10.1126/science.aat8603},
doi = {10.1126/science.aat8603},
number = {6416},
journal = {Science},
publisher = {American Association for the Advancement of Science (AAAS)},
author = {Chuang, Kangway V. and Keiser, Michael J.},
year = {2018},
month = nov
}
@misc{vinyals2016order,
title = {Order Matters: Sequence to sequence for sets},
author = {Oriol Vinyals and Samy Bengio and Manjunath Kudlur},
year = {2016},
eprint = {1511.06391},
archiveprefix = {arXiv},
primaryclass = {stat.ML}
}
@misc{shazeer2017outrageously,
title = {Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer},
author = {Noam Shazeer and Azalia Mirhoseini and Krzysztof Maziarz and Andy Davis and Quoc Le and Geoffrey Hinton and Jeff Dean},
year = {2017},
eprint = {1701.06538},
archiveprefix = {arXiv},
primaryclass = {cs.LG}
}
@misc{jiang2024mixtral,
title = {Mixtral of Experts},
author = {Albert Q. Jiang and Alexandre Sablayrolles and Antoine Roux and Arthur Mensch and Blanche Savary and Chris Bamford and Devendra Singh Chaplot and Diego de las Casas and Emma Bou Hanna and Florian Bressand and Gianna Lengyel and Guillaume Bour and Guillaume Lample and Lélio Renard Lavaud and Lucile Saulnier and Marie-Anne Lachaux and Pierre Stock and Sandeep Subramanian and Sophia Yang and Szymon Antoniak and Teven Le Scao and Théophile Gervet and Thibaut Lavril and Thomas Wang and Timothée Lacroix and William El Sayed},
year = {2024},
eprint = {2401.04088},
archiveprefix = {arXiv},
primaryclass = {cs.LG}
}
@misc{kingma2015variational,
title = {Variational Dropout and the Local Reparameterization Trick},
author = {Diederik P. Kingma and Tim Salimans and Max Welling},
year = {2015},
eprint = {1506.02557},
archiveprefix = {arXiv},
primaryclass = {stat.ML}
}
@article{Chen_2024,
title = {Uncertainty-Aware Yield Prediction with Multimodal Molecular Features},
volume = {38},
issn = {2159-5399},
url = {http://dx.doi.org/10.1609/aaai.v38i8.28668},
doi = {10.1609/aaai.v38i8.28668},
number = {8},
journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
publisher = {Association for the Advancement of Artificial Intelligence (AAAI)},
author = {Chen, Jiayuan and Guo, Kehan and Liu, Zhen and Isayev, Olexandr and Zhang, Xiangliang},
year = {2024},
month = mar,
pages = {8274–8282}
}
@article{schwaller2020data,
title = {Data augmentation strategies to improve reaction yield predictions and estimate uncertainty},
author = {Schwaller, Philippe and Vaucher, Alain C and Laino, Teodoro and Reymond, Jean-Louis},
journal = {Chemrxiv preprint},
year = {2020}
}
@article{schwaller2021prediction,
title = {Prediction of chemical reaction yields using deep learning},
author = {Schwaller, Philippe and Vaucher, Alain C and Laino, Teodoro and Reymond, Jean-Louis},
journal = {Machine learning: science and technology},
volume = {2},
number = {1},
pages = {015016},
year = {2021},
publisher = {IOP Publishing}
}
@article{Kwon_2022,
title = {Uncertainty-aware prediction of chemical reaction yields with graph neural networks},
volume = {14},
issn = {1758-2946},
url = {http://dx.doi.org/10.1186/s13321-021-00579-z},
doi = {10.1186/s13321-021-00579-z},
number = {1},
journal = {Journal of Cheminformatics},
publisher = {Springer Science and Business Media LLC},
author = {Kwon, Youngchun and Lee, Dongseon and Choi, Youn-Suk and Kang, Seokho},
year = {2022},
month = jan
}
@inproceedings{gal2016dropout,
title = {Dropout as a bayesian approximation: Representing model uncertainty in deep learning},
author = {Gal, Yarin and Ghahramani, Zoubin},
booktitle = {international conference on machine learning},
pages = {1050--1059},
year = {2016},
organization = {PMLR}
}
@article{dagdelen_structured_2024,
title = {Structured information extraction from scientific text with large language models},
volume = {15},
copyright = {2024 The Author(s)},
issn = {2041-1723},
url = {https://www.nature.com/articles/s41467-024-45563-x},
doi = {10.1038/s41467-024-45563-x},
abstract = {Extracting structured knowledge from scientific text remains a challenging task for machine learning models. Here, we present a simple approach to joint named entity recognition and relation extraction and demonstrate how pretrained large language models (GPT-3, Llama-2) can be fine-tuned to extract useful records of complex scientific knowledge. We test three representative tasks in materials chemistry: linking dopants and host materials, cataloging metal-organic frameworks, and general composition/phase/morphology/application information extraction. Records are extracted from single sentences or entire paragraphs, and the output can be returned as simple English sentences or a more structured format such as a list of JSON objects. This approach represents a simple, accessible, and highly flexible route to obtaining large databases of structured specialized scientific knowledge extracted from research papers.},
language = {en},
number = {1},
urldate = {2024-04-13},
journal = {Nature Communications},
author = {Dagdelen, John and Dunn, Alexander and Lee, Sanghoon and Walker, Nicholas and Rosen, Andrew S. and Ceder, Gerbrand and Persson, Kristin A. and Jain, Anubhav},
month = feb,
year = {2024},
note = {Publisher: Nature Publishing Group},
keywords = {Materials science, Theory and computation, Databases, Scientific data},
pages = {1418},
file = {Dagdelen et al_2024_Structured information extraction from scientific text with large language.pdf:C\:\\Users\\pepem\\Zotero\\storage\\IQJG3VH8\\Dagdelen et al_2024_Structured information extraction from scientific text with large language.pdf:application/pdf}
}
@article{Trewartha2022,
title = {Quantifying the advantage of domain-specific pre-training on named entity recognition tasks in materials science},
volume = {3},
issn = {2666-3899},
url = {http://dx.doi.org/10.1016/j.patter.2022.100488},
doi = {10.1016/j.patter.2022.100488},
number = {4},
journal = {Patterns},
publisher = {Elsevier BV},
author = {Trewartha, Amalie and Walker, Nicholas and Huo, Haoyan and Lee, Sanghoon and Cruse, Kevin and Dagdelen, John and Dunn, Alexander and Persson, Kristin A. and Ceder, Gerbrand and Jain, Anubhav},
year = {2022},
month = apr,
pages = {100488}
}
@article{Guo2021,
title = {Automated Chemical Reaction Extraction from Scientific Literature},
volume = {62},
issn = {1549-960X},
url = {http://dx.doi.org/10.1021/acs.jcim.1c00284},
doi = {10.1021/acs.jcim.1c00284},
number = {9},
journal = {Journal of Chemical Information and Modeling},
publisher = {American Chemical Society (ACS)},
author = {Guo, Jiang and Ibanez-Lopez, A. Santiago and Gao, Hanyu and Quach, Victor and Coley, Connor W. and Jensen, Klavs F. and Barzilay, Regina},
year = {2021},
month = jun,
pages = {2035–2045}
}
@article{Kim2017,
title = {Materials Synthesis Insights from Scientific Literature via Text Extraction and Machine Learning},
volume = {29},
issn = {1520-5002},
url = {http://dx.doi.org/10.1021/acs.chemmater.7b03500},
doi = {10.1021/acs.chemmater.7b03500},
number = {21},
journal = {Chemistry of Materials},
publisher = {American Chemical Society (ACS)},
author = {Kim, Edward and Huang, Kevin and Saunders, Adam and McCallum, Andrew and Ceder, Gerbrand and Olivetti, Elsa},
year = {2017},
month = oct,
pages = {9436–9444}
}
@misc{mysore2019materials,
title = {The Materials Science Procedural Text Corpus: Annotating Materials Synthesis Procedures with Shallow Semantic Structures},
author = {Sheshera Mysore and Zach Jensen and Edward Kim and Kevin Huang and Haw-Shiuan Chang and Emma Strubell and Jeffrey Flanigan and Andrew McCallum and Elsa Olivetti},
year = {2019},
eprint = {1905.06939},
archiveprefix = {arXiv},
primaryclass = {cs.CL}
}
@article{Kim2020,
title = {Inorganic Materials Synthesis Planning with Literature-Trained Neural Networks},
volume = {60},
issn = {1549-960X},
url = {http://dx.doi.org/10.1021/acs.jcim.9b00995},
doi = {10.1021/acs.jcim.9b00995},
number = {3},
journal = {Journal of Chemical Information and Modeling},
publisher = {American Chemical Society (ACS)},
author = {Kim, Edward and Jensen, Zach and van Grootel, Alexander and Huang, Kevin and Staib, Matthew and Mysore, Sheshera and Chang, Haw-Shiuan and Strubell, Emma and McCallum, Andrew and Jegelka, Stefanie and Olivetti, Elsa},
year = {2020},
month = jan,
pages = {1194–1201}
}
@article{Kononova2019,
title = {Text-mined dataset of inorganic materials synthesis recipes},
volume = {6},
issn = {2052-4463},
url = {http://dx.doi.org/10.1038/s41597-019-0224-1},
doi = {10.1038/s41597-019-0224-1},
number = {1},
journal = {Scientific Data},
publisher = {Springer Science and Business Media LLC},
author = {Kononova, Olga and Huo, Haoyan and He, Tanjin and Rong, Ziqin and Botari, Tiago and Sun, Wenhao and Tshitoyan, Vahe and Ceder, Gerbrand},
year = {2019},
month = oct
}
@article{Huo2019,
title = {Semi-supervised machine-learning classification of materials synthesis procedures},
volume = {5},
issn = {2057-3960},
url = {http://dx.doi.org/10.1038/s41524-019-0204-1},
doi = {10.1038/s41524-019-0204-1},
number = {1},
journal = {npj Computational Materials},
publisher = {Springer Science and Business Media LLC},
author = {Huo, Haoyan and Rong, Ziqin and Kononova, Olga and Sun, Wenhao and Botari, Tiago and He, Tanjin and Tshitoyan, Vahe and Ceder, Gerbrand},
year = {2019},
month = jul
}
@article{Swain2016,
title = {ChemDataExtractor: A Toolkit for Automated Extraction of Chemical Information from the Scientific Literature},
volume = {56},
issn = {1549-960X},
url = {http://dx.doi.org/10.1021/acs.jcim.6b00207},
doi = {10.1021/acs.jcim.6b00207},
number = {10},
journal = {Journal of Chemical Information and Modeling},
publisher = {American Chemical Society (ACS)},
author = {Swain, Matthew C. and Cole, Jacqueline M.},
year = {2016},
month = oct,
pages = {1894–1904}
}
@article{Mavrai2021,
title = {ChemDataExtractor 2.0: Autopopulated Ontologies for Materials Science},
volume = {61},
issn = {1549-960X},
url = {http://dx.doi.org/10.1021/acs.jcim.1c00446},
doi = {10.1021/acs.jcim.1c00446},
number = {9},
journal = {Journal of Chemical Information and Modeling},
publisher = {American Chemical Society (ACS)},
author = {Mavračić, Juraj and Court, Callum J. and Isazawa, Taketomo and Elliott, Stephen R. and Cole, Jacqueline M.},
year = {2021},
month = sep,
pages = {4280–4289}
}
@article{Nugmanov2024,
title = {PaCh (Packed Chemicals): Computationally Effective Binary Format for Chemical Structure Encoding},
volume = {64},
issn = {1549-9596},
url = {https://doi.org/10.1021/acs.jcim.3c01720},
doi = {10.1021/acs.jcim.3c01720},
number = {8},
journal = {Journal of Chemical Information and Modeling},
publisher = {American Chemical Society (ACS)},
author = {Nugmanov, Ramil},
year = {2024},
month = mar,
pages = {3173-3179}
}
@article{Tran2017,
author = {Ngoc Hieu Tran and Xianglilan Zhang and Lei Xin and Baozhen Shan and Ming Li },
title = {De novo peptide sequencing by deep learning},
journal = {Proceedings of the National Academy of Sciences},
volume = {114},
number = {31},
pages = {8247-8252},
year = {2017},
doi = {10.1073/pnas.1705691114},
url = {https://www.pnas.org/doi/abs/10.1073/pnas.1705691114},
eprint = {https://www.pnas.org/doi/pdf/10.1073/pnas.1705691114},
abstract = {De novo peptide sequencing from tandem MS data is the key technology in proteomics for the characterization of proteins, especially for new sequences, such as mAbs. In this study, we propose a deep neural network model, DeepNovo, for de novo peptide sequencing. DeepNovo architecture combines recent advances in convolutional neural networks and recurrent neural networks to learn features of tandem mass spectra, fragment ions, and sequence patterns of peptides. The networks are further integrated with local dynamic programming to solve the complex optimization task of de novo sequencing. We evaluated the method on a wide variety of species and found that DeepNovo considerably outperformed state of the art methods, achieving 7.7–22.9\% higher accuracy at the amino acid level and 38.1–64.0\% higher accuracy at the peptide level. We further used DeepNovo to automatically reconstruct the complete sequences of antibody light and heavy chains of mouse, achieving 97.5–100\% coverage and 97.2–99.5\% accuracy, without assisting databases. Moreover, DeepNovo is retrainable to adapt to any sources of data and provides a complete end-to-end training and prediction solution to the de novo sequencing problem. Not only does our study extend the deep learning revolution to a new field, but it also shows an innovative approach in solving optimization problems by using deep learning and dynamic programming.}
}
@article{wang2022molecular,
title = {Molecular contrastive learning of representations via graph neural networks},
author = {Wang, Yuyang and Wang, Jianren and Cao, Zhonglin and Barati Farimani, Amir},
journal = {Nature Machine Intelligence},
volume = {4},
number = {3},
pages = {279--287},
year = {2022},
publisher = {Nature Publishing Group UK London}
}
@article{le2020contrastive,
title = {Contrastive representation learning: A framework and review},
author = {Le-Khac, Phuc H and Healy, Graham and Smeaton, Alan F},
journal = {Ieee Access},
volume = {8},
pages = {193907--193934},
year = {2020},
publisher = {IEEE}
}
@article{zhou2020graph,
title = {Graph neural networks: A review of methods and applications},
author = {Zhou, Jie and Cui, Ganqu and Hu, Shengding and Zhang, Zhengyan and Yang, Cheng and Liu, Zhiyuan and Wang, Lifeng and Li, Changcheng and Sun, Maosong},
journal = {AI open},
volume = {1},
pages = {57--81},
year = {2020},
publisher = {Elsevier}
}
@article{Allen2016,
author = {Allen, Felicity and Pon, Allison and Greiner, Russ and Wishart, David},
title = {Computational Prediction of Electron Ionization Mass Spectra to Assist in GC/MS Compound Identification},
journal = {Analytical Chemistry},
volume = {88},
number = {15},
pages = {7689-7697},
year = {2016},
doi = {10.1021/acs.analchem.6b01622},
note = {PMID: 27381172}
}
@book{Gross2011,
title = {Mass Spectrometry—A Textbook},
author = {Gross, J. H.},
publisher = {Springer},
year = {2011},
doi = {https://doi.org/10.1007/978-3-319-54398-7}
}
@inbook{Niessen2015,
author = {Niessen, W. M. A. and Falck, D.},
title = {Chapter 1 in Analyzing Biomolecular Interactions by Mass Spectrometry},
booktitle = {Analyzing Biomolecular Interactions by Mass Spectrometry},
editor = {Kool, J. and Niessen, W. M. A.},
publisher = {Wiley},
year = {2015},
doi = {https://doi.org/10.1002/9783527673391}
}
@article{Aebersold2016,
author = {Aebersold, R. and Mann, M.},
title = {Mass-spectrometric exploration of proteome structure and function},
journal = {Nature},
volume = {537},
pages = {347--355},
year = {2016}
}
@article{Gowda2014,
author = {Gowda, G. A. N. and Djukovic, D.},
title = {Overview of mass spectrometry-based metabolomics: opportunities and challenges},
journal = {Methods Mol. Biol.},
volume = {1198},
pages = {3--12},
year = {2014}
}
@article{DeVijlder2018,
author = {De Vijlder, T. and Cuyckens, F.},
title = {A tutorial in small molecule identification via electrospray ionization-mass spectrometry: the practical art of structural elucidation},
journal = {Mass Spectrom. Rev.},
volume = {37},
pages = {607--629},
year = {2018}
}
@article{Peters2011,
author = {Peters, F. T.},
title = {Recent advances of liquid chromatography-(tandem) mass spectrometry in clinical and forensic toxicology},
journal = {Clin. Biochem.},
volume = {44},
pages = {54--65},
year = {2011}
}
@article{VanBocxlaer2000,
author = {Van Bocxlaer, J. F. et al.},
title = {Liquid chromatography-mass spectrometry in forensic toxicology},
journal = {Mass Spectrom. Rev.},
volume = {19},
pages = {165--214},
year = {2000}
}
@article{Lebedev2013,
author = {Lebedev, A. T.},
title = {Environmental mass spectrometry},
journal = {Ann. Rev. Anal. Chem.},
volume = {6},
pages = {163--189},
year = {2013}
}
@article{Ghiandoni2020,
author = {Ghiandoni, G. M. et al.},
journal = {Journal of Computer-Aided Molecular Design},
title = {Enhancing reaction-based de novo design using a multi-label reaction class recommender},
volume = {34},
year = {2020},
pages = {783–803}
}
@article{pernaa2023open,
title={Open-Source Software Development in Cheminformatics: A Qualitative Analysis of Rationales},
author={Pernaa, Johannes and Takala, Aleksi and Ciftci, Veysel and Hern{\'a}ndez-Ramos, Jos{\'e} and C{\'a}ceres-Jensen, Lizethly and Rodr{\'\i}guez-Becerra, Jorge},
journal={Applied Sciences},
volume={13},
number={17},
pages={9516},
year={2023},
publisher={MDPI}
}
@article{chen2006chemoinformatics,
title={Chemoinformatics: past, present, and future},
author={Chen, William Lingran},
journal={Journal of Chemical Information and Modeling},
volume={46},
number={6},
pages={2230--2255},
year={2006},
publisher={ACS Publications}
}
@article{king1946asymmetric,
title={The Asymmetric Rotor III. Punched-Card Methods of Constructing Band Spectra},
author={King, Gilbert W and Cross, Paul C and Thomas, George B},
journal={The Journal of Chemical Physics},
volume={14},
number={1},
pages={35--42},
year={1946},
publisher={American Institute of Physics}
}
@article{ray1957finding,
title={Finding chemical records by digital computers},
author={Ray, Louis C and Kirsch, Russell A},
journal={Science},
volume={126},
number={3278},
pages={814--819},
year={1957},
publisher={American Association for the Advancement of Science}
}
@article{willett2011chemoinformatics,
title={Chemoinformatics: a history},
author={Willett, Peter},
journal={Wiley Interdisciplinary Reviews: Computational Molecular Science},
volume={1},
number={1},
pages={46--56},
year={2011},
publisher={Wiley Online Library}
}
@article{peironcely2012omg,
title={OMG: open molecule generator},
author={Peironcely, Julio E and Rojas-Chert{\'o}, Miguel and Fichera, Davide and Reijmers, Theo and Coulier, Leon and Faulon, Jean-Loup and Hankemeier, Thomas},
journal={Journal of cheminformatics},
volume={4},
pages={1--13},
year={2012},
publisher={Springer}
}
@article{cao2013chemopy,
title={ChemoPy: freely available python package for computational biology and chemoinformatics},
author={Cao, Dong-Sheng and Xu, Qing-Song and Hu, Qian-Nan and Liang, Yi-Zeng},
journal={Bioinformatics},
volume={29},
number={8},
pages={1092--1094},
year={2013},
publisher={Oxford University Press}
}
@inproceedings{satorras2021n,
title = {E (n) equivariant graph neural networks},
author = {Satorras, V{\i}ctor Garcia and Hoogeboom, Emiel and Welling, Max},
booktitle = {International conference on machine learning},
pages = {9323--9332},
year = {2021},
organization = {PMLR}
}
@article{orsi2024one,
title={One chiral fingerprint to find them all},
author={Orsi, Markus and Reymond, Jean-Louis},
journal={Journal of cheminformatics},
volume={16},
number={1},
pages={53},
year={2024},
publisher={Springer}
}
@article{capecchi2020one,
title={One molecular fingerprint to rule them all: drugs, biomolecules, and the metabolome},
author={Capecchi, Alice and Probst, Daniel and Reymond, Jean-Louis},
journal={Journal of cheminformatics},
volume={12},
pages={1--15},
year={2020},
publisher={Springer}
}
@article{rogers2010extended,
title={Extended-connectivity fingerprints},
author={Rogers, David and Hahn, Mathew},
journal={Journal of chemical information and modeling},
volume={50},
number={5},
pages={742--754},
year={2010},
publisher={ACS Publications}
}
@article{carhart1985atom,
title={Atom pairs as molecular features in structure-activity studies: definition and applications},
author={Carhart, Raymond E and Smith, Dennis H and Venkataraghavan, RENGACHARI},
journal={Journal of Chemical Information and Computer Sciences},
volume={25},
number={2},
pages={64--73},
year={1985},
publisher={ACS Publications}
}
@article{probst2018probabilistic,
title={A probabilistic molecular fingerprint for big data settings},
author={Probst, Daniel and Reymond, Jean-Louis},
journal={Journal of cheminformatics},
volume={10},
pages={1--12},
year={2018},
publisher={Springer}
}
@article{landrum_lwreg_2024,
title = {lwreg: {A} {Lightweight} {System} for {Chemical} {Registration} and {Data} {Storage}},
volume = {64},
copyright = {https://creativecommons.org/licenses/by/4.0/},
issn = {1549-9596, 1549-960X},
shorttitle = {lwreg},
url = {https://pubs.acs.org/doi/10.1021/acs.jcim.4c01133},
doi = {10.1021/acs.jcim.4c01133},
language = {en},
number = {16},
urldate = {2024-10-21},
journal = {Journal of Chemical Information and Modeling},
author = {Landrum, Gregory A. and Braun, Jessica and Katzberger, Paul and Lehner, Marc T. and Riniker, Sereina},
month = aug,
year = {2024},
pages = {6247--6252},
}
@article{bento_open_2020,
title = {An open source chemical structure curation pipeline using {RDKit}},
volume = {12},
issn = {1758-2946},
url = {https://jcheminf.biomedcentral.com/articles/10.1186/s13321-020-00456-1},
doi = {10.1186/s13321-020-00456-1},
language = {en},
number = {1},
urldate = {2023-01-12},
journal = {Journal of Cheminformatics},
author = {Bento, A. Patrícia and Hersey, Anne and Félix, Eloy and Landrum, Greg and Gaulton, Anna and Atkinson, Francis and Bellis, Louisa J. and De Veij, Marleen and Leach, Andrew R.},
month = dec,
year = {2020},
pages = {51},
}
@article{lehner_dash_2023,
title = {{DASH}: {Dynamic} {Attention}-{Based} {Substructure} {Hierarchy} for {Partial} {Charge} {Assignment}},
volume = {63},
copyright = {https://creativecommons.org/licenses/by-nc-nd/4.0/},
issn = {1549-9596, 1549-960X},
shorttitle = {{DASH}},
url = {https://pubs.acs.org/doi/10.1021/acs.jcim.3c00800},
doi = {10.1021/acs.jcim.3c00800},
language = {en},
number = {19},
urldate = {2024-10-22},
journal = {Journal of Chemical Information and Modeling},
author = {Lehner, Marc T. and Katzberger, Paul and Maeder, Niels and Schiebroek, Carl C.G. and Teetz, Jakob and Landrum, Gregory A. and Riniker, Sereina},
month = oct,
year = {2023},
pages = {6014--6028},
file = {Full Text:C\:\\Users\\jonas\\Zotero\\storage\\CATGSF9S\\Lehner et al. - 2023 - DASH Dynamic Attention-Based Substructure Hierarc.pdf:application/pdf},
}
@misc{https://doi.org/10.48550/arxiv.2410.11527,
doi = {10.48550/ARXIV.2410.11527},
url = {https://arxiv.org/abs/2410.11527},
author = {Guo, Jeff and Schwaller, Philippe},
keywords = {Biomolecules (q-bio.BM), Machine Learning (cs.LG), FOS: Biological sciences, FOS: Biological sciences, FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {It Takes Two to Tango: Directly Optimizing for Constrained Synthesizability in Generative Molecular Design},
publisher = {arXiv},
year = {2024},
copyright = {Creative Commons Attribution 4.0 International}
}
@misc{https://doi.org/10.48550/arxiv.2110.06389,
doi = {10.48550/ARXIV.2110.06389},
url = {https://arxiv.org/abs/2110.06389},
author = {Gao, Wenhao and Mercado, Rocío and Coley, Connor W.},
keywords = {Machine Learning (cs.LG), Quantitative Methods (q-bio.QM), FOS: Computer and information sciences, FOS: Computer and information sciences, FOS: Biological sciences, FOS: Biological sciences},
title = {Amortized Tree Generation for Bottom-up Synthesis Planning and Synthesizable Molecular Design},
publisher = {arXiv},
year = {2021},
copyright = {Creative Commons Attribution 4.0 International}
}
@misc{https://doi.org/10.48550/arxiv.2410.03494,
doi = {10.48550/ARXIV.2410.03494},
url = {https://arxiv.org/abs/2410.03494},
author = {Gao, Wenhao and Luo, Shitong and Coley, Connor W.},
keywords = {Machine Learning (cs.LG), Artificial Intelligence (cs.AI), Chemical Physics (physics.chem-ph), Biomolecules (q-bio.BM), FOS: Computer and information sciences, FOS: Computer and information sciences, FOS: Physical sciences, FOS: Physical sciences, FOS: Biological sciences, FOS: Biological sciences},
title = {Generative Artificial Intelligence for Navigating Synthesizable Chemical Space},
publisher = {arXiv},
year = {2024},
copyright = {Creative Commons Attribution 4.0 International}
}
@misc{siegel2024corebenchfosteringcredibilitypublished,
title={CORE-Bench: Fostering the Credibility of Published Research Through a Computational Reproducibility Agent Benchmark},
author={Zachary S. Siegel and Sayash Kapoor and Nitya Nagdir and Benedikt Stroebl and Arvind Narayanan},
year={2024},
eprint={2409.11363},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2409.11363},
}
@misc{kapoor2024aiagentsmatter,
title={AI Agents That Matter},
author={Sayash Kapoor and Benedikt Stroebl and Zachary S. Siegel and Nitya Nadgir and Arvind Narayanan},
year={2024},
eprint={2407.01502},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2407.01502},
}
@misc{jimenez2024swebenchlanguagemodelsresolve,
title={SWE-bench: Can Language Models Resolve Real-World GitHub Issues?},
author={Carlos E. Jimenez and John Yang and Alexander Wettig and Shunyu Yao and Kexin Pei and Ofir Press and Karthik Narasimhan},
year={2024},
eprint={2310.06770},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2310.06770},
}
@misc{laurent2024labbenchmeasuringcapabilitieslanguage,
title={LAB-Bench: Measuring Capabilities of Language Models for Biology Research},
author={Jon M. Laurent and Joseph D. Janizek and Michael Ruzo and Michaela M. Hinks and Michael J. Hammerling and Siddharth Narayanan and Manvitha Ponnapati and Andrew D. White and Samuel G. Rodriques},
year={2024},
eprint={2407.10362},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https://arxiv.org/abs/2407.10362},
}
@misc{chan2024mlebenchevaluatingmachinelearning,
title={MLE-bench: Evaluating Machine Learning Agents on Machine Learning Engineering},
author={Jun Shern Chan and Neil Chowdhury and Oliver Jaffe and James Aung and Dane Sherburn and Evan Mays and Giulio Starace and Kevin Liu and Leon Maksin and Tejal Patwardhan and Lilian Weng and Aleksander Mądry},
year={2024},
eprint={2410.07095},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2410.07095},
}
@misc{huang2024mlagentbenchevaluatinglanguageagents,
title={MLAgentBench: Evaluating Language Agents on Machine Learning Experimentation},
author={Qian Huang and Jian Vora and Percy Liang and Jure Leskovec},
year={2024},
eprint={2310.03302},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2310.03302},
}
@article{wei2022chain,
title={Chain-of-thought prompting elicits reasoning in large language models},
author={Wei, Jason and Wang, Xuezhi and Schuurmans, Dale and Bosma, Maarten and Xia, Fei and Chi, Ed and Le, Quoc V and Zhou, Denny and others},
journal={Advances in neural information processing systems},
volume={35},
pages={24824--24837},
year={2022}
}
@article{kojima2022large,
title={Large language models are zero-shot reasoners},
author={Kojima, Takeshi and Gu, Shixiang Shane and Reid, Machel and Matsuo, Yutaka and Iwasawa, Yusuke},
journal={Advances in neural information processing systems},
volume={35},
pages={22199--22213},
year={2022}
}
@misc{fu2023complexitybasedpromptingmultistepreasoning,
title={Complexity-Based Prompting for Multi-Step Reasoning},
author={Yao Fu and Hao Peng and Ashish Sabharwal and Peter Clark and Tushar Khot},
year={2023},
eprint={2210.00720},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2210.00720},
}
@misc{yao2023treethoughtsdeliberateproblem,
title={Tree of Thoughts: Deliberate Problem Solving with Large Language Models},
author={Shunyu Yao and Dian Yu and Jeffrey Zhao and Izhak Shafran and Thomas L. Griffiths and Yuan Cao and Karthik Narasimhan},
year={2023},
eprint={2305.10601},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2305.10601},
}
@article{Fedorenko_2024, title={Language is primarily a tool for communication rather than thought}, volume={630}, ISSN={1476-4687}, url={http://dx.doi.org/10.1038/s41586-024-07522-w}, DOI={10.1038/s41586-024-07522-w}, number={8017}, journal={Nature}, publisher={Springer Science and Business Media LLC}, author={Fedorenko, Evelina and Piantadosi, Steven T. and Gibson, Edward A. F.}, year={2024}, month=jun, pages={575–586} }
@misc{qin2024o1replicationjourneystrategic,
title={O1 Replication Journey: A Strategic Progress Report -- Part 1},
author={Yiwei Qin and Xuefeng Li and Haoyang Zou and Yixiu Liu and Shijie Xia and Zhen Huang and Yixin Ye and Weizhe Yuan and Hector Liu and Yuanzhi Li and Pengfei Liu},
year={2024},
eprint={2410.18982},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https://arxiv.org/abs/2410.18982},
}