references.bib

@article{Ahneman_2018,
  title     = {Predicting reaction performance in C–N cross-coupling using machine learning},
  volume    = {360},
  issn      = {1095-9203},
  url       = {http://dx.doi.org/10.1126/science.aar5169},
  doi       = {10.1126/science.aar5169},
  number    = {6385},
  journal   = {Science},
  publisher = {American Association for the Advancement of Science (AAAS)},
  author    = {Ahneman, Derek T. and Estrada, Jesús G. and Lin, Shishi and Dreher, Spencer D. and Doyle, Abigail G.},
  year      = {2018},
  month     = apr,
  pages     = {186–190}
}
@article{Chuang_2018,
  title     = {Comment on “Predicting reaction performance in C–N cross-coupling using machine learning”},
  volume    = {362},
  issn      = {1095-9203},
  url       = {http://dx.doi.org/10.1126/science.aat8603},
  doi       = {10.1126/science.aat8603},
  number    = {6416},
  journal   = {Science},
  publisher = {American Association for the Advancement of Science (AAAS)},
  author    = {Chuang, Kangway V. and Keiser, Michael J.},
  year      = {2018},
  month     = nov
}
@misc{vinyals2016order,
  title         = {Order Matters: Sequence to sequence for sets},
  author        = {Oriol Vinyals and Samy Bengio and Manjunath Kudlur},
  year          = {2016},
  eprint        = {1511.06391},
  archiveprefix = {arXiv},
  primaryclass  = {stat.ML}
}
@misc{shazeer2017outrageously,
  title         = {Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer},
  author        = {Noam Shazeer and Azalia Mirhoseini and Krzysztof Maziarz and Andy Davis and Quoc Le and Geoffrey Hinton and Jeff Dean},
  year          = {2017},
  eprint        = {1701.06538},
  archiveprefix = {arXiv},
  primaryclass  = {cs.LG}
}
@misc{jiang2024mixtral,
  title         = {Mixtral of Experts},
  author        = {Albert Q. Jiang and Alexandre Sablayrolles and Antoine Roux and Arthur Mensch and Blanche Savary and Chris Bamford and Devendra Singh Chaplot and Diego de las Casas and Emma Bou Hanna and Florian Bressand and Gianna Lengyel and Guillaume Bour and Guillaume Lample and Lélio Renard Lavaud and Lucile Saulnier and Marie-Anne Lachaux and Pierre Stock and Sandeep Subramanian and Sophia Yang and Szymon Antoniak and Teven Le Scao and Théophile Gervet and Thibaut Lavril and Thomas Wang and Timothée Lacroix and William El Sayed},
  year          = {2024},
  eprint        = {2401.04088},
  archiveprefix = {arXiv},
  primaryclass  = {cs.LG}
}
@misc{kingma2015variational,
  title         = {Variational Dropout and the Local Reparameterization Trick},
  author        = {Diederik P. Kingma and Tim Salimans and Max Welling},
  year          = {2015},
  eprint        = {1506.02557},
  archiveprefix = {arXiv},
  primaryclass  = {stat.ML}
}
@article{Chen_2024,
  title     = {Uncertainty-Aware Yield Prediction with Multimodal Molecular Features},
  volume    = {38},
  issn      = {2159-5399},
  url       = {http://dx.doi.org/10.1609/aaai.v38i8.28668},
  doi       = {10.1609/aaai.v38i8.28668},
  number    = {8},
  journal   = {Proceedings of the AAAI Conference on Artificial Intelligence},
  publisher = {Association for the Advancement of Artificial Intelligence (AAAI)},
  author    = {Chen, Jiayuan and Guo, Kehan and Liu, Zhen and Isayev, Olexandr and Zhang, Xiangliang},
  year      = {2024},
  month     = mar,
  pages     = {8274–8282}
}

@article{schwaller2020data,
  title   = {Data augmentation strategies to improve reaction yield predictions and estimate uncertainty},
  author  = {Schwaller, Philippe and Vaucher, Alain C and Laino, Teodoro and Reymond, Jean-Louis},
  journal = {Chemrxiv preprint},
  year    = {2020}
}
@article{schwaller2021prediction,
  title     = {Prediction of chemical reaction yields using deep learning},
  author    = {Schwaller, Philippe and Vaucher, Alain C and Laino, Teodoro and Reymond, Jean-Louis},
  journal   = {Machine learning: science and technology},
  volume    = {2},
  number    = {1},
  pages     = {015016},
  year      = {2021},
  publisher = {IOP Publishing}
}

@article{Kwon_2022,
  title     = {Uncertainty-aware prediction of chemical reaction yields with graph neural networks},
  volume    = {14},
  issn      = {1758-2946},
  url       = {http://dx.doi.org/10.1186/s13321-021-00579-z},
  doi       = {10.1186/s13321-021-00579-z},
  number    = {1},
  journal   = {Journal of Cheminformatics},
  publisher = {Springer Science and Business Media LLC},
  author    = {Kwon, Youngchun and Lee, Dongseon and Choi, Youn-Suk and Kang, Seokho},
  year      = {2022},
  month     = jan
}
@inproceedings{gal2016dropout,
  title        = {Dropout as a bayesian approximation: Representing model uncertainty in deep learning},
  author       = {Gal, Yarin and Ghahramani, Zoubin},
  booktitle    = {international conference on machine learning},
  pages        = {1050--1059},
  year         = {2016},
  organization = {PMLR}
}

@article{dagdelen_structured_2024,
  title     = {Structured information extraction from scientific text with large language models},
  volume    = {15},
  copyright = {2024 The Author(s)},
  issn      = {2041-1723},
  url       = {https://www.nature.com/articles/s41467-024-45563-x},
  doi       = {10.1038/s41467-024-45563-x},
  abstract  = {Extracting structured knowledge from scientific text remains a challenging task for machine learning models. Here, we present a simple approach to joint named entity recognition and relation extraction and demonstrate how pretrained large language models (GPT-3, Llama-2) can be fine-tuned to extract useful records of complex scientific knowledge. We test three representative tasks in materials chemistry: linking dopants and host materials, cataloging metal-organic frameworks, and general composition/phase/morphology/application information extraction. Records are extracted from single sentences or entire paragraphs, and the output can be returned as simple English sentences or a more structured format such as a list of JSON objects. This approach represents a simple, accessible, and highly flexible route to obtaining large databases of structured specialized scientific knowledge extracted from research papers.},
  language  = {en},
  number    = {1},
  urldate   = {2024-04-13},
  journal   = {Nature Communications},
  author    = {Dagdelen, John and Dunn, Alexander and Lee, Sanghoon and Walker, Nicholas and Rosen, Andrew S. and Ceder, Gerbrand and Persson, Kristin A. and Jain, Anubhav},
  month     = feb,
  year      = {2024},
  note      = {Publisher: Nature Publishing Group},
  keywords  = {Materials science, Theory and computation, Databases, Scientific data},
  pages     = {1418},
  file      = {Dagdelen et al_2024_Structured information extraction from scientific text with large language.pdf:C\:\\Users\\pepem\\Zotero\\storage\\IQJG3VH8\\Dagdelen et al_2024_Structured information extraction from scientific text with large language.pdf:application/pdf}
}

@article{Trewartha2022,
  title     = {Quantifying the advantage of domain-specific pre-training on named entity recognition tasks in materials science},
  volume    = {3},
  issn      = {2666-3899},
  url       = {http://dx.doi.org/10.1016/j.patter.2022.100488},
  doi       = {10.1016/j.patter.2022.100488},
  number    = {4},
  journal   = {Patterns},
  publisher = {Elsevier BV},
  author    = {Trewartha,  Amalie and Walker,  Nicholas and Huo,  Haoyan and Lee,  Sanghoon and Cruse,  Kevin and Dagdelen,  John and Dunn,  Alexander and Persson,  Kristin A. and Ceder,  Gerbrand and Jain,  Anubhav},
  year      = {2022},
  month     = apr,
  pages     = {100488}
}

@article{Guo2021,
  title     = {Automated Chemical Reaction Extraction from Scientific Literature},
  volume    = {62},
  issn      = {1549-960X},
  url       = {http://dx.doi.org/10.1021/acs.jcim.1c00284},
  doi       = {10.1021/acs.jcim.1c00284},
  number    = {9},
  journal   = {Journal of Chemical Information and Modeling},
  publisher = {American Chemical Society (ACS)},
  author    = {Guo,  Jiang and Ibanez-Lopez,  A. Santiago and Gao,  Hanyu and Quach,  Victor and Coley,  Connor W. and Jensen,  Klavs F. and Barzilay,  Regina},
  year      = {2021},
  month     = jun,
  pages     = {2035–2045}
}

@article{Kim2017,
  title     = {Materials Synthesis Insights from Scientific Literature via Text Extraction and Machine Learning},
  volume    = {29},
  issn      = {1520-5002},
  url       = {http://dx.doi.org/10.1021/acs.chemmater.7b03500},
  doi       = {10.1021/acs.chemmater.7b03500},
  number    = {21},
  journal   = {Chemistry of Materials},
  publisher = {American Chemical Society (ACS)},
  author    = {Kim,  Edward and Huang,  Kevin and Saunders,  Adam and McCallum,  Andrew and Ceder,  Gerbrand and Olivetti,  Elsa},
  year      = {2017},
  month     = oct,
  pages     = {9436–9444}
}

@misc{mysore2019materials,
  title         = {The Materials Science Procedural Text Corpus: Annotating Materials Synthesis Procedures with Shallow Semantic Structures},
  author        = {Sheshera Mysore and Zach Jensen and Edward Kim and Kevin Huang and Haw-Shiuan Chang and Emma Strubell and Jeffrey Flanigan and Andrew McCallum and Elsa Olivetti},
  year          = {2019},
  eprint        = {1905.06939},
  archiveprefix = {arXiv},
  primaryclass  = {cs.CL}
}

@article{Kim2020,
  title     = {Inorganic Materials Synthesis Planning with Literature-Trained Neural Networks},
  volume    = {60},
  issn      = {1549-960X},
  url       = {http://dx.doi.org/10.1021/acs.jcim.9b00995},
  doi       = {10.1021/acs.jcim.9b00995},
  number    = {3},
  journal   = {Journal of Chemical Information and Modeling},
  publisher = {American Chemical Society (ACS)},
  author    = {Kim,  Edward and Jensen,  Zach and van Grootel,  Alexander and Huang,  Kevin and Staib,  Matthew and Mysore,  Sheshera and Chang,  Haw-Shiuan and Strubell,  Emma and McCallum,  Andrew and Jegelka,  Stefanie and Olivetti,  Elsa},
  year      = {2020},
  month     = jan,
  pages     = {1194–1201}
}

@article{Kononova2019,
  title     = {Text-mined dataset of inorganic materials synthesis recipes},
  volume    = {6},
  issn      = {2052-4463},
  url       = {http://dx.doi.org/10.1038/s41597-019-0224-1},
  doi       = {10.1038/s41597-019-0224-1},
  number    = {1},
  journal   = {Scientific Data},
  publisher = {Springer Science and Business Media LLC},
  author    = {Kononova,  Olga and Huo,  Haoyan and He,  Tanjin and Rong,  Ziqin and Botari,  Tiago and Sun,  Wenhao and Tshitoyan,  Vahe and Ceder,  Gerbrand},
  year      = {2019},
  month     = oct
}

@article{Huo2019,
  title     = {Semi-supervised machine-learning classification of materials synthesis procedures},
  volume    = {5},
  issn      = {2057-3960},
  url       = {http://dx.doi.org/10.1038/s41524-019-0204-1},
  doi       = {10.1038/s41524-019-0204-1},
  number    = {1},
  journal   = {npj Computational Materials},
  publisher = {Springer Science and Business Media LLC},
  author    = {Huo,  Haoyan and Rong,  Ziqin and Kononova,  Olga and Sun,  Wenhao and Botari,  Tiago and He,  Tanjin and Tshitoyan,  Vahe and Ceder,  Gerbrand},
  year      = {2019},
  month     = jul
}

@article{Swain2016,
  title     = {ChemDataExtractor: A Toolkit for Automated Extraction of Chemical Information from the Scientific Literature},
  volume    = {56},
  issn      = {1549-960X},
  url       = {http://dx.doi.org/10.1021/acs.jcim.6b00207},
  doi       = {10.1021/acs.jcim.6b00207},
  number    = {10},
  journal   = {Journal of Chemical Information and Modeling},
  publisher = {American Chemical Society (ACS)},
  author    = {Swain,  Matthew C. and Cole,  Jacqueline M.},
  year      = {2016},
  month     = oct,
  pages     = {1894–1904}
}

@article{Mavrai2021,
  title     = {ChemDataExtractor 2.0: Autopopulated Ontologies for Materials Science},
  volume    = {61},
  issn      = {1549-960X},
  url       = {http://dx.doi.org/10.1021/acs.jcim.1c00446},
  doi       = {10.1021/acs.jcim.1c00446},
  number    = {9},
  journal   = {Journal of Chemical Information and Modeling},
  publisher = {American Chemical Society (ACS)},
  author    = {Mavračić,  Juraj and Court,  Callum J. and Isazawa,  Taketomo and Elliott,  Stephen R. and Cole,  Jacqueline M.},
  year      = {2021},
  month     = sep,
  pages     = {4280–4289}
}

@article{Nugmanov2024,
  title     = {PaCh (Packed Chemicals): Computationally Effective Binary Format for Chemical Structure Encoding},
  volume    = {64},
  issn      = {1549-9596},
  url       = {https://doi.org/10.1021/acs.jcim.3c01720},
  doi       = {10.1021/acs.jcim.3c01720},
  number    = {8},
  journal   = {Journal of Chemical Information and Modeling},
  publisher = {American Chemical Society (ACS)},
  author    = {Nugmanov, Ramil},
  year      = {2024},
  month     = mar,
  pages     = {3173-3179}
}

@article{Tran2017,
  author   = {Ngoc Hieu Tran  and Xianglilan Zhang  and Lei Xin  and Baozhen Shan  and Ming Li },
  title    = {De novo peptide sequencing by deep learning},
  journal  = {Proceedings of the National Academy of Sciences},
  volume   = {114},
  number   = {31},
  pages    = {8247-8252},
  year     = {2017},
  doi      = {10.1073/pnas.1705691114},
  url      = {https://www.pnas.org/doi/abs/10.1073/pnas.1705691114},
  eprint   = {https://www.pnas.org/doi/pdf/10.1073/pnas.1705691114},
  abstract = {De novo peptide sequencing from tandem MS data is the key technology in proteomics for the characterization of proteins, especially for new sequences, such as mAbs. In this study, we propose a deep neural network model, DeepNovo, for de novo peptide sequencing. DeepNovo architecture combines recent advances in convolutional neural networks and recurrent neural networks to learn features of tandem mass spectra, fragment ions, and sequence patterns of peptides. The networks are further integrated with local dynamic programming to solve the complex optimization task of de novo sequencing. We evaluated the method on a wide variety of species and found that DeepNovo considerably outperformed state of the art methods, achieving 7.7–22.9\% higher accuracy at the amino acid level and 38.1–64.0\% higher accuracy at the peptide level. We further used DeepNovo to automatically reconstruct the complete sequences of antibody light and heavy chains of mouse, achieving 97.5–100\% coverage and 97.2–99.5\% accuracy, without assisting databases. Moreover, DeepNovo is retrainable to adapt to any sources of data and provides a complete end-to-end training and prediction solution to the de novo sequencing problem. Not only does our study extend the deep learning revolution to a new field, but it also shows an innovative approach in solving optimization problems by using deep learning and dynamic programming.}
}

@article{wang2022molecular,
  title     = {Molecular contrastive learning of representations via graph neural networks},
  author    = {Wang, Yuyang and Wang, Jianren and Cao, Zhonglin and Barati Farimani, Amir},
  journal   = {Nature Machine Intelligence},
  volume    = {4},
  number    = {3},
  pages     = {279--287},
  year      = {2022},
  publisher = {Nature Publishing Group UK London}
}

@article{le2020contrastive,
  title     = {Contrastive representation learning: A framework and review},
  author    = {Le-Khac, Phuc H and Healy, Graham and Smeaton, Alan F},
  journal   = {Ieee Access},
  volume    = {8},
  pages     = {193907--193934},
  year      = {2020},
  publisher = {IEEE}
}

@article{zhou2020graph,
  title     = {Graph neural networks: A review of methods and applications},
  author    = {Zhou, Jie and Cui, Ganqu and Hu, Shengding and Zhang, Zhengyan and Yang, Cheng and Liu, Zhiyuan and Wang, Lifeng and Li, Changcheng and Sun, Maosong},
  journal   = {AI open},
  volume    = {1},
  pages     = {57--81},
  year      = {2020},
  publisher = {Elsevier}
}

@article{Allen2016,
  author  = {Allen, Felicity and Pon, Allison and Greiner, Russ and Wishart, David},
  title   = {Computational Prediction of Electron Ionization Mass Spectra to Assist in GC/MS Compound Identification},
  journal = {Analytical Chemistry},
  volume  = {88},
  number  = {15},
  pages   = {7689-7697},
  year    = {2016},
  doi     = {10.1021/acs.analchem.6b01622},
  note    = {PMID: 27381172}
}
@book{Gross2011,
  title     = {Mass Spectrometry—A Textbook},
  author    = {Gross, J. H.},
  publisher = {Springer},
  year      = {2011},
  doi       = {https://doi.org/10.1007/978-3-319-54398-7}
}

@inbook{Niessen2015,
  author    = {Niessen, W. M. A. and Falck, D.},
  title     = {Chapter 1 in Analyzing Biomolecular Interactions by Mass Spectrometry},
  booktitle = {Analyzing Biomolecular Interactions by Mass Spectrometry},
  editor    = {Kool, J. and Niessen, W. M. A.},
  publisher = {Wiley},
  year      = {2015},
  doi       = {https://doi.org/10.1002/9783527673391}
}

@article{Aebersold2016,
  author  = {Aebersold, R. and Mann, M.},
  title   = {Mass-spectrometric exploration of proteome structure and function},
  journal = {Nature},
  volume  = {537},
  pages   = {347--355},
  year    = {2016}
}

@article{Gowda2014,
  author  = {Gowda, G. A. N. and Djukovic, D.},
  title   = {Overview of mass spectrometry-based metabolomics: opportunities and challenges},
  journal = {Methods Mol. Biol.},
  volume  = {1198},
  pages   = {3--12},
  year    = {2014}
}

@article{DeVijlder2018,
  author  = {De Vijlder, T. and Cuyckens, F.},
  title   = {A tutorial in small molecule identification via electrospray ionization-mass spectrometry: the practical art of structural elucidation},
  journal = {Mass Spectrom. Rev.},
  volume  = {37},
  pages   = {607--629},
  year    = {2018}
}

@article{Peters2011,
  author  = {Peters, F. T.},
  title   = {Recent advances of liquid chromatography-(tandem) mass spectrometry in clinical and forensic toxicology},
  journal = {Clin. Biochem.},
  volume  = {44},
  pages   = {54--65},
  year    = {2011}
}

@article{VanBocxlaer2000,
  author  = {Van Bocxlaer, J. F. et al.},
  title   = {Liquid chromatography-mass spectrometry in forensic toxicology},
  journal = {Mass Spectrom. Rev.},
  volume  = {19},
  pages   = {165--214},
  year    = {2000}
}

@article{Lebedev2013,
  author  = {Lebedev, A. T.},
  title   = {Environmental mass spectrometry},
  journal = {Ann. Rev. Anal. Chem.},
  volume  = {6},
  pages   = {163--189},
  year    = {2013}
}

@article{Ghiandoni2020,
  author  = {Ghiandoni, G. M. et al.},
  journal = {Journal of Computer-Aided Molecular Design},
  title   = {Enhancing reaction-based de novo design using a multi-label reaction class recommender},
  volume  = {34},
  year    = {2020},
  pages   = {783–803}
}
@article{pernaa2023open,
  title={Open-Source Software Development in Cheminformatics: A Qualitative Analysis of Rationales},
  author={Pernaa, Johannes and Takala, Aleksi and Ciftci, Veysel and Hern{\'a}ndez-Ramos, Jos{\'e} and C{\'a}ceres-Jensen, Lizethly and Rodr{\'\i}guez-Becerra, Jorge},
  journal={Applied Sciences},
  volume={13},
  number={17},
  pages={9516},
  year={2023},
  publisher={MDPI}
}

@article{chen2006chemoinformatics,
  title={Chemoinformatics: past, present, and future},
  author={Chen, William Lingran},
  journal={Journal of Chemical Information and Modeling},
  volume={46},
  number={6},
  pages={2230--2255},
  year={2006},
  publisher={ACS Publications}
}

@article{king1946asymmetric,
  title={The Asymmetric Rotor III. Punched-Card Methods of Constructing Band Spectra},
  author={King, Gilbert W and Cross, Paul C and Thomas, George B},
  journal={The Journal of Chemical Physics},
  volume={14},
  number={1},
  pages={35--42},
  year={1946},
  publisher={American Institute of Physics}
}

@article{ray1957finding,
  title={Finding chemical records by digital computers},
  author={Ray, Louis C and Kirsch, Russell A},
  journal={Science},
  volume={126},
  number={3278},
  pages={814--819},
  year={1957},
  publisher={American Association for the Advancement of Science}
}

@article{willett2011chemoinformatics,
  title={Chemoinformatics: a history},
  author={Willett, Peter},
  journal={Wiley Interdisciplinary Reviews: Computational Molecular Science},
  volume={1},
  number={1},
  pages={46--56},
  year={2011},
  publisher={Wiley Online Library}
}

@article{peironcely2012omg,
  title={OMG: open molecule generator},
  author={Peironcely, Julio E and Rojas-Chert{\'o}, Miguel and Fichera, Davide and Reijmers, Theo and Coulier, Leon and Faulon, Jean-Loup and Hankemeier, Thomas},
  journal={Journal of cheminformatics},
  volume={4},
  pages={1--13},
  year={2012},
  publisher={Springer}
}

@article{cao2013chemopy,
  title={ChemoPy: freely available python package for computational biology and chemoinformatics},
  author={Cao, Dong-Sheng and Xu, Qing-Song and Hu, Qian-Nan and Liang, Yi-Zeng},
  journal={Bioinformatics},
  volume={29},
  number={8},
  pages={1092--1094},
  year={2013},
  publisher={Oxford University Press}
}


@inproceedings{satorras2021n,
  title        = {E (n) equivariant graph neural networks},
  author       = {Satorras, V{\i}ctor Garcia and Hoogeboom, Emiel and Welling, Max},
  booktitle    = {International conference on machine learning},
  pages        = {9323--9332},
  year         = {2021},
  organization = {PMLR}
}

@article{orsi2024one,
  title={One chiral fingerprint to find them all},
  author={Orsi, Markus and Reymond, Jean-Louis},
  journal={Journal of cheminformatics},
  volume={16},
  number={1},
  pages={53},
  year={2024},
  publisher={Springer}
}

@article{capecchi2020one,
  title={One molecular fingerprint to rule them all: drugs, biomolecules, and the metabolome},
  author={Capecchi, Alice and Probst, Daniel and Reymond, Jean-Louis},
  journal={Journal of cheminformatics},
  volume={12},
  pages={1--15},
  year={2020},
  publisher={Springer}
}

@article{rogers2010extended,
  title={Extended-connectivity fingerprints},
  author={Rogers, David and Hahn, Mathew},
  journal={Journal of chemical information and modeling},
  volume={50},
  number={5},
  pages={742--754},
  year={2010},
  publisher={ACS Publications}
}

@article{carhart1985atom,
  title={Atom pairs as molecular features in structure-activity studies: definition and applications},
  author={Carhart, Raymond E and Smith, Dennis H and Venkataraghavan, RENGACHARI},
  journal={Journal of Chemical Information and Computer Sciences},
  volume={25},
  number={2},
  pages={64--73},
  year={1985},
  publisher={ACS Publications}
}

@article{probst2018probabilistic,
  title={A probabilistic molecular fingerprint for big data settings},
  author={Probst, Daniel and Reymond, Jean-Louis},
  journal={Journal of cheminformatics},
  volume={10},
  pages={1--12},
  year={2018},
  publisher={Springer}
}

@article{landrum_lwreg_2024,
    title = {lwreg: {A} {Lightweight} {System} for {Chemical} {Registration} and {Data} {Storage}},
    volume = {64},
    copyright = {https://creativecommons.org/licenses/by/4.0/},
    issn = {1549-9596, 1549-960X},
    shorttitle = {lwreg},
    url = {https://pubs.acs.org/doi/10.1021/acs.jcim.4c01133},
    doi = {10.1021/acs.jcim.4c01133},
    language = {en},
    number = {16},
    urldate = {2024-10-21},
    journal = {Journal of Chemical Information and Modeling},
    author = {Landrum, Gregory A. and Braun, Jessica and Katzberger, Paul and Lehner, Marc T. and Riniker, Sereina},
    month = aug,
    year = {2024},
    pages = {6247--6252},
}

@article{bento_open_2020,
    title = {An open source chemical structure curation pipeline using {RDKit}},
    volume = {12},
    issn = {1758-2946},
    url = {https://jcheminf.biomedcentral.com/articles/10.1186/s13321-020-00456-1},
    doi = {10.1186/s13321-020-00456-1},
    language = {en},
    number = {1},
    urldate = {2023-01-12},
    journal = {Journal of Cheminformatics},
    author = {Bento, A. Patrícia and Hersey, Anne and Félix, Eloy and Landrum, Greg and Gaulton, Anna and Atkinson, Francis and Bellis, Louisa J. and De Veij, Marleen and Leach, Andrew R.},
    month = dec,
    year = {2020},
    pages = {51},
}

@article{lehner_dash_2023,
    title = {{DASH}: {Dynamic} {Attention}-{Based} {Substructure} {Hierarchy} for {Partial} {Charge} {Assignment}},
    volume = {63},
    copyright = {https://creativecommons.org/licenses/by-nc-nd/4.0/},
    issn = {1549-9596, 1549-960X},
    shorttitle = {{DASH}},
    url = {https://pubs.acs.org/doi/10.1021/acs.jcim.3c00800},
    doi = {10.1021/acs.jcim.3c00800},
    language = {en},
    number = {19},
    urldate = {2024-10-22},
    journal = {Journal of Chemical Information and Modeling},
    author = {Lehner, Marc T. and Katzberger, Paul and Maeder, Niels and Schiebroek, Carl C.G. and Teetz, Jakob and Landrum, Gregory A. and Riniker, Sereina},
    month = oct,
    year = {2023},
    pages = {6014--6028},
    file = {Full Text:C\:\\Users\\jonas\\Zotero\\storage\\CATGSF9S\\Lehner et al. - 2023 - DASH Dynamic Attention-Based Substructure Hierarc.pdf:application/pdf},
}

@misc{https://doi.org/10.48550/arxiv.2410.11527,
  doi = {10.48550/ARXIV.2410.11527},
  url = {https://arxiv.org/abs/2410.11527},
  author = {Guo,  Jeff and Schwaller,  Philippe},
  keywords = {Biomolecules (q-bio.BM),  Machine Learning (cs.LG),  FOS: Biological sciences,  FOS: Biological sciences,  FOS: Computer and information sciences,  FOS: Computer and information sciences},
  title = {It Takes Two to Tango: Directly Optimizing for Constrained Synthesizability in Generative Molecular Design},
  publisher = {arXiv},
  year = {2024},
  copyright = {Creative Commons Attribution 4.0 International}
}

@misc{https://doi.org/10.48550/arxiv.2110.06389,
  doi = {10.48550/ARXIV.2110.06389},
  url = {https://arxiv.org/abs/2110.06389},
  author = {Gao,  Wenhao and Mercado,  Rocío and Coley,  Connor W.},
  keywords = {Machine Learning (cs.LG),  Quantitative Methods (q-bio.QM),  FOS: Computer and information sciences,  FOS: Computer and information sciences,  FOS: Biological sciences,  FOS: Biological sciences},
  title = {Amortized Tree Generation for Bottom-up Synthesis Planning and Synthesizable Molecular Design},
  publisher = {arXiv},
  year = {2021},
  copyright = {Creative Commons Attribution 4.0 International}
}

@misc{https://doi.org/10.48550/arxiv.2410.03494,
  doi = {10.48550/ARXIV.2410.03494},
  url = {https://arxiv.org/abs/2410.03494},
  author = {Gao,  Wenhao and Luo,  Shitong and Coley,  Connor W.},
  keywords = {Machine Learning (cs.LG),  Artificial Intelligence (cs.AI),  Chemical Physics (physics.chem-ph),  Biomolecules (q-bio.BM),  FOS: Computer and information sciences,  FOS: Computer and information sciences,  FOS: Physical sciences,  FOS: Physical sciences,  FOS: Biological sciences,  FOS: Biological sciences},
  title = {Generative Artificial Intelligence for Navigating Synthesizable Chemical Space},
  publisher = {arXiv},
  year = {2024},
  copyright = {Creative Commons Attribution 4.0 International}
}

@misc{siegel2024corebenchfosteringcredibilitypublished,
      title={CORE-Bench: Fostering the Credibility of Published Research Through a Computational Reproducibility Agent Benchmark}, 
      author={Zachary S. Siegel and Sayash Kapoor and Nitya Nagdir and Benedikt Stroebl and Arvind Narayanan},
      year={2024},
      eprint={2409.11363},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2409.11363}, 
}

@misc{kapoor2024aiagentsmatter,
      title={AI Agents That Matter}, 
      author={Sayash Kapoor and Benedikt Stroebl and Zachary S. Siegel and Nitya Nadgir and Arvind Narayanan},
      year={2024},
      eprint={2407.01502},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2407.01502}, 
}

@misc{jimenez2024swebenchlanguagemodelsresolve,
      title={SWE-bench: Can Language Models Resolve Real-World GitHub Issues?}, 
      author={Carlos E. Jimenez and John Yang and Alexander Wettig and Shunyu Yao and Kexin Pei and Ofir Press and Karthik Narasimhan},
      year={2024},
      eprint={2310.06770},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2310.06770}, 
}

@misc{laurent2024labbenchmeasuringcapabilitieslanguage,
      title={LAB-Bench: Measuring Capabilities of Language Models for Biology Research}, 
      author={Jon M. Laurent and Joseph D. Janizek and Michael Ruzo and Michaela M. Hinks and Michael J. Hammerling and Siddharth Narayanan and Manvitha Ponnapati and Andrew D. White and Samuel G. Rodriques},
      year={2024},
      eprint={2407.10362},
      archivePrefix={arXiv},
      primaryClass={cs.AI},
      url={https://arxiv.org/abs/2407.10362}, 
}

@misc{chan2024mlebenchevaluatingmachinelearning,
      title={MLE-bench: Evaluating Machine Learning Agents on Machine Learning Engineering}, 
      author={Jun Shern Chan and Neil Chowdhury and Oliver Jaffe and James Aung and Dane Sherburn and Evan Mays and Giulio Starace and Kevin Liu and Leon Maksin and Tejal Patwardhan and Lilian Weng and Aleksander Mądry},
      year={2024},
      eprint={2410.07095},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2410.07095}, 
}

@misc{huang2024mlagentbenchevaluatinglanguageagents,
      title={MLAgentBench: Evaluating Language Agents on Machine Learning Experimentation}, 
      author={Qian Huang and Jian Vora and Percy Liang and Jure Leskovec},
      year={2024},
      eprint={2310.03302},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2310.03302}, 
}


@article{wei2022chain,
  title={Chain-of-thought prompting elicits reasoning in large language models},
  author={Wei, Jason and Wang, Xuezhi and Schuurmans, Dale and Bosma, Maarten and Xia, Fei and Chi, Ed and Le, Quoc V and Zhou, Denny and others},
  journal={Advances in neural information processing systems},
  volume={35},
  pages={24824--24837},
  year={2022}
}

@article{kojima2022large,
  title={Large language models are zero-shot reasoners},
  author={Kojima, Takeshi and Gu, Shixiang Shane and Reid, Machel and Matsuo, Yutaka and Iwasawa, Yusuke},
  journal={Advances in neural information processing systems},
  volume={35},
  pages={22199--22213},
  year={2022}
}

@misc{fu2023complexitybasedpromptingmultistepreasoning,
      title={Complexity-Based Prompting for Multi-Step Reasoning}, 
      author={Yao Fu and Hao Peng and Ashish Sabharwal and Peter Clark and Tushar Khot},
      year={2023},
      eprint={2210.00720},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2210.00720}, 
}


@misc{yao2023treethoughtsdeliberateproblem,
      title={Tree of Thoughts: Deliberate Problem Solving with Large Language Models}, 
      author={Shunyu Yao and Dian Yu and Jeffrey Zhao and Izhak Shafran and Thomas L. Griffiths and Yuan Cao and Karthik Narasimhan},
      year={2023},
      eprint={2305.10601},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2305.10601}, 
}

@article{Fedorenko_2024, title={Language is primarily a tool for communication rather than thought}, volume={630}, ISSN={1476-4687}, url={http://dx.doi.org/10.1038/s41586-024-07522-w}, DOI={10.1038/s41586-024-07522-w}, number={8017}, journal={Nature}, publisher={Springer Science and Business Media LLC}, author={Fedorenko, Evelina and Piantadosi, Steven T. and Gibson, Edward A. F.}, year={2024}, month=jun, pages={575–586} }


@misc{qin2024o1replicationjourneystrategic,
      title={O1 Replication Journey: A Strategic Progress Report -- Part 1}, 
      author={Yiwei Qin and Xuefeng Li and Haoyang Zou and Yixiu Liu and Shijie Xia and Zhen Huang and Yixin Ye and Weizhe Yuan and Hector Liu and Yuanzhi Li and Pengfei Liu},
      year={2024},
      eprint={2410.18982},
      archivePrefix={arXiv},
      primaryClass={cs.AI},
      url={https://arxiv.org/abs/2410.18982}, 
}