Skip to content

Commit

Permalink
Joss paper edits (#51)
Browse files Browse the repository at this point in the history
* resolve #50

* delete comments and resolve #48 , #49

* Create .zenodo.json
  • Loading branch information
pstjohn authored Aug 28, 2022
1 parent 5a4ce42 commit 0a3aefd
Show file tree
Hide file tree
Showing 3 changed files with 161 additions and 139 deletions.
43 changes: 43 additions & 0 deletions .zenodo.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"creators": [
{
"orcid": "0000-0001-6140-1957",
"email": "[email protected]",
"name": "Dave Biagioni",
"affiliation": "Computational Sciences Center, National Renewable Energy Laboratory, Golden CO 80401, USA"
},
{
"orcid": "0000-0002-5867-3561",
"email": "[email protected]",
"name": "Charles Tripp",
"affiliation": "Computational Sciences Center, National Renewable Energy Laboratory, Golden CO 80401, USA"
},
{
"orcid": "0000-0003-0078-6560",
"email": "[email protected]",
"name": "Struan Clark",
"affiliation": "Computational Sciences Center, National Renewable Energy Laboratory, Golden CO 80401, USA"
},
{
"orcid": "0000-0001-5132-0168",
"email": "[email protected]",
"name": "Dmitry Duplyakin",
"affiliation": "Computational Sciences Center, National Renewable Energy Laboratory, Golden CO 80401, USA"
},
{
"orcid": "0000-0003-2828-1273",
"email": "[email protected]",
"name": "Jeffrey Law",
"affiliation": "Biosciences Center, National Renewable Energy Laboratory, Golden CO 80401, USA"
}
{
"orcid": "0000-0002-7928-3722",
"email": "[email protected]",
"name": "Peter St. John",
"affiliation": "Biosciences Center, National Renewable Energy Laboratory, Golden CO 80401, USA"
},
],
"keywords": "Python, reinforcement learning, graph search, combinatorial optimization",
"license": "BSD 3-Clause License",
"title": "graphenv: a Python library for reinforcement learning on graph search spaces",
}
34 changes: 23 additions & 11 deletions joss/paper.bib
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ @Article{ battaglia2018relational
and Malinowski, Mateusz and Tacchetti, Andrea and Raposo,
David and Santoro, Adam and Faulkner, Ryan and others},
year = 2018,
journal = {arXiv preprint arXiv:1806.01261}
journal = {arXiv preprint arXiv:1806.01261},
doi = {10.48550/arXiv.1806.01261},
url = {https://arxiv.org/abs/1806.01261}
}

@TechReport{ biagioni2020rlmolecule,
Expand All @@ -19,7 +21,9 @@ @TechReport{ biagioni2020rlmolecule
others},
year = 2020,
institution = {National Renewable Energy Lab.(NREL), Golden, CO (United
States)}
States)},
doi = {10.11578/dc.20201221.3},
url = {https://www.osti.gov/biblio/1737555}
}

@Article{ brockman2016openai,
Expand All @@ -28,7 +32,9 @@ @Article{ brockman2016openai
and Schneider, Jonas and Schulman, John and Tang, Jie and
Zaremba, Wojciech},
year = 2016,
journal = {arXiv preprint arXiv:1606.01540}
journal = {arXiv preprint arXiv:1606.01540},
url = {https://arxiv.org/abs/1606.01540},
doi = {10.48550/arXiv.1606.01540}
}

@InProceedings{ liang2018rllib,
Expand All @@ -40,7 +46,9 @@ @InProceedings{ liang2018rllib
year = 2018,
booktitle = {International Conference on Machine Learning},
pages = {3053--3062},
organization = {PMLR}
organization = {PMLR},
url = {https://arxiv.org/abs/1712.09381},
doi = {10.48550/arXiv.1712.09381}
}

@Article{ Pandey_2021,
Expand Down Expand Up @@ -102,13 +110,16 @@ @Article{ St_John_2020_b
}

@Article{ sv2021multi,
title = {Multi-objective goal-directed optimization of de novo stable organic radicals for aqueous redox flow batteries},
author = {S.V., Shree S. and Law, Jeffrey and Tripp, Charles
and Duplyakin, Dmitry and Skordilis, Erotokritos and
Biagioni, David and Paton, Robert and {St. John}, Peter},
title = {Multi-objective goal-directed optimization of de novo
stable organic radicals for aqueous redox flow batteries},
author = {Shree Sowndarya S. V. and Jeffrey N. Law and Charles E.
Tripp and Dmitry Duplyakin and Erotokritos Skordilis and
David Biagioni and Robert S. Paton and Peter C. St. John},
year = 2022,
journal = {Nature Machine Intelligence},
doi = {10.1038/s42256-022-00506-3}
month = {aug},
journal = {Nature Machine Intelligence},
publisher = {Springer Science and Business Media {LLC}},
doi = {10.1038/s42256-022-00506-3}
}

@InCollection{ Zheng_2020,
Expand All @@ -128,7 +139,8 @@ @inproceedings{
author={Antoine Prouvost and Justin Dumouchelle and Lara Scavuzzo and Maxime Gasse and Didier Ch{\'e}telat and Andrea Lodi},
booktitle={Learning Meets Combinatorial Algorithms at NeurIPS2020},
year={2020},
url={https://openreview.net/forum?id=IVc9hqgibyB}
url={https://openreview.net/forum?id=IVc9hqgibyB},
doi={10.48550/arXiv.2011.06069}
}

@Article{ Zhou_2019,
Expand Down
223 changes: 95 additions & 128 deletions joss/paper.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,12 @@
---
title: 'graphenv: a Python library for reinforcement learning on graph search spaces'
title:
'graphenv: a Python library for reinforcement learning on graph search spaces'
tags:
- Python
- reinforcement learning
- graph search
- combinatorial optimization
authors:
# - name: Adrian M. Price-Whelan^[Co-first author] # note this makes a footnote saying 'Co-first author'
# orcid: 0000-0000-0000-0000
# affiliation: "1, 2" # (Multiple affiliations must be quoted)
# - name: Author Without ORCID^[Co-first author] # note this makes a footnote saying 'Co-first author'
# affiliation: 2
# - name: Author with no affiliation^[Corresponding author]
# affiliation: 3
- name: David Biagioni
orcid: 0000-0001-6140-1957
affiliation: 2
Expand All @@ -21,146 +15,119 @@ authors:
affiliation: 2
- name: Struan Clark
affiliation: 2
orcid: 0000-0003-0078-6560
orcid: 0000-0003-0078-6560
- name: Dmitry Duplyakin
affiliation: 2
orcid: 0000-0001-5132-0168
orcid: 0000-0001-5132-0168
- name: Jeffrey Law
affiliation: 1
orcid: 0000-0003-2828-1273
- name: Peter C. St. John #^[Corresponding author]
orcid: 0000-0003-2828-1273
- name: Peter C. St. John
orcid: 0000-0002-7928-3722
corresponding: true # (This is how to denote the corresponding author)
affiliation: 1
corresponding: true
affiliation: 1
affiliations:
- name: Biosciences Center, National Renewable Energy Laboratory, Golden CO 80401, USA
index: 1
- name: Computational Sciences Center, National Renewable Energy Laboratory, Golden CO 80401, USA
index: 2

# - name: Institution Name, Country
# index: 2
# - name: Independent Researcher, Country
# index: 3
- name:
Biosciences Center, National Renewable Energy Laboratory, Golden CO 80401,
USA
index: 1
- name:
Computational Sciences Center, National Renewable Energy Laboratory,
Golden CO 80401, USA
index: 2
date: 17 May 2022
bibliography: paper.bib

# Optional fields if submitting to a AAS journal too, see this blog post:
# https://blog.joss.theoj.org/2018/12/a-new-collaboration-with-aas-publishing
# aas-doi: 10.3847/xxxxx <- update this with the DOI from AAS once you know it.
# aas-journal: Astrophysical Journal <- The name of the AAS journal.
---

# Summary

Many important and challenging problems in combinatorial optimization (CO) can be
expressed as graph search problems, in which graph vertices represent full or partial
solutions and edges represent decisions that connect them.
Graph structure not only introduces strong _relational inductive biases_
for learning [@battaglia2018relational] -- in this context, by providing a way to
explicitly model the value of transitioning (along edges) between one search state (vertex)
and the next -- but lends itself to problems both with and without clearly
defined algebraic structure. For example, classic CO problems on graphs such as the Traveling Salesman Problem (TSP) can be expressed as either pure graph search _or_ integer programs. Other problems, however, such as molecular optimization, do no have concise algebraic formulations and yet are readily
implemented as a graph search [@Zhou_2019;@sv2021multi]. Such "model-free" problems constitute
a large fraction of modern reinforcement learning (RL) research owing to the fact that it is often much easier to write a forward simulation that expresses all of the state transitions and rewards, than to write down the precise mathematical expression of the full optimization problem. In the case of
molecular optimization, for example, one can use domain knowledge alongside existing software
libraries to model the effect of adding a single bond or atom to an existing but incomplete
molecule, and let the RL algorithm build a model of how good a given decision is by "experiencing"
the simulated environment many times through. In contrast, a model-based mathematical
formulation that fully expresses all the chemical and physical constraints is intractable.

In recent years, RL has emerged as an effective paradigm for optimizing searches over graphs
and led to state-of-the-art heuristics for games like Go and chess, as well as for classical CO
problems such as the Traveling Salesman Problem (TSP). This combination of graph search
and RL, while powerful, requires non-trivial software to execute, especially when
combining advanced state representations such as Graph Neural Networks (GNN) with
scalable RL algorithms.
Many important and challenging problems in combinatorial optimization (CO) can
be expressed as graph search problems, in which graph vertices represent full or
partial solutions and edges represent decisions that connect them. Graph
structure not only introduces strong _relational inductive biases_ for learning
[@battaglia2018relational] -- in this context, by providing a way to explicitly
model the value of transitioning (along edges) between one search state (vertex)
and the next -- but lends itself to problems both with and without clearly
defined algebraic structure. For example, classic CO problems on graphs such as
the Traveling Salesman Problem (TSP) can be expressed as either pure graph
search _or_ integer programs. Other problems, however, such as molecular
optimization, do no have concise algebraic formulations and yet are readily
implemented as a graph search [@Zhou_2019;@sv2021multi]. Such "model-free"
problems constitute a large fraction of modern reinforcement learning (RL)
research owing to the fact that it is often much easier to write a forward
simulation that expresses all of the state transitions and rewards, than to
write down the precise mathematical expression of the full optimization problem.
In the case of molecular optimization, for example, one can use domain knowledge
alongside existing software libraries to model the effect of adding a single
bond or atom to an existing but incomplete molecule, and let the RL algorithm
build a model of how good a given decision is by "experiencing" the simulated
environment many times through. In contrast, a model-based mathematical
formulation that fully expresses all the chemical and physical constraints is
intractable.

In recent years, RL has emerged as an effective paradigm for optimizing searches
over graphs and led to state-of-the-art heuristics for games like Go and chess,
as well as for classical CO problems such as the TSP. This combination of graph
search and RL, while powerful, requires non-trivial software to execute,
especially when combining advanced state representations such as Graph Neural
Networks (GNN) with scalable RL algorithms.

# Statement of need

The `graphenv` Python library is designed to 1) make graph search problems more readily
expressible as RL problems via an extension of the OpenAI gym API [@brockman2016openai]
while 2) enabling their solution via scalable learning algorithms in the popular RLlib
library [@liang2018rllib]. The intended audience consist of researchers working on graph search problems that are amenable to a reinforcement learning formulation, broadly described as "learning to optimize". This includes those working on classical combinatorial optimization problems such as the Traveling Salesperson Problem, as well as problems that do not have a clear algebraic expression but where the environment dynamics can be simulated, for instance, molecular design.

RLlib provides convenient, out-of-the-box support for several features that enable the application
of RL to complex search problems (e.g., parametrically-defined actions and invalid action masking).
However, native support for action spaces where the action
_choices_ change for each state is challenging to implement in a computationally
efficient fashion. The `graphenv` library provides utility classes that simplify the
flattening and masking of action observations for choosing from a set of successor
states at every node in a graph search.

Related software efforts have addressed parts of the above need. OpenGraphGym [@Zheng_2020] implements RL-based stragies for common graph optimization challenges such as minimum vertex cover or maximum cut, but does not interface with external RL libraries and has minimal documentation. Ecole [@prouvost2020ecole] provides an OpenAI-like gym environment for combinatorial optimization, but intends to operate in concert with traditional mixed integer solvers rather than directly exposing the environment to an RL agent.

The `graphenv` Python library is designed to 1) make graph search problems more
readily expressible as RL problems via an extension of the OpenAI gym API
[@brockman2016openai] while 2) enabling their solution via scalable learning
algorithms in the popular RLlib library [@liang2018rllib]. The intended audience
consist of researchers working on graph search problems that are amenable to a
reinforcement learning formulation, broadly described as "learning to optimize".
This includes those working on classical combinatorial optimization problems
such as the TSP, as well as problems that do not have a clear algebraic
expression but where the environment dynamics can be simulated, for instance,
molecular design.

RLlib provides convenient, out-of-the-box support for several features that
enable the application of RL to complex search problems (e.g.,
parametrically-defined actions and invalid action masking). However, native
support for action spaces where the action _choices_ change for each state is
challenging to implement in a computationally efficient fashion. The `graphenv`
library provides utility classes that simplify the flattening and masking of
action observations for choosing from a set of successor states at every node in
a graph search.

Related software efforts have addressed parts of the above need. OpenGraphGym
[@Zheng_2020] implements RL-based stragies for common graph optimization
challenges such as minimum vertex cover or maximum cut, but does not interface
with external RL libraries and has minimal documentation. Ecole
[@prouvost2020ecole] provides an OpenAI-like gym environment for combinatorial
optimization, but intends to operate in concert with traditional mixed integer
solvers rather than directly exposing the environment to an RL agent.

# Examples of usage

This package is a generalization of methods employed in the optimization of molecular
structure for energy storage applications, funded by US Department of Energy (DOE)'s Advanced Research Projects
Agency - Energy [@sv2021multi]. Specifically, this package enables optimization against
a surrogate objective function based on high-throughput density functional theory
calculations [@Sowndarya_S_V_2021; @St_John_2020_a; @St_John_2020_b] by considering
molecule selection as an iterative process of adding atoms and bonds, transforming the
optimization into a rooted search over a directed, acyclic graph. Ongoing work is
leveraging this library to enable similar optimization for inorganic crystal structures,
again using a surrogate objective function based on high-throughput quantum mechanical
calculations [@Pandey_2021].

This package is a generalization of methods employed in the optimization of
molecular structure for energy storage applications, funded by US Department of
Energy (DOE)'s Advanced Research Projects Agency - Energy [@sv2021multi].
Specifically, this package enables optimization against a surrogate objective
function based on high-throughput density functional theory calculations
[@Sowndarya_S_V_2021; @St_John_2020_a; @St_John_2020_b] by considering molecule
selection as an iterative process of adding atoms and bonds, transforming the
optimization into a rooted search over a directed, acyclic graph. Ongoing work
is leveraging this library to enable similar optimization for inorganic crystal
structures, again using a surrogate objective function based on high-throughput
quantum mechanical calculations [@Pandey_2021].

# Acknowledgements

This work was authored by the National Renewable Energy Laboratory, operated by Alliance
for Sustainable Energy, LLC, for the US Department of Energy (DOE) under Contract No.
DE-AC36-08GO28308. The information, data, or work presented herein was funded in part by
the Advanced Research Projects Agency-Energy (ARPA-E), U.S. Department of Energy, under
Award Number DE-AR0001205. The views and opinions of authors expressed herein do not
necessarily state or reflect those of the United States Government or any agency
thereof. The US Government retains and the publisher, by accepting the article for
publication, acknowledges that the US Government retains a nonexclusive, paid-up,
irrevocable, worldwide license to publish or reproduce the published form of this work
or allow others to do so, for US Government purposes.
This work was authored by the National Renewable Energy Laboratory, operated by
Alliance for Sustainable Energy, LLC, for the US Department of Energy (DOE)
under Contract No. DE-AC36-08GO28308. The information, data, or work presented
herein was funded in part by the Advanced Research Projects Agency-Energy
(ARPA-E), U.S. Department of Energy, under Award Number DE-AR0001205. The views
and opinions of authors expressed herein do not necessarily state or reflect
those of the United States Government or any agency thereof. The US Government
retains and the publisher, by accepting the article for publication,
acknowledges that the US Government retains a nonexclusive, paid-up,
irrevocable, worldwide license to publish or reproduce the published form of
this work or allow others to do so, for US Government purposes.

# References



<!-- # Mathematics
Single dollars ($) are required for inline mathematics e.g. $f(x) = e^{\pi/x}$
Double dollars make self-standing equations:
$$\Theta(x) = \left\{\begin{array}{l}
0\textrm{ if } x < 0\cr
1\textrm{ else}
\end{array}\right.$$
You can also use plain \LaTeX for equations
\begin{equation}\label{eq:fourier}
\hat f(\omega) = \int_{-\infty}^{\infty} f(x) e^{i\omega x} dx
\end{equation}
and refer to \autoref{eq:fourier} from text. -->

<!-- # Citations
Citations to entries in paper.bib should be in
[rMarkdown](http://rmarkdown.rstudio.com/authoring_bibliographies_and_citations.html)
format.
If you want to cite a software repository URL (e.g. something on GitHub without a preferred
citation) then you can do it with the example BibTeX entry below for @fidgit.
For a quick reference, the following citation commands can be used:
- `@biagioni2020rlmolecule` -> "Author et al. (2001)"
- `[@biagioni2020rlmolecule]` -> "(Author et al., 2001)"
- `[@biagioni2020rlmolecule; @sv2021multi]` -> "(Author1 et al., 2001; Author2 et al., 2002)"
# Figures -->
<!--
Figures can be included like this:
![Caption for example figure.\label{fig:example}](figure.png)
and referenced from text using \autoref{fig:example}.
Figure sizes can be customized by adding an optional second parameter:
![Caption for example figure.](figure.png){ width=20% } -->

0 comments on commit 0a3aefd

Please sign in to comment.