Merge branch 'main' into output_all_failed_edges

OpenFreeEnergy · Jan 30, 2025 · 78ba28e · 78ba28e
2 parents 6bffeb4 + 870463b
commit 78ba28e
Show file tree

Hide file tree

Showing 15 changed files with 152 additions and 45 deletions.
diff --git a/docs/guide/protocols/absolutesolvation.rst b/docs/guide/protocols/absolutesolvation.rst
@@ -11,7 +11,7 @@ associate with transferring a molecule from vacuum into a solvent.
    Currently, water is the only supported solvent, however, more solvents might be possible in the future.
 
 The absolute solvation free energy is calculated through a thermodynamic cycle. 
-In this cycle, the interactions of the molecule are decoupled, meaning turned off, using a partial annhilation scheme (see below) both in the solvent and in the vacuum phases.
+In this cycle, the interactions of the molecule are decoupled, meaning turned off, using a partial annihilation scheme (see below) both in the solvent and in the vacuum phases.
 The absolute solvation free energy is then obtained via summation of free energy differences along the thermodynamic cycle.
 
 .. figure:: img/ahfe_thermocycle.png
@@ -22,8 +22,8 @@ The absolute solvation free energy is then obtained via summation of free energy
 Scientific Details
 ------------------
 
-Partial annhilation scheme
-~~~~~~~~~~~~~~~~~~~~~~~~~~
+Partial annihilation scheme
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 In the :class:`.AbsoluteSolvationProtocol` the coulombic interactions of the molecule are fully turned off (annihilated). 
 The Lennard-Jones interactions are instead decoupled, meaning the intermolecular interactions turned off, keeping the intramolecular Lennard-Jones interactions.
@@ -41,7 +41,7 @@ Simulation overview
 ~~~~~~~~~~~~~~~~~~~
 
 The :class:`.ProtocolDAG` of the :class:`.AbsoluteSolvationProtocol` contains :class:`.ProtocolUnit`\ s from both the vacuum and solvent transformations.
-This means that both legs of the thermodynamic cycle are constructured and run concurrently in the same :class:`.ProtocolDAG`. This is different from the :class:`.RelativeHybridTopologyProtocol` where the :class:`.ProtocolDAG` only runs a single leg of a thermodynamic cycle.
+This means that both legs of the thermodynamic cycle are constructed and run concurrently in the same :class:`.ProtocolDAG`. This is different from the :class:`.RelativeHybridTopologyProtocol` where the :class:`.ProtocolDAG` only runs a single leg of a thermodynamic cycle.
 If multiple ``protocol_repeats`` are run (default: ``protocol_repeats=3``), the :class:`.ProtocolDAG` contains multiple :class:`.ProtocolUnit`\ s of both vacuum and solvent transformations.
 
 Simulation steps
@@ -52,7 +52,7 @@ Each :class:`.ProtocolUnit` (whether vacuum or solvent) carries out the followin
 1. Parameterize the system using `OpenMMForceFields <https://github.com/openmm/openmmforcefields>`_ and `Open Force Field <https://github.com/openforcefield/openff-forcefields>`_.
 2. Equilibrate the fully interacting system using a short MD simulation using the same approach as the :class:`.PlainMDProtocol` (in the solvent leg this will include rounds of NVT and NPT equilibration).
 3. Create an alchemical system.
-4. Minimize the alchemical sysem.
+4. Minimize the alchemical system.
 5. Equilibrate and production simulate the alchemical system using the chosen multistate sampling method (under NPT conditions if solvent is present).
 6. Analyze results for the transformation.
 

diff --git a/docs/guide/protocols/relativehybridtopology.rst b/docs/guide/protocols/relativehybridtopology.rst
@@ -93,6 +93,9 @@ In addition to the MBAR estimates of the two legs of the thermodynamic cycle and
 the protocol also returns some metrics to help assess convergence of the results,
 these are detailed in the :ref:`multistate analysis section <multistate_analysis>`.
 
+.. note:: The MBAR uncertainty of each individual transformation is estimated using bootstrapping for 1000 iterations,
+          this leads to larger errors compared to the previous error estimate method.
+
 .. todo: issue 792, consolidate this page into its own analysis page and link both RBFE and AFE pages to it
 .. _multistate_analysis:
 

diff --git a/environment.yml b/environment.yml
@@ -29,15 +29,15 @@ dependencies:
   - openmm >=8.0.0,!=8.1.0,<8.2.0
   - openmmtools
   - openmmforcefields
-  - perses
+#  - perses>=0.10.3
   - pooch
   - py3dmol
   - plugcli
   - tqdm
   - pygraphviz
   - zstandard
   # Issue #443
-  - pymbar<4.0
+  - pymbar>4.0
   # docs
   - autodoc-pydantic<2.0
   - pydata-sphinx-theme

diff --git a/news/add_n_protocol_repeats.rst b/news/add_n_protocol_repeats.rst
@@ -0,0 +1,23 @@
+**Added:**
+
+* Added ``--n-protocol-repeats`` CLI option to allow user-defined number of repeats per quickrun execution. This allows for parallelizing execution of repeats by setting ``--n-protocol-repeats=1`` and calling ``quickrun`` on the same input file multiple times.
+
+**Changed:**
+
+* <news item>
+
+**Deprecated:**
+
+* <news item>
+
+**Removed:**
+
+* <news item>
+
+**Fixed:**
+
+* <news item>
+
+**Security:**
+
+* <news item>
diff --git a/news/pymbar_bootstrap.rst b/news/pymbar_bootstrap.rst
@@ -0,0 +1,23 @@
+**Added:**
+
+* <news item>
+
+**Changed:**
+
+* The MBAR bootstrap (1000 iterations) error is used to estimate protocol uncertainty instead of the statistical uncertainty (one standard deviation) and pymbar3 is no longer supported `PR#1077 <https://github.com/OpenFreeEnergy/openfe/pull/1077>`_
+
+**Deprecated:**
+
+* <news item>
+
+**Removed:**
+
+* <news item>
+
+**Fixed:**
+
+* <news item>
+
+**Security:**
+
+* <news item>
diff --git a/openfe/__init__.py b/openfe/__init__.py
@@ -1,3 +1,14 @@
+# silence pymbar logging warnings
+import logging
+def _mute_timeseries(record):
+    return not "Warning on use of the timeseries module:" in record.msg
+def _mute_jax(record):
+    return not "****** PyMBAR will use 64-bit JAX! *******" in record.msg
+_mbar_log = logging.getLogger("pymbar.timeseries")
+_mbar_log.addFilter(_mute_timeseries)
+_mbar_log = logging.getLogger("pymbar.mbar_solvers")
+_mbar_log.addFilter(_mute_jax)
+
 from gufe import (
     ChemicalSystem,
     Component,

diff --git a/openfe/protocols/openmm_utils/multistate_analysis.py b/openfe/protocols/openmm_utils/multistate_analysis.py
@@ -10,6 +10,7 @@
 import numpy.typing as npt
 from openmmtools import multistate
 from openff.units import unit, ensure_quantity
+from pymbar import MBAR
 from pymbar.utils import ParameterError
 from openfe.analysis import plotting
 from typing import Optional, Union
@@ -68,7 +69,7 @@ class MultistateEquilFEAnalysis:
     result_units : unit.Quantity
       Units to report results in.
     forward_reverse_samples : int
-      The number of samples to use in the foward and reverse analysis
+      The number of samples to use in the forward and reverse analysis
       of the free energies. Default 10.
     """
     def __init__(self, reporter: multistate.MultiStateReporter,
@@ -198,37 +199,32 @@ def _get_free_energy(
         Parameters
         ----------
         analyzer : multistate.MultiStateSamplerAnalyzer
-          MultiStateSamplerAnalyzer to extract free eneriges from.
+          MultiStateSamplerAnalyzer to extract free energies from.
         u_ln : npt.NDArray
           A n_states x (n_sampled_states * n_iterations)
           array of energies (in kT).
         N_l : npt.NDArray
           An array containing the total number of samples drawn from each
           state.
-        unit_type : unit.Quantity
-          What units to return the free energies in.
 
         Returns
         -------
         DG : unit.Quantity
           The free energy difference between the end states.
         dDG : unit.Quantity
-          The MBAR error for the free energy difference estimate.
+          The MBAR bootstrap (1000 iterations) error estimate for the free energy difference.
 
         TODO
         ----
         * Allow folks to pass in extra options for bootstrapping etc..
         * Add standard test against analyzer.get_free_energy()
         """
-        mbar = analyzer._create_mbar(u_ln, N_l)
 
-        try:
-            # pymbar 3
-            DF_ij, dDF_ij = mbar.getFreeEnergyDifferences()
-        except AttributeError:
-            r = mbar.compute_free_energy_differences()
-            DF_ij = r['Delta_f']
-            dDF_ij = r['dDelta_f']
+        # pymbar 4
+        mbar = MBAR(u_ln, N_l, solver_protocol="robust", n_bootstraps=1000, bootstrap_solver_protocol="robust")
+        r = mbar.compute_free_energy_differences(compute_uncertainty=True, uncertainty_method="bootstrap")
+        DF_ij = r['Delta_f']
+        dDF_ij = r['dDelta_f']
 
         DG = DF_ij[0, -1] * analyzer.kT
         dDG = dDF_ij[0, -1] * analyzer.kT
@@ -352,15 +348,8 @@ def get_overlap_matrix(self) -> dict[str, npt.NDArray]:
             * ``matrix``: Estimated overlap matrix of observing a sample from
               state i in state j
         """
-        try:
-            # pymbar 3
-            overlap_matrix = self.analyzer.mbar.computeOverlap()
-            # convert matrix to np array
-            overlap_matrix['matrix'] = np.array(overlap_matrix['matrix'])
-        except AttributeError:
-            overlap_matrix = self.analyzer.mbar.compute_overlap()
+        return self.analyzer.mbar.compute_overlap()
 
-        return overlap_matrix
 
     def get_exchanges(self) -> dict[str, npt.NDArray]:
         """
@@ -373,7 +362,7 @@ def get_exchanges(self) -> dict[str, npt.NDArray]:
           A dictionary containing the following:
             * ``eigenvalues``: The sorted (descending) eigenvalues of the
               lambda state transition matrix
-            * ``matrix``: The transition matrix estimate of a replica switchin
+            * ``matrix``: The transition matrix estimate of a replica switching
               from state i to state j.
         """
         # Get replica mixing statistics

diff --git a/openfe/setup/alchemical_network_planner/relative_alchemical_network_planner.py b/openfe/setup/alchemical_network_planner/relative_alchemical_network_planner.py
@@ -303,7 +303,7 @@ class RBFEAlchemicalNetworkPlanner(RelativeAlchemicalNetworkPlanner):
     """
     def __init__(
         self,
-        name: str = "easy_rbfe",
+        name: str = "easy_rbfe",  # TODO: change this default to "" in 2.0
         mappers: Optional[Iterable[LigandAtomMapper]] = None,
         mapping_scorer: Callable[[LigandAtomMapping], float]  = default_lomap_score,
         ligand_network_planner: Callable = generate_minimal_spanning_network,

diff --git a/openfe/tests/protocols/test_openmmutils.py b/openfe/tests/protocols/test_openmmutils.py
@@ -255,7 +255,8 @@ def test_free_energies(self, analyzer):
         ret_dict = analyzer.unit_results_dict
         assert len(ret_dict.items()) == 7
         assert pytest.approx(ret_dict['unit_estimate'].m) == -47.9606
-        assert pytest.approx(ret_dict['unit_estimate_error'].m) == 0.02396789
+        # more variation when using bootstrap errors so we need a loser tolerance
+        assert pytest.approx(ret_dict['unit_estimate_error'].m, rel=1e4) == 0.0251
         # forward and reverse (since we do this ourselves)
         assert_allclose(
             ret_dict['forward_and_reverse_energies']['fractions'],
@@ -269,23 +270,25 @@ def test_free_energies(self, analyzer):
                       -48.025258, -48.006349, -47.986304, -47.972138, -47.960623]),
             rtol=1e-04,
         )
+        # results generated using pymbar3 with 1000 bootstrap iterations
         assert_allclose(
             ret_dict['forward_and_reverse_energies']['forward_dDGs'].m,
-            np.array([0.07471 , 0.052914, 0.041508, 0.036613, 0.032827, 0.030489,
-                      0.028154, 0.026529, 0.025284, 0.023968]),
-            rtol=1e-04,
+            np.array([0.077645, 0.054695, 0.044680, 0.03947, 0.034822,
+                      0.033443, 0.030793, 0.028777, 0.026683, 0.026199]),
+            rtol=1e-01,
         )
         assert_allclose(
             ret_dict['forward_and_reverse_energies']['reverse_DGs'].m,
             np.array([-47.823839, -47.833107, -47.845866, -47.858173, -47.883887,
                       -47.915963, -47.93319, -47.939125, -47.949016, -47.960623]),
             rtol=1e-04,
         )
+        # results generated using pymbar3 with 1000 bootstrap iterations
         assert_allclose(
             ret_dict['forward_and_reverse_energies']['reverse_dDGs'].m,
-            np.array([0.081209, 0.055975, 0.044693, 0.038691, 0.034603, 0.031894,
-                      0.029417, 0.027082, 0.025316, 0.023968]),
-            rtol=1e-04,
+            np.array([0.088335, 0.059483, 0.046254, 0.041504, 0.03877,
+                      0.035495, 0.031981, 0.029707, 0.027095, 0.026296]),
+            rtol=1e-01,
         )
 
     def test_plots(self, analyzer, tmpdir):

diff --git a/openfecli/commands/plan_rbfe_network.py b/openfecli/commands/plan_rbfe_network.py
@@ -5,7 +5,7 @@
 from openfecli.utils import write, print_duration
 from openfecli import OFECommandPlugin
 from openfecli.parameters import (
-    MOL_DIR, PROTEIN, OUTPUT_DIR, COFACTORS, YAML_OPTIONS,
+    MOL_DIR, PROTEIN, OUTPUT_DIR, COFACTORS, YAML_OPTIONS, N_PROTOCOL_REPEATS
 )
 
 def plan_rbfe_network_main(
@@ -16,6 +16,7 @@ def plan_rbfe_network_main(
     solvent,
     protein,
     cofactors,
+    n_protocol_repeats,
 ):
     """Utility method to plan a relative binding free energy network.
 
@@ -34,7 +35,9 @@ def plan_rbfe_network_main(
     protein : ProteinComponent
         protein component for complex simulations, to which the ligands are bound
     cofactors : Iterable[SmallMoleculeComponent]
-        any cofactors alongisde the protein, can be empty list
+        any cofactors alongside the protein, can be empty list
+    n_protocol_repeats: int
+        number of completely independent repeats of the entire sampling process
 
     Returns
     -------
@@ -46,11 +49,17 @@ def plan_rbfe_network_main(
     from openfe.setup.alchemical_network_planner.relative_alchemical_network_planner import (
         RBFEAlchemicalNetworkPlanner,
     )
+    from openfe.setup.alchemical_network_planner.relative_alchemical_network_planner import RelativeHybridTopologyProtocol
+
+    protocol_settings = RelativeHybridTopologyProtocol.default_settings()
+    protocol_settings.protocol_repeats = n_protocol_repeats
+    protocol = RelativeHybridTopologyProtocol(protocol_settings)
 
     network_planner = RBFEAlchemicalNetworkPlanner(
         mappers=mapper,
         mapping_scorer=mapping_scorer,
         ligand_network_planner=ligand_network_planner,
+        protocol=protocol
     )
     alchemical_network = network_planner(
         ligands=small_molecules, solvent=solvent, protein=protein,
@@ -83,11 +92,13 @@ def plan_rbfe_network_main(
     help=OUTPUT_DIR.kwargs["help"] + " Defaults to `./alchemicalNetwork`.",
     default="alchemicalNetwork",
 )
+@N_PROTOCOL_REPEATS.parameter(multiple=False, required=False, default=3, help=N_PROTOCOL_REPEATS.kwargs["help"])
 @print_duration
 def plan_rbfe_network(
         molecules: list[str], protein: str, cofactors: tuple[str],
         yaml_settings: str,
         output_dir: str,
+        n_protocol_repeats: int,
 ):
     """
     Plan a relative binding free energy network, saved as JSON files for
@@ -169,6 +180,7 @@ def plan_rbfe_network(
         solvent=solvent,
         protein=protein,
         cofactors=cofactors,
+        n_protocol_repeats=n_protocol_repeats,
     )
     write("\tDone")
     write("")

diff --git a/openfecli/commands/plan_rhfe_network.py b/openfecli/commands/plan_rhfe_network.py
@@ -8,12 +8,12 @@
 from openfecli.utils import write, print_duration
 from openfecli import OFECommandPlugin
 from openfecli.parameters import (
-    MOL_DIR, MAPPER, OUTPUT_DIR, YAML_OPTIONS,
+    MOL_DIR, MAPPER, OUTPUT_DIR, YAML_OPTIONS, N_PROTOCOL_REPEATS
 )
 
 def plan_rhfe_network_main(
     mapper, mapping_scorer, ligand_network_planner, small_molecules,
-    solvent,
+    solvent, n_protocol_repeats,
 ):
     """Utility method to plan a relative hydration free energy network.
 
@@ -29,6 +29,8 @@ def plan_rhfe_network_main(
         molecules of the system
     solvent : SolventComponent
         Solvent component used for solvation
+    n_protocol_repeats: int
+        number of completely independent repeats of the entire sampling process
 
     Returns
     -------
@@ -39,11 +41,18 @@ def plan_rhfe_network_main(
     from openfe.setup.alchemical_network_planner.relative_alchemical_network_planner import (
         RHFEAlchemicalNetworkPlanner
     )
+    from openfe.setup.alchemical_network_planner.relative_alchemical_network_planner import RelativeHybridTopologyProtocol
+
+
+    protocol_settings = RelativeHybridTopologyProtocol.default_settings()
+    protocol_settings.protocol_repeats = n_protocol_repeats
+    protocol = RelativeHybridTopologyProtocol(protocol_settings)
 
     network_planner = RHFEAlchemicalNetworkPlanner(
         mappers=mapper,
         mapping_scorer=mapping_scorer,
         ligand_network_planner=ligand_network_planner,
+        protocol=protocol
     )
     alchemical_network = network_planner(
         ligands=small_molecules, solvent=solvent
@@ -70,8 +79,10 @@ def plan_rhfe_network_main(
     help=OUTPUT_DIR.kwargs["help"] + " Defaults to `./alchemicalNetwork`.",
     default="alchemicalNetwork",
 )
+@N_PROTOCOL_REPEATS.parameter(multiple=False, required=False, default=3, help=N_PROTOCOL_REPEATS.kwargs["help"])
+
 @print_duration
-def plan_rhfe_network(molecules: List[str], yaml_settings: str, output_dir: str):
+def plan_rhfe_network(molecules: List[str], yaml_settings: str, output_dir: str, n_protocol_repeats:int):
     """
     Plan a relative hydration free energy network, saved as JSON files for
     the quickrun command.
@@ -143,6 +154,7 @@ def plan_rhfe_network(molecules: List[str], yaml_settings: str, output_dir: str)
         ligand_network_planner=ligand_network_planner,
         small_molecules=small_molecules,
         solvent=solvent,
+        n_protocol_repeats=n_protocol_repeats,
     )
     write("\tDone")
     write("")