-
Notifications
You must be signed in to change notification settings - Fork 23
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Testing out what partial charges could look over the CLI #1068
Changes from 7 commits
b716571
a5075eb
020ba71
45b8025
c5f0371
283d9e9
8383718
aa53762
c5e5900
e192f95
43efb18
8602a39
c350c19
b3ef278
c2fc143
d16bc3e
99a891f
73fa615
37add05
c46e7c1
6ab8e9f
4c9a1f0
1da6c5a
80b632c
ea1589a
fe5de58
4bb2461
1c2d40d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
import sys | ||
import warnings | ||
import numpy as np | ||
from gufe import SmallMoleculeComponent | ||
from openff.units import unit | ||
from openff.toolkit import Molecule as OFFMol | ||
from openff.toolkit.utils.base_wrapper import ToolkitWrapper | ||
|
@@ -17,6 +18,7 @@ | |
RDKitToolkitWrapper | ||
) | ||
from openff.toolkit.utils.toolkit_registry import ToolkitRegistry | ||
from openfe.utils.context_managers import temp_env | ||
|
||
try: | ||
import openeye | ||
|
@@ -285,7 +287,7 @@ def assign_offmol_partial_charges( | |
toolkit_backend: Literal['ambertools', 'openeye', 'rdkit'], | ||
generate_n_conformers: Optional[int], | ||
nagl_model: Optional[str], | ||
) -> None: | ||
) -> OFFMol: | ||
""" | ||
Assign partial charges to an OpenFF Molecule based on a selected method. | ||
|
||
|
@@ -297,7 +299,7 @@ def assign_offmol_partial_charges( | |
Whether or not to overwrite any existing non-zero partial charges. | ||
Note that zeroed charges will always be overwritten. | ||
method : Literal['am1bcc', 'am1bccelf10', 'nagl', 'espaloma'] | ||
Partial charge assignement method. | ||
Partial charge assignment method. | ||
Supported methods include; am1bcc, am1bccelf10, nagl, and espaloma. | ||
toolkit_backend : Literal['ambertools', 'openeye', 'rdkit'] | ||
OpenFF toolkit backend employed for charge generation. | ||
|
@@ -318,17 +320,21 @@ def assign_offmol_partial_charges( | |
Raises | ||
------ | ||
ValueError | ||
If the ``toolkit_backend`` is not suported by the selected ``method``. | ||
If the ``toolkit_backend`` is not supported by the selected ``method``. | ||
If ``generate_n_conformers`` is ``None``, but the input ``offmol`` | ||
has no associated conformers. | ||
If the number of conformers passed or generated exceeds the number | ||
of conformers selected by the partial charge ``method``. | ||
|
||
Returns | ||
------- | ||
The Molecule with partial charges assigned. | ||
""" | ||
|
||
# If you have non-zero charges and not overwriting, just return | ||
if (offmol.partial_charges is not None and np.any(offmol.partial_charges)): | ||
if not overwrite: | ||
return | ||
return offmol | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is probably going to silently break a bunch of things (particlularly protocols). Would it be worth doing an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that as we still assign the charges to the input molecule and not a copy this should still be fine. I tried to make a small example of this here, which should hopefully mean we don't break anything!
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah sorry - I think I hallucinated that you had removed the offmol_copy reassignment step below. So just to confirm, this now does both an inplace asssignment and returns the offmol itself? I guess, do we want to have the option to not do the inplace assignment? (i.e. could there be cases where this would be useful?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah it still does in place but I need the return for the multiprocessing to work, we can add an option for not in place but maybe that should be in another PR? |
||
|
||
# Dictionary for each available charge method | ||
# The idea of this pattern is to allow for maximum flexibility by | ||
|
@@ -408,12 +414,106 @@ def assign_offmol_partial_charges( | |
generate_n_conformers=generate_n_conformers, | ||
) # type: ignore | ||
|
||
# Call selected method to assign partial charges | ||
CHARGE_METHODS[method.lower()]['charge_func']( | ||
offmol=offmol_copy, | ||
toolkit_registry=toolkits, | ||
**CHARGE_METHODS[method.lower()]['charge_extra_kwargs'], | ||
) # type: ignore | ||
# limit the number of threads used by SQM | ||
# <https://github.com/openforcefield/openff-toolkit/issues/1831> | ||
with temp_env({"OMP_NUM_THREADS": "2"}): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's some more pythonic tools for thread limiting like this, let me have a dig in the morning. i.e. we'll need them going forward, so maybe we should bite the bullet and go for it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is what we use in MDAnalysis-land: https://github.com/joblib/threadpoolctl It might be particularly helpful here because I'm not 100% sure all BLAS/LAPACK libraries respect OMP_NUM_THREADS (I'm thinking of you MKL library). There might be better ones too! |
||
# Call selected method to assign partial charges | ||
CHARGE_METHODS[method.lower()]['charge_func']( | ||
offmol=offmol_copy, | ||
toolkit_registry=toolkits, | ||
**CHARGE_METHODS[method.lower()]['charge_extra_kwargs'], | ||
) # type: ignore | ||
|
||
# Copy partial charges back | ||
offmol.partial_charges = offmol_copy.partial_charges | ||
return offmol | ||
|
||
|
||
def bulk_assign_partial_charges( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks great! Have you tested if it scales as intended? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes we get a good speed up in local testing, charging 16 malt1 ligands (~42 atoms) with am1bcc (ambertools) takes:
|
||
molecules: list[SmallMoleculeComponent], | ||
overwrite: bool, | ||
method: Literal['am1bcc', 'am1bccelf10', 'nagl', 'espaloma'], | ||
toolkit_backend: Literal['ambertools', 'openeye', 'rdkit'], | ||
generate_n_conformers: Optional[int], | ||
nagl_model: Optional[str], | ||
processors: int = 1, | ||
) -> list[SmallMoleculeComponent]: | ||
""" | ||
Assign partial charges to a list of SmallMoleculeComponents using multiprocessing. | ||
|
||
Parameters | ||
---------- | ||
molecules : list[gufe.SmallMoleculeComponent] | ||
The list of molecules who should have partial charges assigned. | ||
overwrite : bool | ||
Whether or not to overwrite any existing non-zero partial charges. | ||
Note that zeroed charges will always be overwritten. | ||
method : Literal['am1bcc', 'am1bccelf10', 'nagl', 'espaloma'] | ||
Partial charge assignment method. | ||
Supported methods include; am1bcc, am1bccelf10, nagl, and espaloma. | ||
toolkit_backend : Literal['ambertools', 'openeye', 'rdkit'] | ||
OpenFF toolkit backend employed for charge generation. | ||
Supported options: | ||
* ``ambertools``: selects both the AmberTools and RDKit Toolkit Wrapper | ||
* ``openeye``: selects the OpenEye toolkit Wrapper | ||
* ``rdkit``: selects the RDKit toolkit Wrapper | ||
Note that the ``rdkit`` backend cannot be used for `am1bcc` or | ||
``am1bccelf10`` partial charge methods. | ||
generate_n_conformers : Optional[int] | ||
Number of conformers to generate for partial charge generation. | ||
If ``None`` (default), the input conformer will be used. | ||
Values greater than 1 can only be used alongside ``am1bccelf10``. | ||
nagl_model : Optional[str] | ||
The NAGL model to use for charge assignment if method is ``nagl``. | ||
If ``None``, the latest am1bcc NAGL charge model is used. | ||
processors: int, default 1 | ||
The number of processors which should be used to generate the charges. | ||
|
||
Raises | ||
------ | ||
ValueError | ||
If the ``toolkit_backend`` is not supported by the selected ``method``. | ||
If ``generate_n_conformers`` is ``None``, but the input ``offmol`` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might actually be the case where we want to not inplace modify charges - what if things fails half way through a charge assignment loop, do we end up with have half our ligands charged and half our ligands uncharged? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If charging fails for one or more molecules the whole loop will break, so we might want to more gracefully handle failing though I am not sure what the end result would be one file with charged ligands and another with fails. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might be worth testing - could be in a follow-up (for the sake of the CLI if this fails the whole thing fails, but the API is a bit different). |
||
has no associated conformers. | ||
If the number of conformers passed or generated exceeds the number | ||
of conformers selected by the partial charge ``method``. | ||
|
||
Returns | ||
------- | ||
A list of SmallMoleculeComponents with the charges assigned. | ||
""" | ||
import tqdm | ||
|
||
charge_keywords = { | ||
"overwrite": overwrite, | ||
"method": method, | ||
"toolkit_backend": toolkit_backend, | ||
"generate_n_conformers": generate_n_conformers, | ||
"nagl_model": nagl_model | ||
} | ||
charged_ligands = [] | ||
|
||
if processors > 1: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just a reminder to add a test for this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done in 4c9a1f0 |
||
from concurrent.futures import ProcessPoolExecutor, as_completed | ||
from multiprocessing import get_context | ||
|
||
with ProcessPoolExecutor(max_workers=processors, mp_context=get_context("spawn")) as pool: | ||
|
||
work_list = [ | ||
pool.submit( | ||
assign_offmol_partial_charges, | ||
m.to_openff(), | ||
**charge_keywords, | ||
) | ||
for m in molecules | ||
] | ||
|
||
for work in tqdm.tqdm(as_completed(work_list), desc="Generating charges", ncols=80, total=len(molecules)): | ||
charged_ligands.append(SmallMoleculeComponent.from_openff(work.result())) | ||
|
||
else: | ||
for m in tqdm.tqdm(molecules, desc="Generating charges", ncols=80, total=len(molecules)): | ||
mol_with_charge = assign_offmol_partial_charges(m.to_openff(), **charge_keywords) | ||
charged_ligands.append(SmallMoleculeComponent.from_openff(mol_with_charge)) | ||
|
||
return charged_ligands |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
from contextlib import contextmanager | ||
|
||
@contextmanager | ||
def temp_env(environment_variables: dict[str, str]): | ||
""" | ||
Temporarily update the global environment variables using the supplied values. | ||
|
||
Inspired by bespokefit | ||
<https://github.com/openforcefield/openff-bespokefit/blob/1bd79e9a9e4cea982153aed8e9cc6f8a37d65bd8/openff/bespokefit/utilities/_settings.py#L133> | ||
|
||
Parameters | ||
---------- | ||
environment_variables: dict[str, str] | ||
The variables which should be changed while the manager is active. | ||
""" | ||
import os | ||
|
||
old_env = dict(os.environ) | ||
os.environ.update(environment_variables) | ||
|
||
try: | ||
yield | ||
finally: | ||
os.environ.clear() | ||
os.environ.update(old_env) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
import click | ||
from openfecli import OFECommandPlugin | ||
from openfecli.parameters import MOL_DIR, YAML_OPTIONS, OUTPUT_FILE_AND_EXT, WORKERS | ||
|
||
|
||
@click.command( | ||
"charge-molecules", | ||
short_help="Generate partial charges for a set of molecules." | ||
) | ||
@MOL_DIR.parameter( | ||
required=True, help=MOL_DIR.kwargs["help"] + " Any number of sdf paths." | ||
) | ||
@YAML_OPTIONS.parameter( | ||
multiple=False, required=False, default=None, | ||
help=YAML_OPTIONS.kwargs["help"], | ||
) | ||
@OUTPUT_FILE_AND_EXT.parameter( | ||
help="The name of the SDF file the charged ligands should be writen to." | ||
jthorton marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
@WORKERS.parameter( | ||
help=WORKERS.kwargs["help"], | ||
default=1, | ||
) | ||
@click.option( | ||
"--overwrite-charges", | ||
is_flag=True, | ||
default=False, | ||
help="If we should overwrite the charges already present in the molecules." | ||
jthorton marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
def charge_molecules( | ||
molecules, | ||
yaml_settings, | ||
output, | ||
workers, | ||
overwrite_charges | ||
): | ||
""" | ||
Generate partial charges for the set of input molecules and write them to file. | ||
""" | ||
from openfecli.utils import write | ||
from openfe.protocols.openmm_utils.charge_generation import bulk_assign_partial_charges | ||
|
||
write("SMALL MOLECULE PARTIAL CHARGE GENERATOR") | ||
write("_________________________________________") | ||
write("") | ||
|
||
write("Parsing in Files: ") | ||
|
||
# INPUT | ||
write("\tGot input: ") | ||
|
||
small_molecules = MOL_DIR.get(molecules) | ||
write( | ||
"\t\tSmall Molecules: " | ||
+ " ".join([str(sm) for sm in small_molecules]) | ||
) | ||
|
||
yaml_options = YAML_OPTIONS.get(yaml_settings) | ||
partial_charge = yaml_options.partial_charge | ||
|
||
write("Using Options:") | ||
write("\tPartial Charge Generation: " + str(partial_charge.partial_charge_method)) | ||
write("") | ||
|
||
charged_molecules = bulk_assign_partial_charges( | ||
molecules=small_molecules, | ||
overwrite=overwrite_charges, | ||
method=partial_charge.partial_charge_method, | ||
toolkit_backend=partial_charge.off_toolkit_backend, | ||
generate_n_conformers=partial_charge.number_of_conformers, | ||
nagl_model=partial_charge.nagl_model, | ||
processors=workers | ||
) | ||
|
||
write("\tDone") | ||
write("") | ||
|
||
# OUTPUT | ||
file, _ = OUTPUT_FILE_AND_EXT.get(output) | ||
write("Output:") | ||
write("\tSaving to: " + file.name) | ||
|
||
# default is write bytes | ||
file.mode = "w" | ||
with file.open() as output: | ||
for mol in charged_molecules: | ||
output.write(mol.to_sdf()) | ||
|
||
|
||
PLUGIN = OFECommandPlugin( | ||
command=charge_molecules, section="Miscellaneous", requires_ofe=(0, 3) | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably another issue/PR - do we want to move this module somewhere else within the repo? It feels like it's more than just "openmm_utils".
Long term I think I want to move it to "openfe-utils" or similar, that way upstream protocols can use them without depending on OpenFE.