Skip to content

Commit

Permalink
cli: gather raw option (#620)
Browse files Browse the repository at this point in the history
* fixes issue #618

openfe gather will not correctly report DDG hydration values

* cli: add --report raw option to gather

this reports the unit estimates individually

* add doc to cli for gather raw option

* cli:  add test for gather raw

* cli:  gather, remove dg-raw option

* Update openfecli/commands/gather.py

Co-authored-by: Irfan Alibay <[email protected]>

* Update environment.yml

* change tyk2 regression to use gather --report ddg

* fixup raw gather test to include MBAR uncertainty label

---------

Co-authored-by: Irfan Alibay <[email protected]>
  • Loading branch information
richardjgowers and IAlibay authored Nov 17, 2023
1 parent 2af8dcd commit 94929f5
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 37 deletions.
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ dependencies:
- openmmtools
- openmmforcefields>=0.11.2
- perses
- pooch
- py3dmol
- plugcli
- tqdm
Expand Down
55 changes: 40 additions & 15 deletions openfecli/commands/gather.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,15 @@ def _generate_bad_legs_error_message(set_vals, ligpair):
return msg


def _parse_raw_units(results: dict) -> list[tuple]:
# grab individual unit results from master results dict
# returns list of (estimate, uncertainty) tuples
pus = list(results['unit_results'].values())
return [(pu['outputs']['unit_estimate'],
pu['outputs']['unit_estimate_error'])
for pu in pus]


def _get_ddgs(legs, error_on_missing=True):
import numpy as np
DDGs = []
Expand Down Expand Up @@ -171,24 +180,35 @@ def _get_ddgs(legs, error_on_missing=True):
def _write_ddg(legs, writer, allow_partial):
DDGs = _get_ddgs(legs, error_on_missing=not allow_partial)
writer.writerow(["ligand_i", "ligand_j", "DDG(i->j) (kcal/mol)",
"uncertainty (kcal/mol)"])
"uncertainty (kcal/mol)"])
for ligA, ligB, DDGbind, bind_unc, DDGhyd, hyd_unc in DDGs:
name = f"{ligB}, {ligA}"
if DDGbind is not None:
DDGbind, bind_unc = format_estimate_uncertainty(DDGbind,
bind_unc)
DDGbind, bind_unc = format_estimate_uncertainty(DDGbind, bind_unc)
writer.writerow([ligA, ligB, DDGbind, bind_unc])
if DDGhyd is not None:
DDGhyd, hyd_unc = format_estimate_uncertainty(DDGbind,
bind_unc)
DDGhyd, hyd_unc = format_estimate_uncertainty(DDGhyd, hyd_unc)
writer.writerow([ligA, ligB, DDGhyd, hyd_unc])


def _write_dg_raw(legs, writer, allow_partial):
def _write_raw(legs, writer, allow_partial=True):
writer.writerow(["leg", "ligand_i", "ligand_j", "DG(i->j) (kcal/mol)",
"MBAR uncertainty (kcal/mol)"])

for ligpair, vals in sorted(legs.items()):
for simtype, repeats in sorted(vals.items()):
for m, u in repeats:
if m is None:
m, u = 'NaN', 'NaN'
else:
m, u = format_estimate_uncertainty(m.m, u.m)

writer.writerow([simtype, *ligpair, m, u])


def _write_dg_raw(legs, writer, allow_partial): # pragma: no-cover
writer.writerow(["leg", "ligand_i", "ligand_j", "DG(i->j) (kcal/mol)",
"uncertainty (kcal/mol)"])
for ligpair, vals in sorted(legs.items()):
name = ', '.join(ligpair)
for simtype, (m, u) in sorted(vals.items()):
if m is None:
m, u = 'NaN', 'NaN'
Expand Down Expand Up @@ -255,11 +275,12 @@ def _write_dg_mle(legs, writer, allow_partial):
)
@click.argument('rootdir',
type=click.Path(dir_okay=True, file_okay=False,
path_type=pathlib.Path),
path_type=pathlib.Path),
required=True)
@click.option(
'--report',
type=HyphenAwareChoice(['dg', 'ddg', 'dg-raw'], case_sensitive=False),
type=HyphenAwareChoice(['dg', 'ddg', 'raw'],
case_sensitive=False),
default="dg", show_default=True,
help=(
"What data to report. 'dg' gives maximum-likelihood estimate of "
Expand Down Expand Up @@ -293,8 +314,8 @@ def gather(rootdir, output, report, allow_partial):
from DDG replica averages and standard deviations.
* 'ddg' reports pairs of ligand_i and ligand_j, the calculated
relative free energy DDG(i->j) = DG(j) - DG(i) and its uncertainty.
* 'dg-raw' reports the raw results, giving the leg (vacuum, solvent, or
complex), ligand_i, ligand_j, the raw DG(i->j) associated with it.
* 'raw' reports the raw results, which each repeat simulation given
separately (i.e. no combining of redundant simulations is performed)
The output is a table of **tab** separated values. By default, this
outputs to stdout, use the -o option to choose an output file.
Expand All @@ -317,7 +338,7 @@ def gather(rootdir, output, report, allow_partial):
if result is None:
continue
elif result['estimate'] is None or result['uncertainty'] is None:
click.echo(f"WARNING: Calculations for {result_fn} did not finish succesfully!",
click.echo(f"WARNING: Calculations for {result_fn} did not finish successfully!",
err=True)

try:
Expand All @@ -329,7 +350,10 @@ def gather(rootdir, output, report, allow_partial):
except KeyError:
simtype = legacy_get_type(result_fn)

legs[names][simtype] = result['estimate'], result['uncertainty']
if report.lower() == 'raw':
legs[names][simtype] = _parse_raw_units(result)
else:
legs[names][simtype] = result['estimate'], result['uncertainty']

writer = csv.writer(
output,
Expand All @@ -343,7 +367,8 @@ def gather(rootdir, output, report, allow_partial):
writing_func = {
'dg': _write_dg_mle,
'ddg': _write_ddg,
'dg-raw': _write_dg_raw,
# 'dg-raw': _write_dg_raw,
'raw': _write_raw,
}[report.lower()]
writing_func(legs, writer, allow_partial)

Expand Down
90 changes: 88 additions & 2 deletions openfecli/tests/commands/test_gather.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from click.testing import CliRunner
import glob
from importlib import resources
import tarfile
import os
import pathlib
import pytest
import pooch

from openfecli.commands.gather import (
gather, format_estimate_uncertainty, _get_column,
Expand Down Expand Up @@ -85,7 +86,66 @@ def results_dir(tmpdir):
"""


@pytest.mark.parametrize('report', ["", "dg", "ddg", "dg-raw"])
_EXPECTED_RAW = b"""\
leg ligand_i ligand_j DG(i->j) (kcal/mol) MBAR uncertainty (kcal/mol)
complex lig_ejm_31 lig_ejm_42 -14.77 0.04
complex lig_ejm_31 lig_ejm_42 -14.74 0.04
complex lig_ejm_31 lig_ejm_42 -14.94 0.04
solvent lig_ejm_31 lig_ejm_42 -15.68 0.03
solvent lig_ejm_31 lig_ejm_42 -15.69 0.03
solvent lig_ejm_31 lig_ejm_42 -15.64 0.03
complex lig_ejm_31 lig_ejm_46 -40.56 0.06
complex lig_ejm_31 lig_ejm_46 -40.76 0.05
complex lig_ejm_31 lig_ejm_46 -40.90 0.04
solvent lig_ejm_31 lig_ejm_46 -39.92 0.04
solvent lig_ejm_31 lig_ejm_46 -39.94 0.04
solvent lig_ejm_31 lig_ejm_46 -39.95 0.04
complex lig_ejm_31 lig_ejm_47 -27.68 0.08
complex lig_ejm_31 lig_ejm_47 -27.80 0.06
complex lig_ejm_31 lig_ejm_47 -27.51 0.07
solvent lig_ejm_31 lig_ejm_47 -27.83 0.05
solvent lig_ejm_31 lig_ejm_47 -27.84 0.05
solvent lig_ejm_31 lig_ejm_47 -27.88 0.05
complex lig_ejm_31 lig_ejm_48 -16.15 0.08
complex lig_ejm_31 lig_ejm_48 -15.96 0.07
complex lig_ejm_31 lig_ejm_48 -16.01 0.08
solvent lig_ejm_31 lig_ejm_48 -16.83 0.06
solvent lig_ejm_31 lig_ejm_48 -16.65 0.07
solvent lig_ejm_31 lig_ejm_48 -16.77 0.06
complex lig_ejm_31 lig_ejm_50 -57.31 0.04
complex lig_ejm_31 lig_ejm_50 -57.45 0.04
complex lig_ejm_31 lig_ejm_50 -57.37 0.04
solvent lig_ejm_31 lig_ejm_50 -58.33 0.04
solvent lig_ejm_31 lig_ejm_50 -58.42 0.04
solvent lig_ejm_31 lig_ejm_50 -58.19 0.04
complex lig_ejm_42 lig_ejm_43 -19.24 0.04
complex lig_ejm_42 lig_ejm_43 -18.72 0.05
complex lig_ejm_42 lig_ejm_43 -18.94 0.04
solvent lig_ejm_42 lig_ejm_43 -20.17 0.03
solvent lig_ejm_42 lig_ejm_43 -20.28 0.03
solvent lig_ejm_42 lig_ejm_43 -20.23 0.03
complex lig_ejm_46 lig_jmc_23 17.31 0.02
complex lig_ejm_46 lig_jmc_23 17.37 0.02
complex lig_ejm_46 lig_jmc_23 17.35 0.02
solvent lig_ejm_46 lig_jmc_23 17.20 0.02
solvent lig_ejm_46 lig_jmc_23 17.40 0.02
solvent lig_ejm_46 lig_jmc_23 17.30 0.02
complex lig_ejm_46 lig_jmc_27 15.84 0.03
complex lig_ejm_46 lig_jmc_27 15.79 0.03
complex lig_ejm_46 lig_jmc_27 15.80 0.03
solvent lig_ejm_46 lig_jmc_27 16.16 0.03
solvent lig_ejm_46 lig_jmc_27 16.01 0.03
solvent lig_ejm_46 lig_jmc_27 16.07 0.03
complex lig_ejm_46 lig_jmc_28 23.43 0.04
complex lig_ejm_46 lig_jmc_28 23.29 0.04
complex lig_ejm_46 lig_jmc_28 23.17 0.04
solvent lig_ejm_46 lig_jmc_28 23.67 0.03
solvent lig_ejm_46 lig_jmc_28 23.61 0.03
solvent lig_ejm_46 lig_jmc_28 23.65 0.03
"""


@pytest.mark.parametrize('report', ["", "dg", "ddg"])
def test_gather(results_dir, report):
expected = {
"": _EXPECTED_DG,
Expand Down Expand Up @@ -145,3 +205,29 @@ def test_missing_leg_allow_partial(results_dir):
result = runner.invoke(gather,
['results'] + ['--allow-partial', '-o', '-'])
assert result.exit_code == 0


RBFE_RESULTS = pooch.create(
pooch.os_cache('openfe'),
base_url="doi:10.6084/m9.figshare.24542059",
registry={"results.tar.gz": None},
)


@pytest.fixture
def rbfe_results():
# fetches rbfe results from online
# untars into local directory and returns path to this
d = RBFE_RESULTS.fetch('results.tar.gz', processor=pooch.Untar())

return os.path.join(pooch.os_cache('openfe'), 'results.tar.gz.untar', 'results')


@pytest.mark.download
def test_rbfe_results(rbfe_results):
runner = CliRunner()

result = runner.invoke(gather, ['--report', 'raw', rbfe_results])

assert result.exit_code == 0
assert result.stdout_bytes == _EXPECTED_RAW
31 changes: 11 additions & 20 deletions openfecli/tests/test_rbfe_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,25 +87,16 @@ def fake_execute(*args, **kwargs):
@pytest.fixture
def ref_gather():
return """\
leg ligand_i ligand_j DG(i->j) (kcal/mol) uncertainty (kcal/mol)
complex lig_ejm_31 lig_ejm_42 4.2 0.0
solvent lig_ejm_31 lig_ejm_42 4.2 0.0
complex lig_ejm_31 lig_ejm_46 4.2 0.0
solvent lig_ejm_31 lig_ejm_46 4.2 0.0
complex lig_ejm_31 lig_ejm_47 4.2 0.0
solvent lig_ejm_31 lig_ejm_47 4.2 0.0
complex lig_ejm_31 lig_ejm_48 4.2 0.0
solvent lig_ejm_31 lig_ejm_48 4.2 0.0
complex lig_ejm_31 lig_ejm_50 4.2 0.0
solvent lig_ejm_31 lig_ejm_50 4.2 0.0
complex lig_ejm_42 lig_ejm_43 4.2 0.0
solvent lig_ejm_42 lig_ejm_43 4.2 0.0
complex lig_ejm_46 lig_jmc_23 4.2 0.0
solvent lig_ejm_46 lig_jmc_23 4.2 0.0
complex lig_ejm_46 lig_jmc_27 4.2 0.0
solvent lig_ejm_46 lig_jmc_27 4.2 0.0
complex lig_ejm_46 lig_jmc_28 4.2 0.0
solvent lig_ejm_46 lig_jmc_28 4.2 0.0
ligand_i\tligand_j\tDDG(i->j) (kcal/mol)\tuncertainty (kcal/mol)
lig_ejm_31\tlig_ejm_42\t0.0\t0.0
lig_ejm_31\tlig_ejm_46\t0.0\t0.0
lig_ejm_31\tlig_ejm_47\t0.0\t0.0
lig_ejm_31\tlig_ejm_48\t0.0\t0.0
lig_ejm_31\tlig_ejm_50\t0.0\t0.0
lig_ejm_42\tlig_ejm_43\t0.0\t0.0
lig_ejm_46\tlig_jmc_23\t0.0\t0.0
lig_ejm_46\tlig_jmc_27\t0.0\t0.0
lig_ejm_46\tlig_jmc_28\t0.0\t0.0
"""


Expand All @@ -123,7 +114,7 @@ def test_run_tyk2(tyk2_ligands, tyk2_protein, expected_transformations,
result2 = runner.invoke(quickrun, [fn])
assert result2.exit_code == 0

gather_result = runner.invoke(gather, ["--report", "dg-raw", '.'])
gather_result = runner.invoke(gather, ["--report", "ddg", '.'])

assert gather_result.exit_code == 0
assert gather_result.stdout == ref_gather

0 comments on commit 94929f5

Please sign in to comment.