diff --git a/environment.yml b/environment.yml index 4dbf00eb8..7de2f35c1 100644 --- a/environment.yml +++ b/environment.yml @@ -26,6 +26,7 @@ dependencies: - openmmtools - openmmforcefields>=0.11.2 - perses + - pooch - py3dmol - plugcli - tqdm diff --git a/openfecli/commands/gather.py b/openfecli/commands/gather.py index ab6a879e4..9e6aa42f3 100644 --- a/openfecli/commands/gather.py +++ b/openfecli/commands/gather.py @@ -128,6 +128,15 @@ def _generate_bad_legs_error_message(set_vals, ligpair): return msg +def _parse_raw_units(results: dict) -> list[tuple]: + # grab individual unit results from master results dict + # returns list of (estimate, uncertainty) tuples + pus = list(results['unit_results'].values()) + return [(pu['outputs']['unit_estimate'], + pu['outputs']['unit_estimate_error']) + for pu in pus] + + def _get_ddgs(legs, error_on_missing=True): import numpy as np DDGs = [] @@ -171,24 +180,35 @@ def _get_ddgs(legs, error_on_missing=True): def _write_ddg(legs, writer, allow_partial): DDGs = _get_ddgs(legs, error_on_missing=not allow_partial) writer.writerow(["ligand_i", "ligand_j", "DDG(i->j) (kcal/mol)", - "uncertainty (kcal/mol)"]) + "uncertainty (kcal/mol)"]) for ligA, ligB, DDGbind, bind_unc, DDGhyd, hyd_unc in DDGs: - name = f"{ligB}, {ligA}" if DDGbind is not None: - DDGbind, bind_unc = format_estimate_uncertainty(DDGbind, - bind_unc) + DDGbind, bind_unc = format_estimate_uncertainty(DDGbind, bind_unc) writer.writerow([ligA, ligB, DDGbind, bind_unc]) if DDGhyd is not None: - DDGhyd, hyd_unc = format_estimate_uncertainty(DDGbind, - bind_unc) + DDGhyd, hyd_unc = format_estimate_uncertainty(DDGhyd, hyd_unc) writer.writerow([ligA, ligB, DDGhyd, hyd_unc]) -def _write_dg_raw(legs, writer, allow_partial): +def _write_raw(legs, writer, allow_partial=True): + writer.writerow(["leg", "ligand_i", "ligand_j", "DG(i->j) (kcal/mol)", + "MBAR uncertainty (kcal/mol)"]) + + for ligpair, vals in sorted(legs.items()): + for simtype, repeats in sorted(vals.items()): + for m, u in repeats: + if m is None: + m, u = 'NaN', 'NaN' + else: + m, u = format_estimate_uncertainty(m.m, u.m) + + writer.writerow([simtype, *ligpair, m, u]) + + +def _write_dg_raw(legs, writer, allow_partial): # pragma: no-cover writer.writerow(["leg", "ligand_i", "ligand_j", "DG(i->j) (kcal/mol)", "uncertainty (kcal/mol)"]) for ligpair, vals in sorted(legs.items()): - name = ', '.join(ligpair) for simtype, (m, u) in sorted(vals.items()): if m is None: m, u = 'NaN', 'NaN' @@ -255,11 +275,12 @@ def _write_dg_mle(legs, writer, allow_partial): ) @click.argument('rootdir', type=click.Path(dir_okay=True, file_okay=False, - path_type=pathlib.Path), + path_type=pathlib.Path), required=True) @click.option( '--report', - type=HyphenAwareChoice(['dg', 'ddg', 'dg-raw'], case_sensitive=False), + type=HyphenAwareChoice(['dg', 'ddg', 'raw'], + case_sensitive=False), default="dg", show_default=True, help=( "What data to report. 'dg' gives maximum-likelihood estimate of " @@ -293,8 +314,8 @@ def gather(rootdir, output, report, allow_partial): from DDG replica averages and standard deviations. * 'ddg' reports pairs of ligand_i and ligand_j, the calculated relative free energy DDG(i->j) = DG(j) - DG(i) and its uncertainty. - * 'dg-raw' reports the raw results, giving the leg (vacuum, solvent, or - complex), ligand_i, ligand_j, the raw DG(i->j) associated with it. + * 'raw' reports the raw results, which each repeat simulation given + separately (i.e. no combining of redundant simulations is performed) The output is a table of **tab** separated values. By default, this outputs to stdout, use the -o option to choose an output file. @@ -317,7 +338,7 @@ def gather(rootdir, output, report, allow_partial): if result is None: continue elif result['estimate'] is None or result['uncertainty'] is None: - click.echo(f"WARNING: Calculations for {result_fn} did not finish succesfully!", + click.echo(f"WARNING: Calculations for {result_fn} did not finish successfully!", err=True) try: @@ -329,7 +350,10 @@ def gather(rootdir, output, report, allow_partial): except KeyError: simtype = legacy_get_type(result_fn) - legs[names][simtype] = result['estimate'], result['uncertainty'] + if report.lower() == 'raw': + legs[names][simtype] = _parse_raw_units(result) + else: + legs[names][simtype] = result['estimate'], result['uncertainty'] writer = csv.writer( output, @@ -343,7 +367,8 @@ def gather(rootdir, output, report, allow_partial): writing_func = { 'dg': _write_dg_mle, 'ddg': _write_ddg, - 'dg-raw': _write_dg_raw, + # 'dg-raw': _write_dg_raw, + 'raw': _write_raw, }[report.lower()] writing_func(legs, writer, allow_partial) diff --git a/openfecli/tests/commands/test_gather.py b/openfecli/tests/commands/test_gather.py index 3849e75b9..2eac0c460 100644 --- a/openfecli/tests/commands/test_gather.py +++ b/openfecli/tests/commands/test_gather.py @@ -1,9 +1,10 @@ from click.testing import CliRunner -import glob from importlib import resources import tarfile +import os import pathlib import pytest +import pooch from openfecli.commands.gather import ( gather, format_estimate_uncertainty, _get_column, @@ -85,7 +86,66 @@ def results_dir(tmpdir): """ -@pytest.mark.parametrize('report', ["", "dg", "ddg", "dg-raw"]) +_EXPECTED_RAW = b"""\ +leg ligand_i ligand_j DG(i->j) (kcal/mol) MBAR uncertainty (kcal/mol) +complex lig_ejm_31 lig_ejm_42 -14.77 0.04 +complex lig_ejm_31 lig_ejm_42 -14.74 0.04 +complex lig_ejm_31 lig_ejm_42 -14.94 0.04 +solvent lig_ejm_31 lig_ejm_42 -15.68 0.03 +solvent lig_ejm_31 lig_ejm_42 -15.69 0.03 +solvent lig_ejm_31 lig_ejm_42 -15.64 0.03 +complex lig_ejm_31 lig_ejm_46 -40.56 0.06 +complex lig_ejm_31 lig_ejm_46 -40.76 0.05 +complex lig_ejm_31 lig_ejm_46 -40.90 0.04 +solvent lig_ejm_31 lig_ejm_46 -39.92 0.04 +solvent lig_ejm_31 lig_ejm_46 -39.94 0.04 +solvent lig_ejm_31 lig_ejm_46 -39.95 0.04 +complex lig_ejm_31 lig_ejm_47 -27.68 0.08 +complex lig_ejm_31 lig_ejm_47 -27.80 0.06 +complex lig_ejm_31 lig_ejm_47 -27.51 0.07 +solvent lig_ejm_31 lig_ejm_47 -27.83 0.05 +solvent lig_ejm_31 lig_ejm_47 -27.84 0.05 +solvent lig_ejm_31 lig_ejm_47 -27.88 0.05 +complex lig_ejm_31 lig_ejm_48 -16.15 0.08 +complex lig_ejm_31 lig_ejm_48 -15.96 0.07 +complex lig_ejm_31 lig_ejm_48 -16.01 0.08 +solvent lig_ejm_31 lig_ejm_48 -16.83 0.06 +solvent lig_ejm_31 lig_ejm_48 -16.65 0.07 +solvent lig_ejm_31 lig_ejm_48 -16.77 0.06 +complex lig_ejm_31 lig_ejm_50 -57.31 0.04 +complex lig_ejm_31 lig_ejm_50 -57.45 0.04 +complex lig_ejm_31 lig_ejm_50 -57.37 0.04 +solvent lig_ejm_31 lig_ejm_50 -58.33 0.04 +solvent lig_ejm_31 lig_ejm_50 -58.42 0.04 +solvent lig_ejm_31 lig_ejm_50 -58.19 0.04 +complex lig_ejm_42 lig_ejm_43 -19.24 0.04 +complex lig_ejm_42 lig_ejm_43 -18.72 0.05 +complex lig_ejm_42 lig_ejm_43 -18.94 0.04 +solvent lig_ejm_42 lig_ejm_43 -20.17 0.03 +solvent lig_ejm_42 lig_ejm_43 -20.28 0.03 +solvent lig_ejm_42 lig_ejm_43 -20.23 0.03 +complex lig_ejm_46 lig_jmc_23 17.31 0.02 +complex lig_ejm_46 lig_jmc_23 17.37 0.02 +complex lig_ejm_46 lig_jmc_23 17.35 0.02 +solvent lig_ejm_46 lig_jmc_23 17.20 0.02 +solvent lig_ejm_46 lig_jmc_23 17.40 0.02 +solvent lig_ejm_46 lig_jmc_23 17.30 0.02 +complex lig_ejm_46 lig_jmc_27 15.84 0.03 +complex lig_ejm_46 lig_jmc_27 15.79 0.03 +complex lig_ejm_46 lig_jmc_27 15.80 0.03 +solvent lig_ejm_46 lig_jmc_27 16.16 0.03 +solvent lig_ejm_46 lig_jmc_27 16.01 0.03 +solvent lig_ejm_46 lig_jmc_27 16.07 0.03 +complex lig_ejm_46 lig_jmc_28 23.43 0.04 +complex lig_ejm_46 lig_jmc_28 23.29 0.04 +complex lig_ejm_46 lig_jmc_28 23.17 0.04 +solvent lig_ejm_46 lig_jmc_28 23.67 0.03 +solvent lig_ejm_46 lig_jmc_28 23.61 0.03 +solvent lig_ejm_46 lig_jmc_28 23.65 0.03 +""" + + +@pytest.mark.parametrize('report', ["", "dg", "ddg"]) def test_gather(results_dir, report): expected = { "": _EXPECTED_DG, @@ -145,3 +205,29 @@ def test_missing_leg_allow_partial(results_dir): result = runner.invoke(gather, ['results'] + ['--allow-partial', '-o', '-']) assert result.exit_code == 0 + + +RBFE_RESULTS = pooch.create( + pooch.os_cache('openfe'), + base_url="doi:10.6084/m9.figshare.24542059", + registry={"results.tar.gz": None}, +) + + +@pytest.fixture +def rbfe_results(): + # fetches rbfe results from online + # untars into local directory and returns path to this + d = RBFE_RESULTS.fetch('results.tar.gz', processor=pooch.Untar()) + + return os.path.join(pooch.os_cache('openfe'), 'results.tar.gz.untar', 'results') + + +@pytest.mark.download +def test_rbfe_results(rbfe_results): + runner = CliRunner() + + result = runner.invoke(gather, ['--report', 'raw', rbfe_results]) + + assert result.exit_code == 0 + assert result.stdout_bytes == _EXPECTED_RAW diff --git a/openfecli/tests/test_rbfe_tutorial.py b/openfecli/tests/test_rbfe_tutorial.py index 2b2f102d1..89c55829c 100644 --- a/openfecli/tests/test_rbfe_tutorial.py +++ b/openfecli/tests/test_rbfe_tutorial.py @@ -87,25 +87,16 @@ def fake_execute(*args, **kwargs): @pytest.fixture def ref_gather(): return """\ -leg ligand_i ligand_j DG(i->j) (kcal/mol) uncertainty (kcal/mol) -complex lig_ejm_31 lig_ejm_42 4.2 0.0 -solvent lig_ejm_31 lig_ejm_42 4.2 0.0 -complex lig_ejm_31 lig_ejm_46 4.2 0.0 -solvent lig_ejm_31 lig_ejm_46 4.2 0.0 -complex lig_ejm_31 lig_ejm_47 4.2 0.0 -solvent lig_ejm_31 lig_ejm_47 4.2 0.0 -complex lig_ejm_31 lig_ejm_48 4.2 0.0 -solvent lig_ejm_31 lig_ejm_48 4.2 0.0 -complex lig_ejm_31 lig_ejm_50 4.2 0.0 -solvent lig_ejm_31 lig_ejm_50 4.2 0.0 -complex lig_ejm_42 lig_ejm_43 4.2 0.0 -solvent lig_ejm_42 lig_ejm_43 4.2 0.0 -complex lig_ejm_46 lig_jmc_23 4.2 0.0 -solvent lig_ejm_46 lig_jmc_23 4.2 0.0 -complex lig_ejm_46 lig_jmc_27 4.2 0.0 -solvent lig_ejm_46 lig_jmc_27 4.2 0.0 -complex lig_ejm_46 lig_jmc_28 4.2 0.0 -solvent lig_ejm_46 lig_jmc_28 4.2 0.0 +ligand_i\tligand_j\tDDG(i->j) (kcal/mol)\tuncertainty (kcal/mol) +lig_ejm_31\tlig_ejm_42\t0.0\t0.0 +lig_ejm_31\tlig_ejm_46\t0.0\t0.0 +lig_ejm_31\tlig_ejm_47\t0.0\t0.0 +lig_ejm_31\tlig_ejm_48\t0.0\t0.0 +lig_ejm_31\tlig_ejm_50\t0.0\t0.0 +lig_ejm_42\tlig_ejm_43\t0.0\t0.0 +lig_ejm_46\tlig_jmc_23\t0.0\t0.0 +lig_ejm_46\tlig_jmc_27\t0.0\t0.0 +lig_ejm_46\tlig_jmc_28\t0.0\t0.0 """ @@ -123,7 +114,7 @@ def test_run_tyk2(tyk2_ligands, tyk2_protein, expected_transformations, result2 = runner.invoke(quickrun, [fn]) assert result2.exit_code == 0 - gather_result = runner.invoke(gather, ["--report", "dg-raw", '.']) + gather_result = runner.invoke(gather, ["--report", "ddg", '.']) assert gather_result.exit_code == 0 assert gather_result.stdout == ref_gather