Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix to bootstrap of free energy surfaces, affecting timing and quantitative results #535

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pymbar/confidenceintervals.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ def qq_plot(replicates, K, title="Generic Q-Q plot", filename="qq.pdf"):
N = len(replicates)
dim = len(np.shape(replicates[0]["error"]))
xvals = scipy.stats.norm.ppf((np.arange(0, N) + 0.5) / N) # inverse pdf
nplots = None # keep lint happy when variables are defined in conditionals

if dim == 0:
nplots = 1
Expand Down
37 changes: 23 additions & 14 deletions pymbar/fes.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
##############################################################################
# pymbar: A Python Library for MBAR (FES module)
#
# Copyright 2019 University of Colorado Boulder
# Copyright 2019-2024 University of Colorado Boulder
#
# Authors: Michael Shirts
#
Expand Down Expand Up @@ -399,11 +399,11 @@ def generate_fes(
0, N_k[k], size=N_k[k]
)
index += N_k[k]
# recompute MBAR.
mbar = pymbar.MBAR(
self.u_kn[:, bootstrap_indices], self.N_k, initial_f_k=self.mbar.f_k
)
x_nb = x_n[bootstrap_indices]
# recompute MBAR.
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was unnecessary - it was running MBAR too many times. This saves approximately 2X time.

mbar = pymbar.MBAR(
self.u_kn[:, bootstrap_indices], self.N_k, initial_f_k=self.mbar.f_k
)
x_nb = x_n[bootstrap_indices]

# Compute unnormalized log weights for the given reduced potential
# u_n, needed for all methods.
Expand Down Expand Up @@ -692,8 +692,9 @@ def _generate_fes_kde(self, b, x_n, w_n):
params = self.kde.get_params() # pylint: disable=access-member-before-definition
kde.set_params(**params)
else:
kde = self.kde
kde.fit(x_n, sample_weight=self.w_n)
kde = self.kde # use these new weights for the KDE
w_n = self.w_n
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually can't remember if this was 100% necessary to get updated weights . . .

kde.fit(x_n, sample_weight=w_n)

if b > 0:
self.kdes.append(kde)
Expand Down Expand Up @@ -1358,6 +1359,7 @@ def _get_fes_histogram(
raise ParameterError("Specified reference point for FES not given")

if reference_point in ["from-lowest", "from-specified", "all-differences"]:
j = None # keep lint happy
if reference_point == "from-lowest":
# Determine free energy with lowest free energy to serve as reference point
j = histogram_data["f"].argmin()
Expand Down Expand Up @@ -1415,8 +1417,8 @@ def _get_fes_histogram(
fall = np.zeros([len(histogram_data["f"]), n_bootstraps])
for b in range(n_bootstraps):
h = histogram_datas[b] # just to make this shorter
fall[:, b] = h["f"] - h["f"][j]
df_i = np.std(fall, axis=1)
fall[:, b] = h["f"] - h["f"][j] # subtract out the reference bin
df_i = np.std(fall, ddof=1, axis=1)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixing the std definition.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix bootstrap std definition


elif reference_point == "from-normalization":
# Determine uncertainties from normalization that \sum_i p_i = 1.
Expand Down Expand Up @@ -1510,7 +1512,7 @@ def _get_fes_histogram(
fall[i, j, b] = (
histogram_datas[b]["f"] - histogram_datas[b]["f"].transpose()
)
dfxij_vals = np.std(fall, axis=2)
dfxij_vals = np.std(fall, ddof=1, axis=2)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixing std definition

if uncertainty_method is not None:
# Return dimensionless free energy and uncertainty.
result_vals["df_ij"] = dfxij_vals
Expand Down Expand Up @@ -1565,9 +1567,15 @@ def _get_fes_kde(
if reference_point == "from-lowest":
fmin = np.min(f_i)
f_i = f_i - fmin
wheremin = np.argmin(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to find the location that this is zeroed at for the actual computation of the std.

f_i
) # needed or uncertainties, as we zero by location, not values
elif reference_point == "from-specified":
fmin = -self.kde.score_samples(np.array(fes_reference).reshape(1, -1))
f_i = f_i - fmin
wheremin = np.argmin(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to find the location that this is zeroed at for the actual computation of the std.

f_i
) # needed for uncertainties, as we zero by location, not values
elif reference_point == "from-normalization":
# uncertainites "from normalization" reference is already applied, since
# the density is normalized.
Expand All @@ -1594,8 +1602,9 @@ def _get_fes_kde(

fall = np.zeros([len(x), n_bootstraps])
for b in range(n_bootstraps):
fall[:, b] = -self.kdes[b].score_samples(x) - fmin
df_i = np.std(fall, axis=1)
fall[:, b] = -self.kdes[b].score_samples(x)
fall[:, b] -= fall[wheremin, b]
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Zero out at the correct location.

df_i = np.std(fall, ddof=1, axis=1)
else:
raise ParameterError(
f"Uncertainty method {uncertainty_method} for kde is not implemented"
Expand Down Expand Up @@ -1681,7 +1690,7 @@ def _get_fes_spline(
fall = np.zeros(dim_breakdown)
for b in range(n_bootstraps):
fall[:, b] = self.fes_functions[b](x) - fmin
df_i = np.std(fall, axis=-1)
df_i = np.std(fall, ddof=1, axis=-1)

# uncertainites "from normalization" reference is applied, since
# the density is normalized.
Expand Down
1 change: 1 addition & 0 deletions pymbar/mbar.py
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,7 @@ def compute_expectations_inner(
K = self.K
N = self.N # N is total number of samples
result_vals = dict() # dictionary we will store uncertainties in
Theta_ij = None # keep lint happy when variables are defined in conditionals.

# make observables all positive, allowing us to take the logarithm, which is
# required to prevent overflow in some examples.
Expand Down
Loading