Skip to content

Commit

Permalink
tests fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
tjlane committed Aug 22, 2024
1 parent f7b551d commit a676ce8
Show file tree
Hide file tree
Showing 8 changed files with 104 additions and 134 deletions.
7 changes: 2 additions & 5 deletions meteor/scale.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import reciprocalspaceship as rs


def scale_structure_factors(
reference: rs.DataSet, dataset_to_scale: rs.DataSet
) -> rs.DataSet:
"""
Apply an anisotropic scaling so that `dataset_to_scale` is on the same scale as `reference`.
def scale_structure_factors(reference: rs.DataSet, dataset_to_scale: rs.DataSet) -> rs.DataSet:
"""Apply an anisotropic scaling so that `dataset_to_scale` is on the same scale as `reference`.
C * exp{ -(h**2 B11 + k**2 B22 + l**2 B33 +
2hk B12 + 2hl B13 + 2kl B23) }
Expand Down
25 changes: 8 additions & 17 deletions meteor/tv.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,7 @@ class TvDenoiseResult:


def _tv_denoise_array(*, map_as_array: np.ndarray, weight: float) -> np.ndarray:
"""
Closure convienence function to generate more readable code.
"""
"""Closure convienence function to generate more readable code."""
denoised_map = denoise_tv_chambolle(
map_as_array,
weight=weight,
Expand Down Expand Up @@ -62,13 +60,12 @@ def tv_denoise_difference_map(

def tv_denoise_difference_map(
difference_map_coefficients: rs.DataSet,
full_output: bool = True,
full_output: bool = False,
difference_map_amplitude_column: str = "DF",
difference_map_phase_column: str = "PHIC",
lambda_values_to_scan: Sequence[float] | None = None,
) -> rs.DataSet | tuple[rs.DataSet, TvDenoiseResult]:
"""
Single-pass TV denoising of a difference map.
"""Single-pass TV denoising of a difference map.
Automatically selects the optimal level of regularization (the TV lambda parameter) by maximizing the negentropy of the denoised map. Two modes can be used to dictate which candidate values of lambda are assessed:

Check failure on line 70 in meteor/tv.py

View workflow job for this annotation

GitHub Actions / build (3.11)

Ruff (E501)

meteor/tv.py:70:101: E501 Line too long (217 > 100)
Expand All @@ -83,7 +80,7 @@ def tv_denoise_difference_map(
difference map.
full_output : bool, optional
If `True`, the function returns both the denoised map coefficients and a `TvDenoiseResult` object containing the optimal

Check failure on line 82 in meteor/tv.py

View workflow job for this annotation

GitHub Actions / build (3.11)

Ruff (E501)

meteor/tv.py:82:101: E501 Line too long (128 > 100)
lambda and the associated negentropy. If `False`, only the denoised map coefficients are returned. Default is `True`.
lambda and the associated negentropy. If `False`, only the denoised map coefficients are returned. Default is `False`.

Check failure on line 83 in meteor/tv.py

View workflow job for this annotation

GitHub Actions / build (3.11)

Ruff (E501)

meteor/tv.py:83:101: E501 Line too long (126 > 100)
difference_map_amplitude_column : str, optional
The column name in `difference_map_coefficients` that contains the amplitude values for the difference map. Default is "DF".

Check failure on line 85 in meteor/tv.py

View workflow job for this annotation

GitHub Actions / build (3.11)

Ruff (E501)

meteor/tv.py:85:101: E501 Line too long (132 > 100)
difference_map_phase_column : str, optional
Expand Down Expand Up @@ -117,8 +114,8 @@ def tv_denoise_difference_map(
>>> coefficients = rs.read_mtz("./path/to/difference_map.mtz") # load dataset
>>> denoised_map, result = tv_denoise_difference_map(coefficients, full_output=True)
>>> print(f"Optimal Lambda: {result.optimal_lambda}, Negentropy: {result.optimal_negentropy}")
"""
"""
difference_map = compute_map_from_coefficients(
map_coefficients=difference_map_coefficients,
amplitude_label=difference_map_amplitude_column,
Expand All @@ -128,9 +125,7 @@ def tv_denoise_difference_map(
difference_map_as_array = np.array(difference_map.grid)

def negentropy_objective(tv_lambda: float):
denoised_map = _tv_denoise_array(
map_as_array=difference_map_as_array, weight=tv_lambda
)
denoised_map = _tv_denoise_array(map_as_array=difference_map_as_array, weight=tv_lambda)
return -1.0 * negentropy(denoised_map.flatten())

optimal_lambda: float
Expand All @@ -152,16 +147,12 @@ def negentropy_objective(tv_lambda: float):
optimizer_result = minimize_scalar(
negentropy_objective, bracket=TV_LAMBDA_RANGE, method="golden"
)
assert (
optimizer_result.success
), "Golden minimization failed to find optimal TV lambda"
assert optimizer_result.success, "Golden minimization failed to find optimal TV lambda"
optimal_lambda = optimizer_result.x
highest_negentropy = negentropy_objective(optimal_lambda)

# denoise using the optimized parameters and convert to an rs.DataSet
final_map = _tv_denoise_array(
map_as_array=difference_map_as_array, weight=optimal_lambda
)
final_map = _tv_denoise_array(map_as_array=difference_map_as_array, weight=optimal_lambda)
final_map_as_ccp4 = numpy_array_to_map(
final_map,
spacegroup=difference_map_coefficients.spacegroup,
Expand Down
4 changes: 3 additions & 1 deletion meteor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,16 @@ def canonicalize_amplitudes(
phase_label: str,
inplace: bool = False,
) -> rs.DataSet | None:
dataset.canonicalize_phases(inplace=inplace)

if not inplace:
dataset = dataset.copy(deep=True)

negative_amplitude_indices = dataset[amplitude_label] < 0.0
dataset[amplitude_label] = np.abs(dataset[amplitude_label])
dataset.loc[negative_amplitude_indices, phase_label] += 180.0

dataset.canonicalize_phases(inplace=True)

if not inplace:
return dataset
else:
Expand Down
16 changes: 8 additions & 8 deletions meteor/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,44 +3,44 @@


def negentropy(samples: np.ndarray, tolerance: float = 0.1) -> float:
"""
Computes the negentropy of a given sample array.
"""Computes the negentropy of a given sample array.
Negentropy is defined as the difference between the entropy of a given
distribution and the entropy of a Gaussian distribution with the same variance.
It is a measure of non-Gaussianity, with higher values indicating greater deviation
from Gaussianity.
Args:
----
samples (np.ndarray): A numpy array of sample data for which to calculate the negentropy.
tolerance (float): A tolerance level for checking if the negentropy is suspiciously negative.
Defaults to 0.01.
Returns:
-------
float: The computed negentropy of the sample data.
Raises:
------
ValueError: If the computed negentropy is less than the negative tolerance,
indicating potential issues with the computation.
References:
----------
http://gregorygundersen.com/blog/2020/09/01/gaussian-entropy/
Example:
-------
>>> samples = np.random.normal(size=1000)
>>> negentropy(samples)
0.0012 # Example output, varies with input samples.
"""
"""
std = np.std(samples.flatten())
if std <= 0.0:
return np.inf

neg_e = (
0.5 * np.log(2.0 * np.pi * std**2)
+ 0.5
- differential_entropy(samples.flatten())
)
neg_e = 0.5 * np.log(2.0 * np.pi * std**2) + 0.5 - differential_entropy(samples.flatten())
if not neg_e >= -tolerance:
raise ValueError(
f"negentropy is a relatively big negative number {neg_e} that exceeds the tolerance {tolerance} -- something may have gone wrong"
Expand Down
24 changes: 24 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,27 @@ dev = [
[build-system]
requires = ["setuptools >= 61.0"]
build-backend = "setuptools.build_meta"

[tool.ruff]
line-length = 100

lint.select = [
"E", # pycodestyle (PEP 8) rules
"F", # pyflakes rules
"W", # warnings like trailing whitespace
"C90", # specific rules for use of commas, e.g., avoid trailing commas
"I", # isort rules for import sorting
"N", # flake8 naming conventions
"Q", # quote rules (e.g., enforcing consistent quote usage)
"PT", # flake8-pytest-style rules for pytest
]

exclude = [
"build/",
"dist/",
"migrations/",
".venv/",
".git/",
"__pycache__/",
"*.pyc",
]
61 changes: 16 additions & 45 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,54 +6,25 @@
from meteor.utils import canonicalize_amplitudes


@fixture
def random_intensities() -> rs.DataSet:
"""
A simple 10x10x10 P1 dataset, with random intensities
"""

params = (10.0, 10.0, 10.0, 90.0, 90.0, 90.0)
cell = gemmi.UnitCell(*params)
sg_1 = gemmi.SpaceGroup(1)
Hall = rs.utils.generate_reciprocal_asu(cell, sg_1, 1.0, anomalous=False)

H, K, L = Hall.T
ds = rs.DataSet(
{
"H": H,
"K": K,
"L": L,
"IMEAN": np.abs(np.random.randn(len(H))),
},
spacegroup=sg_1,
cell=cell,
).infer_mtz_dtypes()
ds.set_index(["H", "K", "L"], inplace=True)

return ds


@fixture
def flat_difference_map() -> rs.DataSet:
"""
A simple 3x3x3 P1 map, random
"""

params = (10.0, 10.0, 10.0, 90.0, 90.0, 90.0)
cell = gemmi.UnitCell(*params)
sg_1 = gemmi.SpaceGroup(1)
Hall = rs.utils.generate_reciprocal_asu(cell, sg_1, 5.0, anomalous=False)

H, K, L = Hall.T
@fixture()
def random_difference_map() -> rs.DataSet:
resolution = 1.0
cell = gemmi.UnitCell(10.0, 10.0, 10.0, 90.0, 90.0, 90.0)
space_group = gemmi.SpaceGroup(1)
Hall = rs.utils.generate_reciprocal_asu(cell, space_group, resolution, anomalous=False)

h, k, l = Hall.T
number_of_reflections = len(h)

ds = rs.DataSet(
{
"H": H,
"K": K,
"L": L,
"DF": np.random.randn(len(H)),
"PHIC": np.zeros(len(H)),
"H": h,
"K": k,
"L": l,
"DF": np.random.randn(number_of_reflections),
"PHIC": np.random.uniform(-180, 180, size=number_of_reflections),
},
spacegroup=sg_1,
spacegroup=space_group,
cell=cell,
).infer_mtz_dtypes()

Expand Down
52 changes: 18 additions & 34 deletions test/unit/test_tv.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import gemmi
import numpy as np
import reciprocalspaceship as rs
from pytest import fixture
import pytest
from typing import Sequence

from meteor import tv
from meteor.utils import compute_coefficients_from_map, compute_map_from_coefficients
Expand All @@ -17,7 +18,6 @@ def _generate_single_carbon_density(
unit_cell: gemmi.UnitCell,
d_min: float,
) -> gemmi.FloatGrid:
# Create a model with a single carbon atom
model = gemmi.Model("single_atom")
chain = gemmi.Chain("A")

Expand Down Expand Up @@ -58,22 +58,14 @@ def displaced_single_atom_difference_map_coefficients(
carbon_position1 = (5.0, 5.0, 5.0)
carbon_position2 = (5.1, 5.0, 5.0)

density1 = _generate_single_carbon_density(
carbon_position1, space_group, unit_cell, d_min
)
density2 = _generate_single_carbon_density(
carbon_position2, space_group, unit_cell, d_min
)
density1 = _generate_single_carbon_density(carbon_position1, space_group, unit_cell, d_min)
density2 = _generate_single_carbon_density(carbon_position2, space_group, unit_cell, d_min)

ccp4_map = gemmi.Ccp4Map()
grid_values = (
np.array(density2)
- np.array(density1)
+ noise_sigma * np.random.randn(*density2.shape)
)
ccp4_map.grid = gemmi.FloatGrid(
grid_values.astype(np.float32), unit_cell, space_group
np.array(density2) - np.array(density1) + noise_sigma * np.random.randn(*density2.shape)
)
ccp4_map.grid = gemmi.FloatGrid(grid_values.astype(np.float32), unit_cell, space_group)
ccp4_map.update_ccp4_header()

difference_map_coefficients = compute_coefficients_from_map(
Expand Down Expand Up @@ -112,39 +104,42 @@ def rms_between_coefficients(ds1: rs.DataSet, ds2: rs.DataSet) -> float:
return rms


@fixture
@pytest.fixture()
def noise_free_map() -> rs.DataSet:
return displaced_single_atom_difference_map_coefficients(noise_sigma=0.0)


@fixture
@pytest.fixture()
def noisy_map() -> rs.DataSet:
return displaced_single_atom_difference_map_coefficients(noise_sigma=0.03)


def test_tv_denoise_difference_map_smoke(flat_difference_map: rs.DataSet) -> None:
def test_tv_denoise_difference_map_smoke(random_difference_map: rs.DataSet) -> None:
# test sequence pf specified lambda
tv.tv_denoise_difference_map(
difference_map_coefficients=flat_difference_map,
difference_map_coefficients=random_difference_map,
lambda_values_to_scan=[1.0, 2.0],
)
# test golden optimizer
tv.TV_LAMBDA_RANGE = (1.0, 2.0)
tv.tv_denoise_difference_map(
difference_map_coefficients=flat_difference_map,
difference_map_coefficients=random_difference_map,
)


def test_tv_denoise_difference_map_golden(
noise_free_map: rs.DataSet, noisy_map: rs.DataSet
@pytest.mark.parametrize("lambda_values_to_scan", [None, np.logspace(-3, 2, 100)])
def test_tv_denoise_difference_map(
lambda_values_to_scan: None | Sequence[float], noise_free_map: rs.DataSet, noisy_map: rs.DataSet
) -> None:
rms_before_denoising = rms_between_coefficients(noise_free_map, noisy_map)
denoised_map = tv.tv_denoise_difference_map(
denoised_map, result = tv.tv_denoise_difference_map(
difference_map_coefficients=noisy_map,
lambda_values_to_scan=lambda_values_to_scan,
full_output=True
)
rms_after_denoising = rms_between_coefficients(noise_free_map, denoised_map)
# assert rms_after_denoising < rms_before_denoising
print("xyz", denoised_map.optimal_lambda, rms_before_denoising, rms_after_denoising)
print("xyz", result.optimal_lambda, rms_before_denoising, rms_after_denoising)

testmap = compute_map_from_coefficients(
map_coefficients=noise_free_map,
Expand All @@ -169,14 +164,3 @@ def test_tv_denoise_difference_map_golden(
testmap.write_ccp4_map("denoised.ccp4")


def test_tv_denoise_difference_map_specific_lambdas(
noise_free_map: rs.DataSet, noisy_map: rs.DataSet
) -> None:
rms_before_denoising = rms_between_coefficients(noise_free_map, noisy_map)
denoised_map = tv.tv_denoise_difference_map(
difference_map_coefficients=noisy_map,
lambda_values_to_scan=np.logspace(-3, 2, 100),
)
rms_after_denoising = rms_between_coefficients(noise_free_map, denoised_map)
assert rms_after_denoising < rms_before_denoising
print("xyz", denoised_map.optimal_lambda, rms_before_denoising, rms_after_denoising)
Loading

0 comments on commit a676ce8

Please sign in to comment.