Skip to content

Commit

Permalink
Merge branch 'develop' into fix_data_scaling
Browse files Browse the repository at this point in the history
  • Loading branch information
RandomDefaultUser authored Nov 22, 2024
2 parents 608ba39 + a402f79 commit 1525529
Show file tree
Hide file tree
Showing 29 changed files with 481 additions and 331 deletions.
6 changes: 4 additions & 2 deletions docs/source/advanced_usage/predictions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,13 @@ Gaussian representation of atomic positions. In this algorithm, most of the
computational overhead of the total energy calculation is offloaded to the
computation of this Gaussian representation. This calculation is realized via
LAMMPS and can therefore be GPU accelerated (parallelized) in the same fashion
as the bispectrum descriptor calculation. Simply activate this option via
as the bispectrum descriptor calculation. If a GPU is activated (and LAMMPS
is available), this option will be used by default. It can also manually be
activated via

.. code-block:: python
parameters.descriptors.use_atomic_density_energy_formula = True
parameters.use_atomic_density_formula = True
The Gaussian representation algorithm is describe in
the publication `Predicting electronic structures at any length scale with machine learning <doi.org/10.1038/s41524-023-01070-z>`_.
Expand Down
1 change: 0 additions & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@
"scipy",
"oapackage",
"matplotlib",
"horovod",
"lammps",
"total_energy",
"pqkmeans",
Expand Down
100 changes: 100 additions & 0 deletions examples/advanced/ex10_convert_numpy_openpmd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import mala

from mala.datahandling.data_repo import data_path
import os

parameters = mala.Parameters()
parameters.descriptors.descriptors_contain_xyz = False

# First, convert from Numpy files to openPMD.

data_converter = mala.DataConverter(parameters)

for snapshot in range(2):
data_converter.add_snapshot(
descriptor_input_type="numpy",
descriptor_input_path=os.path.join(
data_path, "Be_snapshot{}.in.npy".format(snapshot)
),
target_input_type="numpy",
target_input_path=os.path.join(
data_path, "Be_snapshot{}.out.npy".format(snapshot)
),
additional_info_input_type=None,
additional_info_input_path=None,
target_units=None,
)

data_converter.convert_snapshots(
descriptor_save_path="./",
target_save_path="./",
additional_info_save_path="./",
naming_scheme="converted_from_numpy_*.bp5",
descriptor_calculation_kwargs={"working_directory": "./"},
)

# Convert those files back to Numpy to verify the data stays the same.

data_converter = mala.DataConverter(parameters)

for snapshot in range(2):
data_converter.add_snapshot(
descriptor_input_type="openpmd",
descriptor_input_path="converted_from_numpy_{}.in.bp5".format(
snapshot
),
target_input_type="openpmd",
target_input_path="converted_from_numpy_{}.out.bp5".format(snapshot),
additional_info_input_type=None,
additional_info_input_path=None,
target_units=None,
)

data_converter.convert_snapshots(
descriptor_save_path="./",
target_save_path="./",
additional_info_save_path="./",
naming_scheme="verify_against_original_numpy_data_*.npy",
descriptor_calculation_kwargs={"working_directory": "./"},
)

for snapshot in range(2):
for i_o in ["in", "out"]:
original = os.path.join(
data_path, "Be_snapshot{}.{}.npy".format(snapshot, i_o)
)
roundtrip = "verify_against_original_numpy_data_{}.{}.npy".format(
snapshot, i_o
)
import numpy as np

original_a = np.load(original)
roundtrip_a = np.load(roundtrip)
np.testing.assert_allclose(original_a, roundtrip_a)

# Now, convert some openPMD data back to Numpy.

data_converter = mala.DataConverter(parameters)

for snapshot in range(2):
data_converter.add_snapshot(
descriptor_input_type="openpmd",
descriptor_input_path=os.path.join(
data_path, "Be_snapshot{}.in.h5".format(snapshot)
),
target_input_type="openpmd",
target_input_path=os.path.join(
data_path, "Be_snapshot{}.out.h5".format(snapshot)
),
additional_info_input_type=None,
additional_info_input_path=None,
target_units=None,
)

data_converter.convert_snapshots(
descriptor_save_path="./",
target_save_path="./",
additional_info_save_path="./",
naming_scheme="converted_from_openpmd_*.npy",
descriptor_calculation_kwargs={"working_directory": "./"},
)
5 changes: 3 additions & 2 deletions external_modules/total_energy_module/total_energy.f90
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ SUBROUTINE initialize(file_name, y_planes_in, calculate_eigts_in)
USE mp_global, ONLY : mp_startup
USE mp, ONLY : mp_size
USE read_input, ONLY : read_input_file
USE command_line_options, ONLY: input_file_, command_line, ndiag_, nyfft_
USE command_line_options, ONLY: input_file_, command_line, ndiag_, nyfft_, &
pencil_decomposition_
!
IMPLICIT NONE
CHARACTER(len=256) :: srvaddress
Expand All @@ -37,9 +38,9 @@ SUBROUTINE initialize(file_name, y_planes_in, calculate_eigts_in)
IF (PRESENT(y_planes_in)) THEN
IF (y_planes_in > 1) THEN
nyfft_ = y_planes_in
pencil_decomposition_ = .true.
ENDIF
ENDIF

!! checks if first string is contained in the second
!
CALL mp_startup ( start_images=.true., images_only=.true.)
Expand Down
108 changes: 87 additions & 21 deletions mala/common/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def __init__(
"openpmd_configuration": {},
"openpmd_granularity": 1,
"lammps": True,
"atomic_density_formula": False,
}
pass

Expand Down Expand Up @@ -88,6 +89,11 @@ def _update_openpmd_granularity(self, new_granularity):
def _update_lammps(self, new_lammps):
self._configuration["lammps"] = new_lammps

def _update_atomic_density_formula(self, new_atomic_density_formula):
self._configuration["atomic_density_formula"] = (
new_atomic_density_formula
)

@staticmethod
def _member_to_json(member):
if isinstance(member, (int, float, type(None), str)):
Expand Down Expand Up @@ -306,9 +312,9 @@ class ParametersDescriptors(ParametersBase):
descriptors.
bispectrum_twojmax : int
Bispectrum calculation: 2*jmax-parameter used for calculation of SNAP
descriptors. Default value for jmax is 5, so default value for
twojmax is 10.
Bispectrum calculation: 2*jmax-parameter used for calculation of
bispectrum descriptors. Default value for jmax is 5, so default value
for twojmax is 10.
lammps_compute_file : string
Bispectrum calculation: LAMMPS input file that is used to calculate the
Expand All @@ -322,11 +328,6 @@ class ParametersDescriptors(ParametersBase):
atomic_density_sigma : float
Sigma used for the calculation of the Gaussian descriptors.
use_atomic_density_energy_formula : bool
If True, Gaussian descriptors will be calculated for the
calculation of the Ewald sum as part of the total energy module.
Default is False.
"""

def __init__(self):
Expand Down Expand Up @@ -356,7 +357,6 @@ def __init__(self):
# atomic density may be used at the same time, if e.g. bispectrum
# descriptors are used for a full inference, which then uses the atomic
# density for the calculation of the Ewald sum.
self.use_atomic_density_energy_formula = False
self.atomic_density_sigma = None
self.atomic_density_cutoff = None

Expand Down Expand Up @@ -556,11 +556,6 @@ class ParametersData(ParametersBase):
Attributes
----------
descriptors_contain_xyz : bool
Legacy option. If True, it is assumed that the first three entries of
the descriptor vector are the xyz coordinates and they are cut from the
descriptor vector. If False, no such cutting is peformed.
snapshot_directories_list : list
A list of all added snapshots.
Expand Down Expand Up @@ -1204,9 +1199,6 @@ class Parameters:
hyperparameters : ParametersHyperparameterOptimization
Parameters used for hyperparameter optimization.
debug : ParametersDebug
Container for all debugging parameters.
manual_seed: int
If not none, this value is used as manual seed for the neural networks.
Can be used to make experiments comparable. Default: None.
Expand Down Expand Up @@ -1238,6 +1230,7 @@ def __init__(self):
# different.
self.openpmd_granularity = 1
self.use_lammps = True
self.use_atomic_density_formula = False

@property
def openpmd_granularity(self):
Expand Down Expand Up @@ -1289,7 +1282,7 @@ def verbosity(self, value):

@property
def use_gpu(self):
"""Control whether or not a GPU is used (provided there is one)."""
"""Control whether a GPU is used (provided there is one)."""
return self._use_gpu

@use_gpu.setter
Expand All @@ -1304,6 +1297,12 @@ def use_gpu(self, value):
"GPU requested, but no GPU found. MALA will "
"operate with CPU only."
)
if self._use_gpu and self.use_lammps:
printout(
"Enabling atomic density formula because LAMMPS and GPU "
"are used."
)
self.use_atomic_density_formula = True

# Invalidate, will be updated in setter.
self.device = None
Expand All @@ -1316,7 +1315,7 @@ def use_gpu(self, value):

@property
def use_ddp(self):
"""Control whether or not dd is used for parallel training."""
"""Control whether ddp is used for parallel training."""
return self._use_ddp

@use_ddp.setter
Expand Down Expand Up @@ -1367,7 +1366,7 @@ def device(self, value):

@property
def use_mpi(self):
"""Control whether or not MPI is used for paralle inference."""
"""Control whether MPI is used for paralle inference."""
return self._use_mpi

@use_mpi.setter
Expand Down Expand Up @@ -1411,19 +1410,67 @@ def openpmd_configuration(self, value):

@property
def use_lammps(self):
"""Control whether or not to use LAMMPS for descriptor calculation."""
"""Control whether to use LAMMPS for descriptor calculation."""
return self._use_lammps

@use_lammps.setter
def use_lammps(self, value):
self._use_lammps = value
if self.use_gpu and value:
printout(
"Enabling atomic density formula because LAMMPS and GPU "
"are used."
)
self.use_atomic_density_formula = True
self.network._update_lammps(self.use_lammps)
self.descriptors._update_lammps(self.use_lammps)
self.targets._update_lammps(self.use_lammps)
self.data._update_lammps(self.use_lammps)
self.running._update_lammps(self.use_lammps)
self.hyperparameters._update_lammps(self.use_lammps)

@property
def use_atomic_density_formula(self):
"""Control whether to use the atomic density formula.
This formula uses as a Gaussian representation of the atomic density
to calculate the structure factor and with it, the Ewald energy
and parts of the exchange-correlation energy. By using it, one can
go from N^2 to NlogN scaling, and offloads most of the computational
overhead of energy calculation from QE to LAMMPS. This is beneficial
since LAMMPS can benefit from GPU acceleration (QE GPU acceleration
is not used in the portion of the QE code MALA employs). If set
to True, this means MALA will perform another LAMMPS calculation
during inference. The hyperparameters for this atomic density
calculation are set via the parameters.descriptors object.
Default is False, except for when both use_gpu and use_lammps
are True, in which case this value will be set to True as well.
"""
return self._use_atomic_density_formula

@use_atomic_density_formula.setter
def use_atomic_density_formula(self, value):
self._use_atomic_density_formula = value

self.network._update_atomic_density_formula(
self.use_atomic_density_formula
)
self.descriptors._update_atomic_density_formula(
self.use_atomic_density_formula
)
self.targets._update_atomic_density_formula(
self.use_atomic_density_formula
)
self.data._update_atomic_density_formula(
self.use_atomic_density_formula
)
self.running._update_atomic_density_formula(
self.use_atomic_density_formula
)
self.hyperparameters._update_atomic_density_formula(
self.use_atomic_density_formula
)

def show(self):
"""Print name and values of all attributes of this object."""
printout(
Expand Down Expand Up @@ -1616,6 +1663,18 @@ def load_from_file(
].from_json(json_dict[key])
setattr(loaded_parameters, key, sub_parameters)

# Backwards compatability:
if key == "descriptors":
if (
"use_atomic_density_energy_formula"
in json_dict[key]
):
loaded_parameters.use_atomic_density_formula = (
json_dict[key][
"use_atomic_density_energy_formula"
]
)

# We iterate a second time, to set global values, so that they
# are properly forwarded.
for key in json_dict:
Expand All @@ -1629,6 +1688,13 @@ def load_from_file(
setattr(loaded_parameters, key, json_dict[key])
if no_snapshots is True:
loaded_parameters.data.snapshot_directories_list = []
# Backwards compatability: since the transfer of old property
# to new property happens _before_ all children descriptor classes
# are instantiated, it is not properly propagated. Thus, we
# simply have to set it to its own value again.
loaded_parameters.use_atomic_density_formula = (
loaded_parameters.use_atomic_density_formula
)
else:
raise Exception("Unsupported parameter save format.")

Expand Down
13 changes: 11 additions & 2 deletions mala/common/physical_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,11 @@ def write_to_openpmd_iteration(
atoms_openpmd["position"][str(atom)].unit_SI = 1.0e-10
atoms_openpmd["positionOffset"][str(atom)].unit_SI = 1.0e-10

if any(i == 0 for i in self.grid_dimensions) and not isinstance(
array, self.SkipArrayWriting
):
self.grid_dimensions = array.shape[0:-1]

dataset = (
array.dataset
if isinstance(array, self.SkipArrayWriting)
Expand All @@ -564,8 +569,12 @@ def write_to_openpmd_iteration(
# Global feature sizes:
feature_global_from = 0
feature_global_to = self.feature_size
if feature_global_to == 0 and isinstance(array, self.SkipArrayWriting):
feature_global_to = array.feature_size
if feature_global_to == 0:
feature_global_to = (
array.feature_size
if isinstance(array, self.SkipArrayWriting)
else array.shape[-1]
)

# First loop: Only metadata, write metadata equivalently across ranks
for current_feature in range(feature_global_from, feature_global_to):
Expand Down
Loading

0 comments on commit 1525529

Please sign in to comment.