Skip to content

Commit

Permalink
support sharing fitting network parameters (#2211)
Browse files Browse the repository at this point in the history
Add `layer_name` parameter to share network parameters among different
fitting network layers.

Signed-off-by: Jinzhe Zeng <[email protected]>
  • Loading branch information
njzjz authored Jan 10, 2023
1 parent 6bc1f2e commit 6154494
Show file tree
Hide file tree
Showing 6 changed files with 294 additions and 11 deletions.
7 changes: 7 additions & 0 deletions deepmd/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,13 @@ def dlopen_library(module: str, filename: str):
r"final_layer_type_\d+/matrix|"
r"final_layer/bias|"
r"final_layer_type_\d+/bias|"
# layer_name
r"share_.+_type_\d/matrix|"
r"share_.+_type_\d/bias|"
r"share_.+_type_\d/idt|"
r"share_.+/matrix|"
r"share_.+/bias|"
r"share_.+/idt|"
)

TYPE_EMBEDDING_PATTERN = str(
Expand Down
51 changes: 41 additions & 10 deletions deepmd/fit/ener.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ class EnerFitting (Fitting):
The precision of the embedding net parameters. Supported options are |PRECISION|
uniform_seed
Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed
layer_name : list[Optional[str]], optional
The name of the each layer. If two layers, either in the same fitting or different fittings,
have the same name, they will share the same neural network parameters.
"""
def __init__ (self,
descrpt : tf.Tensor,
Expand All @@ -99,7 +102,8 @@ def __init__ (self,
atom_ener : List[float] = [],
activation_function : str = 'tanh',
precision : str = 'default',
uniform_seed: bool = False
uniform_seed: bool = False,
layer_name: Optional[List[Optional[str]]] = None,
) -> None:
"""
Constructor
Expand Down Expand Up @@ -133,7 +137,7 @@ def __init__ (self,
self.trainable = trainable
if self.trainable is None:
self.trainable = [True for ii in range(len(self.n_neuron) + 1)]
if type(self.trainable) is bool:
if isinstance(self.trainable, bool):
self.trainable = [self.trainable] * (len(self.n_neuron)+1)
assert(len(self.trainable) == len(self.n_neuron) + 1), 'length of trainable should be that of n_neuron + 1'
self.atom_ener = []
Expand All @@ -159,6 +163,10 @@ def __init__ (self,

self.fitting_net_variables = None
self.mixed_prec = None
self.layer_name = layer_name
if self.layer_name is not None:
assert isinstance(self.layer_name, list), 'layer_name should be a list'
assert len(self.layer_name) == len(self.n_neuron) + 1, 'length of layer_name should be that of n_neuron + 1'

def get_numb_fparam(self) -> int:
"""
Expand Down Expand Up @@ -295,6 +303,7 @@ def _build_lower(
fparam = None,
aparam = None,
bias_atom_e = 0.0,
type_suffix = '',
suffix = '',
reuse = None
):
Expand Down Expand Up @@ -322,12 +331,18 @@ def _build_lower(
else:
one_layer = one_layer_deepmd
for ii in range(0,len(self.n_neuron)) :
if self.layer_name is not None and self.layer_name[ii] is not None:
layer_suffix = 'share_' + self.layer_name[ii] + type_suffix
layer_reuse = tf.AUTO_REUSE
else:
layer_suffix = 'layer_' + str(ii) + type_suffix + suffix
layer_reuse = reuse
if ii >= 1 and self.n_neuron[ii] == self.n_neuron[ii-1] and (not nvnmd_cfg.enable):
layer+= one_layer(
layer,
self.n_neuron[ii],
name='layer_'+str(ii)+suffix,
reuse=reuse,
name=layer_suffix,
reuse=layer_reuse,
seed = self.seed,
use_timestep = self.resnet_dt,
activation_fn = self.fitting_activation_fn,
Expand All @@ -340,8 +355,8 @@ def _build_lower(
layer = one_layer(
layer,
self.n_neuron[ii],
name='layer_'+str(ii)+suffix,
reuse=reuse,
name=layer_suffix,
reuse=layer_reuse,
seed = self.seed,
activation_fn = self.fitting_activation_fn,
precision = self.fitting_precision,
Expand All @@ -350,13 +365,19 @@ def _build_lower(
initial_variables = self.fitting_net_variables,
mixed_prec = self.mixed_prec)
if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
if self.layer_name is not None and self.layer_name[-1] is not None:
layer_suffix = 'share_' + self.layer_name[-1] + type_suffix
layer_reuse = tf.AUTO_REUSE
else:
layer_suffix = 'final_layer' + type_suffix + suffix
layer_reuse = reuse
final_layer = one_layer(
layer,
1,
activation_fn = None,
bavg = bias_atom_e,
name='final_layer'+suffix,
reuse=reuse,
name=layer_suffix,
reuse=layer_reuse,
seed = self.seed,
precision = self.fitting_precision,
trainable = self.trainable[-1],
Expand Down Expand Up @@ -495,14 +516,20 @@ def build (self,
final_layer = self._build_lower(
start_index, natoms[2+type_i],
inputs, fparam, aparam,
bias_atom_e=0., suffix='_type_'+str(type_i)+suffix, reuse=reuse
bias_atom_e=0.,
type_suffix='_type_' + str(type_i),
suffix=suffix,
reuse=reuse,
)
# concat the results
if type_i < len(self.atom_ener) and self.atom_ener[type_i] is not None:
zero_layer = self._build_lower(
start_index, natoms[2+type_i],
inputs_zero, fparam, aparam,
bias_atom_e=0., suffix='_type_'+str(type_i)+suffix, reuse=True
bias_atom_e=0.,
type_suffix='_type_' + str(type_i),
suffix=suffix,
reuse=True,
)
final_layer -= zero_layer
final_layer = tf.reshape(final_layer, [tf.shape(inputs)[0], natoms[2+type_i]])
Expand Down Expand Up @@ -578,6 +605,10 @@ def init_variables(self,
suffix to name scope
"""
self.fitting_net_variables = get_fitting_net_variables_from_graph_def(graph_def, suffix=suffix)
if self.layer_name is not None:
# shared variables have no suffix
shared_variables = get_fitting_net_variables_from_graph_def(graph_def, suffix="")
self.fitting_net_variables.update(shared_variables)
if self.numb_fparam > 0:
self.fparam_avg = get_tensor_by_name_from_graph(graph, 'fitting_attr%s/t_fparam_avg' % suffix)
self.fparam_inv_std = get_tensor_by_name_from_graph(graph, 'fitting_attr%s/t_fparam_istd' % suffix)
Expand Down
10 changes: 9 additions & 1 deletion deepmd/utils/argcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,13 @@ def fitting_ener():
doc_rcond = 'The condition number used to determine the inital energy shift for each type of atoms.'
doc_seed = 'Random seed for parameter initialization of the fitting net'
doc_atom_ener = 'Specify the atomic energy in vacuum for each type'
doc_layer_name = (
"The name of the each layer. The length of this list should be equal to n_neuron + 1. "
"If two layers, either in the same fitting or different fittings, "
"have the same name, they will share the same neural network parameters. "
"The shape of these layers should be the same. "
"If null is given for a layer, parameters will not be shared."
)

return [
Argument("numb_fparam", int, optional = True, default = 0, doc = doc_numb_fparam),
Expand All @@ -329,7 +336,8 @@ def fitting_ener():
Argument("trainable", [list,bool], optional = True, default = True, doc = doc_trainable),
Argument("rcond", float, optional = True, default = 1e-3, doc = doc_rcond),
Argument("seed", [int,None], optional = True, doc = doc_seed),
Argument("atom_ener", list, optional = True, default = [], doc = doc_atom_ener)
Argument("atom_ener", list, optional = True, default = [], doc = doc_atom_ener),
Argument("layer_name", list, optional = True, doc = doc_layer_name),
]


Expand Down
28 changes: 28 additions & 0 deletions doc/train/multi-task-training.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,31 @@ Finally, you can perform the modified multi-task training from the frozen model
```bash
$ dp train input.json --init_frz_model graph.pb
```

## Share layers among energy fitting networks

The multi-task training can be used to train multiple levels of energies (e.g. DFT and CCSD(T)) at the same time.
In this situation, one can set {ref}`model/fitting_net[ener]/layer_name>` to share some of layers among fitting networks.
The architecture of the layers with the same name should be the same.

For example, if one want to share the first and the third layers for two three-hidden-layer fitting networks, the following parameters should be set.
```json
"fitting_net_dict": {
"ccsd": {
"neuron": [
240,
240,
240
],
"layer_name": ["l0", null, "l2", null]
},
"wb97m": {
"neuron": [
240,
240,
240
],
"layer_name": ["l0", null, "l2", null]
}
}
```
121 changes: 121 additions & 0 deletions source/tests/test_layer_name.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import numpy as np

from deepmd.env import tf
from common import gen_data, del_data, j_loader
from common import DataSystem
from deepmd.descriptor import DescrptSeA
from deepmd.fit import EnerFitting, DipoleFittingSeA
from deepmd.model import MultiModel
from deepmd.common import j_must_have

GLOBAL_ENER_FLOAT_PRECISION = tf.float64
GLOBAL_TF_FLOAT_PRECISION = tf.float64
GLOBAL_NP_FLOAT_PRECISION = np.float64


class TestModel(tf.test.TestCase):
def setUp(self):
gen_data()

def tearDown(self):
del_data()

def test_model(self):
"""Two fittings which share the same parameters should give the same result"""
jfile = 'water_layer_name.json'
jdata = j_loader(jfile)

systems = j_must_have(jdata, 'systems')
set_pfx = j_must_have(jdata, 'set_prefix')
batch_size = j_must_have(jdata, 'batch_size')
test_size = j_must_have(jdata, 'numb_test')
batch_size = 1
test_size = 1
rcut = j_must_have(jdata['model']['descriptor'], 'rcut')

data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)

test_data = data.get_test()
numb_test = 1

jdata['model']['descriptor'].pop('type', None)
jdata['model']['descriptor']['multi_task'] = True
descrpt = DescrptSeA(**jdata['model']['descriptor'], uniform_seed=True)
fitting_dict = {}
fitting_type_dict = {}
for fitting_key in jdata['model']['fitting_net_dict']:
item_fitting_param = jdata['model']['fitting_net_dict'][fitting_key]
item_fitting_type = item_fitting_param.get('type', 'ener')
fitting_type_dict[fitting_key] = item_fitting_type
item_fitting_param.pop('type', None)
item_fitting_param.pop('fit_diag', None)
item_fitting_param['descrpt'] = descrpt
if item_fitting_type == 'ener':
fitting_dict[fitting_key] = EnerFitting(**item_fitting_param, uniform_seed=True)
elif item_fitting_type == 'dipole':
fitting_dict[fitting_key] = DipoleFittingSeA(**item_fitting_param, uniform_seed=True)
else:
raise RuntimeError('Test should not be here!')
model = MultiModel(descrpt, fitting_dict, fitting_type_dict)

input_data = {'coord': [test_data['coord']],
'box': [test_data['box']],
'type': [test_data['type']],
'natoms_vec': [test_data['natoms_vec']],
'default_mesh': [test_data['default_mesh']]
}

for fitting_key in jdata['model']['fitting_net_dict']:
model._compute_input_stat(input_data, fitting_key=fitting_key)
model.descrpt.merge_input_stats(model.descrpt.stat_dict)
model.descrpt.bias_atom_e = data.compute_energy_shift()

t_prop_c = tf.placeholder(tf.float32, [5], name='t_prop_c')
t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
t_atom_ener = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
t_type = tf.placeholder(tf.int32, [None], name='i_type')
t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name='i_natoms')
t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
t_mesh = tf.placeholder(tf.int32, [None], name='i_mesh')
is_training = tf.placeholder(tf.bool)
t_fparam = None

model_pred \
= model.build(t_coord,
t_type,
t_natoms,
t_box,
t_mesh,
t_fparam,
suffix="_layer_name",
reuse=False)

e_energy1 = model_pred['water_ener']['energy']
e_force1 = model_pred['water_ener']['force']
e_virial1 = model_pred['water_ener']['virial']
e_energy2 = model_pred['water_ener2']['energy']
e_force2 = model_pred['water_ener2']['force']
e_virial2 = model_pred['water_ener2']['virial']
feed_dict_test = {t_prop_c: test_data['prop_c'],
t_energy: test_data['energy'][:numb_test],
t_force: np.reshape(test_data['force'][:numb_test, :], [-1]),
t_virial: np.reshape(test_data['virial'][:numb_test, :], [-1]),
t_atom_ener: np.reshape(test_data['atom_ener'][:numb_test, :], [-1]),
t_coord: np.reshape(test_data['coord'][:numb_test, :], [-1]),
t_box: test_data['box'][:numb_test, :],
t_type: np.reshape(test_data['type'][:numb_test, :], [-1]),
t_natoms: test_data['natoms_vec'],
t_mesh: test_data['default_mesh'],
is_training: False}

with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
[e1, f1, v1, e2, f2, v2] = sess.run(
[e_energy1, e_force1, e_virial1, e_energy2, e_force2, e_virial2],
feed_dict=feed_dict_test)
np.testing.assert_allclose(e1, e2, rtol=1e-5, atol=1e-5)
np.testing.assert_allclose(f1, f2, rtol=1e-5, atol=1e-5)
np.testing.assert_allclose(v1, v2, rtol=1e-5, atol=1e-5)
Loading

0 comments on commit 6154494

Please sign in to comment.