support sharing fitting network parameters (#2211)

Add `layer_name` parameter to share network parameters among different fitting network layers. Signed-off-by: Jinzhe Zeng <[email protected]>
deepmodeling · Jan 10, 2023 · 6154494 · 6154494
1 parent 6bc1f2e
commit 6154494
Show file tree

Hide file tree

Showing 6 changed files with 294 additions and 11 deletions.
diff --git a/deepmd/env.py b/deepmd/env.py
@@ -117,6 +117,13 @@ def dlopen_library(module: str, filename: str):
     r"final_layer_type_\d+/matrix|"
     r"final_layer/bias|"
     r"final_layer_type_\d+/bias|"
+    # layer_name 
+    r"share_.+_type_\d/matrix|"
+    r"share_.+_type_\d/bias|"
+    r"share_.+_type_\d/idt|"
+    r"share_.+/matrix|"
+    r"share_.+/bias|"
+    r"share_.+/idt|"
 )
 
 TYPE_EMBEDDING_PATTERN = str(

diff --git a/deepmd/fit/ener.py b/deepmd/fit/ener.py
@@ -85,6 +85,9 @@ class EnerFitting (Fitting):
             The precision of the embedding net parameters. Supported options are |PRECISION|
     uniform_seed
             Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed
+    layer_name : list[Optional[str]], optional
+            The name of the each layer. If two layers, either in the same fitting or different fittings,
+            have the same name, they will share the same neural network parameters.
     """
     def __init__ (self, 
                   descrpt : tf.Tensor,
@@ -99,7 +102,8 @@ def __init__ (self,
                   atom_ener : List[float] = [],
                   activation_function : str = 'tanh',
                   precision : str = 'default',
-                  uniform_seed: bool = False
+                  uniform_seed: bool = False,
+                  layer_name: Optional[List[Optional[str]]] = None,
     ) -> None:
         """
         Constructor
@@ -133,7 +137,7 @@ def __init__ (self,
         self.trainable = trainable
         if self.trainable is None:
             self.trainable = [True for ii in range(len(self.n_neuron) + 1)]
-        if type(self.trainable) is bool:
+        if isinstance(self.trainable, bool):
             self.trainable = [self.trainable] * (len(self.n_neuron)+1)
         assert(len(self.trainable) == len(self.n_neuron) + 1), 'length of trainable should be that of n_neuron + 1'
         self.atom_ener = []
@@ -159,6 +163,10 @@ def __init__ (self,
 
         self.fitting_net_variables = None
         self.mixed_prec = None
+        self.layer_name = layer_name
+        if self.layer_name is not None:
+            assert isinstance(self.layer_name, list), 'layer_name should be a list'
+            assert len(self.layer_name) == len(self.n_neuron) + 1, 'length of layer_name should be that of n_neuron + 1'
 
     def get_numb_fparam(self) -> int:
         """
@@ -295,6 +303,7 @@ def _build_lower(
             fparam = None,
             aparam = None, 
             bias_atom_e = 0.0,
+            type_suffix = '',
             suffix = '',
             reuse = None
     ):
@@ -322,12 +331,18 @@ def _build_lower(
         else:
             one_layer = one_layer_deepmd
         for ii in range(0,len(self.n_neuron)) :
+            if self.layer_name is not None and self.layer_name[ii] is not None:
+                layer_suffix = 'share_' + self.layer_name[ii] + type_suffix
+                layer_reuse = tf.AUTO_REUSE
+            else:
+                layer_suffix = 'layer_' + str(ii) + type_suffix + suffix
+                layer_reuse = reuse
             if ii >= 1 and self.n_neuron[ii] == self.n_neuron[ii-1] and (not nvnmd_cfg.enable):
                 layer+= one_layer(
                     layer,
                     self.n_neuron[ii],
-                    name='layer_'+str(ii)+suffix,
-                    reuse=reuse,
+                    name=layer_suffix,
+                    reuse=layer_reuse,
                     seed = self.seed,
                     use_timestep = self.resnet_dt,
                     activation_fn = self.fitting_activation_fn,
@@ -340,8 +355,8 @@ def _build_lower(
                 layer = one_layer(
                     layer,
                     self.n_neuron[ii],
-                    name='layer_'+str(ii)+suffix,
-                    reuse=reuse,
+                    name=layer_suffix,
+                    reuse=layer_reuse,
                     seed = self.seed,
                     activation_fn = self.fitting_activation_fn,
                     precision = self.fitting_precision,
@@ -350,13 +365,19 @@ def _build_lower(
                     initial_variables = self.fitting_net_variables,
                     mixed_prec = self.mixed_prec)
             if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
+        if self.layer_name is not None and self.layer_name[-1] is not None:
+            layer_suffix = 'share_' + self.layer_name[-1] + type_suffix
+            layer_reuse = tf.AUTO_REUSE
+        else:
+            layer_suffix = 'final_layer' + type_suffix + suffix
+            layer_reuse = reuse
         final_layer = one_layer(
             layer, 
             1, 
             activation_fn = None, 
             bavg = bias_atom_e, 
-            name='final_layer'+suffix, 
-            reuse=reuse, 
+            name=layer_suffix,
+            reuse=layer_reuse,
             seed = self.seed, 
             precision = self.fitting_precision, 
             trainable = self.trainable[-1],
@@ -495,14 +516,20 @@ def build (self,
                 final_layer = self._build_lower(
                     start_index, natoms[2+type_i], 
                     inputs, fparam, aparam, 
-                    bias_atom_e=0., suffix='_type_'+str(type_i)+suffix, reuse=reuse
+                    bias_atom_e=0.,
+                    type_suffix='_type_' + str(type_i),
+                    suffix=suffix,
+                    reuse=reuse,
                 )
                 # concat the results
                 if type_i < len(self.atom_ener) and self.atom_ener[type_i] is not None:                
                     zero_layer = self._build_lower(
                         start_index, natoms[2+type_i], 
                         inputs_zero, fparam, aparam, 
-                        bias_atom_e=0., suffix='_type_'+str(type_i)+suffix, reuse=True
+                        bias_atom_e=0.,
+                        type_suffix='_type_' + str(type_i),
+                        suffix=suffix,
+                        reuse=True,
                     )
                     final_layer -= zero_layer
                 final_layer = tf.reshape(final_layer, [tf.shape(inputs)[0], natoms[2+type_i]])
@@ -578,6 +605,10 @@ def init_variables(self,
             suffix to name scope
         """
         self.fitting_net_variables = get_fitting_net_variables_from_graph_def(graph_def, suffix=suffix)
+        if self.layer_name is not None:
+            # shared variables have no suffix
+            shared_variables = get_fitting_net_variables_from_graph_def(graph_def, suffix="")
+            self.fitting_net_variables.update(shared_variables)
         if self.numb_fparam > 0:
             self.fparam_avg = get_tensor_by_name_from_graph(graph, 'fitting_attr%s/t_fparam_avg' % suffix)
             self.fparam_inv_std = get_tensor_by_name_from_graph(graph, 'fitting_attr%s/t_fparam_istd' % suffix)

diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
@@ -318,6 +318,13 @@ def fitting_ener():
     doc_rcond = 'The condition number used to determine the inital energy shift for each type of atoms.'
     doc_seed = 'Random seed for parameter initialization of the fitting net'
     doc_atom_ener = 'Specify the atomic energy in vacuum for each type'
+    doc_layer_name = (
+        "The name of the each layer. The length of this list should be equal to n_neuron + 1. "
+        "If two layers, either in the same fitting or different fittings, "
+        "have the same name, they will share the same neural network parameters. "
+        "The shape of these layers should be the same. "
+        "If null is given for a layer, parameters will not be shared."
+    )
 
     return [
         Argument("numb_fparam", int, optional = True, default = 0, doc = doc_numb_fparam),
@@ -329,7 +336,8 @@ def fitting_ener():
         Argument("trainable", [list,bool], optional = True, default = True, doc = doc_trainable),
         Argument("rcond", float, optional = True, default = 1e-3, doc = doc_rcond),
         Argument("seed", [int,None], optional = True, doc = doc_seed),
-        Argument("atom_ener", list, optional = True, default = [], doc = doc_atom_ener)
+        Argument("atom_ener", list, optional = True, default = [], doc = doc_atom_ener),
+        Argument("layer_name", list, optional = True, doc = doc_layer_name),
     ]
 
 

diff --git a/doc/train/multi-task-training.md b/doc/train/multi-task-training.md
@@ -93,3 +93,31 @@ Finally, you can perform the modified multi-task training from the frozen model
 ```bash
 $ dp train input.json --init_frz_model graph.pb
 ```
+
+## Share layers among energy fitting networks
+
+The multi-task training can be used to train multiple levels of energies (e.g. DFT and CCSD(T)) at the same time.
+In this situation, one can set {ref}`model/fitting_net[ener]/layer_name>` to share some of layers among fitting networks.
+The architecture of the layers with the same name should be the same.
+
+For example, if one want to share the first and the third layers for two three-hidden-layer fitting networks, the following parameters should be set.
+```json
+"fitting_net_dict": {
+    "ccsd": {
+        "neuron": [
+            240,
+            240,
+            240
+        ],
+        "layer_name": ["l0", null, "l2", null]
+    },  
+    "wb97m": {
+        "neuron": [
+            240,
+            240,
+            240 
+        ],
+        "layer_name": ["l0", null, "l2", null]
+    }   
+}
+```
diff --git a/source/tests/test_layer_name.py b/source/tests/test_layer_name.py
@@ -0,0 +1,121 @@
+import numpy as np
+
+from deepmd.env import tf
+from common import gen_data, del_data, j_loader
+from common import DataSystem
+from deepmd.descriptor import DescrptSeA
+from deepmd.fit import EnerFitting, DipoleFittingSeA
+from deepmd.model import MultiModel
+from deepmd.common import j_must_have
+
+GLOBAL_ENER_FLOAT_PRECISION = tf.float64
+GLOBAL_TF_FLOAT_PRECISION = tf.float64
+GLOBAL_NP_FLOAT_PRECISION = np.float64
+
+
+class TestModel(tf.test.TestCase):
+    def setUp(self):
+        gen_data()
+
+    def tearDown(self):
+        del_data()
+
+    def test_model(self):
+        """Two fittings which share the same parameters should give the same result"""
+        jfile = 'water_layer_name.json'
+        jdata = j_loader(jfile)
+
+        systems = j_must_have(jdata, 'systems')
+        set_pfx = j_must_have(jdata, 'set_prefix')
+        batch_size = j_must_have(jdata, 'batch_size')
+        test_size = j_must_have(jdata, 'numb_test')
+        batch_size = 1
+        test_size = 1
+        rcut = j_must_have(jdata['model']['descriptor'], 'rcut')
+
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
+
+        test_data = data.get_test()
+        numb_test = 1
+
+        jdata['model']['descriptor'].pop('type', None)
+        jdata['model']['descriptor']['multi_task'] = True
+        descrpt = DescrptSeA(**jdata['model']['descriptor'], uniform_seed=True)
+        fitting_dict = {}
+        fitting_type_dict = {}
+        for fitting_key in jdata['model']['fitting_net_dict']:
+            item_fitting_param = jdata['model']['fitting_net_dict'][fitting_key]
+            item_fitting_type = item_fitting_param.get('type', 'ener')
+            fitting_type_dict[fitting_key] = item_fitting_type
+            item_fitting_param.pop('type', None)
+            item_fitting_param.pop('fit_diag', None)
+            item_fitting_param['descrpt'] = descrpt
+            if item_fitting_type == 'ener':
+                fitting_dict[fitting_key] = EnerFitting(**item_fitting_param, uniform_seed=True)
+            elif item_fitting_type == 'dipole':
+                fitting_dict[fitting_key] = DipoleFittingSeA(**item_fitting_param, uniform_seed=True)
+            else:
+                raise RuntimeError('Test should not be here!')
+        model = MultiModel(descrpt, fitting_dict, fitting_type_dict)
+
+        input_data = {'coord': [test_data['coord']],
+                      'box': [test_data['box']],
+                      'type': [test_data['type']],
+                      'natoms_vec': [test_data['natoms_vec']],
+                      'default_mesh': [test_data['default_mesh']]
+                      }
+
+        for fitting_key in jdata['model']['fitting_net_dict']:
+            model._compute_input_stat(input_data, fitting_key=fitting_key)
+        model.descrpt.merge_input_stats(model.descrpt.stat_dict)
+        model.descrpt.bias_atom_e = data.compute_energy_shift()
+
+        t_prop_c = tf.placeholder(tf.float32, [5], name='t_prop_c')
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
+        t_atom_ener = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
+        t_type = tf.placeholder(tf.int32, [None], name='i_type')
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name='i_natoms')
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
+        t_mesh = tf.placeholder(tf.int32, [None], name='i_mesh')
+        is_training = tf.placeholder(tf.bool)
+        t_fparam = None
+
+        model_pred \
+            = model.build(t_coord,
+                        t_type,
+                        t_natoms,
+                        t_box,
+                        t_mesh,
+                        t_fparam,
+                        suffix="_layer_name",
+                        reuse=False)
+
+        e_energy1 = model_pred['water_ener']['energy']
+        e_force1 = model_pred['water_ener']['force']
+        e_virial1 = model_pred['water_ener']['virial']
+        e_energy2 = model_pred['water_ener2']['energy']
+        e_force2 = model_pred['water_ener2']['force']
+        e_virial2 = model_pred['water_ener2']['virial']
+        feed_dict_test = {t_prop_c: test_data['prop_c'],
+                        t_energy: test_data['energy'][:numb_test],
+                        t_force: np.reshape(test_data['force'][:numb_test, :], [-1]),
+                        t_virial: np.reshape(test_data['virial'][:numb_test, :], [-1]),
+                        t_atom_ener: np.reshape(test_data['atom_ener'][:numb_test, :], [-1]),
+                        t_coord: np.reshape(test_data['coord'][:numb_test, :], [-1]),
+                        t_box: test_data['box'][:numb_test, :],
+                        t_type: np.reshape(test_data['type'][:numb_test, :], [-1]),
+                        t_natoms: test_data['natoms_vec'],
+                        t_mesh: test_data['default_mesh'],
+                        is_training: False}
+
+        with self.test_session() as sess:
+            sess.run(tf.global_variables_initializer())
+            [e1, f1, v1, e2, f2, v2] = sess.run(
+                            [e_energy1, e_force1, e_virial1, e_energy2, e_force2, e_virial2],
+                            feed_dict=feed_dict_test)
+        np.testing.assert_allclose(e1, e2, rtol=1e-5, atol=1e-5)
+        np.testing.assert_allclose(f1, f2, rtol=1e-5, atol=1e-5)
+        np.testing.assert_allclose(v1, v2, rtol=1e-5, atol=1e-5)