Merge pull request #365 from BrikerMan/develop

Release 1.1.5
BrikerMan · Apr 25, 2020 · 889ba1e · 889ba1e
2 parents 41e36f5 + 9ec0ed7
commit 889ba1e
Show file tree

Hide file tree

Showing 8 changed files with 56 additions and 19 deletions.
diff --git a/docs/about/release-notes.md b/docs/about/release-notes.md
@@ -17,6 +17,10 @@ pip show kashgari
 
 ## Current Release
 
+### [1.1.5] - 2020.04.25
+
+- 🐛 Fix transformer embedding error, loaded custom objects. ([#358])
+
 ### [1.1.4] - 2020.03.30
 
 - 🐛 Fix bert embedding v2 error, set default to not-trainable. ([#354], [#346])
@@ -163,6 +167,10 @@ Here is how the existing versions changes
 - fix classification model evaluate result output
 - change test settings
 
+[1.1.5]: https://github.com/BrikerMan/Kashgari/compare/v1.1.4...v1.1.5
+[1.1.4]: https://github.com/BrikerMan/Kashgari/compare/v1.1.3...v1.1.4
+[1.1.3]: https://github.com/BrikerMan/Kashgari/compare/v1.1.2...v1.1.3
+[1.1.2]: https://github.com/BrikerMan/Kashgari/compare/v1.1.1...v1.1.2
 [1.1.1]: https://github.com/BrikerMan/Kashgari/compare/v1.1.0...v1.1.1
 [1.1.0]: https://github.com/BrikerMan/Kashgari/compare/v1.0.0...v1.1.0
 [1.0.0]: https://github.com/BrikerMan/Kashgari/compare/v0.5.4...v1.0.0
@@ -185,3 +193,4 @@ Here is how the existing versions changes
 [#303]: https://github.com/BrikerMan/Kashgari/issues/303
 [#346]: https://github.com/BrikerMan/Kashgari/issues/346
 [#354]: https://github.com/BrikerMan/Kashgari/issues/354
+[#358]: https://github.com/BrikerMan/Kashgari/issues/358
diff --git a/docs/embeddings/bert-embedding.md b/docs/embeddings/bert-embedding.md
@@ -103,10 +103,6 @@ train_x = [sample]
 
 | model            | provider             | Language       | Link             | info                          |
 | ---------------- | -------------------- | -------------- | ---------------- | ----------------------------- |
-| BERT official    | Google               | Multi Language | [link][bert]     |                               |
-| ERNIE            | Baidu                | Chinese        | [link][ernie]    | Unofficial Tensorflow Version |
-| Chinese BERT WWM | 哈工大讯飞联合实验室 | Chinese        | [link][bert-wwm] | Use Tensorflow Version        |
-
-[bert]: https://github.com/google-research/bert
-[ernie]: https://github.com/ArthurRizar/tensorflow_ernie
-[bert-wwm]: https://github.com/ymcui/Chinese-BERT-wwm#%E4%B8%AD%E6%96%87%E6%A8%A1%E5%9E%8B%E4%B8%8B%E8%BD%BD
+| BERT official    | Google               | Multi Language | [link](https://github.com/google-research/bert)     |                               |
+| ERNIE            | Baidu                | Chinese        | [link](https://github.com/ArthurRizar/tensorflow_ernie)    | Unofficial Tensorflow Version |
+| Chinese BERT WWM | 哈工大讯飞联合实验室 | Chinese        | [link](https://github.com/ymcui/Chinese-BERT-wwm#%E4%B8%AD%E6%96%87%E6%A8%A1%E5%9E%8B%E4%B8%8B%E8%BD%BD) | Use Tensorflow Version        |
diff --git a/docs/embeddings/index.md b/docs/embeddings/index.md
@@ -6,7 +6,8 @@ Kashgari provides several embeddings for language representation. Embedding laye
 | ----------------------------------------------------------- | --------------------------------------------------------------------------- |
 | [BareEmbedding](bare-embedding.md)                        | random init `tf.keras.layers.Embedding` layer for text sequence embedding   |
 | [WordEmbedding](word-embedding.md)                        | pre-trained Word2Vec embedding                                              |
-| [BERTEmbedding](bert-embedding.md)                        | pre-trained BERT embedding                                                  |
+| [BERTEmbedding](bert-embedding.md)                        | pre-trained BERT embedding (BERT, ERNIE)                                    |
+| [TransformerEmbedding](bert-embedding_v2.md)              | pre-trained TransferEmbedding embedding (BERT, ALBERT, RoBERTa, NEZHA)      |
 | [GPT2Embedding](gpt2-embedding.md)                        | pre-trained GPT-2 embedding                                                 |
 | [NumericFeaturesEmbedding](numeric-features-embedding.md) | random init `tf.keras.layers.Embedding` layer for numeric feature embedding |
 | [StackedEmbedding](./stacked-embedding.md)                   | stack other embeddings for multi-input model                                |

diff --git a/kashgari/__init__.py b/kashgari/__init__.py
@@ -19,8 +19,10 @@
 
 import keras_bert
 from kashgari.macros import TaskType, config
+from bert4keras.layers import custom_objects as bert4keras_custom_objects
 
 custom_objects = keras_bert.get_custom_objects()
+custom_objects.update(bert4keras_custom_objects)
 CLASSIFICATION = TaskType.CLASSIFICATION
 LABELING = TaskType.LABELING
 SCORING = TaskType.SCORING

diff --git a/kashgari/embeddings/base_embedding.py b/kashgari/embeddings/base_embedding.py
@@ -43,7 +43,7 @@ def _load_saved_instance(cls,
                              config_dict: Dict,
                              model_path: str,
                              tf_model: keras.Model):
-
+        from kashgari import utils
         processor_info = config_dict['processor']
         processor_class = pydoc.locate(f"{processor_info['module']}.{processor_info['class_name']}")
         processor = processor_class(**processor_info['config'])
@@ -52,8 +52,7 @@ def _load_saved_instance(cls,
                        from_saved_model=True, **config_dict['config'])
 
         embed_model_json_str = json.dumps(config_dict['embed_model'])
-        instance.embed_model = keras.models.model_from_json(embed_model_json_str,
-                                                            custom_objects=kashgari.custom_objects)
+        instance.embed_model = utils._custom_load_keras_model_from_json(embed_model_json_str)
 
         # Load Weights from model
         for layer in instance.embed_model.layers:

diff --git a/kashgari/embeddings/stacked_embedding.py b/kashgari/embeddings/stacked_embedding.py
@@ -29,6 +29,7 @@ def _load_saved_instance(cls,
                              config_dict: Dict,
                              model_path: str,
                              tf_model: keras.Model):
+        from kashgari import utils
         embeddings = []
         for embed_info in config_dict['embeddings']:
             embed_class = pydoc.locate(f"{embed_info['module']}.{embed_info['class_name']}")
@@ -38,12 +39,9 @@ def _load_saved_instance(cls,
             embeddings.append(embedding)
         instance = cls(embeddings=embeddings,
                        from_saved_model=True)
-        print('----')
-        print(instance.embeddings)
 
         embed_model_json_str = json.dumps(config_dict['embed_model'])
-        instance.embed_model = keras.models.model_from_json(embed_model_json_str,
-                                                            custom_objects=kashgari.custom_objects)
+        instance.embed_model = utils._custom_load_keras_model_from_json(embed_model_json_str)
         # Load Weights from model
         for layer in instance.embed_model.layers:
             layer.set_weights(tf_model.get_layer(layer.name).get_weights())

diff --git a/kashgari/tasks/labeling/models.py b/kashgari/tasks/labeling/models.py
@@ -16,7 +16,7 @@
 from kashgari.layers import L
 from kashgari.layers.crf import CRF
 
-from kashgari.utils import custom_objects
+from kashgari import custom_objects
 
 custom_objects['CRF'] = CRF
 

diff --git a/kashgari/utils.py b/kashgari/utils.py
@@ -21,7 +21,7 @@
 import tensorflow as tf
 from tensorflow.python import keras, saved_model
 
-from kashgari import custom_objects
+import kashgari
 from kashgari.embeddings.base_embedding import Embedding
 from kashgari.layers.crf import CRF
 from kashgari.processors.base_processor import BaseProcessor
@@ -43,7 +43,7 @@ def get_list_subset(target: List, index_list: List[int]) -> List:
 
 
 def custom_object_scope():
-    return tf.keras.utils.custom_object_scope(custom_objects)
+    return tf.keras.utils.custom_object_scope(kashgari.custom_objects)
 
 
 def load_model(model_path: str,
@@ -57,14 +57,22 @@ def load_model(model_path: str,
     Returns:
 
     """
+    import keras_bert
     with open(os.path.join(model_path, 'model_info.json'), 'r') as f:
         model_info = json.load(f)
 
     model_class = pydoc.locate(f"{model_info['module']}.{model_info['class_name']}")
     model_json_str = json.dumps(model_info['tf_model'])
 
     model = model_class()
-    model.tf_model = tf.keras.models.model_from_json(model_json_str, custom_objects)
+
+    # Fix loading bug caused by custom objects naming duplication in keras_bert and bert4keras
+    custom_obj_1 = kashgari.custom_objects
+    custom_obj_2 = dict(custom_obj_1)
+    custom_obj_2.update(keras_bert.get_custom_objects())
+
+    model.tf_model = _custom_load_keras_model_from_json(model_json_str)
+
     if load_weights:
         model.tf_model.load_weights(os.path.join(model_path, 'model_weights.h5'))
 
@@ -82,6 +90,30 @@ def load_model(model_path: str,
     return model
 
 
+def _custom_load_keras_model_from_json(json_str):
+    # Fix loading bug caused by custom objects naming duplication in keras_bert and bert4keras
+    import keras_bert
+    custom_obj_1 = kashgari.custom_objects
+    custom_obj_2 = dict(custom_obj_1)
+    custom_obj_2.update(keras_bert.get_custom_objects())
+
+    model, exp = None, None
+    for custom_obj in [
+        custom_obj_1,
+        custom_obj_2
+    ]:
+        try:
+            model = tf.keras.models.model_from_json(json_str, custom_obj)
+            break
+        except Exception as e:
+            exp = e
+
+    if model:
+        return model
+    else:
+        raise exp
+
+
 def load_processor(model_path: str) -> BaseProcessor:
     """
     Load processor from model