Skip to content

Commit

Permalink
Merge pull request #365 from BrikerMan/develop
Browse files Browse the repository at this point in the history
Release 1.1.5
  • Loading branch information
BrikerMan authored Apr 25, 2020
2 parents 41e36f5 + 9ec0ed7 commit 889ba1e
Show file tree
Hide file tree
Showing 8 changed files with 56 additions and 19 deletions.
9 changes: 9 additions & 0 deletions docs/about/release-notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ pip show kashgari

## Current Release

### [1.1.5] - 2020.04.25

- 🐛 Fix transformer embedding error, loaded custom objects. ([#358])

### [1.1.4] - 2020.03.30

- 🐛 Fix bert embedding v2 error, set default to not-trainable. ([#354], [#346])
Expand Down Expand Up @@ -163,6 +167,10 @@ Here is how the existing versions changes
- fix classification model evaluate result output
- change test settings

[1.1.5]: https://github.com/BrikerMan/Kashgari/compare/v1.1.4...v1.1.5
[1.1.4]: https://github.com/BrikerMan/Kashgari/compare/v1.1.3...v1.1.4
[1.1.3]: https://github.com/BrikerMan/Kashgari/compare/v1.1.2...v1.1.3
[1.1.2]: https://github.com/BrikerMan/Kashgari/compare/v1.1.1...v1.1.2
[1.1.1]: https://github.com/BrikerMan/Kashgari/compare/v1.1.0...v1.1.1
[1.1.0]: https://github.com/BrikerMan/Kashgari/compare/v1.0.0...v1.1.0
[1.0.0]: https://github.com/BrikerMan/Kashgari/compare/v0.5.4...v1.0.0
Expand All @@ -185,3 +193,4 @@ Here is how the existing versions changes
[#303]: https://github.com/BrikerMan/Kashgari/issues/303
[#346]: https://github.com/BrikerMan/Kashgari/issues/346
[#354]: https://github.com/BrikerMan/Kashgari/issues/354
[#358]: https://github.com/BrikerMan/Kashgari/issues/358
10 changes: 3 additions & 7 deletions docs/embeddings/bert-embedding.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,6 @@ train_x = [sample]

| model | provider | Language | Link | info |
| ---------------- | -------------------- | -------------- | ---------------- | ----------------------------- |
| BERT official | Google | Multi Language | [link][bert] | |
| ERNIE | Baidu | Chinese | [link][ernie] | Unofficial Tensorflow Version |
| Chinese BERT WWM | 哈工大讯飞联合实验室 | Chinese | [link][bert-wwm] | Use Tensorflow Version |

[bert]: https://github.com/google-research/bert
[ernie]: https://github.com/ArthurRizar/tensorflow_ernie
[bert-wwm]: https://github.com/ymcui/Chinese-BERT-wwm#%E4%B8%AD%E6%96%87%E6%A8%A1%E5%9E%8B%E4%B8%8B%E8%BD%BD
| BERT official | Google | Multi Language | [link](https://github.com/google-research/bert) | |
| ERNIE | Baidu | Chinese | [link](https://github.com/ArthurRizar/tensorflow_ernie) | Unofficial Tensorflow Version |
| Chinese BERT WWM | 哈工大讯飞联合实验室 | Chinese | [link](https://github.com/ymcui/Chinese-BERT-wwm#%E4%B8%AD%E6%96%87%E6%A8%A1%E5%9E%8B%E4%B8%8B%E8%BD%BD) | Use Tensorflow Version |
3 changes: 2 additions & 1 deletion docs/embeddings/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ Kashgari provides several embeddings for language representation. Embedding laye
| ----------------------------------------------------------- | --------------------------------------------------------------------------- |
| [BareEmbedding](bare-embedding.md) | random init `tf.keras.layers.Embedding` layer for text sequence embedding |
| [WordEmbedding](word-embedding.md) | pre-trained Word2Vec embedding |
| [BERTEmbedding](bert-embedding.md) | pre-trained BERT embedding |
| [BERTEmbedding](bert-embedding.md) | pre-trained BERT embedding (BERT, ERNIE) |
| [TransformerEmbedding](bert-embedding_v2.md) | pre-trained TransferEmbedding embedding (BERT, ALBERT, RoBERTa, NEZHA) |
| [GPT2Embedding](gpt2-embedding.md) | pre-trained GPT-2 embedding |
| [NumericFeaturesEmbedding](numeric-features-embedding.md) | random init `tf.keras.layers.Embedding` layer for numeric feature embedding |
| [StackedEmbedding](./stacked-embedding.md) | stack other embeddings for multi-input model |
Expand Down
2 changes: 2 additions & 0 deletions kashgari/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@

import keras_bert
from kashgari.macros import TaskType, config
from bert4keras.layers import custom_objects as bert4keras_custom_objects

custom_objects = keras_bert.get_custom_objects()
custom_objects.update(bert4keras_custom_objects)
CLASSIFICATION = TaskType.CLASSIFICATION
LABELING = TaskType.LABELING
SCORING = TaskType.SCORING
Expand Down
5 changes: 2 additions & 3 deletions kashgari/embeddings/base_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def _load_saved_instance(cls,
config_dict: Dict,
model_path: str,
tf_model: keras.Model):

from kashgari import utils
processor_info = config_dict['processor']
processor_class = pydoc.locate(f"{processor_info['module']}.{processor_info['class_name']}")
processor = processor_class(**processor_info['config'])
Expand All @@ -52,8 +52,7 @@ def _load_saved_instance(cls,
from_saved_model=True, **config_dict['config'])

embed_model_json_str = json.dumps(config_dict['embed_model'])
instance.embed_model = keras.models.model_from_json(embed_model_json_str,
custom_objects=kashgari.custom_objects)
instance.embed_model = utils._custom_load_keras_model_from_json(embed_model_json_str)

# Load Weights from model
for layer in instance.embed_model.layers:
Expand Down
6 changes: 2 additions & 4 deletions kashgari/embeddings/stacked_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def _load_saved_instance(cls,
config_dict: Dict,
model_path: str,
tf_model: keras.Model):
from kashgari import utils
embeddings = []
for embed_info in config_dict['embeddings']:
embed_class = pydoc.locate(f"{embed_info['module']}.{embed_info['class_name']}")
Expand All @@ -38,12 +39,9 @@ def _load_saved_instance(cls,
embeddings.append(embedding)
instance = cls(embeddings=embeddings,
from_saved_model=True)
print('----')
print(instance.embeddings)

embed_model_json_str = json.dumps(config_dict['embed_model'])
instance.embed_model = keras.models.model_from_json(embed_model_json_str,
custom_objects=kashgari.custom_objects)
instance.embed_model = utils._custom_load_keras_model_from_json(embed_model_json_str)
# Load Weights from model
for layer in instance.embed_model.layers:
layer.set_weights(tf_model.get_layer(layer.name).get_weights())
Expand Down
2 changes: 1 addition & 1 deletion kashgari/tasks/labeling/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from kashgari.layers import L
from kashgari.layers.crf import CRF

from kashgari.utils import custom_objects
from kashgari import custom_objects

custom_objects['CRF'] = CRF

Expand Down
38 changes: 35 additions & 3 deletions kashgari/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import tensorflow as tf
from tensorflow.python import keras, saved_model

from kashgari import custom_objects
import kashgari
from kashgari.embeddings.base_embedding import Embedding
from kashgari.layers.crf import CRF
from kashgari.processors.base_processor import BaseProcessor
Expand All @@ -43,7 +43,7 @@ def get_list_subset(target: List, index_list: List[int]) -> List:


def custom_object_scope():
return tf.keras.utils.custom_object_scope(custom_objects)
return tf.keras.utils.custom_object_scope(kashgari.custom_objects)


def load_model(model_path: str,
Expand All @@ -57,14 +57,22 @@ def load_model(model_path: str,
Returns:
"""
import keras_bert
with open(os.path.join(model_path, 'model_info.json'), 'r') as f:
model_info = json.load(f)

model_class = pydoc.locate(f"{model_info['module']}.{model_info['class_name']}")
model_json_str = json.dumps(model_info['tf_model'])

model = model_class()
model.tf_model = tf.keras.models.model_from_json(model_json_str, custom_objects)

# Fix loading bug caused by custom objects naming duplication in keras_bert and bert4keras
custom_obj_1 = kashgari.custom_objects
custom_obj_2 = dict(custom_obj_1)
custom_obj_2.update(keras_bert.get_custom_objects())

model.tf_model = _custom_load_keras_model_from_json(model_json_str)

if load_weights:
model.tf_model.load_weights(os.path.join(model_path, 'model_weights.h5'))

Expand All @@ -82,6 +90,30 @@ def load_model(model_path: str,
return model


def _custom_load_keras_model_from_json(json_str):
# Fix loading bug caused by custom objects naming duplication in keras_bert and bert4keras
import keras_bert
custom_obj_1 = kashgari.custom_objects
custom_obj_2 = dict(custom_obj_1)
custom_obj_2.update(keras_bert.get_custom_objects())

model, exp = None, None
for custom_obj in [
custom_obj_1,
custom_obj_2
]:
try:
model = tf.keras.models.model_from_json(json_str, custom_obj)
break
except Exception as e:
exp = e

if model:
return model
else:
raise exp


def load_processor(model_path: str) -> BaseProcessor:
"""
Load processor from model
Expand Down

0 comments on commit 889ba1e

Please sign in to comment.