Skip to content

Commit

Permalink
Add default NLP model
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelhwn committed Dec 18, 2020
1 parent 24f7cf6 commit 17d8cbe
Show file tree
Hide file tree
Showing 13 changed files with 266 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/ai_modules/nlp_module/models/english_default/checkpoint
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "component_5_DIETClassifier.tf_model"
all_model_checkpoint_paths: "component_5_DIETClassifier.tf_model"

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"-1:low": {"False": 0, "True": 1}, "-1:title": {"False": 2, "True": 3}, "-1:upper": {"False": 4, "True": 5}, "0:BOS": {"False": 6, "True": 7}, "0:EOS": {"False": 8, "True": 9}, "0:digit": {"False": 10}, "0:low": {"False": 11, "True": 12}, "0:title": {"False": 13, "True": 14}, "0:upper": {"False": 15, "True": 16}, "1:low": {"False": 17, "True": 18}, "1:title": {"False": 19, "True": 20}, "1:upper": {"False": 21, "True": 22}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"text": {"my": {"py/object": "numpy.intc", "dtype": "int32", "value": 37}, "name": {"py/object": "numpy.intc", "dtype": "int32", "value": 39}, "is": {"py/object": "numpy.intc", "dtype": "int32", "value": 20}, "michael": {"py/object": "numpy.intc", "dtype": "int32", "value": 32}, "eric": {"py/object": "numpy.intc", "dtype": "int32", "value": 12}, "alice": {"py/object": "numpy.intc", "dtype": "int32", "value": 2}, "am": {"py/object": "numpy.intc", "dtype": "int32", "value": 3}, "john": {"py/object": "numpy.intc", "dtype": "int32", "value": 24}, "james": {"py/object": "numpy.intc", "dtype": "int32", "value": 23}, "kristin": {"py/object": "numpy.intc", "dtype": "int32", "value": 26}, "you": {"py/object": "numpy.intc", "dtype": "int32", "value": 66}, "can": {"py/object": "numpy.intc", "dtype": "int32", "value": 8}, "call": {"py/object": "numpy.intc", "dtype": "int32", "value": 7}, "me": {"py/object": "numpy.intc", "dtype": "int32", "value": 31}, "jack": {"py/object": "numpy.intc", "dtype": "int32", "value": 22}, "peter": {"py/object": "numpy.intc", "dtype": "int32", "value": 44}, "philip": {"py/object": "numpy.intc", "dtype": "int32", "value": 45}, "the": {"py/object": "numpy.intc", "dtype": "int32", "value": 55}, "victor": {"py/object": "numpy.intc", "dtype": "int32", "value": 62}, "morgan": {"py/object": "numpy.intc", "dtype": "int32", "value": 34}, "hey": {"py/object": "numpy.intc", "dtype": "int32", "value": 17}, "dude": {"py/object": "numpy.intc", "dtype": "int32", "value": 11}, "sam": {"py/object": "numpy.intc", "dtype": "int32", "value": 49}, "mario": {"py/object": "numpy.intc", "dtype": "int32", "value": 29}, "alex": {"py/object": "numpy.intc", "dtype": "int32", "value": 1}, "molly": {"py/object": "numpy.intc", "dtype": "int32", "value": 33}, "regina": {"py/object": "numpy.intc", "dtype": "int32", "value": 46}, "yes": {"py/object": "numpy.intc", "dtype": "int32", "value": 65}, "for": {"py/object": "numpy.intc", "dtype": "int32", "value": 13}, "sure": {"py/object": "numpy.intc", "dtype": "int32", "value": 52}, "that": {"py/object": "numpy.intc", "dtype": "int32", "value": 54}, "great": {"py/object": "numpy.intc", "dtype": "int32", "value": 16}, "idea": {"py/object": "numpy.intc", "dtype": "int32", "value": 18}, "why": {"py/object": "numpy.intc", "dtype": "int32", "value": 64}, "not": {"py/object": "numpy.intc", "dtype": "int32", "value": 42}, "let": {"py/object": "numpy.intc", "dtype": "int32", "value": 28}, "do": {"py/object": "numpy.intc", "dtype": "int32", "value": 10}, "it": {"py/object": "numpy.intc", "dtype": "int32", "value": 21}, "this": {"py/object": "numpy.intc", "dtype": "int32", "value": 56}, "no": {"py/object": "numpy.intc", "dtype": "int32", "value": 40}, "thanks": {"py/object": "numpy.intc", "dtype": "int32", "value": 53}, "today": {"py/object": "numpy.intc", "dtype": "int32", "value": 59}, "maybe": {"py/object": "numpy.intc", "dtype": "int32", "value": 30}, "another": {"py/object": "numpy.intc", "dtype": "int32", "value": 4}, "day": {"py/object": "numpy.intc", "dtype": "int32", "value": 9}, "some": {"py/object": "numpy.intc", "dtype": "int32", "value": 50}, "other": {"py/object": "numpy.intc", "dtype": "int32", "value": 43}, "time": {"py/object": "numpy.intc", "dtype": "int32", "value": 57}, "nope": {"py/object": "numpy.intc", "dtype": "int32", "value": 41}, "nah": {"py/object": "numpy.intc", "dtype": "int32", "value": 38}, "interested": {"py/object": "numpy.intc", "dtype": "int32", "value": 19}, "move": {"py/object": "numpy.intc", "dtype": "int32", "value": 35}, "forward": {"py/object": "numpy.intc", "dtype": "int32", "value": 14}, "up": {"py/object": "numpy.intc", "dtype": "int32", "value": 61}, "go": {"py/object": "numpy.intc", "dtype": "int32", "value": 15}, "straight": {"py/object": "numpy.intc", "dtype": "int32", "value": 51}, "ahead": {"py/object": "numpy.intc", "dtype": "int32", "value": 0}, "moving": {"py/object": "numpy.intc", "dtype": "int32", "value": 36}, "keep": {"py/object": "numpy.intc", "dtype": "int32", "value": 25}, "walk": {"py/object": "numpy.intc", "dtype": "int32", "value": 63}, "backward": {"py/object": "numpy.intc", "dtype": "int32", "value": 6}, "back": {"py/object": "numpy.intc", "dtype": "int32", "value": 5}, "turn": {"py/object": "numpy.intc", "dtype": "int32", "value": 60}, "left": {"py/object": "numpy.intc", "dtype": "int32", "value": 27}, "rotate": {"py/object": "numpy.intc", "dtype": "int32", "value": 48}, "to": {"py/object": "numpy.intc", "dtype": "int32", "value": 58}, "right": {"py/object": "numpy.intc", "dtype": "int32", "value": 47}}, "intent": {"user_give_name": {"py/object": "numpy.intc", "dtype": "int32", "value": 1}, "user_answer_affirmative": {"py/object": "numpy.intc", "dtype": "int32", "value": 0}, "user_remain_anonymous": {"py/object": "numpy.intc", "dtype": "int32", "value": 2}, "user_say_move_forward": {"py/object": "numpy.intc", "dtype": "int32", "value": 4}, "user_say_move_backward": {"py/object": "numpy.intc", "dtype": "int32", "value": 3}, "user_say_turn_left": {"py/object": "numpy.intc", "dtype": "int32", "value": 5}, "user_say_turn_right": {"py/object": "numpy.intc", "dtype": "int32", "value": 6}}, "response": null}

Large diffs are not rendered by default.

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[
{
"tag_name": "entity",
"ids_to_tags": {
"1": "B-person",
"2": "I-person",
"3": "L-person",
"4": "U-person",
"0": "O"
},
"tags_to_ids": {
"B-person": 1,
"I-person": 2,
"L-person": 3,
"U-person": 4,
"O": 0
},
"num_tags": 5
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"0": "user_answer_affirmative", "1": "user_give_name", "2": "user_remain_anonymous", "3": "user_say_move_backward", "4": "user_say_move_forward", "5": "user_say_turn_left", "6": "user_say_turn_right"}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
234 changes: 234 additions & 0 deletions src/ai_modules/nlp_module/models/english_default/metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
{
"language": "en",
"pipeline": [
{
"intent_tokenization_flag": false,
"intent_split_symbol": "_",
"case_sensitive": true,
"name": "WhitespaceTokenizer",
"class": "rasa.nlu.tokenizers.whitespace_tokenizer.WhitespaceTokenizer"
},
{
"name": "RegexFeaturizer",
"file": "component_1_RegexFeaturizer.pkl",
"class": "rasa.nlu.featurizers.sparse_featurizer.regex_featurizer.RegexFeaturizer"
},
{
"features": [
[
"low",
"title",
"upper"
],
[
"BOS",
"EOS",
"low",
"upper",
"title",
"digit"
],
[
"low",
"title",
"upper"
]
],
"name": "LexicalSyntacticFeaturizer",
"file": "component_2_LexicalSyntacticFeaturizer",
"class": "rasa.nlu.featurizers.sparse_featurizer.lexical_syntactic_featurizer.LexicalSyntacticFeaturizer"
},
{
"use_shared_vocab": false,
"analyzer": "word",
"token_pattern": "(?u)\\b\\w\\w+\\b",
"strip_accents": null,
"stop_words": null,
"min_df": 1,
"max_df": 1.0,
"min_ngram": 1,
"max_ngram": 1,
"max_features": null,
"lowercase": true,
"OOV_token": null,
"OOV_words": [],
"name": "CountVectorsFeaturizer",
"file": "component_3_CountVectorsFeaturizer.pkl",
"class": "rasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer.CountVectorsFeaturizer"
},
{
"use_shared_vocab": false,
"analyzer": "char_wb",
"token_pattern": "(?u)\\b\\w\\w+\\b",
"strip_accents": null,
"stop_words": null,
"min_df": 1,
"max_df": 1.0,
"min_ngram": 1,
"max_ngram": 4,
"max_features": null,
"lowercase": true,
"OOV_token": null,
"OOV_words": [],
"name": "CountVectorsFeaturizer",
"file": "component_4_CountVectorsFeaturizer.pkl",
"class": "rasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer.CountVectorsFeaturizer"
},
{
"hidden_layers_sizes": {
"text": [],
"label": []
},
"share_hidden_layers": false,
"transformer_size": 256,
"number_of_transformer_layers": 2,
"number_of_attention_heads": 4,
"use_key_relative_attention": false,
"use_value_relative_attention": false,
"max_relative_position": null,
"unidirectional_encoder": false,
"batch_size": [
64,
256
],
"batch_strategy": "balanced",
"epochs": 100,
"random_seed": null,
"learning_rate": 0.001,
"embedding_dimension": 20,
"dense_dimension": {
"text": 512,
"label": 20
},
"number_of_negative_examples": 20,
"similarity_type": "inner",
"loss_type": "softmax",
"ranking_length": 10,
"maximum_positive_similarity": 0.8,
"maximum_negative_similarity": -0.4,
"use_maximum_negative_similarity": true,
"scale_loss": false,
"regularization_constant": 0.002,
"negative_margin_scale": 0.8,
"drop_rate": 0.2,
"drop_rate_attention": 0,
"weight_sparsity": 0.8,
"use_sparse_input_dropout": true,
"use_dense_input_dropout": true,
"evaluate_every_number_of_epochs": 20,
"evaluate_on_number_of_examples": 0,
"intent_classification": true,
"entity_recognition": true,
"use_masked_language_model": false,
"BILOU_flag": true,
"tensorboard_log_directory": null,
"tensorboard_log_level": "epoch",
"name": "DIETClassifier",
"file": "component_5_DIETClassifier",
"class": "rasa.nlu.classifiers.diet_classifier.DIETClassifier"
},
{
"BILOU_flag": true,
"features": [
[
"low",
"title",
"upper"
],
[
"low",
"bias",
"prefix5",
"prefix2",
"suffix5",
"suffix3",
"suffix2",
"upper",
"title",
"digit",
"pattern"
],
[
"low",
"title",
"upper"
]
],
"max_iterations": 50,
"L1_c": 0.1,
"L2_c": 0.1,
"name": "CRFEntityExtractor",
"files": {
"entity": "component_6_CRFEntityExtractor.entity.pkl"
},
"class": "rasa.nlu.extractors.crf_entity_extractor.CRFEntityExtractor"
},
{
"name": "EntitySynonymMapper",
"file": null,
"class": "rasa.nlu.extractors.entity_synonyms.EntitySynonymMapper"
},
{
"hidden_layers_sizes": {
"text": [
256,
128
],
"label": [
256,
128
]
},
"share_hidden_layers": false,
"transformer_size": null,
"number_of_transformer_layers": 0,
"number_of_attention_heads": 4,
"use_key_relative_attention": false,
"use_value_relative_attention": false,
"max_relative_position": null,
"unidirectional_encoder": false,
"batch_size": [
64,
256
],
"batch_strategy": "balanced",
"epochs": 100,
"random_seed": null,
"learning_rate": 0.001,
"embedding_dimension": 20,
"dense_dimension": {
"text": 512,
"label": 512
},
"number_of_negative_examples": 20,
"similarity_type": "inner",
"loss_type": "softmax",
"ranking_length": 10,
"maximum_positive_similarity": 0.8,
"maximum_negative_similarity": -0.4,
"use_maximum_negative_similarity": true,
"scale_loss": true,
"regularization_constant": 0.002,
"weight_sparsity": 0.0,
"negative_margin_scale": 0.8,
"drop_rate": 0.2,
"drop_rate_attention": 0,
"use_sparse_input_dropout": false,
"use_dense_input_dropout": false,
"evaluate_every_number_of_epochs": 20,
"evaluate_on_number_of_examples": 0,
"use_masked_language_model": false,
"retrieval_intent": null,
"tensorboard_log_directory": null,
"tensorboard_log_level": "epoch",
"name": "ResponseSelector",
"intent_classification": true,
"entity_recognition": false,
"BILOU_flag": null,
"file": null,
"class": "rasa.nlu.selectors.response_selector.ResponseSelector"
}
],
"trained_at": "20201211-201842",
"rasa_version": "1.10.18"
}

0 comments on commit 17d8cbe

Please sign in to comment.