ran the training on google collab

PersephoneKarnstein · Jul 2, 2023 · a48e464 · a48e464
1 parent 66acec3
commit a48e464
Show file tree

Hide file tree

Showing 7 changed files with 22 additions and 17 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/README.md b/README.md
@@ -2,7 +2,10 @@
 
 The world is getting worse and transphobes have no imaginiation so I figured a computer could probably do just as well thinking up talking points
 
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/googlecolab/colabtools/blob/master/notebooks/colab-github-demo.ipynb)
 
+![](readme-content/Untitled-1.png)
+![](readme-content/Untitled-2.png)
 
 ## FAQ
 

diff --git a/models/model.h5 b/models/model.h5
diff --git a/models/model.json b/models/model.json
@@ -1,3 +1 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2c9b09ab56a2093bddbba4ffff890daccec700b44ae5a2da68a68b0d15563d26
-size 3803
+{"class_name": "Functional", "config": {"name": "model_1", "trainable": true, "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": [null, 56], "dtype": "int32", "sparse": false, "ragged": false, "name": "input_2"}, "name": "input_2", "inbound_nodes": []}, {"class_name": "keras_nlp>TokenAndPositionEmbedding", "config": {"name": "token_and_position_embedding_1", "trainable": true, "dtype": "float32", "vocabulary_size": 66216, "sequence_length": 56, "embedding_dim": 128, "embeddings_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "mask_zero": false}, "name": "token_and_position_embedding_1", "inbound_nodes": [[["input_2", 0, 0, {}]]]}, {"class_name": "keras_nlp>TransformerDecoder", "config": {"name": "transformer_decoder_4", "trainable": true, "dtype": "float32", "intermediate_dim": 256, "num_heads": 4, "dropout": 0.5, "activation": "relu", "layer_norm_epsilon": 1e-05, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "normalize_first": false, "build_input_shape": [null, 56, 128], "has_cross_attention": false}, "name": "transformer_decoder_4", "inbound_nodes": [[["token_and_position_embedding_1", 0, 0, {}]]]}, {"class_name": "keras_nlp>TransformerDecoder", "config": {"name": "transformer_decoder_5", "trainable": true, "dtype": "float32", "intermediate_dim": 256, "num_heads": 4, "dropout": 0.5, "activation": "relu", "layer_norm_epsilon": 1e-05, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "normalize_first": false, "build_input_shape": [null, 56, 128], "has_cross_attention": false}, "name": "transformer_decoder_5", "inbound_nodes": [[["transformer_decoder_4", 0, 0, {}]]]}, {"class_name": "keras_nlp>TransformerDecoder", "config": {"name": "transformer_decoder_6", "trainable": true, "dtype": "float32", "intermediate_dim": 256, "num_heads": 4, "dropout": 0.5, "activation": "relu", "layer_norm_epsilon": 1e-05, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "normalize_first": false, "build_input_shape": [null, 56, 128], "has_cross_attention": false}, "name": "transformer_decoder_6", "inbound_nodes": [[["transformer_decoder_5", 0, 0, {}]]]}, {"class_name": "keras_nlp>TransformerDecoder", "config": {"name": "transformer_decoder_7", "trainable": true, "dtype": "float32", "intermediate_dim": 256, "num_heads": 4, "dropout": 0.5, "activation": "relu", "layer_norm_epsilon": 1e-05, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "normalize_first": false, "build_input_shape": [null, 56, 128], "has_cross_attention": false}, "name": "transformer_decoder_7", "inbound_nodes": [[["transformer_decoder_6", 0, 0, {}]]]}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "dtype": "float32", "rate": 0.4, "noise_shape": null, "seed": null}, "name": "dropout_1", "inbound_nodes": [[["transformer_decoder_7", 0, 0, {}]]]}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 66216, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense_1", "inbound_nodes": [[["dropout_1", 0, 0, {}]]]}], "input_layers": [["input_2", 0, 0]], "output_layers": [["dense_1", 0, 0]]}, "keras_version": "2.12.0", "backend": "tensorflow"}
diff --git a/readme-content/Untitled-1.png b/readme-content/Untitled-1.png
diff --git a/readme-content/Untitled-2.png b/readme-content/Untitled-2.png
diff --git a/terfy/nlp-model.py b/terfy/nlp-model.py
@@ -1,11 +1,13 @@
 #https://stackabuse.com/gpt-style-text-generation-in-python-with-tensorflowkeras/
 
-import os, glob, keras_nlp, nltk.data, random
+import os, glob, keras_nlp, nltk.data, random,warnings
 import tensorflow as tf
 from tensorflow import keras
 from keras.models import model_from_json
 import numpy as np
 
+nltk.download('punkt', quiet=True)
+
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
 warnings.filterwarnings("ignore", category=UserWarning)
 
@@ -94,7 +96,7 @@ def create_model():
     inputs = keras.layers.Input(shape=(maxlen,), dtype=tf.int32)
     x = keras_nlp.layers.TokenAndPositionEmbedding(vocab_size, maxlen, embed_dim)(inputs)
     for i in range(4):
-        x = keras_nlp.layers.TransformerDecoder(intermediate_dim=embed_dim*2, num_heads=num_heads,                                                             dropout=0.5)(x)
+        x = keras_nlp.layers.TransformerDecoder(intermediate_dim=embed_dim*2, num_heads=num_heads, dropout=0.5)(x)
     do = keras.layers.Dropout(0.4)(x)
     outputs = keras.layers.Dense(vocab_size, activation='softmax')(do)
     model = keras.Model(inputs=inputs, outputs=outputs)
@@ -153,7 +155,6 @@ def generate_text(prompt, response_length=20):
         tokenized_prompt = vectorize_layer([decoded_sample])[:, :-1]
         predictions = model.predict([tokenized_prompt], verbose=0)
         sample_index = len(decoded_sample.strip().split())-1
-
         sampled_token = sample_token(predictions[0][sample_index])
         sampled_token = index_lookup[sampled_token]
         decoded_sample += " " + sampled_token
@@ -169,17 +170,20 @@ def save_model(model,path,filepath="models"):
 	# print("Saved model to disk")
 
 def load_model(filepath="models"):
-	path = os.getcwd()
-	# with redirect_stdout(open(os.devnull, 'w')):
-	json_file = open(path+'/'+filepath+'/model.json', 'r')
-	loaded_model_json = json_file.read()
-	json_file.close()
-	loaded_model = model_from_json(loaded_model_json)
-	# load weights into new model
-	loaded_model.load_weights(path+'/'+filepath+"/model.h5")
-	loaded_model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
-	print("Loaded model from disk")
-	return loaded_model
+    path = os.getcwd()
+    # with redirect_stdout(open(os.devnull, 'w')):
+    json_file = open(path+'/'+filepath+'/model.json', 'r')
+    loaded_model_json = json_file.read()
+    json_file.close()
+    loaded_model = model_from_json(loaded_model_json)
+    # load weights into new model
+    loaded_model.load_weights(path+'/'+filepath+"/model.h5")
+    loaded_model.compile(
+        optimizer="adam", 
+        loss='sparse_categorical_crossentropy',
+        metrics=[keras_nlp.metrics.Perplexity(), 'accuracy'])
+    print("Loaded model from disk")
+    return loaded_model
 
 path = os.getcwd()
 save_model(model,path)