From 63c4b7965149e64a041f068182dc12af511726b5 Mon Sep 17 00:00:00 2001 From: Benedikt Schifferer Date: Wed, 5 Jul 2023 12:49:52 +0000 Subject: [PATCH 1/4] init tutorial --- docs/source/toc.yaml | 14 + examples/tutorials/README.md | 3 + examples/tutorials/basic/README.md | 1 + examples/tutorials/pytorch/README.md | 1 + .../tutorials/tf/DLRM-Ranking-Model.ipynb | 1069 +++++++++++++++++ examples/tutorials/tf/README.md | 1 + 6 files changed, 1089 insertions(+) create mode 100644 examples/tutorials/README.md create mode 100644 examples/tutorials/basic/README.md create mode 100644 examples/tutorials/pytorch/README.md create mode 100644 examples/tutorials/tf/DLRM-Ranking-Model.ipynb create mode 100644 examples/tutorials/tf/README.md diff --git a/docs/source/toc.yaml b/docs/source/toc.yaml index 3140d57c0..b430e6b0c 100644 --- a/docs/source/toc.yaml +++ b/docs/source/toc.yaml @@ -4,6 +4,20 @@ subtrees: entries: - file: README.md title: Introduction + - file: examples/tutorials/README.md + title: Tutorials + entries: + - file: examples/tutorials/basic/README.md + title: Basic + - file: examples/tutorials/tf/README.md + title: TensorFlow + entries: + - file: examples/tutorials/tf/DLRM-Ranking-Model.ipynb + title: DLRM Ranking Model + - file: examples/tutorials/tf/TwoTower-Retrieval-Model.ipynb + title: TwoTower Retrieval Model + - file: examples/tutorials/pytorch/README.md + title: PyTorch - file: guide/recommender_system_guide.rst title: Recommender System Guide - file: examples/index diff --git a/examples/tutorials/README.md b/examples/tutorials/README.md new file mode 100644 index 000000000..a65758ec0 --- /dev/null +++ b/examples/tutorials/README.md @@ -0,0 +1,3 @@ +# Tutorials + + diff --git a/examples/tutorials/basic/README.md b/examples/tutorials/basic/README.md new file mode 100644 index 000000000..c6d2eb525 --- /dev/null +++ b/examples/tutorials/basic/README.md @@ -0,0 +1 @@ +# Basic \ No newline at end of file diff --git a/examples/tutorials/pytorch/README.md b/examples/tutorials/pytorch/README.md new file mode 100644 index 000000000..91d4e61bc --- /dev/null +++ b/examples/tutorials/pytorch/README.md @@ -0,0 +1 @@ +# PyTorch \ No newline at end of file diff --git a/examples/tutorials/tf/DLRM-Ranking-Model.ipynb b/examples/tutorials/tf/DLRM-Ranking-Model.ipynb new file mode 100644 index 000000000..f6e0754b8 --- /dev/null +++ b/examples/tutorials/tf/DLRM-Ranking-Model.ipynb @@ -0,0 +1,1069 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "efd42658", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-05 10:26:20.573752: I tensorflow/core/platform/cpu_feature_guard.cc:183] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n" + ] + } + ], + "source": [ + "import os\n", + "\n", + "from merlin.datasets.entertainment import get_movielens\n", + "\n", + "input_path = os.environ.get(\"INPUT_DATA_DIR\", os.path.expanduser(\"~/merlin-framework/movielens/\"))\n", + "\n", + "get_movielens(variant=\"ml-1m\", path=input_path);" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "5351b22e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "README ratings.dat users.dat\r\n", + "movies.dat train.parquet users_converted.parquet\r\n", + "movies_converted.parquet \u001b[0m\u001b[01;34mtransformed\u001b[0m/ valid.parquet\r\n" + ] + } + ], + "source": [ + "ls {input_path}/ml-1m #noqa" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c529e6e3", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.core.dispatch import get_lib\n", + "\n", + "train = get_lib().read_parquet(f'{input_path}ml-1m/train.parquet')\n", + "valid = get_lib().read_parquet(f'{input_path}ml-1m/valid.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9f0a2009", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userIdmovieIdratingtimestamp
348678204329761974666164
34878520463185974666372
447226275527215973192895
607075368528044966861784
249905150517323983671138
\n", + "
" + ], + "text/plain": [ + " userId movieId rating timestamp\n", + "348678 2043 2976 1 974666164\n", + "348785 2046 318 5 974666372\n", + "447226 2755 2721 5 973192895\n", + "607075 3685 2804 4 966861784\n", + "249905 1505 1732 3 983671138" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b728a155", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(,\n", + " )" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import nvtabular as nvt\n", + "from merlin.schema.tags import Tags\n", + "\n", + "train_ds = nvt.Dataset(train)\n", + "valid_ds = nvt.Dataset(valid)\n", + "\n", + "train_ds, valid_ds" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "bebe2cf6", + "metadata": {}, + "outputs": [], + "source": [ + "categorical_features = ['userId', 'movieId'] >> nvt.ops.Categorify()" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "f5e97c6b", + "metadata": {}, + "outputs": [], + "source": [ + "target = ['rating'] >> nvt.ops.LambdaOp(lambda x: (x>3).astype('int')) >> nvt.ops.AddMetadata(tags=[Tags.BINARY_CLASSIFICATION, Tags.TARGET])" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "a2e10d81", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "%3\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "AddMetadata\n", + "\n", + "\n", + "\n", + "5\n", + "\n", + "+\n", + "\n", + "\n", + "\n", + "0->5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "4\n", + "\n", + "nvt.ops.LambdaOp(lambda x: (x>3).astype('int'))\n", + "\n", + "\n", + "\n", + "4->0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "1\n", + "\n", + "Categorify\n", + "\n", + "\n", + "\n", + "1->5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "2\n", + "\n", + "SelectionOp\n", + "\n", + "\n", + "\n", + "2->1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "2_selector\n", + "\n", + "['userId', 'movieId']\n", + "\n", + "\n", + "\n", + "2_selector->2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "3\n", + "\n", + "SelectionOp\n", + "\n", + "\n", + "\n", + "3->4\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "3_selector\n", + "\n", + "['rating']\n", + "\n", + "\n", + "\n", + "3_selector->3\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "6\n", + "\n", + "output cols\n", + "\n", + "\n", + "\n", + "5->6\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output = categorical_features+target" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "22f033c1", + "metadata": {}, + "outputs": [], + "source": [ + "workflow = nvt.Workflow(output)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e3b8c089", + "metadata": {}, + "outputs": [], + "source": [ + "workflow.fit_transform(train_ds).to_parquet('train')\n", + "workflow.transform(valid_ds).to_parquet('valid')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f821cf69", + "metadata": {}, + "outputs": [], + "source": [ + "train_transformed = nvt.Dataset('train', engine='parquet')\n", + "valid_transformed = nvt.Dataset('valid', engine='parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "49ee4a25", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nametagsdtypeis_listis_raggedproperties.num_bucketsproperties.freq_thresholdproperties.max_sizeproperties.cat_pathproperties.embedding_sizes.cardinalityproperties.embedding_sizes.dimensionproperties.domain.minproperties.domain.maxproperties.domain.name
0userId(Tags.CATEGORICAL)DType(name='int64', element_type=<ElementType....FalseFalseNaN0.00.0.//categories/unique.userId.parquet6043.0210.00.06042.0userId
1movieId(Tags.CATEGORICAL)DType(name='int64', element_type=<ElementType....FalseFalseNaN0.00.0.//categories/unique.movieId.parquet3683.0159.00.03682.0movieId
2rating(Tags.BINARY_CLASSIFICATION, Tags.TARGET)DType(name='int64', element_type=<ElementType....FalseFalseNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + "[{'name': 'userId', 'tags': {}, 'properties': {'num_buckets': None, 'freq_threshold': 0.0, 'max_size': 0.0, 'cat_path': './/categories/unique.userId.parquet', 'embedding_sizes': {'cardinality': 6043.0, 'dimension': 210.0}, 'domain': {'min': 0, 'max': 6042, 'name': 'userId'}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'movieId', 'tags': {}, 'properties': {'num_buckets': None, 'freq_threshold': 0.0, 'max_size': 0.0, 'cat_path': './/categories/unique.movieId.parquet', 'embedding_sizes': {'cardinality': 3683.0, 'dimension': 159.0}, 'domain': {'min': 0, 'max': 3682, 'name': 'movieId'}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'rating', 'tags': {, }, 'properties': {}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_transformed.schema" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "d44eb819", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userIdmovieIdrating
031435200
13171261
2593621901
3988951
48471730
\n", + "
" + ], + "text/plain": [ + " userId movieId rating\n", + "0 3143 520 0\n", + "1 3171 26 1\n", + "2 5936 2190 1\n", + "3 988 95 1\n", + "4 847 173 0" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_transformed.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "150b4f0a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[INFO]: sparse_operation_kit is imported\n", + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.2.0-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n", + "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.2.0-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n", + "[SOK INFO] Initialize finished, communication tool: horovod\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-05 10:26:54.777058: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:47] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", + "2023-07-05 10:26:54.777165: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1638] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 16255 MB memory: -> device: 0, name: Tesla V100-SXM2-32GB-LS, pci bus id: 0000:07:00.0, compute capability: 7.0\n", + "2023-07-05 10:26:54.780396: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:47] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", + "2023-07-05 10:26:54.780445: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1638] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 16255 MB memory: -> device: 1, name: Tesla V100-SXM2-32GB-LS, pci bus id: 0000:0a:00.0, compute capability: 7.0\n", + "2023-07-05 10:27:01.096455: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32\n", + "\t [[{{node Placeholder/_0}}]]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "778/782 [============================>.] - ETA: 0s - loss: 0.6216 - precision: 0.6355 - recall: 0.8855 - binary_accuracy: 0.6423 - auc: 0.6752 - regularization_loss: 0.0000e+00 - loss_batch: 0.6216" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-07-05 10:27:14.927580: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32\n", + "\t [[{{node Placeholder/_0}}]]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r", + "782/782 [==============================] - 16s 9ms/step - loss: 0.6213 - precision: 0.6358 - recall: 0.8852 - binary_accuracy: 0.6427 - auc: 0.6759 - regularization_loss: 0.0000e+00 - loss_batch: 0.6211 - val_loss: 0.5438 - val_precision: 0.7231 - val_recall: 0.8400 - val_binary_accuracy: 0.7220 - val_auc: 0.7886 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 0.5553\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import tensorflow\n", + "import merlin.models.tf as mm\n", + "\n", + "model = mm.DLRMModel(\n", + " train_transformed.schema,\n", + " embedding_dim=64,\n", + " bottom_block=mm.MLPBlock([128, 64]),\n", + " top_block=mm.MLPBlock([128, 64, 32]),\n", + " prediction_tasks=mm.BinaryClassificationTask('rating')\n", + ")\n", + "\n", + "opt = tensorflow.optimizers.legacy.Adam(learning_rate=1e-3)\n", + "model.compile(optimizer=opt)\n", + "model.fit(train_transformed, validation_data=valid_transformed, batch_size=1024, epochs=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "5bba34be", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "workflow.remove_inputs(['rating'])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "0b375c34", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.systems.dag.ensemble import Ensemble\n", + "from merlin.systems.dag.ops.workflow import TransformWorkflow\n", + "from merlin.systems.dag.ops.tensorflow import PredictTensorflow" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "686ca92f", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:absl:Function `_wrapped_model` contains input name(s) movieId, userId with unsupported characters which will be renamed to movieid, userid in the SavedModel.\n", + "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, prepare_list_features_layer_call_fn, prepare_list_features_layer_call_and_return_conditional_losses, output_layer_layer_call_fn while saving (showing 5 of 42). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmpf8xq84k4/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmpf8xq84k4/assets\n", + "WARNING:absl:Function `_wrapped_model` contains input name(s) movieId, userId with unsupported characters which will be renamed to movieid, userid in the SavedModel.\n", + "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, prepare_list_features_layer_call_fn, prepare_list_features_layer_call_and_return_conditional_losses, output_layer_layer_call_fn while saving (showing 5 of 42). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: ensemble4/1_predicttensorflowtriton/1/model.savedmodel/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: ensemble4/1_predicttensorflowtriton/1/model.savedmodel/assets\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + ] + } + ], + "source": [ + "from merlin.systems.dag.ensemble import Ensemble\n", + "\n", + "serving_operators = (\n", + " ['userId', 'movieId'] >> \n", + " TransformWorkflow(workflow) >>\n", + " PredictTensorflow(model)\n", + ")\n", + "ensemble = Ensemble(serving_operators, train_transformed.schema.remove_by_tag(Tags.TARGET))\n", + "\n", + "export_path = os.path.join(\"ensemble4\")\n", + "os.makedirs(export_path)\n", + "\n", + "ens_conf, node_confs = ensemble.export(export_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "fb793fcc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'ensemble4'" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "export_path" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "2ed9f170", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "client created.\n" + ] + } + ], + "source": [ + "import tritonhttpclient\n", + "\n", + "try:\n", + " triton_client = tritonhttpclient.InferenceServerClient(url=\"localhost:8000\", verbose=True)\n", + " print(\"client created.\")\n", + "except Exception as e:\n", + " print(\"channel creation failed: \" + str(e))" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "d0879dfc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "POST /v2/repository/index, headers None\n", + "\n", + "\n", + "bytearray(b'[{\"name\":\"0_transformworkflowtriton\",\"version\":\"1\",\"state\":\"READY\"},{\"name\":\"1_predicttensorflowtriton\",\"version\":\"1\",\"state\":\"READY\"},{\"name\":\"executor_model\",\"version\":\"1\",\"state\":\"READY\"}]')\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'name': '0_transformworkflowtriton', 'version': '1', 'state': 'READY'},\n", + " {'name': '1_predicttensorflowtriton', 'version': '1', 'state': 'READY'},\n", + " {'name': 'executor_model', 'version': '1', 'state': 'READY'}]" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "triton_client.get_model_repository_index()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "87cd5998", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "POST /v2/repository/models/executor_model/load, headers None\n", + "{}\n", + "\n", + "Loaded model 'executor_model'\n", + "CPU times: user 687 ms, sys: 210 ms, total: 897 ms\n", + "Wall time: 32 s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "triton_client.load_model(model_name=\"executor_model\")" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "57c8072a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " userId movieId\n", + "118806 768 2686\n", + "790460 4728 3160\n", + "451136 2777 11\n" + ] + } + ], + "source": [ + "from merlin.core.dispatch import get_lib\n", + "df_lib = get_lib()\n", + "\n", + "valid = df_lib.read_parquet(\n", + " os.path.join(f'{input_path}ml-1m/valid.parquet'), columns=[\"userId\", \"movieId\"]\n", + ")\n", + "batch = valid[:3]\n", + "print(batch)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "457f78a0", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.schema import ColumnSchema, Schema\n", + "import merlin.dtypes as md\n", + "import merlin.systems.triton as merlin_triton\n", + "import tritonclient.grpc as grpcclient\n", + "\n", + "request_schema = Schema([\n", + " ColumnSchema(\"userId\", dtype=md.int64),\n", + " ColumnSchema(\"movieId\", dtype=md.int64),\n", + "])\n", + "\n", + "inputs = merlin_triton.convert_df_to_triton_input(request_schema, batch, grpcclient.InferInput)\n", + "\n", + "outputs = [\n", + " grpcclient.InferRequestedOutput(col)\n", + " for col in [\"rating/binary_classification_task\"]\n", + "]\n", + "\n", + "with grpcclient.InferenceServerClient(\"localhost:8001\") as client:\n", + " response = client.infer(\"executor_model\", inputs, request_id=\"1\", outputs=outputs)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "249a6043", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0.7387646]\n", + " [0.6237726]\n", + " [0.7899361]] (3, 1)\n" + ] + } + ], + "source": [ + "print(response.as_numpy(\"rating/binary_classification_task\"), response.as_numpy(\"rating/binary_classification_task\").shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "a3009009", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nametagsdtypeis_listis_ragged
0userId()DType(name='int64', element_type=<ElementType....FalseFalse
1movieId()DType(name='int64', element_type=<ElementType....FalseFalse
\n", + "
" + ], + "text/plain": [ + "[{'name': 'userId', 'tags': set(), 'properties': {}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'movieId', 'tags': set(), 'properties': {}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "workflow.input_schema" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "46798680", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fd7f8d4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/tutorials/tf/README.md b/examples/tutorials/tf/README.md new file mode 100644 index 000000000..b13015ce9 --- /dev/null +++ b/examples/tutorials/tf/README.md @@ -0,0 +1 @@ +# TensorFlow Tutorials \ No newline at end of file From e79769edf596eb4ada6b578e481d0f78eacd9935 Mon Sep 17 00:00:00 2001 From: Benedikt Schifferer Date: Wed, 5 Jul 2023 12:54:17 +0000 Subject: [PATCH 2/4] remove retrieval --- docs/source/toc.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/toc.yaml b/docs/source/toc.yaml index b430e6b0c..dd798a1ff 100644 --- a/docs/source/toc.yaml +++ b/docs/source/toc.yaml @@ -14,8 +14,8 @@ subtrees: entries: - file: examples/tutorials/tf/DLRM-Ranking-Model.ipynb title: DLRM Ranking Model - - file: examples/tutorials/tf/TwoTower-Retrieval-Model.ipynb - title: TwoTower Retrieval Model +# - file: examples/tutorials/tf/TwoTower-Retrieval-Model.ipynb +# title: TwoTower Retrieval Model - file: examples/tutorials/pytorch/README.md title: PyTorch - file: guide/recommender_system_guide.rst From 4a511a48b32e4e23343e9ab332d5443e8db3449f Mon Sep 17 00:00:00 2001 From: Benedikt Schifferer Date: Wed, 5 Jul 2023 13:09:10 +0000 Subject: [PATCH 3/4] rename README.md to index.md --- docs/source/toc.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/toc.yaml b/docs/source/toc.yaml index dd798a1ff..cc57dc390 100644 --- a/docs/source/toc.yaml +++ b/docs/source/toc.yaml @@ -4,19 +4,19 @@ subtrees: entries: - file: README.md title: Introduction - - file: examples/tutorials/README.md + - file: examples/tutorials/index.md title: Tutorials entries: - - file: examples/tutorials/basic/README.md + - file: examples/tutorials/basic/index.md title: Basic - - file: examples/tutorials/tf/README.md + - file: examples/tutorials/tf/index.md title: TensorFlow entries: - file: examples/tutorials/tf/DLRM-Ranking-Model.ipynb title: DLRM Ranking Model # - file: examples/tutorials/tf/TwoTower-Retrieval-Model.ipynb # title: TwoTower Retrieval Model - - file: examples/tutorials/pytorch/README.md + - file: examples/tutorials/pytorch/index.md title: PyTorch - file: guide/recommender_system_guide.rst title: Recommender System Guide From 2950b3010c2903b344f5f7138fe8bdce9e93f92a Mon Sep 17 00:00:00 2001 From: Benedikt Schifferer Date: Wed, 5 Jul 2023 13:42:51 +0000 Subject: [PATCH 4/4] add title --- examples/tutorials/tf/DLRM-Ranking-Model.ipynb | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/examples/tutorials/tf/DLRM-Ranking-Model.ipynb b/examples/tutorials/tf/DLRM-Ranking-Model.ipynb index f6e0754b8..40c649a90 100644 --- a/examples/tutorials/tf/DLRM-Ranking-Model.ipynb +++ b/examples/tutorials/tf/DLRM-Ranking-Model.ipynb @@ -1,5 +1,13 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "a22c62bb", + "metadata": {}, + "source": [ + "# DLRM Ranking Model" + ] + }, { "cell_type": "code", "execution_count": 1,