From 0fe696e245874f7675ff3b4e72af325ba8f7d1b4 Mon Sep 17 00:00:00 2001 From: Rishi Chandra Date: Tue, 22 Oct 2024 16:18:02 +0000 Subject: [PATCH 1/4] Add Triton cell tags Signed-off-by: Rishi Chandra --- .../conditional_generation_tf.ipynb | 124 +++++++++++++++--- .../conditional_generation_torch.ipynb | 124 +++++++++++++++--- .../huggingface/pipelines_tf.ipynb | 66 ++++++++-- .../huggingface/pipelines_torch.ipynb | 66 ++++++++-- .../sentence_transformers_torch.ipynb | 72 ++++++++-- .../pytorch/image_classification_torch.ipynb | 71 ++++++++-- .../pytorch/regression_torch.ipynb | 67 ++++++++-- .../tensorflow/feature_columns_tf.ipynb | 87 +++++++++--- .../tensorflow/image_classification_tf.ipynb | 78 +++++++++-- .../tensorflow/keras-metadata_tf.ipynb | 64 +++++++-- .../tensorflow/text_classification_tf.ipynb | 66 ++++++++-- 11 files changed, 734 insertions(+), 151 deletions(-) diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/conditional_generation_tf.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/conditional_generation_tf.ipynb index e2c67eb9..3105e066 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/conditional_generation_tf.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/conditional_generation_tf.ipynb @@ -1039,7 +1039,11 @@ "cell_type": "code", "execution_count": 35, "id": "b858cf85-82e6-41ef-905b-d8c5d6fea492", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "import os" @@ -1049,7 +1053,11 @@ "cell_type": "code", "execution_count": 36, "id": "05ce7c77-d562-45e8-89bb-cd656aba5a5f", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "%%bash\n", @@ -1076,7 +1084,11 @@ "cell_type": "code", "execution_count": 37, "id": "afd00b7e-8150-4c95-a2e4-037e9c90f92a", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1160,7 +1172,9 @@ "execution_count": 38, "id": "1a997c33-5202-466d-8304-b8c30f32978f", "metadata": { - "tags": [] + "tags": [ + "TRITON" + ] }, "outputs": [], "source": [ @@ -1175,7 +1189,11 @@ "cell_type": "code", "execution_count": 39, "id": "9dea1875-6b95-4fc0-926d-a625a441b33d", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "# only use first N examples, since this is slow\n", @@ -1186,7 +1204,11 @@ "cell_type": "code", "execution_count": 40, "id": "5d6c54e7-534d-406f-b8e6-fd592efd0ab2", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "# only use first sentence and add prefix for conditional generation\n", @@ -1201,7 +1223,11 @@ "cell_type": "code", "execution_count": 41, "id": "dc1bbbe3-4232-49e5-80f6-99976524b73b", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "# only use first 100 rows, since generation takes a while\n", @@ -1212,7 +1238,11 @@ "cell_type": "code", "execution_count": 42, "id": "5d10c61c-6102-4d19-8dd6-0c7b5b65343e", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -1255,7 +1285,11 @@ "cell_type": "code", "execution_count": 43, "id": "2e0907da-a5d9-4c3b-9db4-ce5e70ca9bb4", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "def triton_fn(triton_uri, model_name):\n", @@ -1305,7 +1339,11 @@ "cell_type": "code", "execution_count": 44, "id": "9308bdd7-6f67-484d-8b51-dd1e1b2960ba", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "generate = predict_batch_udf(partial(triton_fn, triton_uri=\"localhost:8001\", model_name=\"hf_generation_tf\"),\n", @@ -1318,7 +1356,11 @@ "cell_type": "code", "execution_count": 45, "id": "38484ffd-370d-492b-8ca4-9eff9f242a9f", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1354,7 +1396,11 @@ "cell_type": "code", "execution_count": 46, "id": "ebcb6699-3ac2-4529-ab0f-fab0a5e792da", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1389,7 +1435,11 @@ "cell_type": "code", "execution_count": 47, "id": "e2ed18ad-d00b-472c-b2c3-047932f2105d", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1424,7 +1474,11 @@ "cell_type": "code", "execution_count": 48, "id": "0cd64a1c-beb8-47d5-ac6f-e8525bb61176", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1481,7 +1535,11 @@ "cell_type": "code", "execution_count": 49, "id": "af70fed8-0f2b-4ea7-841c-476afdf9b1c0", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1500,7 +1558,11 @@ "cell_type": "code", "execution_count": 50, "id": "ef075e10-e22c-4236-9e0b-cb47cf2d3d06", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -1543,7 +1605,11 @@ "cell_type": "code", "execution_count": 51, "id": "2e7e4af8-b815-4375-b851-8368309ee8e1", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1578,7 +1644,11 @@ "cell_type": "code", "execution_count": 52, "id": "7b0aefb0-a96b-4791-a23c-1ce9b24eb20c", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1613,7 +1683,11 @@ "cell_type": "code", "execution_count": 53, "id": "1214b75b-a373-4579-b4c6-0cb8627da776", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1648,7 +1722,11 @@ "cell_type": "code", "execution_count": 54, "id": "c9dbd21f-9e37-4221-b765-80ba8c80b884", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1715,7 +1793,11 @@ "cell_type": "code", "execution_count": 55, "id": "425d3b28-7705-45ba-8a18-ad34fc895219", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/conditional_generation_torch.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/conditional_generation_torch.ipynb index a09bede3..94cb7df1 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/conditional_generation_torch.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/conditional_generation_torch.ipynb @@ -960,7 +960,11 @@ "cell_type": "code", "execution_count": 28, "id": "b858cf85-82e6-41ef-905b-d8c5d6fea492", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "import os" @@ -970,7 +974,11 @@ "cell_type": "code", "execution_count": 29, "id": "05ce7c77-d562-45e8-89bb-cd656aba5a5f", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "%%bash\n", @@ -997,7 +1005,11 @@ "cell_type": "code", "execution_count": 30, "id": "afd00b7e-8150-4c95-a2e4-037e9c90f92a", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1079,7 +1091,9 @@ "execution_count": 31, "id": "1a997c33-5202-466d-8304-b8c30f32978f", "metadata": { - "tags": [] + "tags": [ + "TRITON" + ] }, "outputs": [], "source": [ @@ -1094,7 +1108,11 @@ "cell_type": "code", "execution_count": 32, "id": "9dea1875-6b95-4fc0-926d-a625a441b33d", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "# only use first N examples, since this is slow\n", @@ -1105,7 +1123,11 @@ "cell_type": "code", "execution_count": 33, "id": "5d6c54e7-534d-406f-b8e6-fd592efd0ab2", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "# only use first sentence and add prefix for conditional generation\n", @@ -1120,7 +1142,11 @@ "cell_type": "code", "execution_count": 34, "id": "dc1bbbe3-4232-49e5-80f6-99976524b73b", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "# only use first 100 rows, since generation takes a while\n", @@ -1131,7 +1157,11 @@ "cell_type": "code", "execution_count": 35, "id": "5d10c61c-6102-4d19-8dd6-0c7b5b65343e", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -1174,7 +1204,11 @@ "cell_type": "code", "execution_count": 36, "id": "2e0907da-a5d9-4c3b-9db4-ce5e70ca9bb4", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "def triton_fn(triton_uri, model_name):\n", @@ -1224,7 +1258,11 @@ "cell_type": "code", "execution_count": 37, "id": "9308bdd7-6f67-484d-8b51-dd1e1b2960ba", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "generate = predict_batch_udf(partial(triton_fn, triton_uri=\"localhost:8001\", model_name=\"hf_generation_torch\"),\n", @@ -1237,7 +1275,11 @@ "cell_type": "code", "execution_count": 38, "id": "38484ffd-370d-492b-8ca4-9eff9f242a9f", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1273,7 +1315,11 @@ "cell_type": "code", "execution_count": 39, "id": "ebcb6699-3ac2-4529-ab0f-fab0a5e792da", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1308,7 +1354,11 @@ "cell_type": "code", "execution_count": 40, "id": "e2ed18ad-d00b-472c-b2c3-047932f2105d", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1343,7 +1393,11 @@ "cell_type": "code", "execution_count": 41, "id": "0cd64a1c-beb8-47d5-ac6f-e8525bb61176", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -1386,7 +1440,11 @@ "cell_type": "code", "execution_count": 42, "id": "af70fed8-0f2b-4ea7-841c-476afdf9b1c0", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1405,7 +1463,11 @@ "cell_type": "code", "execution_count": 43, "id": "ef075e10-e22c-4236-9e0b-cb47cf2d3d06", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -1448,7 +1510,11 @@ "cell_type": "code", "execution_count": 44, "id": "2e7e4af8-b815-4375-b851-8368309ee8e1", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1483,7 +1549,11 @@ "cell_type": "code", "execution_count": 45, "id": "7b0aefb0-a96b-4791-a23c-1ce9b24eb20c", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1518,7 +1588,11 @@ "cell_type": "code", "execution_count": 46, "id": "1214b75b-a373-4579-b4c6-0cb8627da776", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1553,7 +1627,11 @@ "cell_type": "code", "execution_count": 47, "id": "c9dbd21f-9e37-4221-b765-80ba8c80b884", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -1606,7 +1684,11 @@ "cell_type": "code", "execution_count": 48, "id": "425d3b28-7705-45ba-8a18-ad34fc895219", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/pipelines_tf.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/pipelines_tf.ipynb index 115cfffc..dcba0be8 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/pipelines_tf.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/pipelines_tf.ipynb @@ -589,7 +589,11 @@ "cell_type": "code", "execution_count": 22, "id": "4d4be844-4b8c-47df-bd09-0c280c7ff16b", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "import numpy as np\n", @@ -604,7 +608,11 @@ "cell_type": "code", "execution_count": 23, "id": "7e53df9f-43cb-4c38-b8ac-dc2cbad99815", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "%%bash\n", @@ -631,7 +639,11 @@ "cell_type": "code", "execution_count": 24, "id": "144acb8e-4c08-40fc-a9ed-f721c409ee68", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -717,7 +729,11 @@ "cell_type": "code", "execution_count": 25, "id": "d53fb283-bf9e-4571-8c68-b75a41f1f067", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "# only use first sentence of IMDB reviews\n", @@ -732,7 +748,11 @@ "cell_type": "code", "execution_count": 26, "id": "29b0cc0d-c480-4e4a-bd41-207dc314cba5", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "def triton_fn(triton_uri, model_name):\n", @@ -782,7 +802,11 @@ "cell_type": "code", "execution_count": 27, "id": "3930cfcd-3284-4c6a-a9b5-36b8053fe899", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "from functools import partial\n", @@ -800,7 +824,11 @@ "cell_type": "code", "execution_count": 28, "id": "8eecbf23-4e9e-4d4c-8645-98209b25db2c", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -837,7 +865,11 @@ "cell_type": "code", "execution_count": 29, "id": "566ba28c-0ca4-4479-a24a-c8a362228b89", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -873,7 +905,11 @@ "cell_type": "code", "execution_count": 30, "id": "44c7e776-08da-484a-ba07-9d6add1a0f15", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -909,7 +945,11 @@ "cell_type": "code", "execution_count": 31, "id": "f61d79f8-661e-4d9e-a3aa-c0754b854603", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -976,7 +1016,11 @@ "cell_type": "code", "execution_count": 32, "id": "425d3b28-7705-45ba-8a18-ad34fc895219", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/pipelines_torch.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/pipelines_torch.ipynb index 47f45b67..1e99ed36 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/pipelines_torch.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/pipelines_torch.ipynb @@ -484,7 +484,11 @@ "cell_type": "code", "execution_count": 21, "id": "4d4be844-4b8c-47df-bd09-0c280c7ff16b", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "import numpy as np\n", @@ -499,7 +503,11 @@ "cell_type": "code", "execution_count": 22, "id": "7e53df9f-43cb-4c38-b8ac-dc2cbad99815", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "%%bash\n", @@ -526,7 +534,11 @@ "cell_type": "code", "execution_count": 23, "id": "144acb8e-4c08-40fc-a9ed-f721c409ee68", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -612,7 +624,11 @@ "cell_type": "code", "execution_count": 24, "id": "d53fb283-bf9e-4571-8c68-b75a41f1f067", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "# only use first sentence of IMDB reviews\n", @@ -627,7 +643,11 @@ "cell_type": "code", "execution_count": 25, "id": "29b0cc0d-c480-4e4a-bd41-207dc314cba5", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "def triton_fn(triton_uri, model_name):\n", @@ -677,7 +697,11 @@ "cell_type": "code", "execution_count": 26, "id": "3930cfcd-3284-4c6a-a9b5-36b8053fe899", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "from functools import partial\n", @@ -695,7 +719,11 @@ "cell_type": "code", "execution_count": 27, "id": "8eecbf23-4e9e-4d4c-8645-98209b25db2c", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -732,7 +760,11 @@ "cell_type": "code", "execution_count": 28, "id": "566ba28c-0ca4-4479-a24a-c8a362228b89", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -768,7 +800,11 @@ "cell_type": "code", "execution_count": 29, "id": "44c7e776-08da-484a-ba07-9d6add1a0f15", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -804,7 +840,11 @@ "cell_type": "code", "execution_count": 30, "id": "f61d79f8-661e-4d9e-a3aa-c0754b854603", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -857,7 +897,11 @@ "cell_type": "code", "execution_count": 31, "id": "425d3b28-7705-45ba-8a18-ad34fc895219", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/sentence_transformers_torch.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/sentence_transformers_torch.ipynb index 4a8a0407..deac314d 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/sentence_transformers_torch.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/huggingface/sentence_transformers_torch.ipynb @@ -444,7 +444,11 @@ "cell_type": "code", "execution_count": 14, "id": "772e337e-1098-4c7b-ba81-8cb221a518e2", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "import numpy as np\n", @@ -458,7 +462,11 @@ "cell_type": "code", "execution_count": 15, "id": "69d0c93a-bb0b-46c5-9d28-7b08a2e70964", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "%%bash\n", @@ -485,7 +493,11 @@ "cell_type": "code", "execution_count": 16, "id": "1654cdc1-4f9a-4fd5-b7ac-6ca4215bde5d", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -566,7 +578,11 @@ "cell_type": "code", "execution_count": 17, "id": "2969d502-e97b-49d6-bf80-7d177ae867cf", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "from functools import partial\n", @@ -579,7 +595,11 @@ "cell_type": "code", "execution_count": 18, "id": "c8f1e6d6-6519-49e7-8465-4419547633b8", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -598,7 +618,11 @@ "cell_type": "code", "execution_count": 19, "id": "29b0cc0d-c480-4e4a-bd41-207dc314cba5", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "def triton_fn(triton_uri, model_name):\n", @@ -648,7 +672,11 @@ "cell_type": "code", "execution_count": 20, "id": "9c712b8f-6eb4-4fb8-9f0a-04feef847fea", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "encode = predict_batch_udf(partial(triton_fn, triton_uri=\"localhost:8001\", model_name=\"hf_transformer_torch\"),\n", @@ -661,7 +689,11 @@ "cell_type": "code", "execution_count": 21, "id": "934c1a1f-b126-45b0-9c15-265236820ad3", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -683,7 +715,11 @@ "cell_type": "code", "execution_count": 22, "id": "f84cd3f6-b6a8-4142-859a-91f3c183457b", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -704,7 +740,11 @@ "cell_type": "code", "execution_count": 23, "id": "921a4c01-e296-4406-be90-86f20c8c582d", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -725,7 +765,11 @@ "cell_type": "code", "execution_count": 24, "id": "9f67584e-9c4e-474f-b6ea-7811b14d116e", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -778,7 +822,11 @@ "cell_type": "code", "execution_count": 25, "id": "d8e5466b-b5dc-4fe1-9012-0c87cdd72962", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/image_classification_torch.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/image_classification_torch.ipynb index ea07ce28..b6d739a3 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/image_classification_torch.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/image_classification_torch.ipynb @@ -2007,7 +2007,11 @@ "cell_type": "code", "execution_count": 57, "id": "53ca290a-ccc3-4923-a292-944921bab36d", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "import numpy as np\n", @@ -2022,7 +2026,11 @@ "cell_type": "code", "execution_count": 58, "id": "8fa92fe4-2e04-4d82-a357-bfdfca38bd8c", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "%%bash\n", @@ -2047,7 +2055,11 @@ "cell_type": "code", "execution_count": null, "id": "5e869730-3597-4074-bab0-f87768f8996a", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "num_executors = 1\n", @@ -2103,7 +2115,11 @@ "cell_type": "code", "execution_count": 60, "id": "ab94d4d1-dac6-4474-9eb0-59478aa98f7d", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "data": { @@ -2126,7 +2142,10 @@ "execution_count": 61, "id": "12b5f2fc-52e9-428a-b683-6ab1b639aa24", "metadata": { - "scrolled": true + "scrolled": true, + "tags": [ + "TRITON" + ] }, "outputs": [ { @@ -2148,7 +2167,11 @@ "cell_type": "code", "execution_count": 62, "id": "960657d0-31c9-4df6-8eb8-ac3d23137f7a", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "def triton_fn(triton_uri, model_name):\n", @@ -2198,7 +2221,11 @@ "cell_type": "code", "execution_count": 63, "id": "0262fd4a-9845-44b9-8c75-1c105e7deeca", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "mnist = predict_batch_udf(partial(triton_fn, triton_uri=\"localhost:8001\", model_name=\"fashion_mnist\"),\n", @@ -2211,7 +2238,11 @@ "cell_type": "code", "execution_count": 64, "id": "fc5f6baa-052e-4b89-94b6-4821cf01952a", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -2231,7 +2262,11 @@ "cell_type": "code", "execution_count": 65, "id": "a85dea35-e41d-482d-8a8f-52d3c108f038", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -2251,7 +2286,11 @@ "cell_type": "code", "execution_count": 66, "id": "bc3f0dbe-c52b-41d6-8097-8cebaa5ee5a8", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -2271,7 +2310,11 @@ "cell_type": "code", "execution_count": 67, "id": "99fb5e8d", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -2318,7 +2361,11 @@ "cell_type": "code", "execution_count": null, "id": "ab2fe42f-a072-4370-bac2-52fd95363530", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "def stop_triton(it):\n", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/regression_torch.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/regression_torch.ipynb index 5ccc22ec..3412f91a 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/regression_torch.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/regression_torch.ipynb @@ -2273,7 +2273,11 @@ "cell_type": "code", "execution_count": 73, "id": "a9ab4cdf-8103-447e-9ac8-944e2e527239", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "import numpy as np\n", @@ -2288,7 +2292,11 @@ "cell_type": "code", "execution_count": 74, "id": "6632636e-67a3-406c-832c-758aac4245fd", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "%%bash\n", @@ -2313,7 +2321,10 @@ "execution_count": 75, "id": "c6fd1612-de6a-461c-a2ad-1a3fcd277d66", "metadata": { - "scrolled": true + "scrolled": true, + "tags": [ + "TRITON" + ] }, "outputs": [ { @@ -2388,7 +2399,11 @@ "cell_type": "code", "execution_count": 76, "id": "5eae04bc-75ca-421a-87c8-ac507ce1f2f5", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "df = spark.read.parquet(\"california_housing\")" @@ -2399,7 +2414,9 @@ "execution_count": 77, "id": "b350bd8e-9b8f-4511-9ddf-76d917b21b5f", "metadata": { - "tags": [] + "tags": [ + "TRITON" + ] }, "outputs": [ { @@ -2429,7 +2446,11 @@ "cell_type": "code", "execution_count": 78, "id": "69b343ec-688d-4e4d-985e-db72beaaf00c", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "def triton_fn(triton_uri, model_name):\n", @@ -2479,7 +2500,11 @@ "cell_type": "code", "execution_count": 79, "id": "d3e64fda-117b-4810-a9a2-dd498239496f", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "classify = predict_batch_udf(partial(triton_fn, triton_uri=\"localhost:8001\", model_name=\"housing_model\"),\n", @@ -2492,7 +2517,11 @@ "cell_type": "code", "execution_count": 80, "id": "a24149a5-3adc-4089-8769-13cf1e44547a", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -2514,7 +2543,11 @@ "cell_type": "code", "execution_count": 81, "id": "df2ce39f-30af-491a-8472-800fb1ce8458", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -2535,7 +2568,11 @@ "cell_type": "code", "execution_count": 82, "id": "ca6f3eaa-9569-45d0-88bf-9aa0757e1ecb", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "# should raise ValueError\n", @@ -2548,7 +2585,9 @@ "execution_count": 83, "id": "b79c62c8-e1e8-4467-8aef-8939c31833b8", "metadata": { - "tags": [] + "tags": [ + "TRITON" + ] }, "outputs": [ { @@ -2602,7 +2641,11 @@ "cell_type": "code", "execution_count": 84, "id": "15e9b3df-f3c9-46bb-bbeb-42496f7663de", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/feature_columns_tf.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/feature_columns_tf.ipynb index 17af1c93..d159f4f1 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/feature_columns_tf.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/feature_columns_tf.ipynb @@ -1267,7 +1267,11 @@ "cell_type": "code", "execution_count": 45, "id": "2605d134-ef75-4d94-9b16-2c6d85f29bef", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "import numpy as np\n", @@ -1281,7 +1285,11 @@ "cell_type": "code", "execution_count": 46, "id": "4666e618-8038-4dc5-9be7-793aedbf4500", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1316,7 +1324,10 @@ "execution_count": 47, "id": "a7fb146c-5319-4831-85f7-f2f3c084b042", "metadata": { - "scrolled": true + "scrolled": true, + "tags": [ + "TRITON" + ] }, "outputs": [ { @@ -1395,7 +1406,11 @@ "cell_type": "code", "execution_count": 48, "id": "fe8dc3e6-f1b1-4a24-85f4-0a5ecabef4c5", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "df = spark.read.parquet(\"datasets/petfinder-mini\")" @@ -1405,7 +1420,11 @@ "cell_type": "code", "execution_count": 49, "id": "ce92f041-930f-48ed-9a03-19f6c249ca27", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -1433,7 +1452,11 @@ "cell_type": "code", "execution_count": 50, "id": "4cfb3f34-a215-4781-91bf-2bec85e15633", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -1454,7 +1477,9 @@ "id": "b315ee72-62af-476b-a994-0dba72d5f96e", "metadata": { "scrolled": true, - "tags": [] + "tags": [ + "TRITON" + ] }, "outputs": [ { @@ -1475,7 +1500,11 @@ "cell_type": "code", "execution_count": 52, "id": "da004eca-f7ad-4ee3-aa88-a6a20c1b72e5", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "def triton_fn(triton_uri, model_name):\n", @@ -1545,7 +1574,11 @@ "cell_type": "code", "execution_count": 53, "id": "2ffb020e-dc93-456b-bee6-405611eee1e1", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "from functools import partial\n", @@ -1561,7 +1594,11 @@ "cell_type": "code", "execution_count": 54, "id": "7657f820-5ec2-4ac8-a107-4b58773d204a", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -1597,7 +1634,7 @@ } ], "source": [ - "# FAILS: Op type not registered 'DenseBincount' WITHOUT custom python backend\n", + "# WITHOUT custom python backend, FAILS with: Op type not registered 'DenseBincount' \n", "df.withColumn(\"preds\", classify(struct(*columns))).show(truncate=10)" ] }, @@ -1605,7 +1642,11 @@ "cell_type": "code", "execution_count": 55, "id": "e6ff0356-becd-421f-aebb-272497d5ad6a", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1633,7 +1674,11 @@ "cell_type": "code", "execution_count": 56, "id": "ce18ee7c-5958-4986-b200-6d986fcc6243", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1669,7 +1714,9 @@ "execution_count": 57, "id": "0888ce40-b2c4-4aed-8ccb-6a8bcd00abc8", "metadata": { - "tags": [] + "tags": [ + "TRITON" + ] }, "outputs": [ { @@ -1698,7 +1745,11 @@ "cell_type": "code", "execution_count": 58, "id": "d45812b5-f584-41a4-a821-2b59e065671c", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -1751,7 +1802,11 @@ "cell_type": "code", "execution_count": 59, "id": "6914f44f-677f-4db3-be09-783df8d11b8a", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/image_classification_tf.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/image_classification_tf.ipynb index 5add2686..f7df9f51 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/image_classification_tf.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/image_classification_tf.ipynb @@ -1950,7 +1950,11 @@ "cell_type": "code", "execution_count": 52, "id": "a64d19b1-ba4a-4dc7-b3a9-368dc47d0fd8", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "import os\n", @@ -1963,7 +1967,11 @@ "cell_type": "code", "execution_count": 53, "id": "8fa92fe4-2e04-4d82-a357-bfdfca38bd8c", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "%%bash\n", @@ -1988,7 +1996,11 @@ "cell_type": "code", "execution_count": 54, "id": "0f7ecb25-be16-40c4-bdbb-441e2f537000", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -2055,7 +2067,11 @@ "cell_type": "code", "execution_count": 55, "id": "43b93753-1d52-4060-9986-f24c30a67528", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "data": { @@ -2085,7 +2101,11 @@ "cell_type": "code", "execution_count": 56, "id": "3af08bd0-3838-4769-a8de-2643db4101c6", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "def triton_fn(triton_uri, model_name):\n", @@ -2135,7 +2155,11 @@ "cell_type": "code", "execution_count": 57, "id": "6658d2a1-ef7b-4ca1-9fb6-f2ac9050f3e5", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "from functools import partial\n", @@ -2150,7 +2174,11 @@ "cell_type": "code", "execution_count": 58, "id": "8397aa14-82fd-4351-a477-dc8e8b321fa2", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -2170,7 +2198,11 @@ "cell_type": "code", "execution_count": 59, "id": "82698bd9-377a-4415-8971-835487f876cc", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -2190,7 +2222,11 @@ "cell_type": "code", "execution_count": 60, "id": "419ad7bd-fa28-49d3-b98d-db9fba5aeaef", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -2321,7 +2357,11 @@ "cell_type": "code", "execution_count": 61, "id": "79d90a26", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", @@ -2332,7 +2372,11 @@ "cell_type": "code", "execution_count": 62, "id": "4ca495f5", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "sample = preds.iloc[0]\n", @@ -2346,7 +2390,11 @@ "cell_type": "code", "execution_count": 63, "id": "a5d10903", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "data": { @@ -2380,7 +2428,11 @@ "cell_type": "code", "execution_count": 64, "id": "9c9fd967-5cd9-4265-add9-db5c1ccf9893", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/keras-metadata_tf.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/keras-metadata_tf.ipynb index e0683e38..007f6d8a 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/keras-metadata_tf.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/keras-metadata_tf.ipynb @@ -768,7 +768,11 @@ "cell_type": "code", "execution_count": 29, "id": "2605d134-ef75-4d94-9b16-2c6d85f29bef", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "import os\n", @@ -781,7 +785,11 @@ "cell_type": "code", "execution_count": 30, "id": "4666e618-8038-4dc5-9be7-793aedbf4500", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "%%bash\n", @@ -806,7 +814,11 @@ "cell_type": "code", "execution_count": 31, "id": "8c8c0744-0558-4dac-bbfe-8bdde4b2af2d", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -880,7 +892,11 @@ "cell_type": "code", "execution_count": 32, "id": "bcd46360-6851-4a9d-8590-c086e001242a", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "def triton_fn(triton_uri, model_name):\n", @@ -931,7 +947,11 @@ "cell_type": "code", "execution_count": 33, "id": "9fabcaeb-5a44-42bb-8097-5dbc2d0cee3e", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "from functools import partial\n", @@ -946,7 +966,11 @@ "cell_type": "code", "execution_count": 34, "id": "b17f33c8-a0f0-4bce-91f8-5838ba9b12a7", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "# spark.conf.set(\"spark.sql.execution.arrow.maxRecordsPerBatch\", \"1024\")\n", @@ -957,7 +981,11 @@ "cell_type": "code", "execution_count": 35, "id": "8e5b9e99-a1cf-43d3-a795-c7271a917057", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "df = spark.read.parquet(\"image_data.parquet\")" @@ -968,7 +996,9 @@ "execution_count": 36, "id": "e595473d-1a5d-46a6-a6ba-89d2ea903de9", "metadata": { - "tags": [] + "tags": [ + "TRITON" + ] }, "outputs": [ { @@ -1031,7 +1061,11 @@ "cell_type": "code", "execution_count": 37, "id": "5f66d468-e0b1-4589-8606-b3848063a823", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1092,7 +1126,11 @@ "cell_type": "code", "execution_count": 38, "id": "632c4c3a-fa52-4c3d-b71e-7526286e353a", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1137,7 +1175,11 @@ "cell_type": "code", "execution_count": 39, "id": "bbfcaa51-3b9f-43ff-a4a8-4b46766115b8", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb index 971bf393..98dba357 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb @@ -1374,7 +1374,11 @@ "cell_type": "code", "execution_count": 47, "id": "772e337e-1098-4c7b-ba81-8cb221a518e2", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "import numpy as np\n", @@ -1388,7 +1392,11 @@ "cell_type": "code", "execution_count": 48, "id": "69d0c93a-bb0b-46c5-9d28-7b08a2e70964", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "%%bash\n", @@ -1405,7 +1413,11 @@ "cell_type": "code", "execution_count": 49, "id": "f4f14c8f", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "import unicodedata\n", @@ -1444,7 +1456,11 @@ "cell_type": "code", "execution_count": 50, "id": "a7fb146c-5319-4831-85f7-f2f3c084b042", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", @@ -1522,7 +1538,11 @@ "cell_type": "code", "execution_count": 51, "id": "41106a02-236e-4cb3-ac51-76aa64b663c2", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -1574,7 +1594,11 @@ "cell_type": "code", "execution_count": 52, "id": "8b763167-7f50-4278-9bc9-6c3433b62294", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "data": { @@ -1596,7 +1620,11 @@ "cell_type": "code", "execution_count": 53, "id": "29b0cc0d-c480-4e4a-bd41-207dc314cba5", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "def triton_fn(triton_uri, model_name):\n", @@ -1646,7 +1674,11 @@ "cell_type": "code", "execution_count": 54, "id": "8e06d33f-5cef-4a48-afc3-5d468f8ec2b4", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [], "source": [ "from functools import partial\n", @@ -1661,7 +1693,11 @@ "cell_type": "code", "execution_count": 55, "id": "d89e74ad-e551-4bfa-ad08-98725878630a", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -1714,7 +1750,11 @@ "cell_type": "code", "execution_count": 56, "id": "b4fa7fc9-341c-49a6-9af2-e316f2355d67", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stdout", @@ -1770,7 +1810,11 @@ "cell_type": "code", "execution_count": 57, "id": "a71ac9b6-47a2-4306-bc40-9ce7b4e968ec", - "metadata": {}, + "metadata": { + "tags": [ + "TRITON" + ] + }, "outputs": [ { "name": "stderr", From 110c6f7ddd0198e83cb05c85ba376d7df6f2153f Mon Sep 17 00:00:00 2001 From: Rishi Chandra Date: Wed, 23 Oct 2024 15:22:13 +0000 Subject: [PATCH 2/4] Avoid relative paths in tf notebooks --- .../tensorflow/feature_columns_tf.ipynb | 30 +++++++------- .../tensorflow/text_classification_tf.ipynb | 40 +++++++++++-------- 2 files changed, 38 insertions(+), 32 deletions(-) diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/feature_columns_tf.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/feature_columns_tf.ipynb index d159f4f1..c943f868 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/feature_columns_tf.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/feature_columns_tf.ipynb @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "01162f42-0637-4dfe-8d7d-b577e4ffd017", "metadata": {}, "outputs": [ @@ -31,13 +31,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-10-03 17:38:52.548855: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", - "2024-10-03 17:38:52.555529: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", - "2024-10-03 17:38:52.563119: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", - "2024-10-03 17:38:52.565499: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", - "2024-10-03 17:38:52.571252: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-10-23 15:06:03.148933: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", + "2024-10-23 15:06:03.156486: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-10-23 15:06:03.164237: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-10-23 15:06:03.166475: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-10-23 15:06:03.172837: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-10-03 17:38:52.894224: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + "2024-10-23 15:06:03.498973: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "9fa3e1b7-58cd-45f9-9fee-85f25a31c3c6", "metadata": {}, "outputs": [ @@ -78,22 +78,22 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 10, "id": "9326b072-a53c-40c4-a6cb-bd4d3d644d03", "metadata": {}, "outputs": [], "source": [ + "import os\n", "dataset_url = 'http://storage.googleapis.com/download.tensorflow.org/data/petfinder-mini.zip'\n", - "csv_file = 'datasets/petfinder-mini/petfinder-mini.csv'\n", "\n", - "tf.keras.utils.get_file('petfinder_mini.zip', dataset_url,\n", - " extract=True, cache_dir='.')\n", - "dataframe = pd.read_csv(csv_file)" + "data_dir = tf.keras.utils.get_file('petfinder_mini.zip', dataset_url, extract=True)\n", + "data_dir = os.path.join(os.path.dirname(data_dir), 'petfinder-mini/petfinder-mini.csv')\n", + "dataframe = pd.read_csv(data_dir)" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 11, "id": "e98480ef-d13d-44c0-a227-e9a22f9bf2b0", "metadata": {}, "outputs": [ @@ -253,7 +253,7 @@ "4 This handsome yet cute boy is up for adoption.... 3 2 " ] }, - "execution_count": 5, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb index 98dba357..567e0e03 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "76f0f5df-502f-444e-b2ee-1122e1dea870", "metadata": {}, "outputs": [ @@ -31,13 +31,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-10-03 17:43:56.140645: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", - "2024-10-03 17:43:56.147227: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", - "2024-10-03 17:43:56.154601: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", - "2024-10-03 17:43:56.156763: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", - "2024-10-03 17:43:56.162424: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-10-23 15:03:51.507387: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", + "2024-10-23 15:03:51.515051: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-10-23 15:03:51.522806: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-10-23 15:03:51.525092: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-10-23 15:03:51.531528: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-10-03 17:43:56.485452: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + "2024-10-23 15:03:51.862292: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], @@ -54,7 +54,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "a364ad5f-b269-45b5-ab8b-d8f34fb642b7", "metadata": {}, "outputs": [ @@ -72,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "57b1d71f", "metadata": {}, "outputs": [], @@ -89,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "d229c1b6-3967-46b5-9ea8-68f4b42dd211", "metadata": {}, "outputs": [ @@ -97,8 +97,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "Downloading data from https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\n", - "\u001b[1m84125825/84125825\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 0us/step\n" + "Downloading data from https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1m84125825/84125825\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 0us/step\n" ] } ], @@ -106,7 +112,7 @@ "url = \"https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\"\n", "\n", "dataset = tf.keras.utils.get_file(\n", - " \"aclImdb_v1\", url, untar=True, cache_dir=\".\", cache_subdir=\"\"\n", + " \"aclImdb_v1\", url, untar=True,\n", ")\n", "\n", "dataset_dir = os.path.join(os.path.dirname(dataset), \"aclImdb\")" @@ -114,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "1f8038ae-8bc1-46bf-ae4c-6da08886c473", "metadata": {}, "outputs": [ @@ -124,7 +130,7 @@ "['README', 'imdb.vocab', 'test', 'train', 'imdbEr.txt']" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -135,7 +141,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "12faaa3f-3441-4361-b9eb-4317e8c2c2f7", "metadata": {}, "outputs": [ @@ -152,7 +158,7 @@ " 'unsup']" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } From e74b57877f72cb5f9ed9a4e97fc7f0cebdd36b48 Mon Sep 17 00:00:00 2001 From: Rishi Chandra Date: Thu, 24 Oct 2024 02:37:33 +0000 Subject: [PATCH 3/4] Use pathlib for relative dirs --- .../tensorflow/feature_columns_tf.ipynb | 32 ++++++++---- .../tensorflow/text_classification_tf.ipynb | 52 +++++++++---------- 2 files changed, 45 insertions(+), 39 deletions(-) diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/feature_columns_tf.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/feature_columns_tf.ipynb index c943f868..581db0da 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/feature_columns_tf.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/feature_columns_tf.ipynb @@ -31,13 +31,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-10-23 15:06:03.148933: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", - "2024-10-23 15:06:03.156486: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", - "2024-10-23 15:06:03.164237: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", - "2024-10-23 15:06:03.166475: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", - "2024-10-23 15:06:03.172837: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-10-24 02:30:25.563383: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", + "2024-10-24 02:30:25.570788: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-10-24 02:30:25.578536: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-10-24 02:30:25.580796: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-10-24 02:30:25.587423: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-10-23 15:06:03.498973: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + "2024-10-24 02:30:25.936008: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], @@ -78,17 +78,27 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "9326b072-a53c-40c4-a6cb-bd4d3d644d03", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/rishic/.keras/datasets/petfinder_mini.zip\n" + ] + } + ], "source": [ + "import pathlib\n", "import os\n", "dataset_url = 'http://storage.googleapis.com/download.tensorflow.org/data/petfinder-mini.zip'\n", "\n", - "data_dir = tf.keras.utils.get_file('petfinder_mini.zip', dataset_url, extract=True)\n", - "data_dir = os.path.join(os.path.dirname(data_dir), 'petfinder-mini/petfinder-mini.csv')\n", - "dataframe = pd.read_csv(data_dir)" + "data_dir = tf.keras.utils.get_file(fname='petfinder_mini.zip', origin=dataset_url, extract=True)\n", + "data_dir = pathlib.Path(data_dir)\n", + "dataset = os.path.join(os.path.dirname(data_dir), 'petfinder-mini/petfinder-mini.csv')\n", + "dataframe = pd.read_csv(dataset)" ] }, { diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb index 567e0e03..eed58e55 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb @@ -31,13 +31,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-10-23 15:03:51.507387: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", - "2024-10-23 15:03:51.515051: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", - "2024-10-23 15:03:51.522806: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", - "2024-10-23 15:03:51.525092: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", - "2024-10-23 15:03:51.531528: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-10-24 02:14:21.903863: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", + "2024-10-24 02:14:21.911570: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-10-24 02:14:21.919525: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-10-24 02:14:21.921779: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-10-24 02:14:21.928323: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-10-23 15:03:51.862292: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + "2024-10-24 02:14:22.271168: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], @@ -89,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 11, "id": "d229c1b6-3967-46b5-9ea8-68f4b42dd211", "metadata": {}, "outputs": [ @@ -97,30 +97,25 @@ "name": "stdout", "output_type": "stream", "text": [ - "Downloading data from https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1m84125825/84125825\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 0us/step\n" + "Downloading data from https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\n", + "\u001b[1m84125825/84125825\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 0us/step\n" ] } ], "source": [ + "import pathlib\n", "url = \"https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\"\n", "\n", "dataset = tf.keras.utils.get_file(\n", - " \"aclImdb_v1\", url, untar=True,\n", + " fname=\"aclImdb\", origin=url, untar=True,\n", ")\n", "\n", - "dataset_dir = os.path.join(os.path.dirname(dataset), \"aclImdb\")" + "dataset_dir = pathlib.Path(dataset)" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 12, "id": "1f8038ae-8bc1-46bf-ae4c-6da08886c473", "metadata": {}, "outputs": [ @@ -130,7 +125,7 @@ "['README', 'imdb.vocab', 'test', 'train', 'imdbEr.txt']" ] }, - "execution_count": 5, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -141,7 +136,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 13, "id": "12faaa3f-3441-4361-b9eb-4317e8c2c2f7", "metadata": {}, "outputs": [ @@ -158,19 +153,20 @@ " 'unsup']" ] }, - "execution_count": 6, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_dir = os.path.join(dataset_dir, \"train\")\n", + "test_dir = os.path.join(dataset_dir, \"test\")\n", "os.listdir(train_dir)" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 14, "id": "152cc0cc-65d0-4e17-9ee8-222390df45b5", "metadata": {}, "outputs": [ @@ -190,7 +186,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 15, "id": "b2277f58-78c8-4a12-bc98-5103e7c81a35", "metadata": {}, "outputs": [], @@ -201,7 +197,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 17, "id": "ed83de92-ebb3-4170-b2bf-25265c6a6942", "metadata": {}, "outputs": [ @@ -217,7 +213,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-10-03 17:44:07.678162: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 44790 MB memory: -> device: 0, name: NVIDIA RTX A6000, pci bus id: 0000:01:00.0, compute capability: 8.6\n" + "2024-10-24 02:18:45.343343: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 46446 MB memory: -> device: 0, name: NVIDIA RTX A6000, pci bus id: 0000:01:00.0, compute capability: 8.6\n" ] } ], @@ -226,7 +222,7 @@ "seed = 42\n", "\n", "raw_train_ds = tf.keras.utils.text_dataset_from_directory(\n", - " \"aclImdb/train\",\n", + " train_dir,\n", " batch_size=batch_size,\n", " validation_split=0.2,\n", " subset=\"training\",\n", @@ -304,7 +300,7 @@ ], "source": [ "raw_val_ds = tf.keras.utils.text_dataset_from_directory(\n", - " \"aclImdb/train\",\n", + " train_dir,\n", " batch_size=batch_size,\n", " validation_split=0.2,\n", " subset=\"validation\",\n", @@ -328,7 +324,7 @@ ], "source": [ "raw_test_ds = tf.keras.utils.text_dataset_from_directory(\n", - " \"aclImdb/test\", batch_size=batch_size\n", + " test_dir, batch_size=batch_size\n", ")" ] }, From 63265c67668548761597edbe52bb98fc7bcf192c Mon Sep 17 00:00:00 2001 From: Rishi Chandra Date: Thu, 24 Oct 2024 16:22:24 +0000 Subject: [PATCH 4/4] Account for tensorflow extraction path bug --- .../tensorflow/feature_columns_tf.ipynb | 35 ++++++------- .../tensorflow/text_classification_tf.ipynb | 52 ++++++++++++------- 2 files changed, 49 insertions(+), 38 deletions(-) diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/feature_columns_tf.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/feature_columns_tf.ipynb index 581db0da..2ff37b6c 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/feature_columns_tf.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/feature_columns_tf.ipynb @@ -31,13 +31,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-10-24 02:30:25.563383: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", - "2024-10-24 02:30:25.570788: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", - "2024-10-24 02:30:25.578536: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", - "2024-10-24 02:30:25.580796: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", - "2024-10-24 02:30:25.587423: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-10-24 16:04:17.711230: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", + "2024-10-24 16:04:17.719701: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-10-24 16:04:17.728758: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-10-24 16:04:17.731459: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-10-24 16:04:17.738797: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-10-24 02:30:25.936008: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + "2024-10-24 16:04:18.115892: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], @@ -78,27 +78,24 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 7, "id": "9326b072-a53c-40c4-a6cb-bd4d3d644d03", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/rishic/.keras/datasets/petfinder_mini.zip\n" - ] - } - ], + "outputs": [], "source": [ "import pathlib\n", "import os\n", "dataset_url = 'http://storage.googleapis.com/download.tensorflow.org/data/petfinder-mini.zip'\n", "\n", - "data_dir = tf.keras.utils.get_file(fname='petfinder_mini.zip', origin=dataset_url, extract=True)\n", + "data_dir = tf.keras.utils.get_file('petfinder_mini.zip', dataset_url, extract=True, cache_dir='.')\n", "data_dir = pathlib.Path(data_dir)\n", - "dataset = os.path.join(os.path.dirname(data_dir), 'petfinder-mini/petfinder-mini.csv')\n", - "dataframe = pd.read_csv(dataset)" + "try:\n", + " # pet-finder-mini might be under a parent a directory petfinder_mini_extracted. Check if this is the case:\n", + " dataset = os.path.join(os.path.dirname(data_dir), 'petfinder_mini_extracted/petfinder-mini/petfinder-mini.csv')\n", + " dataframe = pd.read_csv(dataset)\n", + "except:\n", + " dataset = os.path.join(os.path.dirname(data_dir), 'petfinder-mini/petfinder-mini.csv')\n", + " dataframe = pd.read_csv(dataset)" ] }, { diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb index eed58e55..63499611 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/tensorflow/text_classification_tf.ipynb @@ -31,13 +31,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-10-24 02:14:21.903863: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", - "2024-10-24 02:14:21.911570: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", - "2024-10-24 02:14:21.919525: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", - "2024-10-24 02:14:21.921779: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", - "2024-10-24 02:14:21.928323: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-10-24 16:15:43.020721: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", + "2024-10-24 16:15:43.028070: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-10-24 16:15:43.035674: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-10-24 16:15:43.037910: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-10-24 16:15:43.044256: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-10-24 02:14:22.271168: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + "2024-10-24 16:15:43.368732: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], @@ -89,19 +89,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "d229c1b6-3967-46b5-9ea8-68f4b42dd211", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Downloading data from https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\n", - "\u001b[1m84125825/84125825\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 0us/step\n" - ] - } - ], + "outputs": [], "source": [ "import pathlib\n", "url = \"https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\"\n", @@ -115,7 +106,30 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, + "id": "bfa5177f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/rishic/.keras/datasets/aclImdb\n", + "/home/rishic/.keras/datasets/aclImdb\n" + ] + } + ], + "source": [ + "print(dataset_dir)\n", + "# aclImdb might be created as a directory containing a single directory aclImdb. Check if this is the case:\n", + "if os.path.exists(dataset_dir / \"aclImdb\"):\n", + " dataset_dir = dataset_dir / \"aclImdb\"\n", + "print(dataset_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, "id": "1f8038ae-8bc1-46bf-ae4c-6da08886c473", "metadata": {}, "outputs": [ @@ -125,7 +139,7 @@ "['README', 'imdb.vocab', 'test', 'train', 'imdbEr.txt']" ] }, - "execution_count": 12, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" }