From 5c250ed748cc5cafb14602fc86621d3157879442 Mon Sep 17 00:00:00 2001
From: C-K-Loan <christian.kasim.loan@gmail.com>
Date: Thu, 21 Jan 2021 05:18:36 +0100
Subject: [PATCH] notebook link updates

---
 .../NLU_training_sentiment_classifier_demo.ipynb                | 2 +-
 .../NLU_training_sentiment_classifier_demo_IMDB.ipynb           | 2 +-
 .../NLU_training_sentiment_classifier_demo_apple_twitter.ipynb  | 2 +-
 .../NLU_training_sentiment_classifier_demo_finanical_news.ipynb | 2 +-
 .../NLU_training_sentiment_classifier_demo_reddit.ipynb         | 2 +-
 .../NLU_training_sentiment_classifier_demo_twitter.ipynb        | 2 +-
 .../NLU_training_multi_class_text_classifier_demo.ipynb         | 2 +-
 .../NLU_training_multi_class_text_classifier_demo_amazon.ipynb  | 2 +-
 ...raining_multi_class_text_classifier_demo_hotel_reviews.ipynb | 2 +-
 ...g_multi_class_text_classifier_demo_musical_instruments.ipynb | 2 +-
 .../NLU_training_multi_class_text_classifier_demo_wine.ipynb    | 2 +-
 .../NLU_traing_multi_label_classifier_E2e.ipynb                 | 2 +-
 ...g_multi_token_label_text_classifier_stackoverflow_tags.ipynb | 2 +-
 .../named_entity_recognition/NLU_training_NER_demo.ipynb        | 2 +-
 .../colab/Training/part_of_speech/NLU_training_POS_demo.ipynb   | 2 +-
 15 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo.ipynb
index 4c56b531..666b7068 100644
--- a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo.ipynb
+++ b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo.ipynb
@@ -1 +1 @@
-{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/collab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo.ipynb)\n","\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null  \n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Stock Market Sentiment dataset \n","https://www.kaggle.com/yash612/stockmarket-sentiment-dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1608771929986,"user_tz":480,"elapsed":2813,"user":{"displayName":"Adam Morgan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh9F8KQIFyVYLTpwx5Hjc1H-jOwcRnh58ghzE2E8Q=s64","userId":"13394261526394139327"}},"outputId":"968a492a-fed0-4a7b-9eba-ebef9ff9ab47"},"source":["! wget http://ckl-it.de/wp-content/uploads/2020/11/stock_data.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2020-12-24 01:05:27--  http://ckl-it.de/wp-content/uploads/2020/11/stock_data.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 479973 (469K) [text/csv]\n","Saving to: ‘stock_data.csv.1’\n","\n","stock_data.csv.1    100%[===================>] 468.72K   324KB/s    in 1.4s    \n","\n","2020-12-24 01:05:29 (324 KB/s) - ‘stock_data.csv.1’ saved [479973/479973]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"x-rbTZLm_Uqs","executionInfo":{"status":"ok","timestamp":1608771936564,"user_tz":480,"elapsed":3170,"user":{"displayName":"Adam Morgan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh9F8KQIFyVYLTpwx5Hjc1H-jOwcRnh58ghzE2E8Q=s64","userId":"13394261526394139327"}},"outputId":"2d69e18e-285e-4337-cb61-372099f47bc3"},"source":["! pip install nlu pyspark==2.4.7"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Requirement already satisfied: nlu in /usr/local/lib/python3.6/dist-packages (1.0.5)\n","Requirement already satisfied: pyspark==2.4.7 in /usr/local/lib/python3.6/dist-packages (2.4.7)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from nlu) (1.1.5)\n","Requirement already satisfied: spark-nlp<2.7,>=2.6.2 in /usr/local/lib/python3.6/dist-packages (from nlu) (2.6.5)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from nlu) (1.19.4)\n","Requirement already satisfied: dataclasses in /usr/local/lib/python3.6/dist-packages (from nlu) (0.8)\n","Requirement already satisfied: pyarrow>=0.16.0 in /usr/local/lib/python3.6/dist-packages (from nlu) (2.0.0)\n","Requirement already satisfied: py4j==0.10.7 in /usr/local/lib/python3.6/dist-packages (from pyspark==2.4.7) (0.10.7)\n","Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.6/dist-packages (from pandas->nlu) (2.8.1)\n","Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->nlu) (2018.9)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.7.3->pandas->nlu) (1.15.0)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"uDGIOASY_fRj","executionInfo":{"status":"ok","timestamp":1608771969641,"user_tz":480,"elapsed":26360,"user":{"displayName":"Adam Morgan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh9F8KQIFyVYLTpwx5Hjc1H-jOwcRnh58ghzE2E8Q=s64","userId":"13394261526394139327"}},"outputId":"caa60b4b-8819-4046-c0e8-d029434a4155"},"source":["import nlu\r\n","sentiment = nlu.load('sentiment')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["analyze_sentiment download started this may take some time.\n","Approx size to download 4.9 MB\n","[OK!]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":128},"id":"U0ENiuMc_kyb","executionInfo":{"status":"ok","timestamp":1608771986728,"user_tz":480,"elapsed":9983,"user":{"displayName":"Adam Morgan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh9F8KQIFyVYLTpwx5Hjc1H-jOwcRnh58ghzE2E8Q=s64","userId":"13394261526394139327"}},"outputId":"1b96fe79-8ae8-4ec3-e3fd-8c234ec15322"},"source":["sentiment.predict(\"I'm very very not at all happy\")"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>sentence</th>\n","      <th>sentiment_confidence</th>\n","      <th>checked</th>\n","      <th>sentiment</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>I'm very very not at all happy</td>\n","      <td>0.304300</td>\n","      <td>[I'm, very, very, not, at, all, happy]</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                    sentence  ... sentiment\n","origin_index                                  ...          \n","0             I'm very very not at all happy  ...  positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":406},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1607932042536,"user_tz":-60,"elapsed":82460,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"5c8fa3de-3b18-4427-ee0f-d26128fb7012"},"source":["import pandas as pd\n","train_path = '/content/stock_data.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","train_df.columns=['text','y']\n","train_df.y = train_df.y.astype(str)\n","train_df.y = train_df.y.str.replace('-1','negative')\n","train_df.y = train_df.y.str.replace('1','positive')\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>y</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Kickers on my watchlist XIDE TIT SOQ PNK CPW B...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>user: AAP MOVIE. 55% return for the FEA/GEED i...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>user I'd be afraid to short AMZN - they are lo...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>MNTA Over 12.00</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>OI  Over 21.37</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>5786</th>\n","      <td>Industry body CII said #discoms are likely to ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>5787</th>\n","      <td>#Gold prices slip below Rs 46,000 as #investor...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>5788</th>\n","      <td>Workers at Bajaj Auto have agreed to a 10% wag...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>5789</th>\n","      <td>#Sharemarket LIVE: Sensex off day’s high, up 6...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>5790</th>\n","      <td>#Sensex, #Nifty climb off day's highs, still u...</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5791 rows × 2 columns</p>\n","</div>"],"text/plain":["                                                   text         y\n","0     Kickers on my watchlist XIDE TIT SOQ PNK CPW B...  positive\n","1     user: AAP MOVIE. 55% return for the FEA/GEED i...  positive\n","2     user I'd be afraid to short AMZN - they are lo...  positive\n","3                                     MNTA Over 12.00    positive\n","4                                      OI  Over 21.37    positive\n","...                                                 ...       ...\n","5786  Industry body CII said #discoms are likely to ...  negative\n","5787  #Gold prices slip below Rs 46,000 as #investor...  negative\n","5788  Workers at Bajaj Auto have agreed to a 10% wag...  positive\n","5789  #Sharemarket LIVE: Sensex off day’s high, up 6...  positive\n","5790  #Sensex, #Nifty climb off day's highs, still u...  positive\n","\n","[5791 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":667},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1607932220223,"user_tz":-60,"elapsed":260138,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"4baf0d66-f257-4c2b-8887-fe7dfe9dadd4"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.71      0.43      0.54      2106\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.77      0.83      0.80      3685\n","\n","    accuracy                           0.69      5791\n","   macro avg       0.49      0.42      0.45      5791\n","weighted avg       0.75      0.69      0.70      5791\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","      <th>y</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Kickers on my watchlist XIDE TIT SOQ PNK CPW B...</td>\n","      <td>0.982228</td>\n","      <td>Kickers on my watchlist XIDE TIT SOQ PNK CPW B...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[0.006487144622951746, -0.042024899274110794, ...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>user: AAP MOVIE. 55% return for the FEA/GEED i...</td>\n","      <td>0.880183</td>\n","      <td>user: AAP MOVIE. 55% return for the FEA/GEED i...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[-0.03017628937959671, -0.0627138689160347, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>user I'd be afraid to short AMZN - they are lo...</td>\n","      <td>0.837914</td>\n","      <td>user I'd be afraid to short AMZN - they are lo...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[0.05556508153676987, -0.016491785645484924, 0...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>MNTA Over 12.00</td>\n","      <td>0.905505</td>\n","      <td>MNTA Over 12.00</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[-0.01097656786441803, -0.02980119362473488, -...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>OI  Over 21.37</td>\n","      <td>0.532368</td>\n","      <td>OI Over 21.37</td>\n","      <td>neutral</td>\n","      <td>positive</td>\n","      <td>[0.024849386885762215, 0.04679658263921738, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>5786</th>\n","      <td>Industry body CII said #discoms are likely to ...</td>\n","      <td>0.785020</td>\n","      <td>Industry body CII said #discoms are likely to ...</td>\n","      <td>negative</td>\n","      <td>negative</td>\n","      <td>[0.020985644310712814, -0.03145354613661766, -...</td>\n","    </tr>\n","    <tr>\n","      <th>5787</th>\n","      <td>#Gold prices slip below Rs 46,000 as #investor...</td>\n","      <td>0.861554</td>\n","      <td>#Gold prices slip below Rs 46,000 as #investor...</td>\n","      <td>negative</td>\n","      <td>negative</td>\n","      <td>[0.05627664923667908, 0.012842322699725628, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>5788</th>\n","      <td>Workers at Bajaj Auto have agreed to a 10% wag...</td>\n","      <td>0.794606</td>\n","      <td>Workers at Bajaj Auto have agreed to a 10% wag...</td>\n","      <td>negative</td>\n","      <td>positive</td>\n","      <td>[0.01210737880319357, -0.02798214927315712, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>5789</th>\n","      <td>#Sharemarket LIVE: Sensex off day’s high, up 6...</td>\n","      <td>0.966394</td>\n","      <td>#Sharemarket LIVE: Sensex off day’s high, up 6...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[0.0031773506198078394, -0.04296385496854782, ...</td>\n","    </tr>\n","    <tr>\n","      <th>5790</th>\n","      <td>#Sensex, #Nifty climb off day's highs, still u...</td>\n","      <td>0.987555</td>\n","      <td>#Sensex, #Nifty climb off day's highs, still u...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[0.04964913800358772, -0.04634825885295868, -0...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5791 rows × 6 columns</p>\n","</div>"],"text/plain":["                                                           text  ...                            default_name_embeddings\n","origin_index                                                     ...                                                   \n","0             Kickers on my watchlist XIDE TIT SOQ PNK CPW B...  ...  [0.006487144622951746, -0.042024899274110794, ...\n","1             user: AAP MOVIE. 55% return for the FEA/GEED i...  ...  [-0.03017628937959671, -0.0627138689160347, -0...\n","2             user I'd be afraid to short AMZN - they are lo...  ...  [0.05556508153676987, -0.016491785645484924, 0...\n","3                                             MNTA Over 12.00    ...  [-0.01097656786441803, -0.02980119362473488, -...\n","4                                              OI  Over 21.37    ...  [0.024849386885762215, 0.04679658263921738, -0...\n","...                                                         ...  ...                                                ...\n","5786          Industry body CII said #discoms are likely to ...  ...  [0.020985644310712814, -0.03145354613661766, -...\n","5787          #Gold prices slip below Rs 46,000 as #investor...  ...  [0.05627664923667908, 0.012842322699725628, -0...\n","5788          Workers at Bajaj Auto have agreed to a 10% wag...  ...  [0.01210737880319357, -0.02798214927315712, -0...\n","5789          #Sharemarket LIVE: Sensex off day’s high, up 6...  ...  [0.0031773506198078394, -0.04296385496854782, ...\n","5790          #Sensex, #Nifty climb off day's highs, still u...  ...  [0.04964913800358772, -0.04634825885295868, -0...\n","\n","[5791 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":107},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1607932222650,"user_tz":-60,"elapsed":262555,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"f38cb015-a48f-47a1-e2b7-5d5bb488beb9"},"source":["fitted_pipe.predict(\"Bitcoin is going to the moon!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.918913</td>\n","      <td>Bitcoin is going to the moon!</td>\n","      <td>positive</td>\n","      <td>[0.06468033790588379, -0.040837567299604416, -...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             sentiment_confidence  ...                            default_name_embeddings\n","origin_index                       ...                                                   \n","0                        0.918913  ...  [0.06468033790588379, -0.040837567299604416, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1607932222651,"user_tz":-60,"elapsed":262549,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"cf184e49-084a-42d4-c95a-4de7c21cae16"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2)                 | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005)                    | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64)                | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5)                 | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True)       | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6)               | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')    | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512)               | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use')      | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":614},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1607932301821,"user_tz":-60,"elapsed":341713,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"0c0022d0-2bf6-44db-e737-30892668621f"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5)  \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","    negative       0.79      0.67      0.72      2106\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.84      0.87      0.85      3685\n","\n","    accuracy                           0.80      5791\n","   macro avg       0.54      0.51      0.53      5791\n","weighted avg       0.82      0.80      0.81      5791\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","      <th>y</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Kickers on my watchlist XIDE TIT SOQ PNK CPW B...</td>\n","      <td>0.999146</td>\n","      <td>Kickers on my watchlist XIDE TIT SOQ PNK CPW B...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[0.006487144622951746, -0.042024899274110794, ...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>user: AAP MOVIE. 55% return for the FEA/GEED i...</td>\n","      <td>0.941052</td>\n","      <td>user: AAP MOVIE. 55% return for the FEA/GEED i...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[-0.03017628937959671, -0.0627138689160347, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>user I'd be afraid to short AMZN - they are lo...</td>\n","      <td>0.648649</td>\n","      <td>user I'd be afraid to short AMZN - they are lo...</td>\n","      <td>negative</td>\n","      <td>positive</td>\n","      <td>[0.05556508153676987, -0.016491785645484924, 0...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>MNTA Over 12.00</td>\n","      <td>0.988186</td>\n","      <td>MNTA Over 12.00</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[-0.01097656786441803, -0.02980119362473488, -...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>OI  Over 21.37</td>\n","      <td>0.783930</td>\n","      <td>OI Over 21.37</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[0.024849386885762215, 0.04679658263921738, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>5786</th>\n","      <td>Industry body CII said #discoms are likely to ...</td>\n","      <td>0.990443</td>\n","      <td>Industry body CII said #discoms are likely to ...</td>\n","      <td>negative</td>\n","      <td>negative</td>\n","      <td>[0.020985644310712814, -0.03145354613661766, -...</td>\n","    </tr>\n","    <tr>\n","      <th>5787</th>\n","      <td>#Gold prices slip below Rs 46,000 as #investor...</td>\n","      <td>0.999385</td>\n","      <td>#Gold prices slip below Rs 46,000 as #investor...</td>\n","      <td>negative</td>\n","      <td>negative</td>\n","      <td>[0.05627664923667908, 0.012842322699725628, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>5788</th>\n","      <td>Workers at Bajaj Auto have agreed to a 10% wag...</td>\n","      <td>0.728881</td>\n","      <td>Workers at Bajaj Auto have agreed to a 10% wag...</td>\n","      <td>negative</td>\n","      <td>positive</td>\n","      <td>[0.01210737880319357, -0.02798214927315712, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>5789</th>\n","      <td>#Sharemarket LIVE: Sensex off day’s high, up 6...</td>\n","      <td>0.987245</td>\n","      <td>#Sharemarket LIVE: Sensex off day’s high, up 6...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[0.0031773506198078394, -0.04296385496854782, ...</td>\n","    </tr>\n","    <tr>\n","      <th>5790</th>\n","      <td>#Sensex, #Nifty climb off day's highs, still u...</td>\n","      <td>0.999714</td>\n","      <td>#Sensex, #Nifty climb off day's highs, still u...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[0.04964913800358772, -0.04634825885295868, -0...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5791 rows × 6 columns</p>\n","</div>"],"text/plain":["                                                           text  ...                            default_name_embeddings\n","origin_index                                                     ...                                                   \n","0             Kickers on my watchlist XIDE TIT SOQ PNK CPW B...  ...  [0.006487144622951746, -0.042024899274110794, ...\n","1             user: AAP MOVIE. 55% return for the FEA/GEED i...  ...  [-0.03017628937959671, -0.0627138689160347, -0...\n","2             user I'd be afraid to short AMZN - they are lo...  ...  [0.05556508153676987, -0.016491785645484924, 0...\n","3                                             MNTA Over 12.00    ...  [-0.01097656786441803, -0.02980119362473488, -...\n","4                                              OI  Over 21.37    ...  [0.024849386885762215, 0.04679658263921738, -0...\n","...                                                         ...  ...                                                ...\n","5786          Industry body CII said #discoms are likely to ...  ...  [0.020985644310712814, -0.03145354613661766, -...\n","5787          #Gold prices slip below Rs 46,000 as #investor...  ...  [0.05627664923667908, 0.012842322699725628, -0...\n","5788          Workers at Bajaj Auto have agreed to a 10% wag...  ...  [0.01210737880319357, -0.02798214927315712, -0...\n","5789          #Sharemarket LIVE: Sensex off day’s high, up 6...  ...  [0.0031773506198078394, -0.04296385496854782, ...\n","5790          #Sensex, #Nifty climb off day's highs, still u...  ...  [0.04964913800358772, -0.04634825885295868, -0...\n","\n","[5791 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nxWFzQOhjWC8","executionInfo":{"status":"ok","timestamp":1607932301823,"user_tz":-60,"elapsed":341709,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"4dfe0938-a01e-4469-c4fa-8909deb02a2a"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":667},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1607932444818,"user_tz":-60,"elapsed":484698,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"dc1943bb-8f1f-4503-c7e4-8f4938ddf4aa"},"source":["trainable_pipe = nlu.load('embed_sentence.bert train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(40)  \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L2_128 download started this may take some time.\n","Approximate size to download 16.1 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.68      0.25      0.36      2106\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.72      0.84      0.77      3685\n","\n","    accuracy                           0.63      5791\n","   macro avg       0.47      0.36      0.38      5791\n","weighted avg       0.71      0.63      0.63      5791\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","      <th>embed_sentence_bert_embeddings</th>\n","      <th>sentiment</th>\n","      <th>y</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Kickers on my watchlist XIDE TIT SOQ PNK CPW B...</td>\n","      <td>0.874224</td>\n","      <td>Kickers on my watchlist XIDE TIT SOQ PNK CPW B...</td>\n","      <td>[-0.9207571744918823, 0.21013416349887848, 0.1...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>user: AAP MOVIE. 55% return for the FEA/GEED i...</td>\n","      <td>0.647704</td>\n","      <td>user: AAP MOVIE. 55% return for the FEA/GEED i...</td>\n","      <td>[-0.43004727363586426, 0.5101231336593628, -0....</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>user I'd be afraid to short AMZN - they are lo...</td>\n","      <td>0.780586</td>\n","      <td>user I'd be afraid to short AMZN - they are lo...</td>\n","      <td>[0.3040030300617218, 0.22862982749938965, -0.5...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>MNTA Over 12.00</td>\n","      <td>0.978046</td>\n","      <td>MNTA Over 12.00</td>\n","      <td>[-1.810348391532898, -0.4799138903617859, -0.7...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>OI  Over 21.37</td>\n","      <td>0.961256</td>\n","      <td>OI Over 21.37</td>\n","      <td>[-2.4639298915863037, 0.3879590630531311, -0.6...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>5786</th>\n","      <td>Industry body CII said #discoms are likely to ...</td>\n","      <td>0.759879</td>\n","      <td>Industry body CII said #discoms are likely to ...</td>\n","      <td>[-0.09503911435604095, 0.6293947696685791, 0.0...</td>\n","      <td>negative</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>5787</th>\n","      <td>#Gold prices slip below Rs 46,000 as #investor...</td>\n","      <td>0.759041</td>\n","      <td>#Gold prices slip below Rs 46,000 as #investor...</td>\n","      <td>[-0.1287938952445984, 0.28170245885849, 0.0280...</td>\n","      <td>negative</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>5788</th>\n","      <td>Workers at Bajaj Auto have agreed to a 10% wag...</td>\n","      <td>0.750849</td>\n","      <td>Workers at Bajaj Auto have agreed to a 10% wag...</td>\n","      <td>[-0.3395587205886841, 0.912406325340271, -0.32...</td>\n","      <td>negative</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>5789</th>\n","      <td>#Sharemarket LIVE: Sensex off day’s high, up 6...</td>\n","      <td>0.567143</td>\n","      <td>#Sharemarket LIVE: Sensex off day’s high, up 6...</td>\n","      <td>[-0.6081283092498779, 0.2732301354408264, 0.25...</td>\n","      <td>neutral</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>5790</th>\n","      <td>#Sensex, #Nifty climb off day's highs, still u...</td>\n","      <td>0.545603</td>\n","      <td>#Sensex, #Nifty climb off day's highs, still u...</td>\n","      <td>[-0.44862690567970276, 0.43264657258987427, 0....</td>\n","      <td>neutral</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5791 rows × 6 columns</p>\n","</div>"],"text/plain":["                                                           text  ...         y\n","origin_index                                                     ...          \n","0             Kickers on my watchlist XIDE TIT SOQ PNK CPW B...  ...  positive\n","1             user: AAP MOVIE. 55% return for the FEA/GEED i...  ...  positive\n","2             user I'd be afraid to short AMZN - they are lo...  ...  positive\n","3                                             MNTA Over 12.00    ...  positive\n","4                                              OI  Over 21.37    ...  positive\n","...                                                         ...  ...       ...\n","5786          Industry body CII said #discoms are likely to ...  ...  negative\n","5787          #Gold prices slip below Rs 46,000 as #investor...  ...  negative\n","5788          Workers at Bajaj Auto have agreed to a 10% wag...  ...  positive\n","5789          #Sharemarket LIVE: Sensex off day’s high, up 6...  ...  positive\n","5790          #Sensex, #Nifty climb off day's highs, still u...  ...  positive\n","\n","[5791 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607932457549,"user_tz":-60,"elapsed":497423,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"c39e2854-fd34-4576-ebb2-352bc80fb3c8"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":125},"executionInfo":{"status":"ok","timestamp":1607932462254,"user_tz":-60,"elapsed":502122,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"22426d96-3b57-4450-8af1-7a0c69de879e"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Tesla plans to invest 10M into the ML sector')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","      <th>embed_sentence_bert_embeddings</th>\n","      <th>sentiment</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.974726</td>\n","      <td>Tesla plans to invest 10M into the ML sector</td>\n","      <td>[-0.07111635059118271, 0.9532930850982666, -1....</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             sentiment_confidence  ... sentiment\n","origin_index                       ...          \n","0                        0.974726  ...  positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607932462257,"user_tz":-60,"elapsed":502119,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"921f5de2-eeb7-4115-a427-1671e3390f1c"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')           | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                 | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)          | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                 | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                     | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)           | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)       | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)      | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')               | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                   | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                       | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                         | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(128)                               | Info: Number of embedding dimensions | Currently set to : 128\n","pipe['glove'].setMaxSentenceLength(128)                       | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L2_128')         | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6)                        | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')             | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative'])     | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L2_128')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"73rQbUy-KLpb"},"source":[""],"execution_count":null,"outputs":[]}]}
\ No newline at end of file
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo.ipynb)\n","\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null  \n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Stock Market Sentiment dataset \n","https://www.kaggle.com/yash612/stockmarket-sentiment-dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1608771929986,"user_tz":480,"elapsed":2813,"user":{"displayName":"Adam Morgan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh9F8KQIFyVYLTpwx5Hjc1H-jOwcRnh58ghzE2E8Q=s64","userId":"13394261526394139327"}},"outputId":"968a492a-fed0-4a7b-9eba-ebef9ff9ab47"},"source":["! wget http://ckl-it.de/wp-content/uploads/2020/11/stock_data.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2020-12-24 01:05:27--  http://ckl-it.de/wp-content/uploads/2020/11/stock_data.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 479973 (469K) [text/csv]\n","Saving to: ‘stock_data.csv.1’\n","\n","stock_data.csv.1    100%[===================>] 468.72K   324KB/s    in 1.4s    \n","\n","2020-12-24 01:05:29 (324 KB/s) - ‘stock_data.csv.1’ saved [479973/479973]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"x-rbTZLm_Uqs","executionInfo":{"status":"ok","timestamp":1608771936564,"user_tz":480,"elapsed":3170,"user":{"displayName":"Adam Morgan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh9F8KQIFyVYLTpwx5Hjc1H-jOwcRnh58ghzE2E8Q=s64","userId":"13394261526394139327"}},"outputId":"2d69e18e-285e-4337-cb61-372099f47bc3"},"source":["! pip install nlu pyspark==2.4.7"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Requirement already satisfied: nlu in /usr/local/lib/python3.6/dist-packages (1.0.5)\n","Requirement already satisfied: pyspark==2.4.7 in /usr/local/lib/python3.6/dist-packages (2.4.7)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from nlu) (1.1.5)\n","Requirement already satisfied: spark-nlp<2.7,>=2.6.2 in /usr/local/lib/python3.6/dist-packages (from nlu) (2.6.5)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from nlu) (1.19.4)\n","Requirement already satisfied: dataclasses in /usr/local/lib/python3.6/dist-packages (from nlu) (0.8)\n","Requirement already satisfied: pyarrow>=0.16.0 in /usr/local/lib/python3.6/dist-packages (from nlu) (2.0.0)\n","Requirement already satisfied: py4j==0.10.7 in /usr/local/lib/python3.6/dist-packages (from pyspark==2.4.7) (0.10.7)\n","Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.6/dist-packages (from pandas->nlu) (2.8.1)\n","Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->nlu) (2018.9)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.7.3->pandas->nlu) (1.15.0)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"uDGIOASY_fRj","executionInfo":{"status":"ok","timestamp":1608771969641,"user_tz":480,"elapsed":26360,"user":{"displayName":"Adam Morgan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh9F8KQIFyVYLTpwx5Hjc1H-jOwcRnh58ghzE2E8Q=s64","userId":"13394261526394139327"}},"outputId":"caa60b4b-8819-4046-c0e8-d029434a4155"},"source":["import nlu\r\n","sentiment = nlu.load('sentiment')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["analyze_sentiment download started this may take some time.\n","Approx size to download 4.9 MB\n","[OK!]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":128},"id":"U0ENiuMc_kyb","executionInfo":{"status":"ok","timestamp":1608771986728,"user_tz":480,"elapsed":9983,"user":{"displayName":"Adam Morgan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh9F8KQIFyVYLTpwx5Hjc1H-jOwcRnh58ghzE2E8Q=s64","userId":"13394261526394139327"}},"outputId":"1b96fe79-8ae8-4ec3-e3fd-8c234ec15322"},"source":["sentiment.predict(\"I'm very very not at all happy\")"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>sentence</th>\n","      <th>sentiment_confidence</th>\n","      <th>checked</th>\n","      <th>sentiment</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>I'm very very not at all happy</td>\n","      <td>0.304300</td>\n","      <td>[I'm, very, very, not, at, all, happy]</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                    sentence  ... sentiment\n","origin_index                                  ...          \n","0             I'm very very not at all happy  ...  positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":406},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1607932042536,"user_tz":-60,"elapsed":82460,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"5c8fa3de-3b18-4427-ee0f-d26128fb7012"},"source":["import pandas as pd\n","train_path = '/content/stock_data.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","train_df.columns=['text','y']\n","train_df.y = train_df.y.astype(str)\n","train_df.y = train_df.y.str.replace('-1','negative')\n","train_df.y = train_df.y.str.replace('1','positive')\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>y</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Kickers on my watchlist XIDE TIT SOQ PNK CPW B...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>user: AAP MOVIE. 55% return for the FEA/GEED i...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>user I'd be afraid to short AMZN - they are lo...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>MNTA Over 12.00</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>OI  Over 21.37</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>5786</th>\n","      <td>Industry body CII said #discoms are likely to ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>5787</th>\n","      <td>#Gold prices slip below Rs 46,000 as #investor...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>5788</th>\n","      <td>Workers at Bajaj Auto have agreed to a 10% wag...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>5789</th>\n","      <td>#Sharemarket LIVE: Sensex off day’s high, up 6...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>5790</th>\n","      <td>#Sensex, #Nifty climb off day's highs, still u...</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5791 rows × 2 columns</p>\n","</div>"],"text/plain":["                                                   text         y\n","0     Kickers on my watchlist XIDE TIT SOQ PNK CPW B...  positive\n","1     user: AAP MOVIE. 55% return for the FEA/GEED i...  positive\n","2     user I'd be afraid to short AMZN - they are lo...  positive\n","3                                     MNTA Over 12.00    positive\n","4                                      OI  Over 21.37    positive\n","...                                                 ...       ...\n","5786  Industry body CII said #discoms are likely to ...  negative\n","5787  #Gold prices slip below Rs 46,000 as #investor...  negative\n","5788  Workers at Bajaj Auto have agreed to a 10% wag...  positive\n","5789  #Sharemarket LIVE: Sensex off day’s high, up 6...  positive\n","5790  #Sensex, #Nifty climb off day's highs, still u...  positive\n","\n","[5791 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":667},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1607932220223,"user_tz":-60,"elapsed":260138,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"4baf0d66-f257-4c2b-8887-fe7dfe9dadd4"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.71      0.43      0.54      2106\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.77      0.83      0.80      3685\n","\n","    accuracy                           0.69      5791\n","   macro avg       0.49      0.42      0.45      5791\n","weighted avg       0.75      0.69      0.70      5791\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","      <th>y</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Kickers on my watchlist XIDE TIT SOQ PNK CPW B...</td>\n","      <td>0.982228</td>\n","      <td>Kickers on my watchlist XIDE TIT SOQ PNK CPW B...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[0.006487144622951746, -0.042024899274110794, ...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>user: AAP MOVIE. 55% return for the FEA/GEED i...</td>\n","      <td>0.880183</td>\n","      <td>user: AAP MOVIE. 55% return for the FEA/GEED i...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[-0.03017628937959671, -0.0627138689160347, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>user I'd be afraid to short AMZN - they are lo...</td>\n","      <td>0.837914</td>\n","      <td>user I'd be afraid to short AMZN - they are lo...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[0.05556508153676987, -0.016491785645484924, 0...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>MNTA Over 12.00</td>\n","      <td>0.905505</td>\n","      <td>MNTA Over 12.00</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[-0.01097656786441803, -0.02980119362473488, -...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>OI  Over 21.37</td>\n","      <td>0.532368</td>\n","      <td>OI Over 21.37</td>\n","      <td>neutral</td>\n","      <td>positive</td>\n","      <td>[0.024849386885762215, 0.04679658263921738, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>5786</th>\n","      <td>Industry body CII said #discoms are likely to ...</td>\n","      <td>0.785020</td>\n","      <td>Industry body CII said #discoms are likely to ...</td>\n","      <td>negative</td>\n","      <td>negative</td>\n","      <td>[0.020985644310712814, -0.03145354613661766, -...</td>\n","    </tr>\n","    <tr>\n","      <th>5787</th>\n","      <td>#Gold prices slip below Rs 46,000 as #investor...</td>\n","      <td>0.861554</td>\n","      <td>#Gold prices slip below Rs 46,000 as #investor...</td>\n","      <td>negative</td>\n","      <td>negative</td>\n","      <td>[0.05627664923667908, 0.012842322699725628, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>5788</th>\n","      <td>Workers at Bajaj Auto have agreed to a 10% wag...</td>\n","      <td>0.794606</td>\n","      <td>Workers at Bajaj Auto have agreed to a 10% wag...</td>\n","      <td>negative</td>\n","      <td>positive</td>\n","      <td>[0.01210737880319357, -0.02798214927315712, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>5789</th>\n","      <td>#Sharemarket LIVE: Sensex off day’s high, up 6...</td>\n","      <td>0.966394</td>\n","      <td>#Sharemarket LIVE: Sensex off day’s high, up 6...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[0.0031773506198078394, -0.04296385496854782, ...</td>\n","    </tr>\n","    <tr>\n","      <th>5790</th>\n","      <td>#Sensex, #Nifty climb off day's highs, still u...</td>\n","      <td>0.987555</td>\n","      <td>#Sensex, #Nifty climb off day's highs, still u...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[0.04964913800358772, -0.04634825885295868, -0...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5791 rows × 6 columns</p>\n","</div>"],"text/plain":["                                                           text  ...                            default_name_embeddings\n","origin_index                                                     ...                                                   \n","0             Kickers on my watchlist XIDE TIT SOQ PNK CPW B...  ...  [0.006487144622951746, -0.042024899274110794, ...\n","1             user: AAP MOVIE. 55% return for the FEA/GEED i...  ...  [-0.03017628937959671, -0.0627138689160347, -0...\n","2             user I'd be afraid to short AMZN - they are lo...  ...  [0.05556508153676987, -0.016491785645484924, 0...\n","3                                             MNTA Over 12.00    ...  [-0.01097656786441803, -0.02980119362473488, -...\n","4                                              OI  Over 21.37    ...  [0.024849386885762215, 0.04679658263921738, -0...\n","...                                                         ...  ...                                                ...\n","5786          Industry body CII said #discoms are likely to ...  ...  [0.020985644310712814, -0.03145354613661766, -...\n","5787          #Gold prices slip below Rs 46,000 as #investor...  ...  [0.05627664923667908, 0.012842322699725628, -0...\n","5788          Workers at Bajaj Auto have agreed to a 10% wag...  ...  [0.01210737880319357, -0.02798214927315712, -0...\n","5789          #Sharemarket LIVE: Sensex off day’s high, up 6...  ...  [0.0031773506198078394, -0.04296385496854782, ...\n","5790          #Sensex, #Nifty climb off day's highs, still u...  ...  [0.04964913800358772, -0.04634825885295868, -0...\n","\n","[5791 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":107},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1607932222650,"user_tz":-60,"elapsed":262555,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"f38cb015-a48f-47a1-e2b7-5d5bb488beb9"},"source":["fitted_pipe.predict(\"Bitcoin is going to the moon!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.918913</td>\n","      <td>Bitcoin is going to the moon!</td>\n","      <td>positive</td>\n","      <td>[0.06468033790588379, -0.040837567299604416, -...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             sentiment_confidence  ...                            default_name_embeddings\n","origin_index                       ...                                                   \n","0                        0.918913  ...  [0.06468033790588379, -0.040837567299604416, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1607932222651,"user_tz":-60,"elapsed":262549,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"cf184e49-084a-42d4-c95a-4de7c21cae16"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2)                 | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005)                    | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64)                | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5)                 | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True)       | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6)               | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')    | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512)               | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use')      | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":614},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1607932301821,"user_tz":-60,"elapsed":341713,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"0c0022d0-2bf6-44db-e737-30892668621f"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5)  \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","    negative       0.79      0.67      0.72      2106\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.84      0.87      0.85      3685\n","\n","    accuracy                           0.80      5791\n","   macro avg       0.54      0.51      0.53      5791\n","weighted avg       0.82      0.80      0.81      5791\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","      <th>y</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Kickers on my watchlist XIDE TIT SOQ PNK CPW B...</td>\n","      <td>0.999146</td>\n","      <td>Kickers on my watchlist XIDE TIT SOQ PNK CPW B...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[0.006487144622951746, -0.042024899274110794, ...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>user: AAP MOVIE. 55% return for the FEA/GEED i...</td>\n","      <td>0.941052</td>\n","      <td>user: AAP MOVIE. 55% return for the FEA/GEED i...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[-0.03017628937959671, -0.0627138689160347, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>user I'd be afraid to short AMZN - they are lo...</td>\n","      <td>0.648649</td>\n","      <td>user I'd be afraid to short AMZN - they are lo...</td>\n","      <td>negative</td>\n","      <td>positive</td>\n","      <td>[0.05556508153676987, -0.016491785645484924, 0...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>MNTA Over 12.00</td>\n","      <td>0.988186</td>\n","      <td>MNTA Over 12.00</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[-0.01097656786441803, -0.02980119362473488, -...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>OI  Over 21.37</td>\n","      <td>0.783930</td>\n","      <td>OI Over 21.37</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[0.024849386885762215, 0.04679658263921738, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>5786</th>\n","      <td>Industry body CII said #discoms are likely to ...</td>\n","      <td>0.990443</td>\n","      <td>Industry body CII said #discoms are likely to ...</td>\n","      <td>negative</td>\n","      <td>negative</td>\n","      <td>[0.020985644310712814, -0.03145354613661766, -...</td>\n","    </tr>\n","    <tr>\n","      <th>5787</th>\n","      <td>#Gold prices slip below Rs 46,000 as #investor...</td>\n","      <td>0.999385</td>\n","      <td>#Gold prices slip below Rs 46,000 as #investor...</td>\n","      <td>negative</td>\n","      <td>negative</td>\n","      <td>[0.05627664923667908, 0.012842322699725628, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>5788</th>\n","      <td>Workers at Bajaj Auto have agreed to a 10% wag...</td>\n","      <td>0.728881</td>\n","      <td>Workers at Bajaj Auto have agreed to a 10% wag...</td>\n","      <td>negative</td>\n","      <td>positive</td>\n","      <td>[0.01210737880319357, -0.02798214927315712, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>5789</th>\n","      <td>#Sharemarket LIVE: Sensex off day’s high, up 6...</td>\n","      <td>0.987245</td>\n","      <td>#Sharemarket LIVE: Sensex off day’s high, up 6...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[0.0031773506198078394, -0.04296385496854782, ...</td>\n","    </tr>\n","    <tr>\n","      <th>5790</th>\n","      <td>#Sensex, #Nifty climb off day's highs, still u...</td>\n","      <td>0.999714</td>\n","      <td>#Sensex, #Nifty climb off day's highs, still u...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>[0.04964913800358772, -0.04634825885295868, -0...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5791 rows × 6 columns</p>\n","</div>"],"text/plain":["                                                           text  ...                            default_name_embeddings\n","origin_index                                                     ...                                                   \n","0             Kickers on my watchlist XIDE TIT SOQ PNK CPW B...  ...  [0.006487144622951746, -0.042024899274110794, ...\n","1             user: AAP MOVIE. 55% return for the FEA/GEED i...  ...  [-0.03017628937959671, -0.0627138689160347, -0...\n","2             user I'd be afraid to short AMZN - they are lo...  ...  [0.05556508153676987, -0.016491785645484924, 0...\n","3                                             MNTA Over 12.00    ...  [-0.01097656786441803, -0.02980119362473488, -...\n","4                                              OI  Over 21.37    ...  [0.024849386885762215, 0.04679658263921738, -0...\n","...                                                         ...  ...                                                ...\n","5786          Industry body CII said #discoms are likely to ...  ...  [0.020985644310712814, -0.03145354613661766, -...\n","5787          #Gold prices slip below Rs 46,000 as #investor...  ...  [0.05627664923667908, 0.012842322699725628, -0...\n","5788          Workers at Bajaj Auto have agreed to a 10% wag...  ...  [0.01210737880319357, -0.02798214927315712, -0...\n","5789          #Sharemarket LIVE: Sensex off day’s high, up 6...  ...  [0.0031773506198078394, -0.04296385496854782, ...\n","5790          #Sensex, #Nifty climb off day's highs, still u...  ...  [0.04964913800358772, -0.04634825885295868, -0...\n","\n","[5791 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nxWFzQOhjWC8","executionInfo":{"status":"ok","timestamp":1607932301823,"user_tz":-60,"elapsed":341709,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"4dfe0938-a01e-4469-c4fa-8909deb02a2a"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":667},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1607932444818,"user_tz":-60,"elapsed":484698,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"dc1943bb-8f1f-4503-c7e4-8f4938ddf4aa"},"source":["trainable_pipe = nlu.load('embed_sentence.bert train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(40)  \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L2_128 download started this may take some time.\n","Approximate size to download 16.1 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.68      0.25      0.36      2106\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.72      0.84      0.77      3685\n","\n","    accuracy                           0.63      5791\n","   macro avg       0.47      0.36      0.38      5791\n","weighted avg       0.71      0.63      0.63      5791\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","      <th>embed_sentence_bert_embeddings</th>\n","      <th>sentiment</th>\n","      <th>y</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Kickers on my watchlist XIDE TIT SOQ PNK CPW B...</td>\n","      <td>0.874224</td>\n","      <td>Kickers on my watchlist XIDE TIT SOQ PNK CPW B...</td>\n","      <td>[-0.9207571744918823, 0.21013416349887848, 0.1...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>user: AAP MOVIE. 55% return for the FEA/GEED i...</td>\n","      <td>0.647704</td>\n","      <td>user: AAP MOVIE. 55% return for the FEA/GEED i...</td>\n","      <td>[-0.43004727363586426, 0.5101231336593628, -0....</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>user I'd be afraid to short AMZN - they are lo...</td>\n","      <td>0.780586</td>\n","      <td>user I'd be afraid to short AMZN - they are lo...</td>\n","      <td>[0.3040030300617218, 0.22862982749938965, -0.5...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>MNTA Over 12.00</td>\n","      <td>0.978046</td>\n","      <td>MNTA Over 12.00</td>\n","      <td>[-1.810348391532898, -0.4799138903617859, -0.7...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>OI  Over 21.37</td>\n","      <td>0.961256</td>\n","      <td>OI Over 21.37</td>\n","      <td>[-2.4639298915863037, 0.3879590630531311, -0.6...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>5786</th>\n","      <td>Industry body CII said #discoms are likely to ...</td>\n","      <td>0.759879</td>\n","      <td>Industry body CII said #discoms are likely to ...</td>\n","      <td>[-0.09503911435604095, 0.6293947696685791, 0.0...</td>\n","      <td>negative</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>5787</th>\n","      <td>#Gold prices slip below Rs 46,000 as #investor...</td>\n","      <td>0.759041</td>\n","      <td>#Gold prices slip below Rs 46,000 as #investor...</td>\n","      <td>[-0.1287938952445984, 0.28170245885849, 0.0280...</td>\n","      <td>negative</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>5788</th>\n","      <td>Workers at Bajaj Auto have agreed to a 10% wag...</td>\n","      <td>0.750849</td>\n","      <td>Workers at Bajaj Auto have agreed to a 10% wag...</td>\n","      <td>[-0.3395587205886841, 0.912406325340271, -0.32...</td>\n","      <td>negative</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>5789</th>\n","      <td>#Sharemarket LIVE: Sensex off day’s high, up 6...</td>\n","      <td>0.567143</td>\n","      <td>#Sharemarket LIVE: Sensex off day’s high, up 6...</td>\n","      <td>[-0.6081283092498779, 0.2732301354408264, 0.25...</td>\n","      <td>neutral</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>5790</th>\n","      <td>#Sensex, #Nifty climb off day's highs, still u...</td>\n","      <td>0.545603</td>\n","      <td>#Sensex, #Nifty climb off day's highs, still u...</td>\n","      <td>[-0.44862690567970276, 0.43264657258987427, 0....</td>\n","      <td>neutral</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5791 rows × 6 columns</p>\n","</div>"],"text/plain":["                                                           text  ...         y\n","origin_index                                                     ...          \n","0             Kickers on my watchlist XIDE TIT SOQ PNK CPW B...  ...  positive\n","1             user: AAP MOVIE. 55% return for the FEA/GEED i...  ...  positive\n","2             user I'd be afraid to short AMZN - they are lo...  ...  positive\n","3                                             MNTA Over 12.00    ...  positive\n","4                                              OI  Over 21.37    ...  positive\n","...                                                         ...  ...       ...\n","5786          Industry body CII said #discoms are likely to ...  ...  negative\n","5787          #Gold prices slip below Rs 46,000 as #investor...  ...  negative\n","5788          Workers at Bajaj Auto have agreed to a 10% wag...  ...  positive\n","5789          #Sharemarket LIVE: Sensex off day’s high, up 6...  ...  positive\n","5790          #Sensex, #Nifty climb off day's highs, still u...  ...  positive\n","\n","[5791 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607932457549,"user_tz":-60,"elapsed":497423,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"c39e2854-fd34-4576-ebb2-352bc80fb3c8"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":125},"executionInfo":{"status":"ok","timestamp":1607932462254,"user_tz":-60,"elapsed":502122,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"22426d96-3b57-4450-8af1-7a0c69de879e"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Tesla plans to invest 10M into the ML sector')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","      <th>embed_sentence_bert_embeddings</th>\n","      <th>sentiment</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.974726</td>\n","      <td>Tesla plans to invest 10M into the ML sector</td>\n","      <td>[-0.07111635059118271, 0.9532930850982666, -1....</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             sentiment_confidence  ... sentiment\n","origin_index                       ...          \n","0                        0.974726  ...  positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607932462257,"user_tz":-60,"elapsed":502119,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"921f5de2-eeb7-4115-a427-1671e3390f1c"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')           | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                 | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)          | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                 | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                     | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)           | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)       | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)      | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')               | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                   | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                       | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                         | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(128)                               | Info: Number of embedding dimensions | Currently set to : 128\n","pipe['glove'].setMaxSentenceLength(128)                       | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L2_128')         | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6)                        | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')             | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative'])     | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L2_128')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"73rQbUy-KLpb"},"source":[""],"execution_count":null,"outputs":[]}]}
\ No newline at end of file
diff --git a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_IMDB.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_IMDB.ipynb
index af53f8a5..490dc371 100644
--- a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_IMDB.ipynb
+++ b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_IMDB.ipynb
@@ -1 +1 @@
-{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_IMDB.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/collab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_IMDB.ipynb)\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null  \n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download IMDB dataset\n","https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews\n","\n","IMDB dataset having 50K movie reviews for natural language processing or Text analytics.\n","This is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets. We provide a set of 25,000 highly polar movie reviews for training and 25,000 for testing. So, predict the number of positive and negative reviews using either classification or deep learning algorithms.\n","For more dataset information, please go through the following link,\n","http://ai.stanford.edu/~amaas/data/sentiment/"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610788112062,"user_tz":-300,"elapsed":2594,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a99c604d-fad2-4ace-c9b3-13dcb5893e03"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/IMDB-Dataset.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:07:54--  http://ckl-it.de/wp-content/uploads/2021/01/IMDB-Dataset.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 3288450 (3.1M) [text/csv]\n","Saving to: ‘IMDB-Dataset.csv’\n","\n","IMDB-Dataset.csv    100%[===================>]   3.14M  2.29MB/s    in 1.4s    \n","\n","2021-01-16 09:07:56 (2.29 MB/s) - ‘IMDB-Dataset.csv’ saved [3288450/3288450]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610788130009,"user_tz":-300,"elapsed":1019,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"fa40642c-aeea-4506-b40e-3542a49a2ee9"},"source":["import pandas as pd\n","train_path = '/content/IMDB-Dataset.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>y</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>One of the other reviewers has mentioned that ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>A wonderful little production. &lt;br /&gt;&lt;br /&gt;The...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>I thought this was a wonderful way to spend ti...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Basically there's a family where a little boy ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>Petter Mattei's \"Love in the Time of Money\" is...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>2495</th>\n","      <td>Another great movie by Costa-Gavras. It's a gr...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>2496</th>\n","      <td>Though structured totally different from the b...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>2497</th>\n","      <td>Handsome and dashing British airline pilot Geo...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>2498</th>\n","      <td>This film breeches the fine line between satir...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>2499</th>\n","      <td>Mardi Gras: Made in China provides a wonderful...</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>2500 rows × 2 columns</p>\n","</div>"],"text/plain":["                                                   text         y\n","0     One of the other reviewers has mentioned that ...  positive\n","1     A wonderful little production. <br /><br />The...  positive\n","2     I thought this was a wonderful way to spend ti...  positive\n","3     Basically there's a family where a little boy ...  negative\n","4     Petter Mattei's \"Love in the Time of Money\" is...  positive\n","...                                                 ...       ...\n","2495  Another great movie by Costa-Gavras. It's a gr...  negative\n","2496  Though structured totally different from the b...  positive\n","2497  Handsome and dashing British airline pilot Geo...  positive\n","2498  This film breeches the fine line between satir...  negative\n","2499  Mardi Gras: Made in China provides a wonderful...  positive\n","\n","[2500 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609464660630,"user_tz":-300,"elapsed":19440,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3be85e48-38c8-4a7c-bbd7-226e204fa739"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.70      0.70      0.70        27\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.79      0.65      0.71        23\n","\n","    accuracy                           0.68        50\n","   macro avg       0.50      0.45      0.47        50\n","weighted avg       0.74      0.68      0.71        50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>default_name_embeddings</th>\n","      <th>sentiment</th>\n","      <th>sentiment_confidence</th>\n","      <th>y</th>\n","      <th>document</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>One of the other reviewers has mentioned that ...</td>\n","      <td>[-0.04935329407453537, -0.01034686528146267, -...</td>\n","      <td>positive</td>\n","      <td>0.968638</td>\n","      <td>positive</td>\n","      <td>One of the other reviewers has mentioned that ...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>A wonderful little production. &lt;br /&gt;&lt;br /&gt;The...</td>\n","      <td>[0.040489643812179565, -0.054199717938899994, ...</td>\n","      <td>negative</td>\n","      <td>0.990273</td>\n","      <td>positive</td>\n","      <td>A wonderful little production. &lt;br /&gt;&lt;br /&gt;The...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>I thought this was a wonderful way to spend ti...</td>\n","      <td>[0.026364900171756744, 0.07112795859575272, 0....</td>\n","      <td>negative</td>\n","      <td>0.957352</td>\n","      <td>positive</td>\n","      <td>I thought this was a wonderful way to spend ti...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Basically there's a family where a little boy ...</td>\n","      <td>[-0.05151151493191719, 0.008207003585994244, -...</td>\n","      <td>negative</td>\n","      <td>0.958503</td>\n","      <td>negative</td>\n","      <td>Basically there's a family where a little boy ...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>Petter Mattei's \"Love in the Time of Money\" is...</td>\n","      <td>[0.06880538165569305, 0.019250543788075447, -0...</td>\n","      <td>positive</td>\n","      <td>0.999108</td>\n","      <td>positive</td>\n","      <td>Petter Mattei's \"Love in the Time of Money\" is...</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>Probably my all-time favorite movie, a story o...</td>\n","      <td>[0.004764211364090443, 0.027671916410326958, -...</td>\n","      <td>positive</td>\n","      <td>0.993937</td>\n","      <td>positive</td>\n","      <td>Probably my all-time favorite movie, a story o...</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>I sure would like to see a resurrection of a u...</td>\n","      <td>[-0.03813941031694412, -0.03322296217083931, 0...</td>\n","      <td>positive</td>\n","      <td>0.974884</td>\n","      <td>positive</td>\n","      <td>I sure would like to see a resurrection of a u...</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>This show was an amazing, fresh &amp; innovative i...</td>\n","      <td>[0.010670202784240246, -0.04322813078761101, -...</td>\n","      <td>negative</td>\n","      <td>0.721451</td>\n","      <td>negative</td>\n","      <td>This show was an amazing, fresh &amp; innovative i...</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>Encouraged by the positive comments about this...</td>\n","      <td>[0.010801736265420914, -0.07724311947822571, -...</td>\n","      <td>positive</td>\n","      <td>0.884824</td>\n","      <td>negative</td>\n","      <td>Encouraged by the positive comments about this...</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>If you like original gut wrenching laughter yo...</td>\n","      <td>[-0.0245585348457098, 0.0005475765210576355, -...</td>\n","      <td>negative</td>\n","      <td>0.850509</td>\n","      <td>positive</td>\n","      <td>If you like original gut wrenching laughter yo...</td>\n","    </tr>\n","    <tr>\n","      <th>10</th>\n","      <td>Phil the Alien is one of those quirky films wh...</td>\n","      <td>[0.023403573781251907, 0.017464609816670418, -...</td>\n","      <td>negative</td>\n","      <td>0.836944</td>\n","      <td>negative</td>\n","      <td>Phil the Alien is one of those quirky films wh...</td>\n","    </tr>\n","    <tr>\n","      <th>11</th>\n","      <td>I saw this movie when I was about 12 when it c...</td>\n","      <td>[-0.046517230570316315, -0.025949953123927116,...</td>\n","      <td>negative</td>\n","      <td>0.999218</td>\n","      <td>negative</td>\n","      <td>I saw this movie when I was about 12 when it c...</td>\n","    </tr>\n","    <tr>\n","      <th>12</th>\n","      <td>So im not a big fan of Boll's work but then ag...</td>\n","      <td>[0.0032458826899528503, -0.013339877128601074,...</td>\n","      <td>negative</td>\n","      <td>0.999841</td>\n","      <td>negative</td>\n","      <td>So im not a big fan of Boll's work but then ag...</td>\n","    </tr>\n","    <tr>\n","      <th>13</th>\n","      <td>The cast played Shakespeare.&lt;br /&gt;&lt;br /&gt;Shakes...</td>\n","      <td>[0.044309284538030624, 0.061706289649009705, -...</td>\n","      <td>neutral</td>\n","      <td>0.504574</td>\n","      <td>negative</td>\n","      <td>The cast played Shakespeare.&lt;br /&gt;&lt;br /&gt;Shakes...</td>\n","    </tr>\n","    <tr>\n","      <th>14</th>\n","      <td>This a fantastic movie of three prisoners who ...</td>\n","      <td>[0.005487383343279362, -0.005359508562833071, ...</td>\n","      <td>positive</td>\n","      <td>0.956110</td>\n","      <td>positive</td>\n","      <td>This a fantastic movie of three prisoners who ...</td>\n","    </tr>\n","    <tr>\n","      <th>15</th>\n","      <td>Kind of drawn in by the erotic scenes, only to...</td>\n","      <td>[0.04357790946960449, -0.034652918577194214, -...</td>\n","      <td>negative</td>\n","      <td>0.990112</td>\n","      <td>negative</td>\n","      <td>Kind of drawn in by the erotic scenes, only to...</td>\n","    </tr>\n","    <tr>\n","      <th>16</th>\n","      <td>Some films just simply should not be remade. T...</td>\n","      <td>[0.006823724135756493, -0.0692802369594574, -0...</td>\n","      <td>negative</td>\n","      <td>0.996081</td>\n","      <td>positive</td>\n","      <td>Some films just simply should not be remade. T...</td>\n","    </tr>\n","    <tr>\n","      <th>17</th>\n","      <td>This movie made it into one of my top 10 most ...</td>\n","      <td>[-0.013747279532253742, -0.0038213622756302357...</td>\n","      <td>negative</td>\n","      <td>0.999338</td>\n","      <td>negative</td>\n","      <td>This movie made it into one of my top 10 most ...</td>\n","    </tr>\n","    <tr>\n","      <th>18</th>\n","      <td>I remember this film,it was the first film i h...</td>\n","      <td>[-0.005101265385746956, 0.022435873746871948, ...</td>\n","      <td>positive</td>\n","      <td>0.986708</td>\n","      <td>positive</td>\n","      <td>I remember this film,it was the first film i h...</td>\n","    </tr>\n","    <tr>\n","      <th>19</th>\n","      <td>An awful film! It must have been up against so...</td>\n","      <td>[0.011224010959267616, -0.007102800067514181, ...</td>\n","      <td>negative</td>\n","      <td>0.998881</td>\n","      <td>negative</td>\n","      <td>An awful film! It must have been up against so...</td>\n","    </tr>\n","    <tr>\n","      <th>20</th>\n","      <td>After the success of Die Hard and it's sequels...</td>\n","      <td>[0.022048521786928177, -0.020497862249612808, ...</td>\n","      <td>negative</td>\n","      <td>0.650546</td>\n","      <td>positive</td>\n","      <td>After the success of Die Hard and it's sequels...</td>\n","    </tr>\n","    <tr>\n","      <th>21</th>\n","      <td>I had the terrible misfortune of having to vie...</td>\n","      <td>[-0.010102338157594204, -0.05102328583598137, ...</td>\n","      <td>negative</td>\n","      <td>0.999930</td>\n","      <td>negative</td>\n","      <td>I had the terrible misfortune of having to vie...</td>\n","    </tr>\n","    <tr>\n","      <th>22</th>\n","      <td>What an absolutely stunning movie, if you have...</td>\n","      <td>[-0.016428396105766296, 0.007074637804180384, ...</td>\n","      <td>positive</td>\n","      <td>0.981123</td>\n","      <td>positive</td>\n","      <td>What an absolutely stunning movie, if you have...</td>\n","    </tr>\n","    <tr>\n","      <th>23</th>\n","      <td>First of all, let's get a few things straight ...</td>\n","      <td>[-0.06437410414218903, -0.029181038960814476, ...</td>\n","      <td>negative</td>\n","      <td>0.957470</td>\n","      <td>negative</td>\n","      <td>First of all, let's get a few things straight ...</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>This was the worst movie I saw at WorldFest an...</td>\n","      <td>[0.03901044651865959, 0.06355303525924683, -0....</td>\n","      <td>negative</td>\n","      <td>0.999471</td>\n","      <td>negative</td>\n","      <td>This was the worst movie I saw at WorldFest an...</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>The Karen Carpenter Story shows a little more ...</td>\n","      <td>[-0.021897025406360626, 0.04400184750556946, 0...</td>\n","      <td>positive</td>\n","      <td>0.997247</td>\n","      <td>positive</td>\n","      <td>The Karen Carpenter Story shows a little more ...</td>\n","    </tr>\n","    <tr>\n","      <th>26</th>\n","      <td>\"The Cell\" is an exotic masterpiece, a dizzyin...</td>\n","      <td>[0.0439823754131794, -0.007468021009117365, -0...</td>\n","      <td>positive</td>\n","      <td>0.996351</td>\n","      <td>positive</td>\n","      <td>\"The Cell\" is an exotic masterpiece, a dizzyin...</td>\n","    </tr>\n","    <tr>\n","      <th>27</th>\n","      <td>This film tried to be too many things all at o...</td>\n","      <td>[-0.004155139438807964, -0.03771881386637688, ...</td>\n","      <td>neutral</td>\n","      <td>0.570219</td>\n","      <td>negative</td>\n","      <td>This film tried to be too many things all at o...</td>\n","    </tr>\n","    <tr>\n","      <th>28</th>\n","      <td>This movie was so frustrating. Everything seem...</td>\n","      <td>[0.015594013035297394, -0.007509331218898296, ...</td>\n","      <td>negative</td>\n","      <td>0.999906</td>\n","      <td>negative</td>\n","      <td>This movie was so frustrating. Everything seem...</td>\n","    </tr>\n","    <tr>\n","      <th>29</th>\n","      <td>'War movie' is a Hollywood genre that has been...</td>\n","      <td>[-0.036022596061229706, -0.006816706154495478,...</td>\n","      <td>negative</td>\n","      <td>0.733068</td>\n","      <td>positive</td>\n","      <td>'War movie' is a Hollywood genre that has been...</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>Taut and organically gripping, Edward Dmytryk'...</td>\n","      <td>[0.0312348585575819, -0.04670163244009018, -0....</td>\n","      <td>positive</td>\n","      <td>0.995882</td>\n","      <td>positive</td>\n","      <td>Taut and organically gripping, Edward Dmytryk'...</td>\n","    </tr>\n","    <tr>\n","      <th>31</th>\n","      <td>\"Ardh Satya\" is one of the finest film ever ma...</td>\n","      <td>[0.060114260762929916, -0.0590929239988327, -0...</td>\n","      <td>positive</td>\n","      <td>0.999671</td>\n","      <td>positive</td>\n","      <td>\"Ardh Satya\" is one of the finest film ever ma...</td>\n","    </tr>\n","    <tr>\n","      <th>32</th>\n","      <td>My first exposure to the Templarios &amp; not a go...</td>\n","      <td>[0.013515714555978775, -0.004898980725556612, ...</td>\n","      <td>negative</td>\n","      <td>0.999994</td>\n","      <td>negative</td>\n","      <td>My first exposure to the Templarios &amp; not a go...</td>\n","    </tr>\n","    <tr>\n","      <th>33</th>\n","      <td>One of the most significant quotes from the en...</td>\n","      <td>[0.022280631586909294, -0.00839739479124546, -...</td>\n","      <td>positive</td>\n","      <td>0.997032</td>\n","      <td>positive</td>\n","      <td>One of the most significant quotes from the en...</td>\n","    </tr>\n","    <tr>\n","      <th>34</th>\n","      <td>I watched this film not really expecting much,...</td>\n","      <td>[0.009434111416339874, -0.046402934938669205, ...</td>\n","      <td>negative</td>\n","      <td>0.992625</td>\n","      <td>negative</td>\n","      <td>I watched this film not really expecting much,...</td>\n","    </tr>\n","    <tr>\n","      <th>35</th>\n","      <td>I bought this film at Blockbuster for $3.00, b...</td>\n","      <td>[0.011683089658617973, -0.047437384724617004, ...</td>\n","      <td>negative</td>\n","      <td>0.999485</td>\n","      <td>negative</td>\n","      <td>I bought this film at Blockbuster for $3.00, b...</td>\n","    </tr>\n","    <tr>\n","      <th>36</th>\n","      <td>The plot is about the death of little children...</td>\n","      <td>[-0.0348515659570694, 0.01680166646838188, -0....</td>\n","      <td>neutral</td>\n","      <td>0.537487</td>\n","      <td>negative</td>\n","      <td>The plot is about the death of little children...</td>\n","    </tr>\n","    <tr>\n","      <th>37</th>\n","      <td>Ever watched a movie that lost the plot? Well,...</td>\n","      <td>[-0.02899913117289543, 0.0164097361266613, -0....</td>\n","      <td>negative</td>\n","      <td>0.998984</td>\n","      <td>negative</td>\n","      <td>Ever watched a movie that lost the plot? Well,...</td>\n","    </tr>\n","    <tr>\n","      <th>38</th>\n","      <td>Okay, so this series kind of takes the route o...</td>\n","      <td>[0.002110496163368225, 0.02887572906911373, -0...</td>\n","      <td>positive</td>\n","      <td>0.965860</td>\n","      <td>positive</td>\n","      <td>Okay, so this series kind of takes the route o...</td>\n","    </tr>\n","    <tr>\n","      <th>39</th>\n","      <td>After sitting through this pile of dung, my hu...</td>\n","      <td>[0.013781447894871235, -0.010363072156906128, ...</td>\n","      <td>positive</td>\n","      <td>0.697232</td>\n","      <td>negative</td>\n","      <td>After sitting through this pile of dung, my hu...</td>\n","    </tr>\n","    <tr>\n","      <th>40</th>\n","      <td>It had all the clichés of movies of this type ...</td>\n","      <td>[0.03799372911453247, -0.038665950298309326, -...</td>\n","      <td>negative</td>\n","      <td>0.998307</td>\n","      <td>negative</td>\n","      <td>It had all the clichés of movies of this type ...</td>\n","    </tr>\n","    <tr>\n","      <th>41</th>\n","      <td>This movie is based on the book, \"A Many Splen...</td>\n","      <td>[-0.00033091730438172817, -0.05126418545842171...</td>\n","      <td>positive</td>\n","      <td>0.998454</td>\n","      <td>positive</td>\n","      <td>This movie is based on the book, \"A Many Splen...</td>\n","    </tr>\n","    <tr>\n","      <th>42</th>\n","      <td>Of all the films I have seen, this one, The Ra...</td>\n","      <td>[0.014630819670855999, -0.04907294735312462, -...</td>\n","      <td>negative</td>\n","      <td>0.999938</td>\n","      <td>negative</td>\n","      <td>Of all the films I have seen, this one, The Ra...</td>\n","    </tr>\n","    <tr>\n","      <th>43</th>\n","      <td>I had heard good things about \"States of Grace...</td>\n","      <td>[0.027017194777727127, 0.002088379580527544, 0...</td>\n","      <td>positive</td>\n","      <td>0.884466</td>\n","      <td>negative</td>\n","      <td>I had heard good things about \"States of Grace...</td>\n","    </tr>\n","    <tr>\n","      <th>44</th>\n","      <td>This movie struck home for me. Being 29, I rem...</td>\n","      <td>[-0.0009387845057062805, -0.048219360411167145...</td>\n","      <td>negative</td>\n","      <td>0.969341</td>\n","      <td>positive</td>\n","      <td>This movie struck home for me. Being 29, I rem...</td>\n","    </tr>\n","    <tr>\n","      <th>45</th>\n","      <td>As a disclaimer, I've seen the movie 5-6 times...</td>\n","      <td>[0.0065035647712647915, 0.00230638706125319, 0...</td>\n","      <td>negative</td>\n","      <td>0.967124</td>\n","      <td>positive</td>\n","      <td>As a disclaimer, I've seen the movie 5-6 times...</td>\n","    </tr>\n","    <tr>\n","      <th>46</th>\n","      <td>Protocol is an implausible movie whose only sa...</td>\n","      <td>[0.05113476142287254, 0.04671141505241394, -0....</td>\n","      <td>neutral</td>\n","      <td>0.593109</td>\n","      <td>negative</td>\n","      <td>Protocol is an implausible movie whose only sa...</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>How this film could be classified as Drama, I ...</td>\n","      <td>[0.011419376358389854, -0.0828876867890358, -0...</td>\n","      <td>negative</td>\n","      <td>0.991421</td>\n","      <td>negative</td>\n","      <td>How this film could be classified as Drama, I ...</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>Preston Sturgis' THE POWER AND THE GLORY was u...</td>\n","      <td>[0.024031344801187515, 0.03399205952882767, 0....</td>\n","      <td>positive</td>\n","      <td>0.994996</td>\n","      <td>positive</td>\n","      <td>Preston Sturgis' THE POWER AND THE GLORY was u...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>Average (and surprisingly tame) Fulci giallo w...</td>\n","      <td>[0.015038557350635529, -0.0037642912939190865,...</td>\n","      <td>positive</td>\n","      <td>0.996770</td>\n","      <td>negative</td>\n","      <td>Average (and surprisingly tame) Fulci giallo w...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                           text  ...                                           document\n","origin_index                                                     ...                                                   \n","0             One of the other reviewers has mentioned that ...  ...  One of the other reviewers has mentioned that ...\n","1             A wonderful little production. <br /><br />The...  ...  A wonderful little production. <br /><br />The...\n","2             I thought this was a wonderful way to spend ti...  ...  I thought this was a wonderful way to spend ti...\n","3             Basically there's a family where a little boy ...  ...  Basically there's a family where a little boy ...\n","4             Petter Mattei's \"Love in the Time of Money\" is...  ...  Petter Mattei's \"Love in the Time of Money\" is...\n","5             Probably my all-time favorite movie, a story o...  ...  Probably my all-time favorite movie, a story o...\n","6             I sure would like to see a resurrection of a u...  ...  I sure would like to see a resurrection of a u...\n","7             This show was an amazing, fresh & innovative i...  ...  This show was an amazing, fresh & innovative i...\n","8             Encouraged by the positive comments about this...  ...  Encouraged by the positive comments about this...\n","9             If you like original gut wrenching laughter yo...  ...  If you like original gut wrenching laughter yo...\n","10            Phil the Alien is one of those quirky films wh...  ...  Phil the Alien is one of those quirky films wh...\n","11            I saw this movie when I was about 12 when it c...  ...  I saw this movie when I was about 12 when it c...\n","12            So im not a big fan of Boll's work but then ag...  ...  So im not a big fan of Boll's work but then ag...\n","13            The cast played Shakespeare.<br /><br />Shakes...  ...  The cast played Shakespeare.<br /><br />Shakes...\n","14            This a fantastic movie of three prisoners who ...  ...  This a fantastic movie of three prisoners who ...\n","15            Kind of drawn in by the erotic scenes, only to...  ...  Kind of drawn in by the erotic scenes, only to...\n","16            Some films just simply should not be remade. T...  ...  Some films just simply should not be remade. T...\n","17            This movie made it into one of my top 10 most ...  ...  This movie made it into one of my top 10 most ...\n","18            I remember this film,it was the first film i h...  ...  I remember this film,it was the first film i h...\n","19            An awful film! It must have been up against so...  ...  An awful film! It must have been up against so...\n","20            After the success of Die Hard and it's sequels...  ...  After the success of Die Hard and it's sequels...\n","21            I had the terrible misfortune of having to vie...  ...  I had the terrible misfortune of having to vie...\n","22            What an absolutely stunning movie, if you have...  ...  What an absolutely stunning movie, if you have...\n","23            First of all, let's get a few things straight ...  ...  First of all, let's get a few things straight ...\n","24            This was the worst movie I saw at WorldFest an...  ...  This was the worst movie I saw at WorldFest an...\n","25            The Karen Carpenter Story shows a little more ...  ...  The Karen Carpenter Story shows a little more ...\n","26            \"The Cell\" is an exotic masterpiece, a dizzyin...  ...  \"The Cell\" is an exotic masterpiece, a dizzyin...\n","27            This film tried to be too many things all at o...  ...  This film tried to be too many things all at o...\n","28            This movie was so frustrating. Everything seem...  ...  This movie was so frustrating. Everything seem...\n","29            'War movie' is a Hollywood genre that has been...  ...  'War movie' is a Hollywood genre that has been...\n","30            Taut and organically gripping, Edward Dmytryk'...  ...  Taut and organically gripping, Edward Dmytryk'...\n","31            \"Ardh Satya\" is one of the finest film ever ma...  ...  \"Ardh Satya\" is one of the finest film ever ma...\n","32            My first exposure to the Templarios & not a go...  ...  My first exposure to the Templarios & not a go...\n","33            One of the most significant quotes from the en...  ...  One of the most significant quotes from the en...\n","34            I watched this film not really expecting much,...  ...  I watched this film not really expecting much,...\n","35            I bought this film at Blockbuster for $3.00, b...  ...  I bought this film at Blockbuster for $3.00, b...\n","36            The plot is about the death of little children...  ...  The plot is about the death of little children...\n","37            Ever watched a movie that lost the plot? Well,...  ...  Ever watched a movie that lost the plot? Well,...\n","38            Okay, so this series kind of takes the route o...  ...  Okay, so this series kind of takes the route o...\n","39            After sitting through this pile of dung, my hu...  ...  After sitting through this pile of dung, my hu...\n","40            It had all the clichés of movies of this type ...  ...  It had all the clichés of movies of this type ...\n","41            This movie is based on the book, \"A Many Splen...  ...  This movie is based on the book, \"A Many Splen...\n","42            Of all the films I have seen, this one, The Ra...  ...  Of all the films I have seen, this one, The Ra...\n","43            I had heard good things about \"States of Grace...  ...  I had heard good things about \"States of Grace...\n","44            This movie struck home for me. Being 29, I rem...  ...  This movie struck home for me. Being 29, I rem...\n","45            As a disclaimer, I've seen the movie 5-6 times...  ...  As a disclaimer, I've seen the movie 5-6 times...\n","46            Protocol is an implausible movie whose only sa...  ...  Protocol is an implausible movie whose only sa...\n","47            How this film could be classified as Drama, I ...  ...  How this film could be classified as Drama, I ...\n","48            Preston Sturgis' THE POWER AND THE GLORY was u...  ...  Preston Sturgis' THE POWER AND THE GLORY was u...\n","49            Average (and surprisingly tame) Fulci giallo w...  ...  Average (and surprisingly tame) Fulci giallo w...\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609464663328,"user_tz":-300,"elapsed":2733,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ae604bdd-49fb-4b5e-978e-5190dd03b227"},"source":["fitted_pipe.predict('It was one of the best films i have ever watched in my entire life !!')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>default_name_embeddings</th>\n","      <th>sentiment</th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>[0.06468033790588379, -0.040837567299604416, -...</td>\n","      <td>positive</td>\n","      <td>0.982375</td>\n","      <td>Bitcoin is going to the moon!</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                        default_name_embeddings  ...                       document\n","origin_index                                                     ...                               \n","0             [0.06468033790588379, -0.040837567299604416, -...  ...  Bitcoin is going to the moon!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609464663334,"user_tz":-300,"elapsed":31,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a9059197-9e1c-4afe-ca3b-97c6d310f60c"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2)                 | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005)                    | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64)                | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5)                 | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True)       | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6)               | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')    | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512)               | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use')      | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609464673090,"user_tz":-300,"elapsed":9777,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"164e4f03-f48a-4347-95e8-fd3509bf146e"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5)  \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","    negative       0.81      0.96      0.88        27\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.94      0.70      0.80        23\n","\n","    accuracy                           0.84        50\n","   macro avg       0.58      0.55      0.56        50\n","weighted avg       0.87      0.84      0.84        50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>default_name_embeddings</th>\n","      <th>sentiment</th>\n","      <th>sentiment_confidence</th>\n","      <th>y</th>\n","      <th>document</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>One of the other reviewers has mentioned that ...</td>\n","      <td>[-0.04935329407453537, -0.01034686528146267, -...</td>\n","      <td>positive</td>\n","      <td>0.966858</td>\n","      <td>positive</td>\n","      <td>One of the other reviewers has mentioned that ...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>A wonderful little production. &lt;br /&gt;&lt;br /&gt;The...</td>\n","      <td>[0.040489643812179565, -0.054199717938899994, ...</td>\n","      <td>negative</td>\n","      <td>0.985679</td>\n","      <td>positive</td>\n","      <td>A wonderful little production. &lt;br /&gt;&lt;br /&gt;The...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>I thought this was a wonderful way to spend ti...</td>\n","      <td>[0.026364900171756744, 0.07112795859575272, 0....</td>\n","      <td>negative</td>\n","      <td>0.988745</td>\n","      <td>positive</td>\n","      <td>I thought this was a wonderful way to spend ti...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Basically there's a family where a little boy ...</td>\n","      <td>[-0.05151151493191719, 0.008207003585994244, -...</td>\n","      <td>negative</td>\n","      <td>0.999291</td>\n","      <td>negative</td>\n","      <td>Basically there's a family where a little boy ...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>Petter Mattei's \"Love in the Time of Money\" is...</td>\n","      <td>[0.06880538165569305, 0.019250543788075447, -0...</td>\n","      <td>positive</td>\n","      <td>0.999684</td>\n","      <td>positive</td>\n","      <td>Petter Mattei's \"Love in the Time of Money\" is...</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>Probably my all-time favorite movie, a story o...</td>\n","      <td>[0.004764211364090443, 0.027671916410326958, -...</td>\n","      <td>positive</td>\n","      <td>0.996598</td>\n","      <td>positive</td>\n","      <td>Probably my all-time favorite movie, a story o...</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>I sure would like to see a resurrection of a u...</td>\n","      <td>[-0.03813941031694412, -0.03322296217083931, 0...</td>\n","      <td>positive</td>\n","      <td>0.960203</td>\n","      <td>positive</td>\n","      <td>I sure would like to see a resurrection of a u...</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>This show was an amazing, fresh &amp; innovative i...</td>\n","      <td>[0.010670202784240246, -0.04322813078761101, -...</td>\n","      <td>negative</td>\n","      <td>0.753273</td>\n","      <td>negative</td>\n","      <td>This show was an amazing, fresh &amp; innovative i...</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>Encouraged by the positive comments about this...</td>\n","      <td>[0.010801736265420914, -0.07724311947822571, -...</td>\n","      <td>negative</td>\n","      <td>0.958928</td>\n","      <td>negative</td>\n","      <td>Encouraged by the positive comments about this...</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>If you like original gut wrenching laughter yo...</td>\n","      <td>[-0.0245585348457098, 0.0005475765210576355, -...</td>\n","      <td>neutral</td>\n","      <td>0.536441</td>\n","      <td>positive</td>\n","      <td>If you like original gut wrenching laughter yo...</td>\n","    </tr>\n","    <tr>\n","      <th>10</th>\n","      <td>Phil the Alien is one of those quirky films wh...</td>\n","      <td>[0.023403573781251907, 0.017464609816670418, -...</td>\n","      <td>negative</td>\n","      <td>0.959978</td>\n","      <td>negative</td>\n","      <td>Phil the Alien is one of those quirky films wh...</td>\n","    </tr>\n","    <tr>\n","      <th>11</th>\n","      <td>I saw this movie when I was about 12 when it c...</td>\n","      <td>[-0.046517230570316315, -0.025949953123927116,...</td>\n","      <td>negative</td>\n","      <td>0.999949</td>\n","      <td>negative</td>\n","      <td>I saw this movie when I was about 12 when it c...</td>\n","    </tr>\n","    <tr>\n","      <th>12</th>\n","      <td>So im not a big fan of Boll's work but then ag...</td>\n","      <td>[0.0032458826899528503, -0.013339877128601074,...</td>\n","      <td>negative</td>\n","      <td>0.999997</td>\n","      <td>negative</td>\n","      <td>So im not a big fan of Boll's work but then ag...</td>\n","    </tr>\n","    <tr>\n","      <th>13</th>\n","      <td>The cast played Shakespeare.&lt;br /&gt;&lt;br /&gt;Shakes...</td>\n","      <td>[0.044309284538030624, 0.061706289649009705, -...</td>\n","      <td>negative</td>\n","      <td>0.984033</td>\n","      <td>negative</td>\n","      <td>The cast played Shakespeare.&lt;br /&gt;&lt;br /&gt;Shakes...</td>\n","    </tr>\n","    <tr>\n","      <th>14</th>\n","      <td>This a fantastic movie of three prisoners who ...</td>\n","      <td>[0.005487383343279362, -0.005359508562833071, ...</td>\n","      <td>positive</td>\n","      <td>0.775998</td>\n","      <td>positive</td>\n","      <td>This a fantastic movie of three prisoners who ...</td>\n","    </tr>\n","    <tr>\n","      <th>15</th>\n","      <td>Kind of drawn in by the erotic scenes, only to...</td>\n","      <td>[0.04357790946960449, -0.034652918577194214, -...</td>\n","      <td>negative</td>\n","      <td>0.999683</td>\n","      <td>negative</td>\n","      <td>Kind of drawn in by the erotic scenes, only to...</td>\n","    </tr>\n","    <tr>\n","      <th>16</th>\n","      <td>Some films just simply should not be remade. T...</td>\n","      <td>[0.006823724135756493, -0.0692802369594574, -0...</td>\n","      <td>negative</td>\n","      <td>0.999245</td>\n","      <td>positive</td>\n","      <td>Some films just simply should not be remade. T...</td>\n","    </tr>\n","    <tr>\n","      <th>17</th>\n","      <td>This movie made it into one of my top 10 most ...</td>\n","      <td>[-0.013747279532253742, -0.0038213622756302357...</td>\n","      <td>negative</td>\n","      <td>0.999970</td>\n","      <td>negative</td>\n","      <td>This movie made it into one of my top 10 most ...</td>\n","    </tr>\n","    <tr>\n","      <th>18</th>\n","      <td>I remember this film,it was the first film i h...</td>\n","      <td>[-0.005101265385746956, 0.022435873746871948, ...</td>\n","      <td>positive</td>\n","      <td>0.975574</td>\n","      <td>positive</td>\n","      <td>I remember this film,it was the first film i h...</td>\n","    </tr>\n","    <tr>\n","      <th>19</th>\n","      <td>An awful film! It must have been up against so...</td>\n","      <td>[0.011224010959267616, -0.007102800067514181, ...</td>\n","      <td>negative</td>\n","      <td>0.999990</td>\n","      <td>negative</td>\n","      <td>An awful film! It must have been up against so...</td>\n","    </tr>\n","    <tr>\n","      <th>20</th>\n","      <td>After the success of Die Hard and it's sequels...</td>\n","      <td>[0.022048521786928177, -0.020497862249612808, ...</td>\n","      <td>positive</td>\n","      <td>0.951596</td>\n","      <td>positive</td>\n","      <td>After the success of Die Hard and it's sequels...</td>\n","    </tr>\n","    <tr>\n","      <th>21</th>\n","      <td>I had the terrible misfortune of having to vie...</td>\n","      <td>[-0.010102338157594204, -0.05102328583598137, ...</td>\n","      <td>negative</td>\n","      <td>0.999999</td>\n","      <td>negative</td>\n","      <td>I had the terrible misfortune of having to vie...</td>\n","    </tr>\n","    <tr>\n","      <th>22</th>\n","      <td>What an absolutely stunning movie, if you have...</td>\n","      <td>[-0.016428396105766296, 0.007074637804180384, ...</td>\n","      <td>positive</td>\n","      <td>0.931946</td>\n","      <td>positive</td>\n","      <td>What an absolutely stunning movie, if you have...</td>\n","    </tr>\n","    <tr>\n","      <th>23</th>\n","      <td>First of all, let's get a few things straight ...</td>\n","      <td>[-0.06437410414218903, -0.029181038960814476, ...</td>\n","      <td>negative</td>\n","      <td>0.990350</td>\n","      <td>negative</td>\n","      <td>First of all, let's get a few things straight ...</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>This was the worst movie I saw at WorldFest an...</td>\n","      <td>[0.03901044651865959, 0.06355303525924683, -0....</td>\n","      <td>negative</td>\n","      <td>0.999986</td>\n","      <td>negative</td>\n","      <td>This was the worst movie I saw at WorldFest an...</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>The Karen Carpenter Story shows a little more ...</td>\n","      <td>[-0.021897025406360626, 0.04400184750556946, 0...</td>\n","      <td>positive</td>\n","      <td>0.999463</td>\n","      <td>positive</td>\n","      <td>The Karen Carpenter Story shows a little more ...</td>\n","    </tr>\n","    <tr>\n","      <th>26</th>\n","      <td>\"The Cell\" is an exotic masterpiece, a dizzyin...</td>\n","      <td>[0.0439823754131794, -0.007468021009117365, -0...</td>\n","      <td>positive</td>\n","      <td>0.998291</td>\n","      <td>positive</td>\n","      <td>\"The Cell\" is an exotic masterpiece, a dizzyin...</td>\n","    </tr>\n","    <tr>\n","      <th>27</th>\n","      <td>This film tried to be too many things all at o...</td>\n","      <td>[-0.004155139438807964, -0.03771881386637688, ...</td>\n","      <td>negative</td>\n","      <td>0.865707</td>\n","      <td>negative</td>\n","      <td>This film tried to be too many things all at o...</td>\n","    </tr>\n","    <tr>\n","      <th>28</th>\n","      <td>This movie was so frustrating. Everything seem...</td>\n","      <td>[0.015594013035297394, -0.007509331218898296, ...</td>\n","      <td>negative</td>\n","      <td>0.999998</td>\n","      <td>negative</td>\n","      <td>This movie was so frustrating. Everything seem...</td>\n","    </tr>\n","    <tr>\n","      <th>29</th>\n","      <td>'War movie' is a Hollywood genre that has been...</td>\n","      <td>[-0.036022596061229706, -0.006816706154495478,...</td>\n","      <td>negative</td>\n","      <td>0.993793</td>\n","      <td>positive</td>\n","      <td>'War movie' is a Hollywood genre that has been...</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>Taut and organically gripping, Edward Dmytryk'...</td>\n","      <td>[0.0312348585575819, -0.04670163244009018, -0....</td>\n","      <td>positive</td>\n","      <td>0.997460</td>\n","      <td>positive</td>\n","      <td>Taut and organically gripping, Edward Dmytryk'...</td>\n","    </tr>\n","    <tr>\n","      <th>31</th>\n","      <td>\"Ardh Satya\" is one of the finest film ever ma...</td>\n","      <td>[0.060114260762929916, -0.0590929239988327, -0...</td>\n","      <td>positive</td>\n","      <td>0.999880</td>\n","      <td>positive</td>\n","      <td>\"Ardh Satya\" is one of the finest film ever ma...</td>\n","    </tr>\n","    <tr>\n","      <th>32</th>\n","      <td>My first exposure to the Templarios &amp; not a go...</td>\n","      <td>[0.013515714555978775, -0.004898980725556612, ...</td>\n","      <td>negative</td>\n","      <td>1.000000</td>\n","      <td>negative</td>\n","      <td>My first exposure to the Templarios &amp; not a go...</td>\n","    </tr>\n","    <tr>\n","      <th>33</th>\n","      <td>One of the most significant quotes from the en...</td>\n","      <td>[0.022280631586909294, -0.00839739479124546, -...</td>\n","      <td>positive</td>\n","      <td>0.999292</td>\n","      <td>positive</td>\n","      <td>One of the most significant quotes from the en...</td>\n","    </tr>\n","    <tr>\n","      <th>34</th>\n","      <td>I watched this film not really expecting much,...</td>\n","      <td>[0.009434111416339874, -0.046402934938669205, ...</td>\n","      <td>negative</td>\n","      <td>0.999848</td>\n","      <td>negative</td>\n","      <td>I watched this film not really expecting much,...</td>\n","    </tr>\n","    <tr>\n","      <th>35</th>\n","      <td>I bought this film at Blockbuster for $3.00, b...</td>\n","      <td>[0.011683089658617973, -0.047437384724617004, ...</td>\n","      <td>negative</td>\n","      <td>0.999993</td>\n","      <td>negative</td>\n","      <td>I bought this film at Blockbuster for $3.00, b...</td>\n","    </tr>\n","    <tr>\n","      <th>36</th>\n","      <td>The plot is about the death of little children...</td>\n","      <td>[-0.0348515659570694, 0.01680166646838188, -0....</td>\n","      <td>negative</td>\n","      <td>0.997690</td>\n","      <td>negative</td>\n","      <td>The plot is about the death of little children...</td>\n","    </tr>\n","    <tr>\n","      <th>37</th>\n","      <td>Ever watched a movie that lost the plot? Well,...</td>\n","      <td>[-0.02899913117289543, 0.0164097361266613, -0....</td>\n","      <td>negative</td>\n","      <td>0.999995</td>\n","      <td>negative</td>\n","      <td>Ever watched a movie that lost the plot? Well,...</td>\n","    </tr>\n","    <tr>\n","      <th>38</th>\n","      <td>Okay, so this series kind of takes the route o...</td>\n","      <td>[0.002110496163368225, 0.02887572906911373, -0...</td>\n","      <td>positive</td>\n","      <td>0.993408</td>\n","      <td>positive</td>\n","      <td>Okay, so this series kind of takes the route o...</td>\n","    </tr>\n","    <tr>\n","      <th>39</th>\n","      <td>After sitting through this pile of dung, my hu...</td>\n","      <td>[0.013781447894871235, -0.010363072156906128, ...</td>\n","      <td>negative</td>\n","      <td>0.905860</td>\n","      <td>negative</td>\n","      <td>After sitting through this pile of dung, my hu...</td>\n","    </tr>\n","    <tr>\n","      <th>40</th>\n","      <td>It had all the clichés of movies of this type ...</td>\n","      <td>[0.03799372911453247, -0.038665950298309326, -...</td>\n","      <td>negative</td>\n","      <td>0.999892</td>\n","      <td>negative</td>\n","      <td>It had all the clichés of movies of this type ...</td>\n","    </tr>\n","    <tr>\n","      <th>41</th>\n","      <td>This movie is based on the book, \"A Many Splen...</td>\n","      <td>[-0.00033091730438172817, -0.05126418545842171...</td>\n","      <td>positive</td>\n","      <td>0.999837</td>\n","      <td>positive</td>\n","      <td>This movie is based on the book, \"A Many Splen...</td>\n","    </tr>\n","    <tr>\n","      <th>42</th>\n","      <td>Of all the films I have seen, this one, The Ra...</td>\n","      <td>[0.014630819670855999, -0.04907294735312462, -...</td>\n","      <td>negative</td>\n","      <td>1.000000</td>\n","      <td>negative</td>\n","      <td>Of all the films I have seen, this one, The Ra...</td>\n","    </tr>\n","    <tr>\n","      <th>43</th>\n","      <td>I had heard good things about \"States of Grace...</td>\n","      <td>[0.027017194777727127, 0.002088379580527544, 0...</td>\n","      <td>negative</td>\n","      <td>0.978662</td>\n","      <td>negative</td>\n","      <td>I had heard good things about \"States of Grace...</td>\n","    </tr>\n","    <tr>\n","      <th>44</th>\n","      <td>This movie struck home for me. Being 29, I rem...</td>\n","      <td>[-0.0009387845057062805, -0.048219360411167145...</td>\n","      <td>negative</td>\n","      <td>0.993965</td>\n","      <td>positive</td>\n","      <td>This movie struck home for me. Being 29, I rem...</td>\n","    </tr>\n","    <tr>\n","      <th>45</th>\n","      <td>As a disclaimer, I've seen the movie 5-6 times...</td>\n","      <td>[0.0065035647712647915, 0.00230638706125319, 0...</td>\n","      <td>negative</td>\n","      <td>0.999341</td>\n","      <td>positive</td>\n","      <td>As a disclaimer, I've seen the movie 5-6 times...</td>\n","    </tr>\n","    <tr>\n","      <th>46</th>\n","      <td>Protocol is an implausible movie whose only sa...</td>\n","      <td>[0.05113476142287254, 0.04671141505241394, -0....</td>\n","      <td>negative</td>\n","      <td>0.913287</td>\n","      <td>negative</td>\n","      <td>Protocol is an implausible movie whose only sa...</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>How this film could be classified as Drama, I ...</td>\n","      <td>[0.011419376358389854, -0.0828876867890358, -0...</td>\n","      <td>negative</td>\n","      <td>0.999841</td>\n","      <td>negative</td>\n","      <td>How this film could be classified as Drama, I ...</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>Preston Sturgis' THE POWER AND THE GLORY was u...</td>\n","      <td>[0.024031344801187515, 0.03399205952882767, 0....</td>\n","      <td>positive</td>\n","      <td>0.998516</td>\n","      <td>positive</td>\n","      <td>Preston Sturgis' THE POWER AND THE GLORY was u...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>Average (and surprisingly tame) Fulci giallo w...</td>\n","      <td>[0.015038557350635529, -0.0037642912939190865,...</td>\n","      <td>positive</td>\n","      <td>0.995483</td>\n","      <td>negative</td>\n","      <td>Average (and surprisingly tame) Fulci giallo w...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                           text  ...                                           document\n","origin_index                                                     ...                                                   \n","0             One of the other reviewers has mentioned that ...  ...  One of the other reviewers has mentioned that ...\n","1             A wonderful little production. <br /><br />The...  ...  A wonderful little production. <br /><br />The...\n","2             I thought this was a wonderful way to spend ti...  ...  I thought this was a wonderful way to spend ti...\n","3             Basically there's a family where a little boy ...  ...  Basically there's a family where a little boy ...\n","4             Petter Mattei's \"Love in the Time of Money\" is...  ...  Petter Mattei's \"Love in the Time of Money\" is...\n","5             Probably my all-time favorite movie, a story o...  ...  Probably my all-time favorite movie, a story o...\n","6             I sure would like to see a resurrection of a u...  ...  I sure would like to see a resurrection of a u...\n","7             This show was an amazing, fresh & innovative i...  ...  This show was an amazing, fresh & innovative i...\n","8             Encouraged by the positive comments about this...  ...  Encouraged by the positive comments about this...\n","9             If you like original gut wrenching laughter yo...  ...  If you like original gut wrenching laughter yo...\n","10            Phil the Alien is one of those quirky films wh...  ...  Phil the Alien is one of those quirky films wh...\n","11            I saw this movie when I was about 12 when it c...  ...  I saw this movie when I was about 12 when it c...\n","12            So im not a big fan of Boll's work but then ag...  ...  So im not a big fan of Boll's work but then ag...\n","13            The cast played Shakespeare.<br /><br />Shakes...  ...  The cast played Shakespeare.<br /><br />Shakes...\n","14            This a fantastic movie of three prisoners who ...  ...  This a fantastic movie of three prisoners who ...\n","15            Kind of drawn in by the erotic scenes, only to...  ...  Kind of drawn in by the erotic scenes, only to...\n","16            Some films just simply should not be remade. T...  ...  Some films just simply should not be remade. T...\n","17            This movie made it into one of my top 10 most ...  ...  This movie made it into one of my top 10 most ...\n","18            I remember this film,it was the first film i h...  ...  I remember this film,it was the first film i h...\n","19            An awful film! It must have been up against so...  ...  An awful film! It must have been up against so...\n","20            After the success of Die Hard and it's sequels...  ...  After the success of Die Hard and it's sequels...\n","21            I had the terrible misfortune of having to vie...  ...  I had the terrible misfortune of having to vie...\n","22            What an absolutely stunning movie, if you have...  ...  What an absolutely stunning movie, if you have...\n","23            First of all, let's get a few things straight ...  ...  First of all, let's get a few things straight ...\n","24            This was the worst movie I saw at WorldFest an...  ...  This was the worst movie I saw at WorldFest an...\n","25            The Karen Carpenter Story shows a little more ...  ...  The Karen Carpenter Story shows a little more ...\n","26            \"The Cell\" is an exotic masterpiece, a dizzyin...  ...  \"The Cell\" is an exotic masterpiece, a dizzyin...\n","27            This film tried to be too many things all at o...  ...  This film tried to be too many things all at o...\n","28            This movie was so frustrating. Everything seem...  ...  This movie was so frustrating. Everything seem...\n","29            'War movie' is a Hollywood genre that has been...  ...  'War movie' is a Hollywood genre that has been...\n","30            Taut and organically gripping, Edward Dmytryk'...  ...  Taut and organically gripping, Edward Dmytryk'...\n","31            \"Ardh Satya\" is one of the finest film ever ma...  ...  \"Ardh Satya\" is one of the finest film ever ma...\n","32            My first exposure to the Templarios & not a go...  ...  My first exposure to the Templarios & not a go...\n","33            One of the most significant quotes from the en...  ...  One of the most significant quotes from the en...\n","34            I watched this film not really expecting much,...  ...  I watched this film not really expecting much,...\n","35            I bought this film at Blockbuster for $3.00, b...  ...  I bought this film at Blockbuster for $3.00, b...\n","36            The plot is about the death of little children...  ...  The plot is about the death of little children...\n","37            Ever watched a movie that lost the plot? Well,...  ...  Ever watched a movie that lost the plot? Well,...\n","38            Okay, so this series kind of takes the route o...  ...  Okay, so this series kind of takes the route o...\n","39            After sitting through this pile of dung, my hu...  ...  After sitting through this pile of dung, my hu...\n","40            It had all the clichés of movies of this type ...  ...  It had all the clichés of movies of this type ...\n","41            This movie is based on the book, \"A Many Splen...  ...  This movie is based on the book, \"A Many Splen...\n","42            Of all the films I have seen, this one, The Ra...  ...  Of all the films I have seen, this one, The Ra...\n","43            I had heard good things about \"States of Grace...  ...  I had heard good things about \"States of Grace...\n","44            This movie struck home for me. Being 29, I rem...  ...  This movie struck home for me. Being 29, I rem...\n","45            As a disclaimer, I've seen the movie 5-6 times...  ...  As a disclaimer, I've seen the movie 5-6 times...\n","46            Protocol is an implausible movie whose only sa...  ...  Protocol is an implausible movie whose only sa...\n","47            How this film could be classified as Drama, I ...  ...  How this film could be classified as Drama, I ...\n","48            Preston Sturgis' THE POWER AND THE GLORY was u...  ...  Preston Sturgis' THE POWER AND THE GLORY was u...\n","49            Average (and surprisingly tame) Fulci giallo w...  ...  Average (and surprisingly tame) Fulci giallo w...\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1609469926255,"user_tz":-300,"elapsed":140492,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"64f54fdd-699a-4559-f6e4-74b7b5f3e92e"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(120)  \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.85      0.81      0.83      1234\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.87      0.79      0.83      1266\n","\n","    accuracy                           0.80      2500\n","   macro avg       0.57      0.54      0.55      2500\n","weighted avg       0.86      0.80      0.83      2500\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1609470097011,"user_tz":-300,"elapsed":170766,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"59899be4-e33a-4b5e-ff37-df6a9a3994b2"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":127},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1609470112616,"user_tz":-300,"elapsed":15622,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cbe7fc37-7794-4c28-d1de-5ba88d3db58b"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It was one of the best films i have ever watched in my entire life !!')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>sentiment</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>positive</td>\n","      <td>[0.09222018718719482, 0.11720675230026245, 0.1...</td>\n","      <td>0.999543</td>\n","      <td>It was one of the best films i have ever watch...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             sentiment  ...                                           document\n","origin_index            ...                                                   \n","0             positive  ...  It was one of the best films i have ever watch...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1609470112618,"user_tz":-300,"elapsed":17,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c7467f93-d619-470f-fd40-c2be1805b83f"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')            | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                  | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                 | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)           | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                  | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                      | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)            | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)        | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)       | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                    | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                        | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                 | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                          | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                        | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                 | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')         | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6)                         | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')              | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative'])      | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]}
\ No newline at end of file
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_IMDB.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_IMDB.ipynb)\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 class IMDB Movie sentiment classifier training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null  \n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download IMDB dataset\n","https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews\n","\n","IMDB dataset having 50K movie reviews for natural language processing or Text analytics.\n","This is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets. We provide a set of 25,000 highly polar movie reviews for training and 25,000 for testing. So, predict the number of positive and negative reviews using either classification or deep learning algorithms.\n","For more dataset information, please go through the following link,\n","http://ai.stanford.edu/~amaas/data/sentiment/"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610788112062,"user_tz":-300,"elapsed":2594,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a99c604d-fad2-4ace-c9b3-13dcb5893e03"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/IMDB-Dataset.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:07:54--  http://ckl-it.de/wp-content/uploads/2021/01/IMDB-Dataset.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 3288450 (3.1M) [text/csv]\n","Saving to: ‘IMDB-Dataset.csv’\n","\n","IMDB-Dataset.csv    100%[===================>]   3.14M  2.29MB/s    in 1.4s    \n","\n","2021-01-16 09:07:56 (2.29 MB/s) - ‘IMDB-Dataset.csv’ saved [3288450/3288450]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610788130009,"user_tz":-300,"elapsed":1019,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"fa40642c-aeea-4506-b40e-3542a49a2ee9"},"source":["import pandas as pd\n","train_path = '/content/IMDB-Dataset.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>y</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>One of the other reviewers has mentioned that ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>A wonderful little production. &lt;br /&gt;&lt;br /&gt;The...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>I thought this was a wonderful way to spend ti...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Basically there's a family where a little boy ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>Petter Mattei's \"Love in the Time of Money\" is...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>2495</th>\n","      <td>Another great movie by Costa-Gavras. It's a gr...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>2496</th>\n","      <td>Though structured totally different from the b...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>2497</th>\n","      <td>Handsome and dashing British airline pilot Geo...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>2498</th>\n","      <td>This film breeches the fine line between satir...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>2499</th>\n","      <td>Mardi Gras: Made in China provides a wonderful...</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>2500 rows × 2 columns</p>\n","</div>"],"text/plain":["                                                   text         y\n","0     One of the other reviewers has mentioned that ...  positive\n","1     A wonderful little production. <br /><br />The...  positive\n","2     I thought this was a wonderful way to spend ti...  positive\n","3     Basically there's a family where a little boy ...  negative\n","4     Petter Mattei's \"Love in the Time of Money\" is...  positive\n","...                                                 ...       ...\n","2495  Another great movie by Costa-Gavras. It's a gr...  negative\n","2496  Though structured totally different from the b...  positive\n","2497  Handsome and dashing British airline pilot Geo...  positive\n","2498  This film breeches the fine line between satir...  negative\n","2499  Mardi Gras: Made in China provides a wonderful...  positive\n","\n","[2500 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609464660630,"user_tz":-300,"elapsed":19440,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3be85e48-38c8-4a7c-bbd7-226e204fa739"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.70      0.70      0.70        27\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.79      0.65      0.71        23\n","\n","    accuracy                           0.68        50\n","   macro avg       0.50      0.45      0.47        50\n","weighted avg       0.74      0.68      0.71        50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>default_name_embeddings</th>\n","      <th>sentiment</th>\n","      <th>sentiment_confidence</th>\n","      <th>y</th>\n","      <th>document</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>One of the other reviewers has mentioned that ...</td>\n","      <td>[-0.04935329407453537, -0.01034686528146267, -...</td>\n","      <td>positive</td>\n","      <td>0.968638</td>\n","      <td>positive</td>\n","      <td>One of the other reviewers has mentioned that ...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>A wonderful little production. &lt;br /&gt;&lt;br /&gt;The...</td>\n","      <td>[0.040489643812179565, -0.054199717938899994, ...</td>\n","      <td>negative</td>\n","      <td>0.990273</td>\n","      <td>positive</td>\n","      <td>A wonderful little production. &lt;br /&gt;&lt;br /&gt;The...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>I thought this was a wonderful way to spend ti...</td>\n","      <td>[0.026364900171756744, 0.07112795859575272, 0....</td>\n","      <td>negative</td>\n","      <td>0.957352</td>\n","      <td>positive</td>\n","      <td>I thought this was a wonderful way to spend ti...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Basically there's a family where a little boy ...</td>\n","      <td>[-0.05151151493191719, 0.008207003585994244, -...</td>\n","      <td>negative</td>\n","      <td>0.958503</td>\n","      <td>negative</td>\n","      <td>Basically there's a family where a little boy ...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>Petter Mattei's \"Love in the Time of Money\" is...</td>\n","      <td>[0.06880538165569305, 0.019250543788075447, -0...</td>\n","      <td>positive</td>\n","      <td>0.999108</td>\n","      <td>positive</td>\n","      <td>Petter Mattei's \"Love in the Time of Money\" is...</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>Probably my all-time favorite movie, a story o...</td>\n","      <td>[0.004764211364090443, 0.027671916410326958, -...</td>\n","      <td>positive</td>\n","      <td>0.993937</td>\n","      <td>positive</td>\n","      <td>Probably my all-time favorite movie, a story o...</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>I sure would like to see a resurrection of a u...</td>\n","      <td>[-0.03813941031694412, -0.03322296217083931, 0...</td>\n","      <td>positive</td>\n","      <td>0.974884</td>\n","      <td>positive</td>\n","      <td>I sure would like to see a resurrection of a u...</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>This show was an amazing, fresh &amp; innovative i...</td>\n","      <td>[0.010670202784240246, -0.04322813078761101, -...</td>\n","      <td>negative</td>\n","      <td>0.721451</td>\n","      <td>negative</td>\n","      <td>This show was an amazing, fresh &amp; innovative i...</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>Encouraged by the positive comments about this...</td>\n","      <td>[0.010801736265420914, -0.07724311947822571, -...</td>\n","      <td>positive</td>\n","      <td>0.884824</td>\n","      <td>negative</td>\n","      <td>Encouraged by the positive comments about this...</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>If you like original gut wrenching laughter yo...</td>\n","      <td>[-0.0245585348457098, 0.0005475765210576355, -...</td>\n","      <td>negative</td>\n","      <td>0.850509</td>\n","      <td>positive</td>\n","      <td>If you like original gut wrenching laughter yo...</td>\n","    </tr>\n","    <tr>\n","      <th>10</th>\n","      <td>Phil the Alien is one of those quirky films wh...</td>\n","      <td>[0.023403573781251907, 0.017464609816670418, -...</td>\n","      <td>negative</td>\n","      <td>0.836944</td>\n","      <td>negative</td>\n","      <td>Phil the Alien is one of those quirky films wh...</td>\n","    </tr>\n","    <tr>\n","      <th>11</th>\n","      <td>I saw this movie when I was about 12 when it c...</td>\n","      <td>[-0.046517230570316315, -0.025949953123927116,...</td>\n","      <td>negative</td>\n","      <td>0.999218</td>\n","      <td>negative</td>\n","      <td>I saw this movie when I was about 12 when it c...</td>\n","    </tr>\n","    <tr>\n","      <th>12</th>\n","      <td>So im not a big fan of Boll's work but then ag...</td>\n","      <td>[0.0032458826899528503, -0.013339877128601074,...</td>\n","      <td>negative</td>\n","      <td>0.999841</td>\n","      <td>negative</td>\n","      <td>So im not a big fan of Boll's work but then ag...</td>\n","    </tr>\n","    <tr>\n","      <th>13</th>\n","      <td>The cast played Shakespeare.&lt;br /&gt;&lt;br /&gt;Shakes...</td>\n","      <td>[0.044309284538030624, 0.061706289649009705, -...</td>\n","      <td>neutral</td>\n","      <td>0.504574</td>\n","      <td>negative</td>\n","      <td>The cast played Shakespeare.&lt;br /&gt;&lt;br /&gt;Shakes...</td>\n","    </tr>\n","    <tr>\n","      <th>14</th>\n","      <td>This a fantastic movie of three prisoners who ...</td>\n","      <td>[0.005487383343279362, -0.005359508562833071, ...</td>\n","      <td>positive</td>\n","      <td>0.956110</td>\n","      <td>positive</td>\n","      <td>This a fantastic movie of three prisoners who ...</td>\n","    </tr>\n","    <tr>\n","      <th>15</th>\n","      <td>Kind of drawn in by the erotic scenes, only to...</td>\n","      <td>[0.04357790946960449, -0.034652918577194214, -...</td>\n","      <td>negative</td>\n","      <td>0.990112</td>\n","      <td>negative</td>\n","      <td>Kind of drawn in by the erotic scenes, only to...</td>\n","    </tr>\n","    <tr>\n","      <th>16</th>\n","      <td>Some films just simply should not be remade. T...</td>\n","      <td>[0.006823724135756493, -0.0692802369594574, -0...</td>\n","      <td>negative</td>\n","      <td>0.996081</td>\n","      <td>positive</td>\n","      <td>Some films just simply should not be remade. T...</td>\n","    </tr>\n","    <tr>\n","      <th>17</th>\n","      <td>This movie made it into one of my top 10 most ...</td>\n","      <td>[-0.013747279532253742, -0.0038213622756302357...</td>\n","      <td>negative</td>\n","      <td>0.999338</td>\n","      <td>negative</td>\n","      <td>This movie made it into one of my top 10 most ...</td>\n","    </tr>\n","    <tr>\n","      <th>18</th>\n","      <td>I remember this film,it was the first film i h...</td>\n","      <td>[-0.005101265385746956, 0.022435873746871948, ...</td>\n","      <td>positive</td>\n","      <td>0.986708</td>\n","      <td>positive</td>\n","      <td>I remember this film,it was the first film i h...</td>\n","    </tr>\n","    <tr>\n","      <th>19</th>\n","      <td>An awful film! It must have been up against so...</td>\n","      <td>[0.011224010959267616, -0.007102800067514181, ...</td>\n","      <td>negative</td>\n","      <td>0.998881</td>\n","      <td>negative</td>\n","      <td>An awful film! It must have been up against so...</td>\n","    </tr>\n","    <tr>\n","      <th>20</th>\n","      <td>After the success of Die Hard and it's sequels...</td>\n","      <td>[0.022048521786928177, -0.020497862249612808, ...</td>\n","      <td>negative</td>\n","      <td>0.650546</td>\n","      <td>positive</td>\n","      <td>After the success of Die Hard and it's sequels...</td>\n","    </tr>\n","    <tr>\n","      <th>21</th>\n","      <td>I had the terrible misfortune of having to vie...</td>\n","      <td>[-0.010102338157594204, -0.05102328583598137, ...</td>\n","      <td>negative</td>\n","      <td>0.999930</td>\n","      <td>negative</td>\n","      <td>I had the terrible misfortune of having to vie...</td>\n","    </tr>\n","    <tr>\n","      <th>22</th>\n","      <td>What an absolutely stunning movie, if you have...</td>\n","      <td>[-0.016428396105766296, 0.007074637804180384, ...</td>\n","      <td>positive</td>\n","      <td>0.981123</td>\n","      <td>positive</td>\n","      <td>What an absolutely stunning movie, if you have...</td>\n","    </tr>\n","    <tr>\n","      <th>23</th>\n","      <td>First of all, let's get a few things straight ...</td>\n","      <td>[-0.06437410414218903, -0.029181038960814476, ...</td>\n","      <td>negative</td>\n","      <td>0.957470</td>\n","      <td>negative</td>\n","      <td>First of all, let's get a few things straight ...</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>This was the worst movie I saw at WorldFest an...</td>\n","      <td>[0.03901044651865959, 0.06355303525924683, -0....</td>\n","      <td>negative</td>\n","      <td>0.999471</td>\n","      <td>negative</td>\n","      <td>This was the worst movie I saw at WorldFest an...</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>The Karen Carpenter Story shows a little more ...</td>\n","      <td>[-0.021897025406360626, 0.04400184750556946, 0...</td>\n","      <td>positive</td>\n","      <td>0.997247</td>\n","      <td>positive</td>\n","      <td>The Karen Carpenter Story shows a little more ...</td>\n","    </tr>\n","    <tr>\n","      <th>26</th>\n","      <td>\"The Cell\" is an exotic masterpiece, a dizzyin...</td>\n","      <td>[0.0439823754131794, -0.007468021009117365, -0...</td>\n","      <td>positive</td>\n","      <td>0.996351</td>\n","      <td>positive</td>\n","      <td>\"The Cell\" is an exotic masterpiece, a dizzyin...</td>\n","    </tr>\n","    <tr>\n","      <th>27</th>\n","      <td>This film tried to be too many things all at o...</td>\n","      <td>[-0.004155139438807964, -0.03771881386637688, ...</td>\n","      <td>neutral</td>\n","      <td>0.570219</td>\n","      <td>negative</td>\n","      <td>This film tried to be too many things all at o...</td>\n","    </tr>\n","    <tr>\n","      <th>28</th>\n","      <td>This movie was so frustrating. Everything seem...</td>\n","      <td>[0.015594013035297394, -0.007509331218898296, ...</td>\n","      <td>negative</td>\n","      <td>0.999906</td>\n","      <td>negative</td>\n","      <td>This movie was so frustrating. Everything seem...</td>\n","    </tr>\n","    <tr>\n","      <th>29</th>\n","      <td>'War movie' is a Hollywood genre that has been...</td>\n","      <td>[-0.036022596061229706, -0.006816706154495478,...</td>\n","      <td>negative</td>\n","      <td>0.733068</td>\n","      <td>positive</td>\n","      <td>'War movie' is a Hollywood genre that has been...</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>Taut and organically gripping, Edward Dmytryk'...</td>\n","      <td>[0.0312348585575819, -0.04670163244009018, -0....</td>\n","      <td>positive</td>\n","      <td>0.995882</td>\n","      <td>positive</td>\n","      <td>Taut and organically gripping, Edward Dmytryk'...</td>\n","    </tr>\n","    <tr>\n","      <th>31</th>\n","      <td>\"Ardh Satya\" is one of the finest film ever ma...</td>\n","      <td>[0.060114260762929916, -0.0590929239988327, -0...</td>\n","      <td>positive</td>\n","      <td>0.999671</td>\n","      <td>positive</td>\n","      <td>\"Ardh Satya\" is one of the finest film ever ma...</td>\n","    </tr>\n","    <tr>\n","      <th>32</th>\n","      <td>My first exposure to the Templarios &amp; not a go...</td>\n","      <td>[0.013515714555978775, -0.004898980725556612, ...</td>\n","      <td>negative</td>\n","      <td>0.999994</td>\n","      <td>negative</td>\n","      <td>My first exposure to the Templarios &amp; not a go...</td>\n","    </tr>\n","    <tr>\n","      <th>33</th>\n","      <td>One of the most significant quotes from the en...</td>\n","      <td>[0.022280631586909294, -0.00839739479124546, -...</td>\n","      <td>positive</td>\n","      <td>0.997032</td>\n","      <td>positive</td>\n","      <td>One of the most significant quotes from the en...</td>\n","    </tr>\n","    <tr>\n","      <th>34</th>\n","      <td>I watched this film not really expecting much,...</td>\n","      <td>[0.009434111416339874, -0.046402934938669205, ...</td>\n","      <td>negative</td>\n","      <td>0.992625</td>\n","      <td>negative</td>\n","      <td>I watched this film not really expecting much,...</td>\n","    </tr>\n","    <tr>\n","      <th>35</th>\n","      <td>I bought this film at Blockbuster for $3.00, b...</td>\n","      <td>[0.011683089658617973, -0.047437384724617004, ...</td>\n","      <td>negative</td>\n","      <td>0.999485</td>\n","      <td>negative</td>\n","      <td>I bought this film at Blockbuster for $3.00, b...</td>\n","    </tr>\n","    <tr>\n","      <th>36</th>\n","      <td>The plot is about the death of little children...</td>\n","      <td>[-0.0348515659570694, 0.01680166646838188, -0....</td>\n","      <td>neutral</td>\n","      <td>0.537487</td>\n","      <td>negative</td>\n","      <td>The plot is about the death of little children...</td>\n","    </tr>\n","    <tr>\n","      <th>37</th>\n","      <td>Ever watched a movie that lost the plot? Well,...</td>\n","      <td>[-0.02899913117289543, 0.0164097361266613, -0....</td>\n","      <td>negative</td>\n","      <td>0.998984</td>\n","      <td>negative</td>\n","      <td>Ever watched a movie that lost the plot? Well,...</td>\n","    </tr>\n","    <tr>\n","      <th>38</th>\n","      <td>Okay, so this series kind of takes the route o...</td>\n","      <td>[0.002110496163368225, 0.02887572906911373, -0...</td>\n","      <td>positive</td>\n","      <td>0.965860</td>\n","      <td>positive</td>\n","      <td>Okay, so this series kind of takes the route o...</td>\n","    </tr>\n","    <tr>\n","      <th>39</th>\n","      <td>After sitting through this pile of dung, my hu...</td>\n","      <td>[0.013781447894871235, -0.010363072156906128, ...</td>\n","      <td>positive</td>\n","      <td>0.697232</td>\n","      <td>negative</td>\n","      <td>After sitting through this pile of dung, my hu...</td>\n","    </tr>\n","    <tr>\n","      <th>40</th>\n","      <td>It had all the clichés of movies of this type ...</td>\n","      <td>[0.03799372911453247, -0.038665950298309326, -...</td>\n","      <td>negative</td>\n","      <td>0.998307</td>\n","      <td>negative</td>\n","      <td>It had all the clichés of movies of this type ...</td>\n","    </tr>\n","    <tr>\n","      <th>41</th>\n","      <td>This movie is based on the book, \"A Many Splen...</td>\n","      <td>[-0.00033091730438172817, -0.05126418545842171...</td>\n","      <td>positive</td>\n","      <td>0.998454</td>\n","      <td>positive</td>\n","      <td>This movie is based on the book, \"A Many Splen...</td>\n","    </tr>\n","    <tr>\n","      <th>42</th>\n","      <td>Of all the films I have seen, this one, The Ra...</td>\n","      <td>[0.014630819670855999, -0.04907294735312462, -...</td>\n","      <td>negative</td>\n","      <td>0.999938</td>\n","      <td>negative</td>\n","      <td>Of all the films I have seen, this one, The Ra...</td>\n","    </tr>\n","    <tr>\n","      <th>43</th>\n","      <td>I had heard good things about \"States of Grace...</td>\n","      <td>[0.027017194777727127, 0.002088379580527544, 0...</td>\n","      <td>positive</td>\n","      <td>0.884466</td>\n","      <td>negative</td>\n","      <td>I had heard good things about \"States of Grace...</td>\n","    </tr>\n","    <tr>\n","      <th>44</th>\n","      <td>This movie struck home for me. Being 29, I rem...</td>\n","      <td>[-0.0009387845057062805, -0.048219360411167145...</td>\n","      <td>negative</td>\n","      <td>0.969341</td>\n","      <td>positive</td>\n","      <td>This movie struck home for me. Being 29, I rem...</td>\n","    </tr>\n","    <tr>\n","      <th>45</th>\n","      <td>As a disclaimer, I've seen the movie 5-6 times...</td>\n","      <td>[0.0065035647712647915, 0.00230638706125319, 0...</td>\n","      <td>negative</td>\n","      <td>0.967124</td>\n","      <td>positive</td>\n","      <td>As a disclaimer, I've seen the movie 5-6 times...</td>\n","    </tr>\n","    <tr>\n","      <th>46</th>\n","      <td>Protocol is an implausible movie whose only sa...</td>\n","      <td>[0.05113476142287254, 0.04671141505241394, -0....</td>\n","      <td>neutral</td>\n","      <td>0.593109</td>\n","      <td>negative</td>\n","      <td>Protocol is an implausible movie whose only sa...</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>How this film could be classified as Drama, I ...</td>\n","      <td>[0.011419376358389854, -0.0828876867890358, -0...</td>\n","      <td>negative</td>\n","      <td>0.991421</td>\n","      <td>negative</td>\n","      <td>How this film could be classified as Drama, I ...</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>Preston Sturgis' THE POWER AND THE GLORY was u...</td>\n","      <td>[0.024031344801187515, 0.03399205952882767, 0....</td>\n","      <td>positive</td>\n","      <td>0.994996</td>\n","      <td>positive</td>\n","      <td>Preston Sturgis' THE POWER AND THE GLORY was u...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>Average (and surprisingly tame) Fulci giallo w...</td>\n","      <td>[0.015038557350635529, -0.0037642912939190865,...</td>\n","      <td>positive</td>\n","      <td>0.996770</td>\n","      <td>negative</td>\n","      <td>Average (and surprisingly tame) Fulci giallo w...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                           text  ...                                           document\n","origin_index                                                     ...                                                   \n","0             One of the other reviewers has mentioned that ...  ...  One of the other reviewers has mentioned that ...\n","1             A wonderful little production. <br /><br />The...  ...  A wonderful little production. <br /><br />The...\n","2             I thought this was a wonderful way to spend ti...  ...  I thought this was a wonderful way to spend ti...\n","3             Basically there's a family where a little boy ...  ...  Basically there's a family where a little boy ...\n","4             Petter Mattei's \"Love in the Time of Money\" is...  ...  Petter Mattei's \"Love in the Time of Money\" is...\n","5             Probably my all-time favorite movie, a story o...  ...  Probably my all-time favorite movie, a story o...\n","6             I sure would like to see a resurrection of a u...  ...  I sure would like to see a resurrection of a u...\n","7             This show was an amazing, fresh & innovative i...  ...  This show was an amazing, fresh & innovative i...\n","8             Encouraged by the positive comments about this...  ...  Encouraged by the positive comments about this...\n","9             If you like original gut wrenching laughter yo...  ...  If you like original gut wrenching laughter yo...\n","10            Phil the Alien is one of those quirky films wh...  ...  Phil the Alien is one of those quirky films wh...\n","11            I saw this movie when I was about 12 when it c...  ...  I saw this movie when I was about 12 when it c...\n","12            So im not a big fan of Boll's work but then ag...  ...  So im not a big fan of Boll's work but then ag...\n","13            The cast played Shakespeare.<br /><br />Shakes...  ...  The cast played Shakespeare.<br /><br />Shakes...\n","14            This a fantastic movie of three prisoners who ...  ...  This a fantastic movie of three prisoners who ...\n","15            Kind of drawn in by the erotic scenes, only to...  ...  Kind of drawn in by the erotic scenes, only to...\n","16            Some films just simply should not be remade. T...  ...  Some films just simply should not be remade. T...\n","17            This movie made it into one of my top 10 most ...  ...  This movie made it into one of my top 10 most ...\n","18            I remember this film,it was the first film i h...  ...  I remember this film,it was the first film i h...\n","19            An awful film! It must have been up against so...  ...  An awful film! It must have been up against so...\n","20            After the success of Die Hard and it's sequels...  ...  After the success of Die Hard and it's sequels...\n","21            I had the terrible misfortune of having to vie...  ...  I had the terrible misfortune of having to vie...\n","22            What an absolutely stunning movie, if you have...  ...  What an absolutely stunning movie, if you have...\n","23            First of all, let's get a few things straight ...  ...  First of all, let's get a few things straight ...\n","24            This was the worst movie I saw at WorldFest an...  ...  This was the worst movie I saw at WorldFest an...\n","25            The Karen Carpenter Story shows a little more ...  ...  The Karen Carpenter Story shows a little more ...\n","26            \"The Cell\" is an exotic masterpiece, a dizzyin...  ...  \"The Cell\" is an exotic masterpiece, a dizzyin...\n","27            This film tried to be too many things all at o...  ...  This film tried to be too many things all at o...\n","28            This movie was so frustrating. Everything seem...  ...  This movie was so frustrating. Everything seem...\n","29            'War movie' is a Hollywood genre that has been...  ...  'War movie' is a Hollywood genre that has been...\n","30            Taut and organically gripping, Edward Dmytryk'...  ...  Taut and organically gripping, Edward Dmytryk'...\n","31            \"Ardh Satya\" is one of the finest film ever ma...  ...  \"Ardh Satya\" is one of the finest film ever ma...\n","32            My first exposure to the Templarios & not a go...  ...  My first exposure to the Templarios & not a go...\n","33            One of the most significant quotes from the en...  ...  One of the most significant quotes from the en...\n","34            I watched this film not really expecting much,...  ...  I watched this film not really expecting much,...\n","35            I bought this film at Blockbuster for $3.00, b...  ...  I bought this film at Blockbuster for $3.00, b...\n","36            The plot is about the death of little children...  ...  The plot is about the death of little children...\n","37            Ever watched a movie that lost the plot? Well,...  ...  Ever watched a movie that lost the plot? Well,...\n","38            Okay, so this series kind of takes the route o...  ...  Okay, so this series kind of takes the route o...\n","39            After sitting through this pile of dung, my hu...  ...  After sitting through this pile of dung, my hu...\n","40            It had all the clichés of movies of this type ...  ...  It had all the clichés of movies of this type ...\n","41            This movie is based on the book, \"A Many Splen...  ...  This movie is based on the book, \"A Many Splen...\n","42            Of all the films I have seen, this one, The Ra...  ...  Of all the films I have seen, this one, The Ra...\n","43            I had heard good things about \"States of Grace...  ...  I had heard good things about \"States of Grace...\n","44            This movie struck home for me. Being 29, I rem...  ...  This movie struck home for me. Being 29, I rem...\n","45            As a disclaimer, I've seen the movie 5-6 times...  ...  As a disclaimer, I've seen the movie 5-6 times...\n","46            Protocol is an implausible movie whose only sa...  ...  Protocol is an implausible movie whose only sa...\n","47            How this film could be classified as Drama, I ...  ...  How this film could be classified as Drama, I ...\n","48            Preston Sturgis' THE POWER AND THE GLORY was u...  ...  Preston Sturgis' THE POWER AND THE GLORY was u...\n","49            Average (and surprisingly tame) Fulci giallo w...  ...  Average (and surprisingly tame) Fulci giallo w...\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609464663328,"user_tz":-300,"elapsed":2733,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ae604bdd-49fb-4b5e-978e-5190dd03b227"},"source":["fitted_pipe.predict('It was one of the best films i have ever watched in my entire life !!')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>default_name_embeddings</th>\n","      <th>sentiment</th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>[0.06468033790588379, -0.040837567299604416, -...</td>\n","      <td>positive</td>\n","      <td>0.982375</td>\n","      <td>Bitcoin is going to the moon!</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                        default_name_embeddings  ...                       document\n","origin_index                                                     ...                               \n","0             [0.06468033790588379, -0.040837567299604416, -...  ...  Bitcoin is going to the moon!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609464663334,"user_tz":-300,"elapsed":31,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a9059197-9e1c-4afe-ca3b-97c6d310f60c"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2)                 | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005)                    | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64)                | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5)                 | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True)       | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6)               | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')    | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512)               | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use')      | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609464673090,"user_tz":-300,"elapsed":9777,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"164e4f03-f48a-4347-95e8-fd3509bf146e"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5)  \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","    negative       0.81      0.96      0.88        27\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.94      0.70      0.80        23\n","\n","    accuracy                           0.84        50\n","   macro avg       0.58      0.55      0.56        50\n","weighted avg       0.87      0.84      0.84        50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>default_name_embeddings</th>\n","      <th>sentiment</th>\n","      <th>sentiment_confidence</th>\n","      <th>y</th>\n","      <th>document</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>One of the other reviewers has mentioned that ...</td>\n","      <td>[-0.04935329407453537, -0.01034686528146267, -...</td>\n","      <td>positive</td>\n","      <td>0.966858</td>\n","      <td>positive</td>\n","      <td>One of the other reviewers has mentioned that ...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>A wonderful little production. &lt;br /&gt;&lt;br /&gt;The...</td>\n","      <td>[0.040489643812179565, -0.054199717938899994, ...</td>\n","      <td>negative</td>\n","      <td>0.985679</td>\n","      <td>positive</td>\n","      <td>A wonderful little production. &lt;br /&gt;&lt;br /&gt;The...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>I thought this was a wonderful way to spend ti...</td>\n","      <td>[0.026364900171756744, 0.07112795859575272, 0....</td>\n","      <td>negative</td>\n","      <td>0.988745</td>\n","      <td>positive</td>\n","      <td>I thought this was a wonderful way to spend ti...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Basically there's a family where a little boy ...</td>\n","      <td>[-0.05151151493191719, 0.008207003585994244, -...</td>\n","      <td>negative</td>\n","      <td>0.999291</td>\n","      <td>negative</td>\n","      <td>Basically there's a family where a little boy ...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>Petter Mattei's \"Love in the Time of Money\" is...</td>\n","      <td>[0.06880538165569305, 0.019250543788075447, -0...</td>\n","      <td>positive</td>\n","      <td>0.999684</td>\n","      <td>positive</td>\n","      <td>Petter Mattei's \"Love in the Time of Money\" is...</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>Probably my all-time favorite movie, a story o...</td>\n","      <td>[0.004764211364090443, 0.027671916410326958, -...</td>\n","      <td>positive</td>\n","      <td>0.996598</td>\n","      <td>positive</td>\n","      <td>Probably my all-time favorite movie, a story o...</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>I sure would like to see a resurrection of a u...</td>\n","      <td>[-0.03813941031694412, -0.03322296217083931, 0...</td>\n","      <td>positive</td>\n","      <td>0.960203</td>\n","      <td>positive</td>\n","      <td>I sure would like to see a resurrection of a u...</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>This show was an amazing, fresh &amp; innovative i...</td>\n","      <td>[0.010670202784240246, -0.04322813078761101, -...</td>\n","      <td>negative</td>\n","      <td>0.753273</td>\n","      <td>negative</td>\n","      <td>This show was an amazing, fresh &amp; innovative i...</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>Encouraged by the positive comments about this...</td>\n","      <td>[0.010801736265420914, -0.07724311947822571, -...</td>\n","      <td>negative</td>\n","      <td>0.958928</td>\n","      <td>negative</td>\n","      <td>Encouraged by the positive comments about this...</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>If you like original gut wrenching laughter yo...</td>\n","      <td>[-0.0245585348457098, 0.0005475765210576355, -...</td>\n","      <td>neutral</td>\n","      <td>0.536441</td>\n","      <td>positive</td>\n","      <td>If you like original gut wrenching laughter yo...</td>\n","    </tr>\n","    <tr>\n","      <th>10</th>\n","      <td>Phil the Alien is one of those quirky films wh...</td>\n","      <td>[0.023403573781251907, 0.017464609816670418, -...</td>\n","      <td>negative</td>\n","      <td>0.959978</td>\n","      <td>negative</td>\n","      <td>Phil the Alien is one of those quirky films wh...</td>\n","    </tr>\n","    <tr>\n","      <th>11</th>\n","      <td>I saw this movie when I was about 12 when it c...</td>\n","      <td>[-0.046517230570316315, -0.025949953123927116,...</td>\n","      <td>negative</td>\n","      <td>0.999949</td>\n","      <td>negative</td>\n","      <td>I saw this movie when I was about 12 when it c...</td>\n","    </tr>\n","    <tr>\n","      <th>12</th>\n","      <td>So im not a big fan of Boll's work but then ag...</td>\n","      <td>[0.0032458826899528503, -0.013339877128601074,...</td>\n","      <td>negative</td>\n","      <td>0.999997</td>\n","      <td>negative</td>\n","      <td>So im not a big fan of Boll's work but then ag...</td>\n","    </tr>\n","    <tr>\n","      <th>13</th>\n","      <td>The cast played Shakespeare.&lt;br /&gt;&lt;br /&gt;Shakes...</td>\n","      <td>[0.044309284538030624, 0.061706289649009705, -...</td>\n","      <td>negative</td>\n","      <td>0.984033</td>\n","      <td>negative</td>\n","      <td>The cast played Shakespeare.&lt;br /&gt;&lt;br /&gt;Shakes...</td>\n","    </tr>\n","    <tr>\n","      <th>14</th>\n","      <td>This a fantastic movie of three prisoners who ...</td>\n","      <td>[0.005487383343279362, -0.005359508562833071, ...</td>\n","      <td>positive</td>\n","      <td>0.775998</td>\n","      <td>positive</td>\n","      <td>This a fantastic movie of three prisoners who ...</td>\n","    </tr>\n","    <tr>\n","      <th>15</th>\n","      <td>Kind of drawn in by the erotic scenes, only to...</td>\n","      <td>[0.04357790946960449, -0.034652918577194214, -...</td>\n","      <td>negative</td>\n","      <td>0.999683</td>\n","      <td>negative</td>\n","      <td>Kind of drawn in by the erotic scenes, only to...</td>\n","    </tr>\n","    <tr>\n","      <th>16</th>\n","      <td>Some films just simply should not be remade. T...</td>\n","      <td>[0.006823724135756493, -0.0692802369594574, -0...</td>\n","      <td>negative</td>\n","      <td>0.999245</td>\n","      <td>positive</td>\n","      <td>Some films just simply should not be remade. T...</td>\n","    </tr>\n","    <tr>\n","      <th>17</th>\n","      <td>This movie made it into one of my top 10 most ...</td>\n","      <td>[-0.013747279532253742, -0.0038213622756302357...</td>\n","      <td>negative</td>\n","      <td>0.999970</td>\n","      <td>negative</td>\n","      <td>This movie made it into one of my top 10 most ...</td>\n","    </tr>\n","    <tr>\n","      <th>18</th>\n","      <td>I remember this film,it was the first film i h...</td>\n","      <td>[-0.005101265385746956, 0.022435873746871948, ...</td>\n","      <td>positive</td>\n","      <td>0.975574</td>\n","      <td>positive</td>\n","      <td>I remember this film,it was the first film i h...</td>\n","    </tr>\n","    <tr>\n","      <th>19</th>\n","      <td>An awful film! It must have been up against so...</td>\n","      <td>[0.011224010959267616, -0.007102800067514181, ...</td>\n","      <td>negative</td>\n","      <td>0.999990</td>\n","      <td>negative</td>\n","      <td>An awful film! It must have been up against so...</td>\n","    </tr>\n","    <tr>\n","      <th>20</th>\n","      <td>After the success of Die Hard and it's sequels...</td>\n","      <td>[0.022048521786928177, -0.020497862249612808, ...</td>\n","      <td>positive</td>\n","      <td>0.951596</td>\n","      <td>positive</td>\n","      <td>After the success of Die Hard and it's sequels...</td>\n","    </tr>\n","    <tr>\n","      <th>21</th>\n","      <td>I had the terrible misfortune of having to vie...</td>\n","      <td>[-0.010102338157594204, -0.05102328583598137, ...</td>\n","      <td>negative</td>\n","      <td>0.999999</td>\n","      <td>negative</td>\n","      <td>I had the terrible misfortune of having to vie...</td>\n","    </tr>\n","    <tr>\n","      <th>22</th>\n","      <td>What an absolutely stunning movie, if you have...</td>\n","      <td>[-0.016428396105766296, 0.007074637804180384, ...</td>\n","      <td>positive</td>\n","      <td>0.931946</td>\n","      <td>positive</td>\n","      <td>What an absolutely stunning movie, if you have...</td>\n","    </tr>\n","    <tr>\n","      <th>23</th>\n","      <td>First of all, let's get a few things straight ...</td>\n","      <td>[-0.06437410414218903, -0.029181038960814476, ...</td>\n","      <td>negative</td>\n","      <td>0.990350</td>\n","      <td>negative</td>\n","      <td>First of all, let's get a few things straight ...</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>This was the worst movie I saw at WorldFest an...</td>\n","      <td>[0.03901044651865959, 0.06355303525924683, -0....</td>\n","      <td>negative</td>\n","      <td>0.999986</td>\n","      <td>negative</td>\n","      <td>This was the worst movie I saw at WorldFest an...</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>The Karen Carpenter Story shows a little more ...</td>\n","      <td>[-0.021897025406360626, 0.04400184750556946, 0...</td>\n","      <td>positive</td>\n","      <td>0.999463</td>\n","      <td>positive</td>\n","      <td>The Karen Carpenter Story shows a little more ...</td>\n","    </tr>\n","    <tr>\n","      <th>26</th>\n","      <td>\"The Cell\" is an exotic masterpiece, a dizzyin...</td>\n","      <td>[0.0439823754131794, -0.007468021009117365, -0...</td>\n","      <td>positive</td>\n","      <td>0.998291</td>\n","      <td>positive</td>\n","      <td>\"The Cell\" is an exotic masterpiece, a dizzyin...</td>\n","    </tr>\n","    <tr>\n","      <th>27</th>\n","      <td>This film tried to be too many things all at o...</td>\n","      <td>[-0.004155139438807964, -0.03771881386637688, ...</td>\n","      <td>negative</td>\n","      <td>0.865707</td>\n","      <td>negative</td>\n","      <td>This film tried to be too many things all at o...</td>\n","    </tr>\n","    <tr>\n","      <th>28</th>\n","      <td>This movie was so frustrating. Everything seem...</td>\n","      <td>[0.015594013035297394, -0.007509331218898296, ...</td>\n","      <td>negative</td>\n","      <td>0.999998</td>\n","      <td>negative</td>\n","      <td>This movie was so frustrating. Everything seem...</td>\n","    </tr>\n","    <tr>\n","      <th>29</th>\n","      <td>'War movie' is a Hollywood genre that has been...</td>\n","      <td>[-0.036022596061229706, -0.006816706154495478,...</td>\n","      <td>negative</td>\n","      <td>0.993793</td>\n","      <td>positive</td>\n","      <td>'War movie' is a Hollywood genre that has been...</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>Taut and organically gripping, Edward Dmytryk'...</td>\n","      <td>[0.0312348585575819, -0.04670163244009018, -0....</td>\n","      <td>positive</td>\n","      <td>0.997460</td>\n","      <td>positive</td>\n","      <td>Taut and organically gripping, Edward Dmytryk'...</td>\n","    </tr>\n","    <tr>\n","      <th>31</th>\n","      <td>\"Ardh Satya\" is one of the finest film ever ma...</td>\n","      <td>[0.060114260762929916, -0.0590929239988327, -0...</td>\n","      <td>positive</td>\n","      <td>0.999880</td>\n","      <td>positive</td>\n","      <td>\"Ardh Satya\" is one of the finest film ever ma...</td>\n","    </tr>\n","    <tr>\n","      <th>32</th>\n","      <td>My first exposure to the Templarios &amp; not a go...</td>\n","      <td>[0.013515714555978775, -0.004898980725556612, ...</td>\n","      <td>negative</td>\n","      <td>1.000000</td>\n","      <td>negative</td>\n","      <td>My first exposure to the Templarios &amp; not a go...</td>\n","    </tr>\n","    <tr>\n","      <th>33</th>\n","      <td>One of the most significant quotes from the en...</td>\n","      <td>[0.022280631586909294, -0.00839739479124546, -...</td>\n","      <td>positive</td>\n","      <td>0.999292</td>\n","      <td>positive</td>\n","      <td>One of the most significant quotes from the en...</td>\n","    </tr>\n","    <tr>\n","      <th>34</th>\n","      <td>I watched this film not really expecting much,...</td>\n","      <td>[0.009434111416339874, -0.046402934938669205, ...</td>\n","      <td>negative</td>\n","      <td>0.999848</td>\n","      <td>negative</td>\n","      <td>I watched this film not really expecting much,...</td>\n","    </tr>\n","    <tr>\n","      <th>35</th>\n","      <td>I bought this film at Blockbuster for $3.00, b...</td>\n","      <td>[0.011683089658617973, -0.047437384724617004, ...</td>\n","      <td>negative</td>\n","      <td>0.999993</td>\n","      <td>negative</td>\n","      <td>I bought this film at Blockbuster for $3.00, b...</td>\n","    </tr>\n","    <tr>\n","      <th>36</th>\n","      <td>The plot is about the death of little children...</td>\n","      <td>[-0.0348515659570694, 0.01680166646838188, -0....</td>\n","      <td>negative</td>\n","      <td>0.997690</td>\n","      <td>negative</td>\n","      <td>The plot is about the death of little children...</td>\n","    </tr>\n","    <tr>\n","      <th>37</th>\n","      <td>Ever watched a movie that lost the plot? Well,...</td>\n","      <td>[-0.02899913117289543, 0.0164097361266613, -0....</td>\n","      <td>negative</td>\n","      <td>0.999995</td>\n","      <td>negative</td>\n","      <td>Ever watched a movie that lost the plot? Well,...</td>\n","    </tr>\n","    <tr>\n","      <th>38</th>\n","      <td>Okay, so this series kind of takes the route o...</td>\n","      <td>[0.002110496163368225, 0.02887572906911373, -0...</td>\n","      <td>positive</td>\n","      <td>0.993408</td>\n","      <td>positive</td>\n","      <td>Okay, so this series kind of takes the route o...</td>\n","    </tr>\n","    <tr>\n","      <th>39</th>\n","      <td>After sitting through this pile of dung, my hu...</td>\n","      <td>[0.013781447894871235, -0.010363072156906128, ...</td>\n","      <td>negative</td>\n","      <td>0.905860</td>\n","      <td>negative</td>\n","      <td>After sitting through this pile of dung, my hu...</td>\n","    </tr>\n","    <tr>\n","      <th>40</th>\n","      <td>It had all the clichés of movies of this type ...</td>\n","      <td>[0.03799372911453247, -0.038665950298309326, -...</td>\n","      <td>negative</td>\n","      <td>0.999892</td>\n","      <td>negative</td>\n","      <td>It had all the clichés of movies of this type ...</td>\n","    </tr>\n","    <tr>\n","      <th>41</th>\n","      <td>This movie is based on the book, \"A Many Splen...</td>\n","      <td>[-0.00033091730438172817, -0.05126418545842171...</td>\n","      <td>positive</td>\n","      <td>0.999837</td>\n","      <td>positive</td>\n","      <td>This movie is based on the book, \"A Many Splen...</td>\n","    </tr>\n","    <tr>\n","      <th>42</th>\n","      <td>Of all the films I have seen, this one, The Ra...</td>\n","      <td>[0.014630819670855999, -0.04907294735312462, -...</td>\n","      <td>negative</td>\n","      <td>1.000000</td>\n","      <td>negative</td>\n","      <td>Of all the films I have seen, this one, The Ra...</td>\n","    </tr>\n","    <tr>\n","      <th>43</th>\n","      <td>I had heard good things about \"States of Grace...</td>\n","      <td>[0.027017194777727127, 0.002088379580527544, 0...</td>\n","      <td>negative</td>\n","      <td>0.978662</td>\n","      <td>negative</td>\n","      <td>I had heard good things about \"States of Grace...</td>\n","    </tr>\n","    <tr>\n","      <th>44</th>\n","      <td>This movie struck home for me. Being 29, I rem...</td>\n","      <td>[-0.0009387845057062805, -0.048219360411167145...</td>\n","      <td>negative</td>\n","      <td>0.993965</td>\n","      <td>positive</td>\n","      <td>This movie struck home for me. Being 29, I rem...</td>\n","    </tr>\n","    <tr>\n","      <th>45</th>\n","      <td>As a disclaimer, I've seen the movie 5-6 times...</td>\n","      <td>[0.0065035647712647915, 0.00230638706125319, 0...</td>\n","      <td>negative</td>\n","      <td>0.999341</td>\n","      <td>positive</td>\n","      <td>As a disclaimer, I've seen the movie 5-6 times...</td>\n","    </tr>\n","    <tr>\n","      <th>46</th>\n","      <td>Protocol is an implausible movie whose only sa...</td>\n","      <td>[0.05113476142287254, 0.04671141505241394, -0....</td>\n","      <td>negative</td>\n","      <td>0.913287</td>\n","      <td>negative</td>\n","      <td>Protocol is an implausible movie whose only sa...</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>How this film could be classified as Drama, I ...</td>\n","      <td>[0.011419376358389854, -0.0828876867890358, -0...</td>\n","      <td>negative</td>\n","      <td>0.999841</td>\n","      <td>negative</td>\n","      <td>How this film could be classified as Drama, I ...</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>Preston Sturgis' THE POWER AND THE GLORY was u...</td>\n","      <td>[0.024031344801187515, 0.03399205952882767, 0....</td>\n","      <td>positive</td>\n","      <td>0.998516</td>\n","      <td>positive</td>\n","      <td>Preston Sturgis' THE POWER AND THE GLORY was u...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>Average (and surprisingly tame) Fulci giallo w...</td>\n","      <td>[0.015038557350635529, -0.0037642912939190865,...</td>\n","      <td>positive</td>\n","      <td>0.995483</td>\n","      <td>negative</td>\n","      <td>Average (and surprisingly tame) Fulci giallo w...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                           text  ...                                           document\n","origin_index                                                     ...                                                   \n","0             One of the other reviewers has mentioned that ...  ...  One of the other reviewers has mentioned that ...\n","1             A wonderful little production. <br /><br />The...  ...  A wonderful little production. <br /><br />The...\n","2             I thought this was a wonderful way to spend ti...  ...  I thought this was a wonderful way to spend ti...\n","3             Basically there's a family where a little boy ...  ...  Basically there's a family where a little boy ...\n","4             Petter Mattei's \"Love in the Time of Money\" is...  ...  Petter Mattei's \"Love in the Time of Money\" is...\n","5             Probably my all-time favorite movie, a story o...  ...  Probably my all-time favorite movie, a story o...\n","6             I sure would like to see a resurrection of a u...  ...  I sure would like to see a resurrection of a u...\n","7             This show was an amazing, fresh & innovative i...  ...  This show was an amazing, fresh & innovative i...\n","8             Encouraged by the positive comments about this...  ...  Encouraged by the positive comments about this...\n","9             If you like original gut wrenching laughter yo...  ...  If you like original gut wrenching laughter yo...\n","10            Phil the Alien is one of those quirky films wh...  ...  Phil the Alien is one of those quirky films wh...\n","11            I saw this movie when I was about 12 when it c...  ...  I saw this movie when I was about 12 when it c...\n","12            So im not a big fan of Boll's work but then ag...  ...  So im not a big fan of Boll's work but then ag...\n","13            The cast played Shakespeare.<br /><br />Shakes...  ...  The cast played Shakespeare.<br /><br />Shakes...\n","14            This a fantastic movie of three prisoners who ...  ...  This a fantastic movie of three prisoners who ...\n","15            Kind of drawn in by the erotic scenes, only to...  ...  Kind of drawn in by the erotic scenes, only to...\n","16            Some films just simply should not be remade. T...  ...  Some films just simply should not be remade. T...\n","17            This movie made it into one of my top 10 most ...  ...  This movie made it into one of my top 10 most ...\n","18            I remember this film,it was the first film i h...  ...  I remember this film,it was the first film i h...\n","19            An awful film! It must have been up against so...  ...  An awful film! It must have been up against so...\n","20            After the success of Die Hard and it's sequels...  ...  After the success of Die Hard and it's sequels...\n","21            I had the terrible misfortune of having to vie...  ...  I had the terrible misfortune of having to vie...\n","22            What an absolutely stunning movie, if you have...  ...  What an absolutely stunning movie, if you have...\n","23            First of all, let's get a few things straight ...  ...  First of all, let's get a few things straight ...\n","24            This was the worst movie I saw at WorldFest an...  ...  This was the worst movie I saw at WorldFest an...\n","25            The Karen Carpenter Story shows a little more ...  ...  The Karen Carpenter Story shows a little more ...\n","26            \"The Cell\" is an exotic masterpiece, a dizzyin...  ...  \"The Cell\" is an exotic masterpiece, a dizzyin...\n","27            This film tried to be too many things all at o...  ...  This film tried to be too many things all at o...\n","28            This movie was so frustrating. Everything seem...  ...  This movie was so frustrating. Everything seem...\n","29            'War movie' is a Hollywood genre that has been...  ...  'War movie' is a Hollywood genre that has been...\n","30            Taut and organically gripping, Edward Dmytryk'...  ...  Taut and organically gripping, Edward Dmytryk'...\n","31            \"Ardh Satya\" is one of the finest film ever ma...  ...  \"Ardh Satya\" is one of the finest film ever ma...\n","32            My first exposure to the Templarios & not a go...  ...  My first exposure to the Templarios & not a go...\n","33            One of the most significant quotes from the en...  ...  One of the most significant quotes from the en...\n","34            I watched this film not really expecting much,...  ...  I watched this film not really expecting much,...\n","35            I bought this film at Blockbuster for $3.00, b...  ...  I bought this film at Blockbuster for $3.00, b...\n","36            The plot is about the death of little children...  ...  The plot is about the death of little children...\n","37            Ever watched a movie that lost the plot? Well,...  ...  Ever watched a movie that lost the plot? Well,...\n","38            Okay, so this series kind of takes the route o...  ...  Okay, so this series kind of takes the route o...\n","39            After sitting through this pile of dung, my hu...  ...  After sitting through this pile of dung, my hu...\n","40            It had all the clichés of movies of this type ...  ...  It had all the clichés of movies of this type ...\n","41            This movie is based on the book, \"A Many Splen...  ...  This movie is based on the book, \"A Many Splen...\n","42            Of all the films I have seen, this one, The Ra...  ...  Of all the films I have seen, this one, The Ra...\n","43            I had heard good things about \"States of Grace...  ...  I had heard good things about \"States of Grace...\n","44            This movie struck home for me. Being 29, I rem...  ...  This movie struck home for me. Being 29, I rem...\n","45            As a disclaimer, I've seen the movie 5-6 times...  ...  As a disclaimer, I've seen the movie 5-6 times...\n","46            Protocol is an implausible movie whose only sa...  ...  Protocol is an implausible movie whose only sa...\n","47            How this film could be classified as Drama, I ...  ...  How this film could be classified as Drama, I ...\n","48            Preston Sturgis' THE POWER AND THE GLORY was u...  ...  Preston Sturgis' THE POWER AND THE GLORY was u...\n","49            Average (and surprisingly tame) Fulci giallo w...  ...  Average (and surprisingly tame) Fulci giallo w...\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1609469926255,"user_tz":-300,"elapsed":140492,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"64f54fdd-699a-4559-f6e4-74b7b5f3e92e"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(120)  \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.85      0.81      0.83      1234\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.87      0.79      0.83      1266\n","\n","    accuracy                           0.80      2500\n","   macro avg       0.57      0.54      0.55      2500\n","weighted avg       0.86      0.80      0.83      2500\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1609470097011,"user_tz":-300,"elapsed":170766,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"59899be4-e33a-4b5e-ff37-df6a9a3994b2"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":127},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1609470112616,"user_tz":-300,"elapsed":15622,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cbe7fc37-7794-4c28-d1de-5ba88d3db58b"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It was one of the best films i have ever watched in my entire life !!')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>sentiment</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>positive</td>\n","      <td>[0.09222018718719482, 0.11720675230026245, 0.1...</td>\n","      <td>0.999543</td>\n","      <td>It was one of the best films i have ever watch...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             sentiment  ...                                           document\n","origin_index            ...                                                   \n","0             positive  ...  It was one of the best films i have ever watch...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1609470112618,"user_tz":-300,"elapsed":17,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c7467f93-d619-470f-fd40-c2be1805b83f"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')            | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                  | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                 | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)           | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                  | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                      | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)            | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)        | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)       | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                    | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                        | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                 | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                          | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                        | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                 | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')         | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6)                         | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')              | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative'])      | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]}
\ No newline at end of file
diff --git a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_apple_twitter.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_apple_twitter.ipynb
index e2b1cd02..3808340d 100644
--- a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_apple_twitter.ipynb
+++ b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_apple_twitter.ipynb
@@ -1 +1 @@
-{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_apple_twitter.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"RIV-9vEqxTBB"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\r\n","\r\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_apple_twitter.ipynb)\r\n","\r\n","\r\n","\r\n","# Training a Sentiment Analysis Classifier with NLU \r\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \r\n","\r\n","This notebook showcases the following features : \r\n","\r\n","- How to train the deep learning classifier\r\n","- How to store a pipeline to disk\r\n","- How to load the pipeline from disk (Enables NLU offline mode)\r\n","\r\n"]},{"cell_type":"code","metadata":{"id":"05-mAOF6ol-0"},"source":["import os\r\n","from sklearn.metrics import classification_report\r\n","! apt-get update -qq > /dev/null   \r\n","# Install java\r\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\r\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\r\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\r\n","! pip install nlu pyspark==2.4.7 > /dev/null  \r\n","\r\n","\r\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download appple twitter  Sentiment dataset \n","https://www.kaggle.com/seriousran/appletwittersentimenttexts\n","\n","this dataset contains tweets made towards apple and today we are going to train our model to predict whether the tweet contains sentiment!\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1609468082890,"user_tz":-300,"elapsed":77740,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a791d4cf-bfa3-4cc6-a60d-c885afe2e917"},"source":["! wget https://raw.githubusercontent.com/ahmedlone127/nlu-master/main/apple-twitter-sentiment-texts.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-01 02:27:38--  https://raw.githubusercontent.com/ahmedlone127/nlu-master/main/apple-twitter-sentiment-texts.csv\n","Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n","Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 31678 (31K) [text/plain]\n","Saving to: ‘apple-twitter-sentiment-texts.csv’\n","\n","apple-twitter-senti 100%[===================>]  30.94K  --.-KB/s    in 0.002s  \n","\n","2021-01-01 02:27:39 (12.9 MB/s) - ‘apple-twitter-sentiment-texts.csv’ saved [31678/31678]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1609468083287,"user_tz":-300,"elapsed":78124,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1a23969f-abf0-4bc3-e2ec-0879b2b77cad"},"source":["import pandas as pd\n","train_path = '/content/apple-twitter-sentiment-texts.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df = train_df[~train_df[\"y\"].isin([\"neuteral\"])]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>y</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>@Apple  you need to sort your phones out.</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Wow. Yall needa step it up @Apple RT @heynyla:...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>I'm surprised there isn't more talk about what...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Realised the reason @apple make huge phones is...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>Apple Inc. CEO Donates $291K To Pennsylvania S...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>281</th>\n","      <td>@apple so thanks for being greedy assholes who...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>282</th>\n","      <td>@apple iCal AGAIN!!! it reset all my recurring...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>283</th>\n","      <td>Just did my first transaction with @Apple Pay ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>284</th>\n","      <td>RT @JPDesloges: Kantar Worldpanel: iPhone sale...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>285</th>\n","      <td>Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>286 rows × 2 columns</p>\n","</div>"],"text/plain":["                                                  text         y\n","0            @Apple  you need to sort your phones out.  negative\n","1    Wow. Yall needa step it up @Apple RT @heynyla:...  negative\n","2    I'm surprised there isn't more talk about what...  negative\n","3    Realised the reason @apple make huge phones is...  negative\n","4    Apple Inc. CEO Donates $291K To Pennsylvania S...  positive\n","..                                                 ...       ...\n","281  @apple so thanks for being greedy assholes who...  negative\n","282  @apple iCal AGAIN!!! it reset all my recurring...  negative\n","283  Just did my first transaction with @Apple Pay ...  positive\n","284  RT @JPDesloges: Kantar Worldpanel: iPhone sale...  positive\n","285  Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...  positive\n","\n","[286 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":845},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609468191792,"user_tz":-300,"elapsed":186618,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"41d18f44-64e1-4766-a8cf-4545813930d7"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.91      0.80      0.85       143\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.82      0.91      0.86       143\n","\n","    accuracy                           0.86       286\n","   macro avg       0.58      0.57      0.57       286\n","weighted avg       0.86      0.86      0.86       286\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>sentiment_confidence</th>\n","      <th>y</th>\n","      <th>default_name_embeddings</th>\n","      <th>text</th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.998447</td>\n","      <td>negative</td>\n","      <td>[-0.01731022447347641, 0.010604134760797024, -...</td>\n","      <td>@Apple  you need to sort your phones out.</td>\n","      <td>@Apple you need to sort your phones out.</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>0.990570</td>\n","      <td>negative</td>\n","      <td>[0.019931159913539886, -0.04991159215569496, -...</td>\n","      <td>Wow. Yall needa step it up @Apple RT @heynyla:...</td>\n","      <td>Wow. Yall needa step it up @Apple RT @heynyla:...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>0.969844</td>\n","      <td>negative</td>\n","      <td>[0.01646081730723381, -0.02681073546409607, -0...</td>\n","      <td>I'm surprised there isn't more talk about what...</td>\n","      <td>I'm surprised there isn't more talk about what...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>0.996128</td>\n","      <td>negative</td>\n","      <td>[0.04638500511646271, -0.037105873227119446, -...</td>\n","      <td>Realised the reason @apple make huge phones is...</td>\n","      <td>Realised the reason @apple make huge phones is...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>0.959235</td>\n","      <td>positive</td>\n","      <td>[-0.028623634949326515, 0.03947276994585991, -...</td>\n","      <td>Apple Inc. CEO Donates $291K To Pennsylvania S...</td>\n","      <td>Apple Inc. CEO Donates $291K To Pennsylvania S...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>281</th>\n","      <td>0.978435</td>\n","      <td>negative</td>\n","      <td>[0.03778046742081642, 0.03407461196184158, 0.0...</td>\n","      <td>@apple so thanks for being greedy assholes who...</td>\n","      <td>@apple so thanks for being greedy assholes who...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>282</th>\n","      <td>0.623791</td>\n","      <td>negative</td>\n","      <td>[-0.013547728769481182, -0.001025827950797975,...</td>\n","      <td>@apple iCal AGAIN!!! it reset all my recurring...</td>\n","      <td>@apple iCal AGAIN!!! it reset all my recurring...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>283</th>\n","      <td>0.999104</td>\n","      <td>positive</td>\n","      <td>[-0.0015363194979727268, -0.01644994132220745,...</td>\n","      <td>Just did my first transaction with @Apple Pay ...</td>\n","      <td>Just did my first transaction with @Apple Pay ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>284</th>\n","      <td>0.999854</td>\n","      <td>positive</td>\n","      <td>[0.0656985342502594, 0.028557728976011276, -0....</td>\n","      <td>RT @JPDesloges: Kantar Worldpanel: iPhone sale...</td>\n","      <td>RT @JPDesloges: Kantar Worldpanel: iPhone sale...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>285</th>\n","      <td>0.983244</td>\n","      <td>positive</td>\n","      <td>[0.02311933971941471, 0.05785432830452919, -0....</td>\n","      <td>Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...</td>\n","      <td>Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>286 rows × 6 columns</p>\n","</div>"],"text/plain":["             sentiment_confidence  ... sentiment\n","origin_index                       ...          \n","0                        0.998447  ...  negative\n","1                        0.990570  ...  positive\n","2                        0.969844  ...  negative\n","3                        0.996128  ...  negative\n","4                        0.959235  ...  positive\n","...                           ...  ...       ...\n","281                      0.978435  ...  negative\n","282                      0.623791  ...  positive\n","283                      0.999104  ...  positive\n","284                      0.999854  ...  positive\n","285                      0.983244  ...  positive\n","\n","[286 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"id":"qdCUg2MR0PD2","colab":{"base_uri":"https://localhost:8080/","height":110},"executionInfo":{"status":"ok","timestamp":1609468194339,"user_tz":-300,"elapsed":189158,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"00d8c7b6-22e1-4979-8c51-58471540a3dd"},"source":["fitted_pipe.predict('I hate the newest update')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>sentiment_confidence</th>\n","      <th>default_name_embeddings</th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.996097</td>\n","      <td>[0.06468033790588379, -0.040837567299604416, -...</td>\n","      <td>Bitcoin is going to the moon!</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             sentiment_confidence  ... sentiment\n","origin_index                       ...          \n","0                        0.996097  ...  positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"id":"UtsAUGTmOTms","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468194341,"user_tz":-300,"elapsed":189154,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3ab00ec5-5894-400f-c6c9-e32099fed1f5"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2)                 | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005)                    | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64)                | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5)                 | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True)       | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6)               | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')    | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512)               | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use')      | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"id":"mptfvHx-MMMX","colab":{"base_uri":"https://localhost:8080/","height":793},"executionInfo":{"status":"ok","timestamp":1609468205048,"user_tz":-300,"elapsed":199854,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"9c9a1628-3034-4be0-94bc-7c109d2c3263"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5)  \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","    negative       0.96      0.85      0.90       143\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.87      0.95      0.91       143\n","\n","    accuracy                           0.90       286\n","   macro avg       0.61      0.60      0.60       286\n","weighted avg       0.92      0.90      0.91       286\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>sentiment_confidence</th>\n","      <th>y</th>\n","      <th>default_name_embeddings</th>\n","      <th>text</th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.999738</td>\n","      <td>negative</td>\n","      <td>[-0.01731022447347641, 0.010604134760797024, -...</td>\n","      <td>@Apple  you need to sort your phones out.</td>\n","      <td>@Apple you need to sort your phones out.</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>0.937319</td>\n","      <td>negative</td>\n","      <td>[0.019931159913539886, -0.04991159215569496, -...</td>\n","      <td>Wow. Yall needa step it up @Apple RT @heynyla:...</td>\n","      <td>Wow. Yall needa step it up @Apple RT @heynyla:...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>0.974594</td>\n","      <td>negative</td>\n","      <td>[0.01646081730723381, -0.02681073546409607, -0...</td>\n","      <td>I'm surprised there isn't more talk about what...</td>\n","      <td>I'm surprised there isn't more talk about what...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>0.997196</td>\n","      <td>negative</td>\n","      <td>[0.04638500511646271, -0.037105873227119446, -...</td>\n","      <td>Realised the reason @apple make huge phones is...</td>\n","      <td>Realised the reason @apple make huge phones is...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>0.709098</td>\n","      <td>positive</td>\n","      <td>[-0.028623634949326515, 0.03947276994585991, -...</td>\n","      <td>Apple Inc. CEO Donates $291K To Pennsylvania S...</td>\n","      <td>Apple Inc. CEO Donates $291K To Pennsylvania S...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>281</th>\n","      <td>0.984257</td>\n","      <td>negative</td>\n","      <td>[0.03778046742081642, 0.03407461196184158, 0.0...</td>\n","      <td>@apple so thanks for being greedy assholes who...</td>\n","      <td>@apple so thanks for being greedy assholes who...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>282</th>\n","      <td>0.904880</td>\n","      <td>negative</td>\n","      <td>[-0.013547728769481182, -0.001025827950797975,...</td>\n","      <td>@apple iCal AGAIN!!! it reset all my recurring...</td>\n","      <td>@apple iCal AGAIN!!! it reset all my recurring...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>283</th>\n","      <td>0.995687</td>\n","      <td>positive</td>\n","      <td>[-0.0015363194979727268, -0.01644994132220745,...</td>\n","      <td>Just did my first transaction with @Apple Pay ...</td>\n","      <td>Just did my first transaction with @Apple Pay ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>284</th>\n","      <td>0.998746</td>\n","      <td>positive</td>\n","      <td>[0.0656985342502594, 0.028557728976011276, -0....</td>\n","      <td>RT @JPDesloges: Kantar Worldpanel: iPhone sale...</td>\n","      <td>RT @JPDesloges: Kantar Worldpanel: iPhone sale...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>285</th>\n","      <td>0.710708</td>\n","      <td>positive</td>\n","      <td>[0.02311933971941471, 0.05785432830452919, -0....</td>\n","      <td>Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...</td>\n","      <td>Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>286 rows × 6 columns</p>\n","</div>"],"text/plain":["             sentiment_confidence  ... sentiment\n","origin_index                       ...          \n","0                        0.999738  ...  negative\n","1                        0.937319  ...  positive\n","2                        0.974594  ...  negative\n","3                        0.997196  ...  negative\n","4                        0.709098  ...  positive\n","...                           ...  ...       ...\n","281                      0.984257  ...  negative\n","282                      0.904880  ...  negative\n","283                      0.995687  ...  positive\n","284                      0.998746  ...  positive\n","285                      0.710708  ...  positive\n","\n","[286 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468205058,"user_tz":-300,"elapsed":199858,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"11560398-8fb9-4110-aed3-f7d9c1f71268"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468415116,"user_tz":-300,"elapsed":409908,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b8e4f245-595a-40f3-9e1d-76f71e76b74e"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(110)  \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.96      0.85      0.90       143\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.92      0.92      0.92       143\n","\n","    accuracy                           0.88       286\n","   macro avg       0.63      0.59      0.61       286\n","weighted avg       0.94      0.88      0.91       286\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"bZZpObLOtqo8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468632998,"user_tz":-300,"elapsed":627783,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e6a87d34-ce84-4968-c3a0-9aade476874b"},"source":["stored_model_path = './models/classifier_dl_trained' \r\n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":127},"executionInfo":{"status":"ok","timestamp":1609468646911,"user_tz":-300,"elapsed":641690,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"454b2c7d-7c32-4cc2-cf25-a52b5a879abd"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('I hate the newest update')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>sentiment_confidence</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.974083</td>\n","      <td>[-0.058236218988895416, -0.3061041235923767, 0...</td>\n","      <td>I hate it</td>\n","      <td>negative</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             sentiment_confidence  ... sentiment\n","origin_index                       ...          \n","0                        0.974083  ...  negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468646914,"user_tz":-300,"elapsed":641685,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"80ce5918-3803-45f4-e10f-300144342295"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')            | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)       | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                    | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                        | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                  | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                 | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)           | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                  | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                      | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)            | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)        | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                 | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                          | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                        | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                 | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')         | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6)                         | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')              | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative'])      | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]}
\ No newline at end of file
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_apple_twitter.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"RIV-9vEqxTBB"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\r\n","\r\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_apple_twitter.ipynb)\r\n","\r\n","\r\n","\r\n","# Training a Sentiment Analysis Classifier with NLU \r\n","## 2 class Apple Tweets sentiment classifier training\r\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \r\n","\r\n","This notebook showcases the following features : \r\n","\r\n","- How to train the deep learning classifier\r\n","- How to store a pipeline to disk\r\n","- How to load the pipeline from disk (Enables NLU offline mode)\r\n","\r\n"]},{"cell_type":"code","metadata":{"id":"05-mAOF6ol-0"},"source":["import os\r\n","from sklearn.metrics import classification_report\r\n","! apt-get update -qq > /dev/null   \r\n","# Install java\r\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\r\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\r\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\r\n","! pip install nlu pyspark==2.4.7 > /dev/null  \r\n","\r\n","\r\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download appple twitter  Sentiment dataset \n","https://www.kaggle.com/seriousran/appletwittersentimenttexts\n","\n","this dataset contains tweets made towards apple and today we are going to train our model to predict whether the tweet contains sentiment!\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1609468082890,"user_tz":-300,"elapsed":77740,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a791d4cf-bfa3-4cc6-a60d-c885afe2e917"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/apple-twitter-sentiment-texts.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-01 02:27:38--  https://raw.githubusercontent.com/ahmedlone127/nlu-master/main/apple-twitter-sentiment-texts.csv\n","Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n","Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 31678 (31K) [text/plain]\n","Saving to: ‘apple-twitter-sentiment-texts.csv’\n","\n","apple-twitter-senti 100%[===================>]  30.94K  --.-KB/s    in 0.002s  \n","\n","2021-01-01 02:27:39 (12.9 MB/s) - ‘apple-twitter-sentiment-texts.csv’ saved [31678/31678]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1609468083287,"user_tz":-300,"elapsed":78124,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1a23969f-abf0-4bc3-e2ec-0879b2b77cad"},"source":["import pandas as pd\n","train_path = '/content/apple-twitter-sentiment-texts.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df = train_df[~train_df[\"y\"].isin([\"neuteral\"])]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>y</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>@Apple  you need to sort your phones out.</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Wow. Yall needa step it up @Apple RT @heynyla:...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>I'm surprised there isn't more talk about what...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Realised the reason @apple make huge phones is...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>Apple Inc. CEO Donates $291K To Pennsylvania S...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>281</th>\n","      <td>@apple so thanks for being greedy assholes who...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>282</th>\n","      <td>@apple iCal AGAIN!!! it reset all my recurring...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>283</th>\n","      <td>Just did my first transaction with @Apple Pay ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>284</th>\n","      <td>RT @JPDesloges: Kantar Worldpanel: iPhone sale...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>285</th>\n","      <td>Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>286 rows × 2 columns</p>\n","</div>"],"text/plain":["                                                  text         y\n","0            @Apple  you need to sort your phones out.  negative\n","1    Wow. Yall needa step it up @Apple RT @heynyla:...  negative\n","2    I'm surprised there isn't more talk about what...  negative\n","3    Realised the reason @apple make huge phones is...  negative\n","4    Apple Inc. CEO Donates $291K To Pennsylvania S...  positive\n","..                                                 ...       ...\n","281  @apple so thanks for being greedy assholes who...  negative\n","282  @apple iCal AGAIN!!! it reset all my recurring...  negative\n","283  Just did my first transaction with @Apple Pay ...  positive\n","284  RT @JPDesloges: Kantar Worldpanel: iPhone sale...  positive\n","285  Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...  positive\n","\n","[286 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":845},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609468191792,"user_tz":-300,"elapsed":186618,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"41d18f44-64e1-4766-a8cf-4545813930d7"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.91      0.80      0.85       143\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.82      0.91      0.86       143\n","\n","    accuracy                           0.86       286\n","   macro avg       0.58      0.57      0.57       286\n","weighted avg       0.86      0.86      0.86       286\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>sentiment_confidence</th>\n","      <th>y</th>\n","      <th>default_name_embeddings</th>\n","      <th>text</th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.998447</td>\n","      <td>negative</td>\n","      <td>[-0.01731022447347641, 0.010604134760797024, -...</td>\n","      <td>@Apple  you need to sort your phones out.</td>\n","      <td>@Apple you need to sort your phones out.</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>0.990570</td>\n","      <td>negative</td>\n","      <td>[0.019931159913539886, -0.04991159215569496, -...</td>\n","      <td>Wow. Yall needa step it up @Apple RT @heynyla:...</td>\n","      <td>Wow. Yall needa step it up @Apple RT @heynyla:...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>0.969844</td>\n","      <td>negative</td>\n","      <td>[0.01646081730723381, -0.02681073546409607, -0...</td>\n","      <td>I'm surprised there isn't more talk about what...</td>\n","      <td>I'm surprised there isn't more talk about what...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>0.996128</td>\n","      <td>negative</td>\n","      <td>[0.04638500511646271, -0.037105873227119446, -...</td>\n","      <td>Realised the reason @apple make huge phones is...</td>\n","      <td>Realised the reason @apple make huge phones is...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>0.959235</td>\n","      <td>positive</td>\n","      <td>[-0.028623634949326515, 0.03947276994585991, -...</td>\n","      <td>Apple Inc. CEO Donates $291K To Pennsylvania S...</td>\n","      <td>Apple Inc. CEO Donates $291K To Pennsylvania S...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>281</th>\n","      <td>0.978435</td>\n","      <td>negative</td>\n","      <td>[0.03778046742081642, 0.03407461196184158, 0.0...</td>\n","      <td>@apple so thanks for being greedy assholes who...</td>\n","      <td>@apple so thanks for being greedy assholes who...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>282</th>\n","      <td>0.623791</td>\n","      <td>negative</td>\n","      <td>[-0.013547728769481182, -0.001025827950797975,...</td>\n","      <td>@apple iCal AGAIN!!! it reset all my recurring...</td>\n","      <td>@apple iCal AGAIN!!! it reset all my recurring...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>283</th>\n","      <td>0.999104</td>\n","      <td>positive</td>\n","      <td>[-0.0015363194979727268, -0.01644994132220745,...</td>\n","      <td>Just did my first transaction with @Apple Pay ...</td>\n","      <td>Just did my first transaction with @Apple Pay ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>284</th>\n","      <td>0.999854</td>\n","      <td>positive</td>\n","      <td>[0.0656985342502594, 0.028557728976011276, -0....</td>\n","      <td>RT @JPDesloges: Kantar Worldpanel: iPhone sale...</td>\n","      <td>RT @JPDesloges: Kantar Worldpanel: iPhone sale...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>285</th>\n","      <td>0.983244</td>\n","      <td>positive</td>\n","      <td>[0.02311933971941471, 0.05785432830452919, -0....</td>\n","      <td>Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...</td>\n","      <td>Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>286 rows × 6 columns</p>\n","</div>"],"text/plain":["             sentiment_confidence  ... sentiment\n","origin_index                       ...          \n","0                        0.998447  ...  negative\n","1                        0.990570  ...  positive\n","2                        0.969844  ...  negative\n","3                        0.996128  ...  negative\n","4                        0.959235  ...  positive\n","...                           ...  ...       ...\n","281                      0.978435  ...  negative\n","282                      0.623791  ...  positive\n","283                      0.999104  ...  positive\n","284                      0.999854  ...  positive\n","285                      0.983244  ...  positive\n","\n","[286 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"id":"qdCUg2MR0PD2","colab":{"base_uri":"https://localhost:8080/","height":110},"executionInfo":{"status":"ok","timestamp":1609468194339,"user_tz":-300,"elapsed":189158,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"00d8c7b6-22e1-4979-8c51-58471540a3dd"},"source":["fitted_pipe.predict('I hate the newest update')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>sentiment_confidence</th>\n","      <th>default_name_embeddings</th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.996097</td>\n","      <td>[0.06468033790588379, -0.040837567299604416, -...</td>\n","      <td>Bitcoin is going to the moon!</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             sentiment_confidence  ... sentiment\n","origin_index                       ...          \n","0                        0.996097  ...  positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"id":"UtsAUGTmOTms","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468194341,"user_tz":-300,"elapsed":189154,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3ab00ec5-5894-400f-c6c9-e32099fed1f5"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2)                 | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005)                    | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64)                | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5)                 | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True)       | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6)               | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')    | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512)               | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use')      | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"id":"mptfvHx-MMMX","colab":{"base_uri":"https://localhost:8080/","height":793},"executionInfo":{"status":"ok","timestamp":1609468205048,"user_tz":-300,"elapsed":199854,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"9c9a1628-3034-4be0-94bc-7c109d2c3263"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5)  \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","    negative       0.96      0.85      0.90       143\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.87      0.95      0.91       143\n","\n","    accuracy                           0.90       286\n","   macro avg       0.61      0.60      0.60       286\n","weighted avg       0.92      0.90      0.91       286\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>sentiment_confidence</th>\n","      <th>y</th>\n","      <th>default_name_embeddings</th>\n","      <th>text</th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.999738</td>\n","      <td>negative</td>\n","      <td>[-0.01731022447347641, 0.010604134760797024, -...</td>\n","      <td>@Apple  you need to sort your phones out.</td>\n","      <td>@Apple you need to sort your phones out.</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>0.937319</td>\n","      <td>negative</td>\n","      <td>[0.019931159913539886, -0.04991159215569496, -...</td>\n","      <td>Wow. Yall needa step it up @Apple RT @heynyla:...</td>\n","      <td>Wow. Yall needa step it up @Apple RT @heynyla:...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>0.974594</td>\n","      <td>negative</td>\n","      <td>[0.01646081730723381, -0.02681073546409607, -0...</td>\n","      <td>I'm surprised there isn't more talk about what...</td>\n","      <td>I'm surprised there isn't more talk about what...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>0.997196</td>\n","      <td>negative</td>\n","      <td>[0.04638500511646271, -0.037105873227119446, -...</td>\n","      <td>Realised the reason @apple make huge phones is...</td>\n","      <td>Realised the reason @apple make huge phones is...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>0.709098</td>\n","      <td>positive</td>\n","      <td>[-0.028623634949326515, 0.03947276994585991, -...</td>\n","      <td>Apple Inc. CEO Donates $291K To Pennsylvania S...</td>\n","      <td>Apple Inc. CEO Donates $291K To Pennsylvania S...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>281</th>\n","      <td>0.984257</td>\n","      <td>negative</td>\n","      <td>[0.03778046742081642, 0.03407461196184158, 0.0...</td>\n","      <td>@apple so thanks for being greedy assholes who...</td>\n","      <td>@apple so thanks for being greedy assholes who...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>282</th>\n","      <td>0.904880</td>\n","      <td>negative</td>\n","      <td>[-0.013547728769481182, -0.001025827950797975,...</td>\n","      <td>@apple iCal AGAIN!!! it reset all my recurring...</td>\n","      <td>@apple iCal AGAIN!!! it reset all my recurring...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>283</th>\n","      <td>0.995687</td>\n","      <td>positive</td>\n","      <td>[-0.0015363194979727268, -0.01644994132220745,...</td>\n","      <td>Just did my first transaction with @Apple Pay ...</td>\n","      <td>Just did my first transaction with @Apple Pay ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>284</th>\n","      <td>0.998746</td>\n","      <td>positive</td>\n","      <td>[0.0656985342502594, 0.028557728976011276, -0....</td>\n","      <td>RT @JPDesloges: Kantar Worldpanel: iPhone sale...</td>\n","      <td>RT @JPDesloges: Kantar Worldpanel: iPhone sale...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>285</th>\n","      <td>0.710708</td>\n","      <td>positive</td>\n","      <td>[0.02311933971941471, 0.05785432830452919, -0....</td>\n","      <td>Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...</td>\n","      <td>Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>286 rows × 6 columns</p>\n","</div>"],"text/plain":["             sentiment_confidence  ... sentiment\n","origin_index                       ...          \n","0                        0.999738  ...  negative\n","1                        0.937319  ...  positive\n","2                        0.974594  ...  negative\n","3                        0.997196  ...  negative\n","4                        0.709098  ...  positive\n","...                           ...  ...       ...\n","281                      0.984257  ...  negative\n","282                      0.904880  ...  negative\n","283                      0.995687  ...  positive\n","284                      0.998746  ...  positive\n","285                      0.710708  ...  positive\n","\n","[286 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468205058,"user_tz":-300,"elapsed":199858,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"11560398-8fb9-4110-aed3-f7d9c1f71268"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468415116,"user_tz":-300,"elapsed":409908,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b8e4f245-595a-40f3-9e1d-76f71e76b74e"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(110)  \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.96      0.85      0.90       143\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.92      0.92      0.92       143\n","\n","    accuracy                           0.88       286\n","   macro avg       0.63      0.59      0.61       286\n","weighted avg       0.94      0.88      0.91       286\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"bZZpObLOtqo8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468632998,"user_tz":-300,"elapsed":627783,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e6a87d34-ce84-4968-c3a0-9aade476874b"},"source":["stored_model_path = './models/classifier_dl_trained' \r\n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":127},"executionInfo":{"status":"ok","timestamp":1609468646911,"user_tz":-300,"elapsed":641690,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"454b2c7d-7c32-4cc2-cf25-a52b5a879abd"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('I hate the newest update')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>sentiment_confidence</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.974083</td>\n","      <td>[-0.058236218988895416, -0.3061041235923767, 0...</td>\n","      <td>I hate it</td>\n","      <td>negative</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             sentiment_confidence  ... sentiment\n","origin_index                       ...          \n","0                        0.974083  ...  negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468646914,"user_tz":-300,"elapsed":641685,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"80ce5918-3803-45f4-e10f-300144342295"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')            | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)       | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                    | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                        | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                  | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                 | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)           | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                  | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                      | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)            | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)        | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                 | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                          | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                        | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                 | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')         | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6)                         | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')              | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative'])      | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]}
\ No newline at end of file
diff --git a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_finanical_news.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_finanical_news.ipynb
index 4d0dd044..81f918c0 100644
--- a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_finanical_news.ipynb
+++ b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_finanical_news.ipynb
@@ -1 +1 @@
-{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_finanical_news.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_finanical_news.ipynb)\n","\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null  \n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Finanical News  Sentiment dataset \n","https://www.kaggle.com/ankurzing/sentiment-analysis-for-financial-news\n","\n","This dataset contains the sentiments for financial news headlines from the perspective of a retail investor. Further details about the dataset can be found in: Malo, P., Sinha, A., Takala, P., Korhonen, P. and Wallenius, J. (2014): “Good debt or bad debt: Detecting semantic orientations in economic texts.” Journal of the American Society for Information Science and Technology."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610788018304,"user_tz":-300,"elapsed":2399,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f494fab0-8f9c-4087-f554-31a21764a207"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/all-data.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:06:20--  http://ckl-it.de/wp-content/uploads/2021/01/all-data.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 704799 (688K) [text/csv]\n","Saving to: ‘all-data.csv’\n","\n","all-data.csv        100%[===================>] 688.28K  1.09MB/s    in 0.6s    \n","\n","2021-01-16 09:06:21 (1.09 MB/s) - ‘all-data.csv’ saved [704799/704799]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610788018314,"user_tz":-300,"elapsed":660,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e1e2496a-8df8-4e5d-db53-63d62ef1f050"},"source":["import pandas as pd\n","train_path = '/content/all-data.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df = train_df[~train_df[\"y\"].isin([\"neutral\"])]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>y</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>1</th>\n","      <td>The international electronic industry company ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>With the new production plant the company woul...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>According to the company 's updated strategy f...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>For the last quarter of 2010 , Componenta 's n...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>4839</th>\n","      <td>HELSINKI Thomson Financial - Shares in Cargote...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4840</th>\n","      <td>LONDON MarketWatch -- Share prices ended lower...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4842</th>\n","      <td>Operating profit fell to EUR 35.4 mn from EUR ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4843</th>\n","      <td>Net sales of the Paper segment decreased to EU...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4844</th>\n","      <td>Sales in Finland decreased by 10.5 % in Januar...</td>\n","      <td>negative</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>1967 rows × 2 columns</p>\n","</div>"],"text/plain":["                                                   text         y\n","1     The international electronic industry company ...  negative\n","2     With the new production plant the company woul...  positive\n","3     According to the company 's updated strategy f...  positive\n","4     FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...  positive\n","5     For the last quarter of 2010 , Componenta 's n...  positive\n","...                                                 ...       ...\n","4839  HELSINKI Thomson Financial - Shares in Cargote...  negative\n","4840  LONDON MarketWatch -- Share prices ended lower...  negative\n","4842  Operating profit fell to EUR 35.4 mn from EUR ...  negative\n","4843  Net sales of the Paper segment decreased to EU...  negative\n","4844  Sales in Finland decreased by 10.5 % in Januar...  negative\n","\n","[1967 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609264914996,"user_tz":-300,"elapsed":191025,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6dc536e4-252e-4324-e070-cd477a79330d"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.00      0.00      0.00         1\n","    positive       0.98      1.00      0.99        49\n","\n","    accuracy                           0.98        50\n","   macro avg       0.49      0.50      0.49        50\n","weighted avg       0.96      0.98      0.97        50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","      <th>y</th>\n","      <th>sentiment_confidence</th>\n","      <th>text</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>1</th>\n","      <td>The international electronic industry company ...</td>\n","      <td>positive</td>\n","      <td>negative</td>\n","      <td>1.000000</td>\n","      <td>The international electronic industry company ...</td>\n","      <td>[0.002136496128514409, 0.07194118946790695, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>With the new production plant the company woul...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>With the new production plant the company woul...</td>\n","      <td>[0.05198746547102928, 0.03577739745378494, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>According to the company 's updated strategy f...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>According to the company 's updated strategy f...</td>\n","      <td>[0.03416536748409271, 0.04053246229887009, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...</td>\n","      <td>[0.07730763405561447, -0.045694783329963684, -...</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>For the last quarter of 2010 , Componenta 's n...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>For the last quarter of 2010 , Componenta 's n...</td>\n","      <td>[0.05603468790650368, 0.04817350581288338, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>In the third quarter of 2010 , net sales incre...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>In the third quarter of 2010 , net sales incre...</td>\n","      <td>[0.037710510194301605, 0.037198420614004135, -...</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>Operating profit rose to EUR 13.1 mn from EUR ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Operating profit rose to EUR 13.1 mn from EUR ...</td>\n","      <td>[0.04557091370224953, 0.0453636609017849, -0.0...</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>Operating profit totalled EUR 21.1 mn , up fro...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Operating profit totalled EUR 21.1 mn , up fro...</td>\n","      <td>[0.05191247910261154, 0.059505216777324677, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>TeliaSonera TLSN said the offer is in line wit...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>TeliaSonera TLSN said the offer is in line wit...</td>\n","      <td>[0.07441692799329758, -0.0487477071583271, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>10</th>\n","      <td>STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMEN...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMEN...</td>\n","      <td>[0.03200741112232208, 0.03773287683725357, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>11</th>\n","      <td>A purchase agreement for 7,200 tons of gasolin...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>A purchase agreement for 7,200 tons of gasolin...</td>\n","      <td>[0.05590442568063736, 0.041032955050468445, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>12</th>\n","      <td>Finnish Talentum reports its operating profit ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Finnish Talentum reports its operating profit ...</td>\n","      <td>[0.06596074998378754, 0.05897102504968643, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>13</th>\n","      <td>Clothing retail chain Sepp+Æl+Æ 's sales incre...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Clothing retail chain Sepp+Æl+Æ 's sales incre...</td>\n","      <td>[0.03395465016365051, 0.05171804875135422, 0.0...</td>\n","    </tr>\n","    <tr>\n","      <th>14</th>\n","      <td>Consolidated net sales increased 16 % to reach...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Consolidated net sales increased 16 % to reach...</td>\n","      <td>[0.060446273535490036, 0.03799470514059067, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>15</th>\n","      <td>Foundries division reports its sales increased...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Foundries division reports its sales increased...</td>\n","      <td>[0.0494563989341259, 0.05158388614654541, -0.0...</td>\n","    </tr>\n","    <tr>\n","      <th>16</th>\n","      <td>HELSINKI ( AFX ) - Shares closed higher , led ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>HELSINKI ( AFX ) - Shares closed higher , led ...</td>\n","      <td>[0.0629865899682045, -0.045351240783929825, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>17</th>\n","      <td>Incap Contract Manufacturing Services Pvt Ltd ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Incap Contract Manufacturing Services Pvt Ltd ...</td>\n","      <td>[0.05365738272666931, -0.055247869342565536, -...</td>\n","    </tr>\n","    <tr>\n","      <th>18</th>\n","      <td>Its board of directors will propose a dividend...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Its board of directors will propose a dividend...</td>\n","      <td>[0.0692642331123352, 0.02292279154062271, -0.0...</td>\n","    </tr>\n","    <tr>\n","      <th>19</th>\n","      <td>Lifetree was founded in 2000 , and its revenue...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Lifetree was founded in 2000 , and its revenue...</td>\n","      <td>[0.0810408890247345, 0.039108917117118835, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>20</th>\n","      <td>( Filippova ) A trilateral agreement on invest...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>0.999998</td>\n","      <td>( Filippova ) A trilateral agreement on invest...</td>\n","      <td>[0.05172618478536606, 0.02967883087694645, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>21</th>\n","      <td>MegaFon 's subscriber base increased 16.1 % in...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>MegaFon 's subscriber base increased 16.1 % in...</td>\n","      <td>[0.03825156390666962, 0.001971189398318529, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>22</th>\n","      <td>Net income from life insurance doubled to EUR ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Net income from life insurance doubled to EUR ...</td>\n","      <td>[0.05222763866186142, 0.05695151165127754, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>23</th>\n","      <td>Net sales increased to EUR193 .3 m from EUR179...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Net sales increased to EUR193 .3 m from EUR179...</td>\n","      <td>[0.02272764965891838, 0.016222774982452393, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>Net sales surged by 18.5 % to EUR167 .8 m. Tel...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Net sales surged by 18.5 % to EUR167 .8 m. Tel...</td>\n","      <td>[0.05020830035209656, 0.03307913616299629, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>Nordea Group 's operating profit increased in ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Nordea Group 's operating profit increased in ...</td>\n","      <td>[0.0497022308409214, 0.023793146014213562, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>26</th>\n","      <td>Operating profit for the nine-month period inc...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Operating profit for the nine-month period inc...</td>\n","      <td>[0.04339126497507095, 0.024815633893013, -0.02...</td>\n","    </tr>\n","    <tr>\n","      <th>27</th>\n","      <td>Operating profit for the nine-month period inc...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Operating profit for the nine-month period inc...</td>\n","      <td>[0.035663120448589325, 0.03037247434258461, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>28</th>\n","      <td>Operating profit for the three-month period in...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Operating profit for the three-month period in...</td>\n","      <td>[0.029575243592262268, 0.007764187641441822, -...</td>\n","    </tr>\n","    <tr>\n","      <th>29</th>\n","      <td>The Brazilian unit of Finnish security solutio...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>The Brazilian unit of Finnish security solutio...</td>\n","      <td>[0.047570426017045975, -0.023694489151239395, ...</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>The company 's net profit rose 11.4 % on the y...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>The company 's net profit rose 11.4 % on the y...</td>\n","      <td>[0.06896018236875534, 0.046189870685338974, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>31</th>\n","      <td>The Lithuanian beer market made up 14.41 milli...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>0.999999</td>\n","      <td>The Lithuanian beer market made up 14.41 milli...</td>\n","      <td>[0.0020184037275612354, -0.044685497879981995,...</td>\n","    </tr>\n","    <tr>\n","      <th>32</th>\n","      <td>Viking Line 's cargo revenue increased by 5.4 ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Viking Line 's cargo revenue increased by 5.4 ...</td>\n","      <td>[-0.007756179664283991, -0.04868081212043762, ...</td>\n","    </tr>\n","    <tr>\n","      <th>33</th>\n","      <td>The fair value of the property portfolio doubl...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>The fair value of the property portfolio doubl...</td>\n","      <td>[0.06604734063148499, -0.025070184841752052, 0...</td>\n","    </tr>\n","    <tr>\n","      <th>34</th>\n","      <td>10 February 2011 - Finnish media company Sanom...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>10 February 2011 - Finnish media company Sanom...</td>\n","      <td>[0.05996786803007126, 0.03255663812160492, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>35</th>\n","      <td>A Helsinki : ELIiV today reported EPS of EUR1 ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>0.999999</td>\n","      <td>A Helsinki : ELIiV today reported EPS of EUR1 ...</td>\n","      <td>[0.051878154277801514, -0.03290269523859024, -...</td>\n","    </tr>\n","    <tr>\n","      <th>36</th>\n","      <td>Aspo Plc STOCK EXCHANGE RELEASE February 11 , ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Aspo Plc STOCK EXCHANGE RELEASE February 11 , ...</td>\n","      <td>[0.03545805439352989, -0.04956813529133797, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>37</th>\n","      <td>Commission income increased by 22 % to EUR 4.4...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Commission income increased by 22 % to EUR 4.4...</td>\n","      <td>[0.05664118379354477, 0.004533933009952307, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>38</th>\n","      <td>In January , traffic , measured in revenue pas...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>In January , traffic , measured in revenue pas...</td>\n","      <td>[-0.026962362229824066, 0.010590712539851665, ...</td>\n","    </tr>\n","    <tr>\n","      <th>39</th>\n","      <td>In January-September 2010 , Fiskars ' net prof...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>In January-September 2010 , Fiskars ' net prof...</td>\n","      <td>[0.056088510900735855, 0.0369233600795269, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>40</th>\n","      <td>Net income from life insurance rose to EUR 16....</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Net income from life insurance rose to EUR 16....</td>\n","      <td>[0.05793088302016258, 0.06312950700521469, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>41</th>\n","      <td>Nyrstar has also agreed to supply to Talvivaar...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Nyrstar has also agreed to supply to Talvivaar...</td>\n","      <td>[0.004785533994436264, 0.004442625679075718, -...</td>\n","    </tr>\n","    <tr>\n","      <th>42</th>\n","      <td>Sales for both the Department Store Division a...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Sales for both the Department Store Division a...</td>\n","      <td>[-0.050088364630937576, 0.04885219410061836, 0...</td>\n","    </tr>\n","    <tr>\n","      <th>43</th>\n","      <td>Sales have risen in other export markets .</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Sales have risen in other export markets .</td>\n","      <td>[0.058916959911584854, 0.018443405628204346, -...</td>\n","    </tr>\n","    <tr>\n","      <th>44</th>\n","      <td>Sales increased due to growing market rates an...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Sales increased due to growing market rates an...</td>\n","      <td>[0.047733016312122345, 0.010620158165693283, 0...</td>\n","    </tr>\n","    <tr>\n","      <th>45</th>\n","      <td>The agreement strengthens our long-term partne...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>The agreement strengthens our long-term partne...</td>\n","      <td>[0.06433788686990738, 0.027824176475405693, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>46</th>\n","      <td>The agreement was signed with Biohit Healthcar...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>The agreement was signed with Biohit Healthcar...</td>\n","      <td>[0.03612205758690834, 0.038267459720373154, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>The company also estimates the already carried...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>The company also estimates the already carried...</td>\n","      <td>[0.04304526373744011, 0.023360760882496834, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>The company 's order book stood at 1.5 bln eur...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>The company 's order book stood at 1.5 bln eur...</td>\n","      <td>[0.036210183054208755, -0.010278576985001564, ...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>The company said that paper demand increased i...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>The company said that paper demand increased i...</td>\n","      <td>[0.06558039039373398, 0.04877239838242531, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>50</th>\n","      <td>The world 's second largest stainless steel ma...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>The world 's second largest stainless steel ma...</td>\n","      <td>[0.04267223924398422, 0.03184577450156212, -0....</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                       document  ...                            default_name_embeddings\n","origin_index                                                     ...                                                   \n","1             The international electronic industry company ...  ...  [0.002136496128514409, 0.07194118946790695, -0...\n","2             With the new production plant the company woul...  ...  [0.05198746547102928, 0.03577739745378494, -0....\n","3             According to the company 's updated strategy f...  ...  [0.03416536748409271, 0.04053246229887009, -0....\n","4             FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...  ...  [0.07730763405561447, -0.045694783329963684, -...\n","5             For the last quarter of 2010 , Componenta 's n...  ...  [0.05603468790650368, 0.04817350581288338, -0....\n","6             In the third quarter of 2010 , net sales incre...  ...  [0.037710510194301605, 0.037198420614004135, -...\n","7             Operating profit rose to EUR 13.1 mn from EUR ...  ...  [0.04557091370224953, 0.0453636609017849, -0.0...\n","8             Operating profit totalled EUR 21.1 mn , up fro...  ...  [0.05191247910261154, 0.059505216777324677, -0...\n","9             TeliaSonera TLSN said the offer is in line wit...  ...  [0.07441692799329758, -0.0487477071583271, -0....\n","10            STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMEN...  ...  [0.03200741112232208, 0.03773287683725357, -0....\n","11            A purchase agreement for 7,200 tons of gasolin...  ...  [0.05590442568063736, 0.041032955050468445, -0...\n","12            Finnish Talentum reports its operating profit ...  ...  [0.06596074998378754, 0.05897102504968643, -0....\n","13            Clothing retail chain Sepp+Æl+Æ 's sales incre...  ...  [0.03395465016365051, 0.05171804875135422, 0.0...\n","14            Consolidated net sales increased 16 % to reach...  ...  [0.060446273535490036, 0.03799470514059067, -0...\n","15            Foundries division reports its sales increased...  ...  [0.0494563989341259, 0.05158388614654541, -0.0...\n","16            HELSINKI ( AFX ) - Shares closed higher , led ...  ...  [0.0629865899682045, -0.045351240783929825, -0...\n","17            Incap Contract Manufacturing Services Pvt Ltd ...  ...  [0.05365738272666931, -0.055247869342565536, -...\n","18            Its board of directors will propose a dividend...  ...  [0.0692642331123352, 0.02292279154062271, -0.0...\n","19            Lifetree was founded in 2000 , and its revenue...  ...  [0.0810408890247345, 0.039108917117118835, -0....\n","20            ( Filippova ) A trilateral agreement on invest...  ...  [0.05172618478536606, 0.02967883087694645, -0....\n","21            MegaFon 's subscriber base increased 16.1 % in...  ...  [0.03825156390666962, 0.001971189398318529, -0...\n","22            Net income from life insurance doubled to EUR ...  ...  [0.05222763866186142, 0.05695151165127754, -0....\n","23            Net sales increased to EUR193 .3 m from EUR179...  ...  [0.02272764965891838, 0.016222774982452393, 0....\n","24            Net sales surged by 18.5 % to EUR167 .8 m. Tel...  ...  [0.05020830035209656, 0.03307913616299629, -0....\n","25            Nordea Group 's operating profit increased in ...  ...  [0.0497022308409214, 0.023793146014213562, -0....\n","26            Operating profit for the nine-month period inc...  ...  [0.04339126497507095, 0.024815633893013, -0.02...\n","27            Operating profit for the nine-month period inc...  ...  [0.035663120448589325, 0.03037247434258461, -0...\n","28            Operating profit for the three-month period in...  ...  [0.029575243592262268, 0.007764187641441822, -...\n","29            The Brazilian unit of Finnish security solutio...  ...  [0.047570426017045975, -0.023694489151239395, ...\n","30            The company 's net profit rose 11.4 % on the y...  ...  [0.06896018236875534, 0.046189870685338974, -0...\n","31            The Lithuanian beer market made up 14.41 milli...  ...  [0.0020184037275612354, -0.044685497879981995,...\n","32            Viking Line 's cargo revenue increased by 5.4 ...  ...  [-0.007756179664283991, -0.04868081212043762, ...\n","33            The fair value of the property portfolio doubl...  ...  [0.06604734063148499, -0.025070184841752052, 0...\n","34            10 February 2011 - Finnish media company Sanom...  ...  [0.05996786803007126, 0.03255663812160492, -0....\n","35            A Helsinki : ELIiV today reported EPS of EUR1 ...  ...  [0.051878154277801514, -0.03290269523859024, -...\n","36            Aspo Plc STOCK EXCHANGE RELEASE February 11 , ...  ...  [0.03545805439352989, -0.04956813529133797, -0...\n","37            Commission income increased by 22 % to EUR 4.4...  ...  [0.05664118379354477, 0.004533933009952307, -0...\n","38            In January , traffic , measured in revenue pas...  ...  [-0.026962362229824066, 0.010590712539851665, ...\n","39            In January-September 2010 , Fiskars ' net prof...  ...  [0.056088510900735855, 0.0369233600795269, -0....\n","40            Net income from life insurance rose to EUR 16....  ...  [0.05793088302016258, 0.06312950700521469, -0....\n","41            Nyrstar has also agreed to supply to Talvivaar...  ...  [0.004785533994436264, 0.004442625679075718, -...\n","42            Sales for both the Department Store Division a...  ...  [-0.050088364630937576, 0.04885219410061836, 0...\n","43                   Sales have risen in other export markets .  ...  [0.058916959911584854, 0.018443405628204346, -...\n","44            Sales increased due to growing market rates an...  ...  [0.047733016312122345, 0.010620158165693283, 0...\n","45            The agreement strengthens our long-term partne...  ...  [0.06433788686990738, 0.027824176475405693, -0...\n","46            The agreement was signed with Biohit Healthcar...  ...  [0.03612205758690834, 0.038267459720373154, -0...\n","47            The company also estimates the already carried...  ...  [0.04304526373744011, 0.023360760882496834, -0...\n","48            The company 's order book stood at 1.5 bln eur...  ...  [0.036210183054208755, -0.010278576985001564, ...\n","49            The company said that paper demand increased i...  ...  [0.06558039039373398, 0.04877239838242531, -0....\n","50            The world 's second largest stainless steel ma...  ...  [0.04267223924398422, 0.03184577450156212, -0....\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":107},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609264917602,"user_tz":-300,"elapsed":193623,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8fe5b9aa-c87a-42d3-e00d-920e63ca6aa4"},"source":["fitted_pipe.predict('According to the most recent update there has been a major decrese in the rate of oil')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","      <th>sentiment_confidence</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Bitcoin is going to the moon!</td>\n","      <td>positive</td>\n","      <td>0.999994</td>\n","      <td>[0.06468033790588379, -0.040837567299604416, -...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                   document  ...                            default_name_embeddings\n","origin_index                                 ...                                                   \n","0             Bitcoin is going to the moon!  ...  [0.06468033790588379, -0.040837567299604416, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609264917604,"user_tz":-300,"elapsed":193620,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ac9c8b1a-7fdd-4a6f-bdfd-1dbb823d9bf4"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2)                 | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005)                    | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64)                | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5)                 | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True)       | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6)               | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')    | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512)               | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use')      | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":753},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609264924472,"user_tz":-300,"elapsed":200484,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1dd94bc8-09c8-45db-ab81-bbd64acb8a4b"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5)  \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","    negative       0.00      0.00      0.00         1\n","    positive       0.99      1.00      0.99        99\n","\n","    accuracy                           0.99       100\n","   macro avg       0.49      0.50      0.50       100\n","weighted avg       0.98      0.99      0.99       100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","      <th>y</th>\n","      <th>sentiment_confidence</th>\n","      <th>text</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>1</th>\n","      <td>The international electronic industry company ...</td>\n","      <td>positive</td>\n","      <td>negative</td>\n","      <td>1.000000</td>\n","      <td>The international electronic industry company ...</td>\n","      <td>[0.002136496128514409, 0.07194118946790695, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>With the new production plant the company woul...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>With the new production plant the company woul...</td>\n","      <td>[0.05198746547102928, 0.03577739745378494, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>According to the company 's updated strategy f...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>According to the company 's updated strategy f...</td>\n","      <td>[0.03416536748409271, 0.04053246229887009, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...</td>\n","      <td>[0.07730763405561447, -0.045694783329963684, -...</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>For the last quarter of 2010 , Componenta 's n...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>For the last quarter of 2010 , Componenta 's n...</td>\n","      <td>[0.05603468790650368, 0.04817350581288338, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>116</th>\n","      <td>Operating profit margin increased from 11.2 % ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Operating profit margin increased from 11.2 % ...</td>\n","      <td>[0.01058729737997055, -0.008798183873295784, -...</td>\n","    </tr>\n","    <tr>\n","      <th>117</th>\n","      <td>Operating profit rose to EUR 3.11 mn from EUR ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Operating profit rose to EUR 3.11 mn from EUR ...</td>\n","      <td>[0.03610285371541977, 0.04256380349397659, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>118</th>\n","      <td>Operating profit rose to EUR 5mn from EUR 2.8 ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Operating profit rose to EUR 5mn from EUR 2.8 ...</td>\n","      <td>[0.04815328121185303, 0.050376053899526596, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>119</th>\n","      <td>Operating profit was EUR 24.5 mn , up from EUR...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Operating profit was EUR 24.5 mn , up from EUR...</td>\n","      <td>[0.048205215483903885, 0.05145161226391792, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>120</th>\n","      <td>Ramirent 's net sales in the second quarterend...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Ramirent 's net sales in the second quarterend...</td>\n","      <td>[0.0638015866279602, 0.0272374227643013, -0.04...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>100 rows × 6 columns</p>\n","</div>"],"text/plain":["                                                       document  ...                            default_name_embeddings\n","origin_index                                                     ...                                                   \n","1             The international electronic industry company ...  ...  [0.002136496128514409, 0.07194118946790695, -0...\n","2             With the new production plant the company woul...  ...  [0.05198746547102928, 0.03577739745378494, -0....\n","3             According to the company 's updated strategy f...  ...  [0.03416536748409271, 0.04053246229887009, -0....\n","4             FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...  ...  [0.07730763405561447, -0.045694783329963684, -...\n","5             For the last quarter of 2010 , Componenta 's n...  ...  [0.05603468790650368, 0.04817350581288338, -0....\n","...                                                         ...  ...                                                ...\n","116           Operating profit margin increased from 11.2 % ...  ...  [0.01058729737997055, -0.008798183873295784, -...\n","117           Operating profit rose to EUR 3.11 mn from EUR ...  ...  [0.03610285371541977, 0.04256380349397659, -0....\n","118           Operating profit rose to EUR 5mn from EUR 2.8 ...  ...  [0.04815328121185303, 0.050376053899526596, -0...\n","119           Operating profit was EUR 24.5 mn , up from EUR...  ...  [0.048205215483903885, 0.05145161226391792, -0...\n","120           Ramirent 's net sales in the second quarterend...  ...  [0.0638015866279602, 0.0272374227643013, -0.04...\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609264924477,"user_tz":-300,"elapsed":200483,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e259763c-470b-4d46-b3d1-28cf545f5dcd"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609266286092,"user_tz":-300,"elapsed":1562094,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4237752f-4fbe-4235-b33d-5d7b8ba29d48"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(70)  \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.88      0.87      0.88       604\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.96      0.92      0.94      1363\n","\n","    accuracy                           0.91      1967\n","   macro avg       0.62      0.60      0.61      1967\n","weighted avg       0.94      0.91      0.92      1967\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609266449598,"user_tz":-300,"elapsed":1725594,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b31b5e1e-3f09-4ab3-e97a-fb32ac87b319"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":124},"executionInfo":{"status":"ok","timestamp":1609266465229,"user_tz":-300,"elapsed":1741220,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5d9cc34a-693c-44d7-e50a-6e0ca5d4e024"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('According to the most recent update there has been a major decrese in the rate of oil')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","      <th>sentiment_confidence</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Tesla plans to invest 10M into the ML sector</td>\n","      <td>positive</td>\n","      <td>0.999980</td>\n","      <td>[0.15737222135066986, 0.2598555386066437, 0.85...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                  document  ...    en_embed_sentence_small_bert_L12_768_embeddings\n","origin_index                                                ...                                                   \n","0             Tesla plans to invest 10M into the ML sector  ...  [0.15737222135066986, 0.2598555386066437, 0.85...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609266465232,"user_tz":-300,"elapsed":1741218,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ec54f7c0-8174-4fd4-9db8-51c1d15be3eb"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')            | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                  | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                 | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)           | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                  | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                      | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)            | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)        | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)       | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                    | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                        | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                 | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                          | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                        | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                 | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')         | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6)                         | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')              | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative'])      | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]}
\ No newline at end of file
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_finanical_news.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_finanical_news.ipynb)\n","\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 class Finance News sentiment classifier training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null  \n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Finanical News  Sentiment dataset \n","https://www.kaggle.com/ankurzing/sentiment-analysis-for-financial-news\n","\n","This dataset contains the sentiments for financial news headlines from the perspective of a retail investor. Further details about the dataset can be found in: Malo, P., Sinha, A., Takala, P., Korhonen, P. and Wallenius, J. (2014): “Good debt or bad debt: Detecting semantic orientations in economic texts.” Journal of the American Society for Information Science and Technology."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610788018304,"user_tz":-300,"elapsed":2399,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f494fab0-8f9c-4087-f554-31a21764a207"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/all-data.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:06:20--  http://ckl-it.de/wp-content/uploads/2021/01/all-data.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 704799 (688K) [text/csv]\n","Saving to: ‘all-data.csv’\n","\n","all-data.csv        100%[===================>] 688.28K  1.09MB/s    in 0.6s    \n","\n","2021-01-16 09:06:21 (1.09 MB/s) - ‘all-data.csv’ saved [704799/704799]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610788018314,"user_tz":-300,"elapsed":660,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e1e2496a-8df8-4e5d-db53-63d62ef1f050"},"source":["import pandas as pd\n","train_path = '/content/all-data.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df = train_df[~train_df[\"y\"].isin([\"neutral\"])]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>y</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>1</th>\n","      <td>The international electronic industry company ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>With the new production plant the company woul...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>According to the company 's updated strategy f...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>For the last quarter of 2010 , Componenta 's n...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>4839</th>\n","      <td>HELSINKI Thomson Financial - Shares in Cargote...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4840</th>\n","      <td>LONDON MarketWatch -- Share prices ended lower...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4842</th>\n","      <td>Operating profit fell to EUR 35.4 mn from EUR ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4843</th>\n","      <td>Net sales of the Paper segment decreased to EU...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4844</th>\n","      <td>Sales in Finland decreased by 10.5 % in Januar...</td>\n","      <td>negative</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>1967 rows × 2 columns</p>\n","</div>"],"text/plain":["                                                   text         y\n","1     The international electronic industry company ...  negative\n","2     With the new production plant the company woul...  positive\n","3     According to the company 's updated strategy f...  positive\n","4     FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...  positive\n","5     For the last quarter of 2010 , Componenta 's n...  positive\n","...                                                 ...       ...\n","4839  HELSINKI Thomson Financial - Shares in Cargote...  negative\n","4840  LONDON MarketWatch -- Share prices ended lower...  negative\n","4842  Operating profit fell to EUR 35.4 mn from EUR ...  negative\n","4843  Net sales of the Paper segment decreased to EU...  negative\n","4844  Sales in Finland decreased by 10.5 % in Januar...  negative\n","\n","[1967 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609264914996,"user_tz":-300,"elapsed":191025,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6dc536e4-252e-4324-e070-cd477a79330d"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.00      0.00      0.00         1\n","    positive       0.98      1.00      0.99        49\n","\n","    accuracy                           0.98        50\n","   macro avg       0.49      0.50      0.49        50\n","weighted avg       0.96      0.98      0.97        50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","      <th>y</th>\n","      <th>sentiment_confidence</th>\n","      <th>text</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>1</th>\n","      <td>The international electronic industry company ...</td>\n","      <td>positive</td>\n","      <td>negative</td>\n","      <td>1.000000</td>\n","      <td>The international electronic industry company ...</td>\n","      <td>[0.002136496128514409, 0.07194118946790695, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>With the new production plant the company woul...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>With the new production plant the company woul...</td>\n","      <td>[0.05198746547102928, 0.03577739745378494, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>According to the company 's updated strategy f...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>According to the company 's updated strategy f...</td>\n","      <td>[0.03416536748409271, 0.04053246229887009, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...</td>\n","      <td>[0.07730763405561447, -0.045694783329963684, -...</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>For the last quarter of 2010 , Componenta 's n...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>For the last quarter of 2010 , Componenta 's n...</td>\n","      <td>[0.05603468790650368, 0.04817350581288338, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>In the third quarter of 2010 , net sales incre...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>In the third quarter of 2010 , net sales incre...</td>\n","      <td>[0.037710510194301605, 0.037198420614004135, -...</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>Operating profit rose to EUR 13.1 mn from EUR ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Operating profit rose to EUR 13.1 mn from EUR ...</td>\n","      <td>[0.04557091370224953, 0.0453636609017849, -0.0...</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>Operating profit totalled EUR 21.1 mn , up fro...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Operating profit totalled EUR 21.1 mn , up fro...</td>\n","      <td>[0.05191247910261154, 0.059505216777324677, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>TeliaSonera TLSN said the offer is in line wit...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>TeliaSonera TLSN said the offer is in line wit...</td>\n","      <td>[0.07441692799329758, -0.0487477071583271, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>10</th>\n","      <td>STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMEN...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMEN...</td>\n","      <td>[0.03200741112232208, 0.03773287683725357, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>11</th>\n","      <td>A purchase agreement for 7,200 tons of gasolin...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>A purchase agreement for 7,200 tons of gasolin...</td>\n","      <td>[0.05590442568063736, 0.041032955050468445, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>12</th>\n","      <td>Finnish Talentum reports its operating profit ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Finnish Talentum reports its operating profit ...</td>\n","      <td>[0.06596074998378754, 0.05897102504968643, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>13</th>\n","      <td>Clothing retail chain Sepp+Æl+Æ 's sales incre...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Clothing retail chain Sepp+Æl+Æ 's sales incre...</td>\n","      <td>[0.03395465016365051, 0.05171804875135422, 0.0...</td>\n","    </tr>\n","    <tr>\n","      <th>14</th>\n","      <td>Consolidated net sales increased 16 % to reach...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Consolidated net sales increased 16 % to reach...</td>\n","      <td>[0.060446273535490036, 0.03799470514059067, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>15</th>\n","      <td>Foundries division reports its sales increased...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Foundries division reports its sales increased...</td>\n","      <td>[0.0494563989341259, 0.05158388614654541, -0.0...</td>\n","    </tr>\n","    <tr>\n","      <th>16</th>\n","      <td>HELSINKI ( AFX ) - Shares closed higher , led ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>HELSINKI ( AFX ) - Shares closed higher , led ...</td>\n","      <td>[0.0629865899682045, -0.045351240783929825, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>17</th>\n","      <td>Incap Contract Manufacturing Services Pvt Ltd ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Incap Contract Manufacturing Services Pvt Ltd ...</td>\n","      <td>[0.05365738272666931, -0.055247869342565536, -...</td>\n","    </tr>\n","    <tr>\n","      <th>18</th>\n","      <td>Its board of directors will propose a dividend...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Its board of directors will propose a dividend...</td>\n","      <td>[0.0692642331123352, 0.02292279154062271, -0.0...</td>\n","    </tr>\n","    <tr>\n","      <th>19</th>\n","      <td>Lifetree was founded in 2000 , and its revenue...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Lifetree was founded in 2000 , and its revenue...</td>\n","      <td>[0.0810408890247345, 0.039108917117118835, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>20</th>\n","      <td>( Filippova ) A trilateral agreement on invest...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>0.999998</td>\n","      <td>( Filippova ) A trilateral agreement on invest...</td>\n","      <td>[0.05172618478536606, 0.02967883087694645, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>21</th>\n","      <td>MegaFon 's subscriber base increased 16.1 % in...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>MegaFon 's subscriber base increased 16.1 % in...</td>\n","      <td>[0.03825156390666962, 0.001971189398318529, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>22</th>\n","      <td>Net income from life insurance doubled to EUR ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Net income from life insurance doubled to EUR ...</td>\n","      <td>[0.05222763866186142, 0.05695151165127754, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>23</th>\n","      <td>Net sales increased to EUR193 .3 m from EUR179...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Net sales increased to EUR193 .3 m from EUR179...</td>\n","      <td>[0.02272764965891838, 0.016222774982452393, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>Net sales surged by 18.5 % to EUR167 .8 m. Tel...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Net sales surged by 18.5 % to EUR167 .8 m. Tel...</td>\n","      <td>[0.05020830035209656, 0.03307913616299629, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>Nordea Group 's operating profit increased in ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Nordea Group 's operating profit increased in ...</td>\n","      <td>[0.0497022308409214, 0.023793146014213562, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>26</th>\n","      <td>Operating profit for the nine-month period inc...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Operating profit for the nine-month period inc...</td>\n","      <td>[0.04339126497507095, 0.024815633893013, -0.02...</td>\n","    </tr>\n","    <tr>\n","      <th>27</th>\n","      <td>Operating profit for the nine-month period inc...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Operating profit for the nine-month period inc...</td>\n","      <td>[0.035663120448589325, 0.03037247434258461, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>28</th>\n","      <td>Operating profit for the three-month period in...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Operating profit for the three-month period in...</td>\n","      <td>[0.029575243592262268, 0.007764187641441822, -...</td>\n","    </tr>\n","    <tr>\n","      <th>29</th>\n","      <td>The Brazilian unit of Finnish security solutio...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>The Brazilian unit of Finnish security solutio...</td>\n","      <td>[0.047570426017045975, -0.023694489151239395, ...</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>The company 's net profit rose 11.4 % on the y...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>The company 's net profit rose 11.4 % on the y...</td>\n","      <td>[0.06896018236875534, 0.046189870685338974, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>31</th>\n","      <td>The Lithuanian beer market made up 14.41 milli...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>0.999999</td>\n","      <td>The Lithuanian beer market made up 14.41 milli...</td>\n","      <td>[0.0020184037275612354, -0.044685497879981995,...</td>\n","    </tr>\n","    <tr>\n","      <th>32</th>\n","      <td>Viking Line 's cargo revenue increased by 5.4 ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Viking Line 's cargo revenue increased by 5.4 ...</td>\n","      <td>[-0.007756179664283991, -0.04868081212043762, ...</td>\n","    </tr>\n","    <tr>\n","      <th>33</th>\n","      <td>The fair value of the property portfolio doubl...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>The fair value of the property portfolio doubl...</td>\n","      <td>[0.06604734063148499, -0.025070184841752052, 0...</td>\n","    </tr>\n","    <tr>\n","      <th>34</th>\n","      <td>10 February 2011 - Finnish media company Sanom...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>10 February 2011 - Finnish media company Sanom...</td>\n","      <td>[0.05996786803007126, 0.03255663812160492, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>35</th>\n","      <td>A Helsinki : ELIiV today reported EPS of EUR1 ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>0.999999</td>\n","      <td>A Helsinki : ELIiV today reported EPS of EUR1 ...</td>\n","      <td>[0.051878154277801514, -0.03290269523859024, -...</td>\n","    </tr>\n","    <tr>\n","      <th>36</th>\n","      <td>Aspo Plc STOCK EXCHANGE RELEASE February 11 , ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Aspo Plc STOCK EXCHANGE RELEASE February 11 , ...</td>\n","      <td>[0.03545805439352989, -0.04956813529133797, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>37</th>\n","      <td>Commission income increased by 22 % to EUR 4.4...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Commission income increased by 22 % to EUR 4.4...</td>\n","      <td>[0.05664118379354477, 0.004533933009952307, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>38</th>\n","      <td>In January , traffic , measured in revenue pas...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>In January , traffic , measured in revenue pas...</td>\n","      <td>[-0.026962362229824066, 0.010590712539851665, ...</td>\n","    </tr>\n","    <tr>\n","      <th>39</th>\n","      <td>In January-September 2010 , Fiskars ' net prof...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>In January-September 2010 , Fiskars ' net prof...</td>\n","      <td>[0.056088510900735855, 0.0369233600795269, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>40</th>\n","      <td>Net income from life insurance rose to EUR 16....</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Net income from life insurance rose to EUR 16....</td>\n","      <td>[0.05793088302016258, 0.06312950700521469, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>41</th>\n","      <td>Nyrstar has also agreed to supply to Talvivaar...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Nyrstar has also agreed to supply to Talvivaar...</td>\n","      <td>[0.004785533994436264, 0.004442625679075718, -...</td>\n","    </tr>\n","    <tr>\n","      <th>42</th>\n","      <td>Sales for both the Department Store Division a...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Sales for both the Department Store Division a...</td>\n","      <td>[-0.050088364630937576, 0.04885219410061836, 0...</td>\n","    </tr>\n","    <tr>\n","      <th>43</th>\n","      <td>Sales have risen in other export markets .</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Sales have risen in other export markets .</td>\n","      <td>[0.058916959911584854, 0.018443405628204346, -...</td>\n","    </tr>\n","    <tr>\n","      <th>44</th>\n","      <td>Sales increased due to growing market rates an...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Sales increased due to growing market rates an...</td>\n","      <td>[0.047733016312122345, 0.010620158165693283, 0...</td>\n","    </tr>\n","    <tr>\n","      <th>45</th>\n","      <td>The agreement strengthens our long-term partne...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>The agreement strengthens our long-term partne...</td>\n","      <td>[0.06433788686990738, 0.027824176475405693, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>46</th>\n","      <td>The agreement was signed with Biohit Healthcar...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>The agreement was signed with Biohit Healthcar...</td>\n","      <td>[0.03612205758690834, 0.038267459720373154, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>The company also estimates the already carried...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>The company also estimates the already carried...</td>\n","      <td>[0.04304526373744011, 0.023360760882496834, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>The company 's order book stood at 1.5 bln eur...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>The company 's order book stood at 1.5 bln eur...</td>\n","      <td>[0.036210183054208755, -0.010278576985001564, ...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>The company said that paper demand increased i...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>The company said that paper demand increased i...</td>\n","      <td>[0.06558039039373398, 0.04877239838242531, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>50</th>\n","      <td>The world 's second largest stainless steel ma...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>The world 's second largest stainless steel ma...</td>\n","      <td>[0.04267223924398422, 0.03184577450156212, -0....</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                       document  ...                            default_name_embeddings\n","origin_index                                                     ...                                                   \n","1             The international electronic industry company ...  ...  [0.002136496128514409, 0.07194118946790695, -0...\n","2             With the new production plant the company woul...  ...  [0.05198746547102928, 0.03577739745378494, -0....\n","3             According to the company 's updated strategy f...  ...  [0.03416536748409271, 0.04053246229887009, -0....\n","4             FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...  ...  [0.07730763405561447, -0.045694783329963684, -...\n","5             For the last quarter of 2010 , Componenta 's n...  ...  [0.05603468790650368, 0.04817350581288338, -0....\n","6             In the third quarter of 2010 , net sales incre...  ...  [0.037710510194301605, 0.037198420614004135, -...\n","7             Operating profit rose to EUR 13.1 mn from EUR ...  ...  [0.04557091370224953, 0.0453636609017849, -0.0...\n","8             Operating profit totalled EUR 21.1 mn , up fro...  ...  [0.05191247910261154, 0.059505216777324677, -0...\n","9             TeliaSonera TLSN said the offer is in line wit...  ...  [0.07441692799329758, -0.0487477071583271, -0....\n","10            STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMEN...  ...  [0.03200741112232208, 0.03773287683725357, -0....\n","11            A purchase agreement for 7,200 tons of gasolin...  ...  [0.05590442568063736, 0.041032955050468445, -0...\n","12            Finnish Talentum reports its operating profit ...  ...  [0.06596074998378754, 0.05897102504968643, -0....\n","13            Clothing retail chain Sepp+Æl+Æ 's sales incre...  ...  [0.03395465016365051, 0.05171804875135422, 0.0...\n","14            Consolidated net sales increased 16 % to reach...  ...  [0.060446273535490036, 0.03799470514059067, -0...\n","15            Foundries division reports its sales increased...  ...  [0.0494563989341259, 0.05158388614654541, -0.0...\n","16            HELSINKI ( AFX ) - Shares closed higher , led ...  ...  [0.0629865899682045, -0.045351240783929825, -0...\n","17            Incap Contract Manufacturing Services Pvt Ltd ...  ...  [0.05365738272666931, -0.055247869342565536, -...\n","18            Its board of directors will propose a dividend...  ...  [0.0692642331123352, 0.02292279154062271, -0.0...\n","19            Lifetree was founded in 2000 , and its revenue...  ...  [0.0810408890247345, 0.039108917117118835, -0....\n","20            ( Filippova ) A trilateral agreement on invest...  ...  [0.05172618478536606, 0.02967883087694645, -0....\n","21            MegaFon 's subscriber base increased 16.1 % in...  ...  [0.03825156390666962, 0.001971189398318529, -0...\n","22            Net income from life insurance doubled to EUR ...  ...  [0.05222763866186142, 0.05695151165127754, -0....\n","23            Net sales increased to EUR193 .3 m from EUR179...  ...  [0.02272764965891838, 0.016222774982452393, 0....\n","24            Net sales surged by 18.5 % to EUR167 .8 m. Tel...  ...  [0.05020830035209656, 0.03307913616299629, -0....\n","25            Nordea Group 's operating profit increased in ...  ...  [0.0497022308409214, 0.023793146014213562, -0....\n","26            Operating profit for the nine-month period inc...  ...  [0.04339126497507095, 0.024815633893013, -0.02...\n","27            Operating profit for the nine-month period inc...  ...  [0.035663120448589325, 0.03037247434258461, -0...\n","28            Operating profit for the three-month period in...  ...  [0.029575243592262268, 0.007764187641441822, -...\n","29            The Brazilian unit of Finnish security solutio...  ...  [0.047570426017045975, -0.023694489151239395, ...\n","30            The company 's net profit rose 11.4 % on the y...  ...  [0.06896018236875534, 0.046189870685338974, -0...\n","31            The Lithuanian beer market made up 14.41 milli...  ...  [0.0020184037275612354, -0.044685497879981995,...\n","32            Viking Line 's cargo revenue increased by 5.4 ...  ...  [-0.007756179664283991, -0.04868081212043762, ...\n","33            The fair value of the property portfolio doubl...  ...  [0.06604734063148499, -0.025070184841752052, 0...\n","34            10 February 2011 - Finnish media company Sanom...  ...  [0.05996786803007126, 0.03255663812160492, -0....\n","35            A Helsinki : ELIiV today reported EPS of EUR1 ...  ...  [0.051878154277801514, -0.03290269523859024, -...\n","36            Aspo Plc STOCK EXCHANGE RELEASE February 11 , ...  ...  [0.03545805439352989, -0.04956813529133797, -0...\n","37            Commission income increased by 22 % to EUR 4.4...  ...  [0.05664118379354477, 0.004533933009952307, -0...\n","38            In January , traffic , measured in revenue pas...  ...  [-0.026962362229824066, 0.010590712539851665, ...\n","39            In January-September 2010 , Fiskars ' net prof...  ...  [0.056088510900735855, 0.0369233600795269, -0....\n","40            Net income from life insurance rose to EUR 16....  ...  [0.05793088302016258, 0.06312950700521469, -0....\n","41            Nyrstar has also agreed to supply to Talvivaar...  ...  [0.004785533994436264, 0.004442625679075718, -...\n","42            Sales for both the Department Store Division a...  ...  [-0.050088364630937576, 0.04885219410061836, 0...\n","43                   Sales have risen in other export markets .  ...  [0.058916959911584854, 0.018443405628204346, -...\n","44            Sales increased due to growing market rates an...  ...  [0.047733016312122345, 0.010620158165693283, 0...\n","45            The agreement strengthens our long-term partne...  ...  [0.06433788686990738, 0.027824176475405693, -0...\n","46            The agreement was signed with Biohit Healthcar...  ...  [0.03612205758690834, 0.038267459720373154, -0...\n","47            The company also estimates the already carried...  ...  [0.04304526373744011, 0.023360760882496834, -0...\n","48            The company 's order book stood at 1.5 bln eur...  ...  [0.036210183054208755, -0.010278576985001564, ...\n","49            The company said that paper demand increased i...  ...  [0.06558039039373398, 0.04877239838242531, -0....\n","50            The world 's second largest stainless steel ma...  ...  [0.04267223924398422, 0.03184577450156212, -0....\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":107},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609264917602,"user_tz":-300,"elapsed":193623,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8fe5b9aa-c87a-42d3-e00d-920e63ca6aa4"},"source":["fitted_pipe.predict('According to the most recent update there has been a major decrese in the rate of oil')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","      <th>sentiment_confidence</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Bitcoin is going to the moon!</td>\n","      <td>positive</td>\n","      <td>0.999994</td>\n","      <td>[0.06468033790588379, -0.040837567299604416, -...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                   document  ...                            default_name_embeddings\n","origin_index                                 ...                                                   \n","0             Bitcoin is going to the moon!  ...  [0.06468033790588379, -0.040837567299604416, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609264917604,"user_tz":-300,"elapsed":193620,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ac9c8b1a-7fdd-4a6f-bdfd-1dbb823d9bf4"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2)                 | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005)                    | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64)                | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5)                 | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True)       | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6)               | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')    | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512)               | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use')      | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":753},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609264924472,"user_tz":-300,"elapsed":200484,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1dd94bc8-09c8-45db-ab81-bbd64acb8a4b"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5)  \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","    negative       0.00      0.00      0.00         1\n","    positive       0.99      1.00      0.99        99\n","\n","    accuracy                           0.99       100\n","   macro avg       0.49      0.50      0.50       100\n","weighted avg       0.98      0.99      0.99       100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","      <th>y</th>\n","      <th>sentiment_confidence</th>\n","      <th>text</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>1</th>\n","      <td>The international electronic industry company ...</td>\n","      <td>positive</td>\n","      <td>negative</td>\n","      <td>1.000000</td>\n","      <td>The international electronic industry company ...</td>\n","      <td>[0.002136496128514409, 0.07194118946790695, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>With the new production plant the company woul...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>With the new production plant the company woul...</td>\n","      <td>[0.05198746547102928, 0.03577739745378494, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>According to the company 's updated strategy f...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>According to the company 's updated strategy f...</td>\n","      <td>[0.03416536748409271, 0.04053246229887009, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...</td>\n","      <td>[0.07730763405561447, -0.045694783329963684, -...</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>For the last quarter of 2010 , Componenta 's n...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>For the last quarter of 2010 , Componenta 's n...</td>\n","      <td>[0.05603468790650368, 0.04817350581288338, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>116</th>\n","      <td>Operating profit margin increased from 11.2 % ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Operating profit margin increased from 11.2 % ...</td>\n","      <td>[0.01058729737997055, -0.008798183873295784, -...</td>\n","    </tr>\n","    <tr>\n","      <th>117</th>\n","      <td>Operating profit rose to EUR 3.11 mn from EUR ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Operating profit rose to EUR 3.11 mn from EUR ...</td>\n","      <td>[0.03610285371541977, 0.04256380349397659, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>118</th>\n","      <td>Operating profit rose to EUR 5mn from EUR 2.8 ...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Operating profit rose to EUR 5mn from EUR 2.8 ...</td>\n","      <td>[0.04815328121185303, 0.050376053899526596, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>119</th>\n","      <td>Operating profit was EUR 24.5 mn , up from EUR...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Operating profit was EUR 24.5 mn , up from EUR...</td>\n","      <td>[0.048205215483903885, 0.05145161226391792, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>120</th>\n","      <td>Ramirent 's net sales in the second quarterend...</td>\n","      <td>positive</td>\n","      <td>positive</td>\n","      <td>1.000000</td>\n","      <td>Ramirent 's net sales in the second quarterend...</td>\n","      <td>[0.0638015866279602, 0.0272374227643013, -0.04...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>100 rows × 6 columns</p>\n","</div>"],"text/plain":["                                                       document  ...                            default_name_embeddings\n","origin_index                                                     ...                                                   \n","1             The international electronic industry company ...  ...  [0.002136496128514409, 0.07194118946790695, -0...\n","2             With the new production plant the company woul...  ...  [0.05198746547102928, 0.03577739745378494, -0....\n","3             According to the company 's updated strategy f...  ...  [0.03416536748409271, 0.04053246229887009, -0....\n","4             FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...  ...  [0.07730763405561447, -0.045694783329963684, -...\n","5             For the last quarter of 2010 , Componenta 's n...  ...  [0.05603468790650368, 0.04817350581288338, -0....\n","...                                                         ...  ...                                                ...\n","116           Operating profit margin increased from 11.2 % ...  ...  [0.01058729737997055, -0.008798183873295784, -...\n","117           Operating profit rose to EUR 3.11 mn from EUR ...  ...  [0.03610285371541977, 0.04256380349397659, -0....\n","118           Operating profit rose to EUR 5mn from EUR 2.8 ...  ...  [0.04815328121185303, 0.050376053899526596, -0...\n","119           Operating profit was EUR 24.5 mn , up from EUR...  ...  [0.048205215483903885, 0.05145161226391792, -0...\n","120           Ramirent 's net sales in the second quarterend...  ...  [0.0638015866279602, 0.0272374227643013, -0.04...\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609264924477,"user_tz":-300,"elapsed":200483,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e259763c-470b-4d46-b3d1-28cf545f5dcd"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609266286092,"user_tz":-300,"elapsed":1562094,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4237752f-4fbe-4235-b33d-5d7b8ba29d48"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(70)  \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.88      0.87      0.88       604\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.96      0.92      0.94      1363\n","\n","    accuracy                           0.91      1967\n","   macro avg       0.62      0.60      0.61      1967\n","weighted avg       0.94      0.91      0.92      1967\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609266449598,"user_tz":-300,"elapsed":1725594,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b31b5e1e-3f09-4ab3-e97a-fb32ac87b319"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":124},"executionInfo":{"status":"ok","timestamp":1609266465229,"user_tz":-300,"elapsed":1741220,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5d9cc34a-693c-44d7-e50a-6e0ca5d4e024"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('According to the most recent update there has been a major decrese in the rate of oil')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","      <th>sentiment_confidence</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Tesla plans to invest 10M into the ML sector</td>\n","      <td>positive</td>\n","      <td>0.999980</td>\n","      <td>[0.15737222135066986, 0.2598555386066437, 0.85...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                  document  ...    en_embed_sentence_small_bert_L12_768_embeddings\n","origin_index                                                ...                                                   \n","0             Tesla plans to invest 10M into the ML sector  ...  [0.15737222135066986, 0.2598555386066437, 0.85...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609266465232,"user_tz":-300,"elapsed":1741218,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ec54f7c0-8174-4fd4-9db8-51c1d15be3eb"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')            | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                  | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                 | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)           | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                  | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                      | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)            | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)        | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)       | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                    | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                        | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                 | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                          | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                        | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                 | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')         | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6)                         | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')              | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative'])      | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]}
\ No newline at end of file
diff --git a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_reddit.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_reddit.ipynb
index bcb938e7..f21ed5a0 100644
--- a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_reddit.ipynb
+++ b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_reddit.ipynb
@@ -1 +1 @@
-{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_reddit.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_reddit.ipynb)\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator)  from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install  pyspark==2.4.7 \n","! pip install nlu > /dev/null    \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Reddit  Sentiment dataset \n","https://www.kaggle.com/cosmos98/twitter-and-reddit-sentimental-analysis-dataset\n","#Context\n","\n","This is was a Dataset Created as a part of the university Project On Sentimental Analysis On Multi-Source Social Media Platforms using PySpark."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610788117464,"user_tz":-300,"elapsed":1827,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"69719e54-e5be-4a26-bcde-b913531cbcd9"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/Reddit_Data.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:08:00--  http://ckl-it.de/wp-content/uploads/2021/01/Reddit_Data.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 153265 (150K) [text/csv]\n","Saving to: ‘Reddit_Data.csv’\n","\n","Reddit_Data.csv     100%[===================>] 149.67K   402KB/s    in 0.4s    \n","\n","2021-01-16 09:08:01 (402 KB/s) - ‘Reddit_Data.csv’ saved [153265/153265]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610788135165,"user_tz":-300,"elapsed":1025,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cdd2f3e1-3f96-4a5b-9291-34bce078fbf0"},"source":["import pandas as pd\n","train_path = '/content/Reddit_Data.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>y</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>its true they had cut the power what douchebag...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>fuck giroud better finishing like this month</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>looks shit now but still proud made</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>pelor the burning hate the best evil god</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>can ask what you with something this powerful</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>595</th>\n","      <td>bangali desh bechne main sabse aage</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>596</th>\n","      <td>national media channels were gaged not cover t...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>597</th>\n","      <td>been following these threads from the beginni...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>598</th>\n","      <td>pretty sure this sarcasm satire the news 1500...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>599</th>\n","      <td>much would love for namo our next hard imagin...</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>600 rows × 2 columns</p>\n","</div>"],"text/plain":["                                                  text         y\n","0    its true they had cut the power what douchebag...  positive\n","1        fuck giroud better finishing like this month   positive\n","2                 looks shit now but still proud made   positive\n","3            pelor the burning hate the best evil god   negative\n","4       can ask what you with something this powerful   positive\n","..                                                 ...       ...\n","595               bangali desh bechne main sabse aage   positive\n","596  national media channels were gaged not cover t...  positive\n","597   been following these threads from the beginni...  negative\n","598   pretty sure this sarcasm satire the news 1500...  positive\n","599   much would love for namo our next hard imagin...  positive\n","\n","[600 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609522047844,"user_tz":-300,"elapsed":222057,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"83383a78-d68b-43a0-a253-318696580942"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.67      1.00      0.80        24\n","     neutral       0.00      0.00      0.00         0\n","    positive       1.00      0.23      0.38        26\n","\n","    accuracy                           0.60        50\n","   macro avg       0.56      0.41      0.39        50\n","weighted avg       0.84      0.60      0.58        50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>document</th>\n","      <th>y</th>\n","      <th>text</th>\n","      <th>sentiment</th>\n","      <th>default_name_embeddings</th>\n","      <th>sentiment_confidence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>its true they had cut the power what douchebag...</td>\n","      <td>positive</td>\n","      <td>its true they had cut the power what douchebag...</td>\n","      <td>negative</td>\n","      <td>[0.033111296594142914, 0.053994592279195786, -...</td>\n","      <td>0.632922</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>fuck giroud better finishing like this month</td>\n","      <td>positive</td>\n","      <td>fuck giroud better finishing like this month</td>\n","      <td>neutral</td>\n","      <td>[0.0678204670548439, 0.01411951333284378, -0.0...</td>\n","      <td>0.558096</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>looks shit now but still proud made</td>\n","      <td>positive</td>\n","      <td>looks shit now but still proud made</td>\n","      <td>neutral</td>\n","      <td>[0.03247416764497757, -0.09844464808702469, -0...</td>\n","      <td>0.594104</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>pelor the burning hate the best evil god</td>\n","      <td>negative</td>\n","      <td>pelor the burning hate the best evil god</td>\n","      <td>negative</td>\n","      <td>[0.04032062739133835, 0.07666623592376709, -0....</td>\n","      <td>0.699286</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>can ask what you with something this powerful</td>\n","      <td>positive</td>\n","      <td>can ask what you with something this powerful</td>\n","      <td>positive</td>\n","      <td>[0.015518003143370152, -0.05116305500268936, -...</td>\n","      <td>0.615222</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>aapâ shazia ilmi from puram constituency lag...</td>\n","      <td>negative</td>\n","      <td>aapâ shazia ilmi from puram constituency lag...</td>\n","      <td>negative</td>\n","      <td>[0.02478150464594364, -0.06508146971464157, -0...</td>\n","      <td>0.751383</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>fuck yeah</td>\n","      <td>negative</td>\n","      <td>fuck yeah</td>\n","      <td>negative</td>\n","      <td>[0.04602408409118652, -0.025047995150089264, -...</td>\n","      <td>0.663185</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>honestly really surprised alice ranked that lo...</td>\n","      <td>positive</td>\n","      <td>honestly really surprised alice ranked that lo...</td>\n","      <td>positive</td>\n","      <td>[-0.035716041922569275, -0.04127982258796692, ...</td>\n","      <td>0.605483</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>didn care about politics before now hate</td>\n","      <td>negative</td>\n","      <td>didn care about politics before now hate</td>\n","      <td>negative</td>\n","      <td>[-0.006816444452852011, 0.06221264973282814, -...</td>\n","      <td>0.701191</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>hard nips and goosebumps</td>\n","      <td>negative</td>\n","      <td>hard nips and goosebumps</td>\n","      <td>negative</td>\n","      <td>[-0.02919699251651764, -0.030449824407696724, ...</td>\n","      <td>0.629745</td>\n","    </tr>\n","    <tr>\n","      <th>10</th>\n","      <td>varadabhai ndtv trying too well dilute bjp tre...</td>\n","      <td>negative</td>\n","      <td>varadabhai ndtv trying too well dilute bjp tre...</td>\n","      <td>negative</td>\n","      <td>[0.04727796092629433, -0.06792476028203964, -0...</td>\n","      <td>0.756238</td>\n","    </tr>\n","    <tr>\n","      <th>11</th>\n","      <td>old man has lost his mind</td>\n","      <td>positive</td>\n","      <td>old man has lost his mind</td>\n","      <td>neutral</td>\n","      <td>[0.039657335728406906, -0.04277808964252472, -...</td>\n","      <td>0.502476</td>\n","    </tr>\n","    <tr>\n","      <th>12</th>\n","      <td>why this being downvoted you might ask both mo...</td>\n","      <td>negative</td>\n","      <td>why this being downvoted you might ask both mo...</td>\n","      <td>negative</td>\n","      <td>[0.06581216305494308, -0.06079106032848358, -0...</td>\n","      <td>0.710366</td>\n","    </tr>\n","    <tr>\n","      <th>13</th>\n","      <td>hasnt changed all apolitical before simply don...</td>\n","      <td>positive</td>\n","      <td>hasnt changed all apolitical before simply do...</td>\n","      <td>negative</td>\n","      <td>[0.03509754315018654, -0.004639611579477787, -...</td>\n","      <td>0.603606</td>\n","    </tr>\n","    <tr>\n","      <th>14</th>\n","      <td>for one campaign pretty much just snatched the...</td>\n","      <td>negative</td>\n","      <td>for one campaign pretty much just snatched the...</td>\n","      <td>negative</td>\n","      <td>[0.017386479303240776, 0.0443551279604435, -0....</td>\n","      <td>0.631376</td>\n","    </tr>\n","    <tr>\n","      <th>15</th>\n","      <td>vajpayee managed forge much broader coalition ...</td>\n","      <td>positive</td>\n","      <td>vajpayee managed forge much broader coalition ...</td>\n","      <td>negative</td>\n","      <td>[0.0372871570289135, -0.051079731434583664, -0...</td>\n","      <td>0.685135</td>\n","    </tr>\n","    <tr>\n","      <th>16</th>\n","      <td>lol this only proves how desperate they are ge...</td>\n","      <td>positive</td>\n","      <td>lol this only proves how desperate they are ge...</td>\n","      <td>negative</td>\n","      <td>[0.05233633145689964, -0.03147873282432556, 0....</td>\n","      <td>0.624959</td>\n","    </tr>\n","    <tr>\n","      <th>17</th>\n","      <td>dont hate aap but your questions are example w...</td>\n","      <td>negative</td>\n","      <td>dont hate aap but your questions are example ...</td>\n","      <td>negative</td>\n","      <td>[0.026356497779488564, -0.04044198617339134, -...</td>\n","      <td>0.769971</td>\n","    </tr>\n","    <tr>\n","      <th>18</th>\n","      <td>what were the other policies you discussed not...</td>\n","      <td>negative</td>\n","      <td>what were the other policies you discussed not...</td>\n","      <td>negative</td>\n","      <td>[-0.07521010935306549, 0.008543566800653934, 0...</td>\n","      <td>0.669384</td>\n","    </tr>\n","    <tr>\n","      <th>19</th>\n","      <td>wow lots favorites this bracket haqua tsukushi...</td>\n","      <td>positive</td>\n","      <td>wow lots favorites this bracket haqua tsukushi...</td>\n","      <td>neutral</td>\n","      <td>[-0.0693160742521286, -0.015458519570529461, -...</td>\n","      <td>0.593471</td>\n","    </tr>\n","    <tr>\n","      <th>20</th>\n","      <td>sorry know this isn what you asked just ventin...</td>\n","      <td>negative</td>\n","      <td>sorry know this isn what you asked just ventin...</td>\n","      <td>negative</td>\n","      <td>[0.016777772456407547, -0.05478338897228241, -...</td>\n","      <td>0.745406</td>\n","    </tr>\n","    <tr>\n","      <th>21</th>\n","      <td>coming out strongly against gujarat chief mini...</td>\n","      <td>positive</td>\n","      <td>coming out strongly against gujarat chief min...</td>\n","      <td>negative</td>\n","      <td>[0.06856723129749298, -0.019821858033537865, -...</td>\n","      <td>0.694449</td>\n","    </tr>\n","    <tr>\n","      <th>22</th>\n","      <td>there one tool bjp can use their manifesto whi...</td>\n","      <td>positive</td>\n","      <td>there one tool bjp can use their manifesto whi...</td>\n","      <td>negative</td>\n","      <td>[0.057847339659929276, -0.05365725979208946, -...</td>\n","      <td>0.623127</td>\n","    </tr>\n","    <tr>\n","      <th>23</th>\n","      <td>jakiro spotted the middle top maybe</td>\n","      <td>positive</td>\n","      <td>jakiro spotted the middle top maybe</td>\n","      <td>neutral</td>\n","      <td>[-0.011690962128341198, -0.024473998695611954,...</td>\n","      <td>0.575394</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>family mormon have never tried explain them th...</td>\n","      <td>positive</td>\n","      <td>family mormon have never tried explain them t...</td>\n","      <td>positive</td>\n","      <td>[0.03987010195851326, -0.0009543427731841803, ...</td>\n","      <td>0.606252</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>with these results would have grudgingly accep...</td>\n","      <td>negative</td>\n","      <td>with these results would have grudgingly accep...</td>\n","      <td>negative</td>\n","      <td>[0.034668292850255966, -0.05392604321241379, -...</td>\n","      <td>0.736970</td>\n","    </tr>\n","    <tr>\n","      <th>26</th>\n","      <td>tea partier expresses support for namo after e...</td>\n","      <td>negative</td>\n","      <td>tea partier expresses support for namo after ...</td>\n","      <td>negative</td>\n","      <td>[0.032365716993808746, -0.056087080389261246, ...</td>\n","      <td>0.760564</td>\n","    </tr>\n","    <tr>\n","      <th>27</th>\n","      <td>politically would stupid move take stand right...</td>\n","      <td>negative</td>\n","      <td>politically would stupid move take stand right...</td>\n","      <td>negative</td>\n","      <td>[-0.00040777752292342484, -0.01262842211872339...</td>\n","      <td>0.674769</td>\n","    </tr>\n","    <tr>\n","      <th>28</th>\n","      <td>wtf why</td>\n","      <td>negative</td>\n","      <td>wtf why</td>\n","      <td>negative</td>\n","      <td>[0.025807170197367668, -0.07080958038568497, -...</td>\n","      <td>0.635538</td>\n","    </tr>\n","    <tr>\n","      <th>29</th>\n","      <td>have actually seen lot users views change duri...</td>\n","      <td>positive</td>\n","      <td>have actually seen lot users views change dur...</td>\n","      <td>negative</td>\n","      <td>[-0.009333955124020576, 0.01388698909431696, -...</td>\n","      <td>0.662819</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>truth told there not insignificant percentage ...</td>\n","      <td>positive</td>\n","      <td>truth told there not insignificant percentage ...</td>\n","      <td>negative</td>\n","      <td>[0.03927519917488098, -0.05597652122378349, -0...</td>\n","      <td>0.713786</td>\n","    </tr>\n","    <tr>\n","      <th>31</th>\n","      <td>was anti bjp and neutral cong became anti bjp ...</td>\n","      <td>positive</td>\n","      <td>was anti bjp and neutral cong became anti bjp ...</td>\n","      <td>negative</td>\n","      <td>[0.03805134445428848, -0.030298737809062004, -...</td>\n","      <td>0.732909</td>\n","    </tr>\n","    <tr>\n","      <th>32</th>\n","      <td>most religions have dogmatic orthodox well eso...</td>\n","      <td>positive</td>\n","      <td>most religions have dogmatic orthodox well eso...</td>\n","      <td>positive</td>\n","      <td>[0.03939439728856087, -0.02040349319577217, -0...</td>\n","      <td>0.625969</td>\n","    </tr>\n","    <tr>\n","      <th>33</th>\n","      <td>laureatte sen said christian schools are perfe...</td>\n","      <td>positive</td>\n","      <td>laureatte sen said christian schools are perfe...</td>\n","      <td>neutral</td>\n","      <td>[0.05267934128642082, 0.05836360529065132, 0.0...</td>\n","      <td>0.510249</td>\n","    </tr>\n","    <tr>\n","      <th>34</th>\n","      <td>need stop watching the garbage that you watch ...</td>\n","      <td>positive</td>\n","      <td>need stop watching the garbage that you watch...</td>\n","      <td>neutral</td>\n","      <td>[-0.012382612563669682, 0.01988200470805168, 0...</td>\n","      <td>0.552975</td>\n","    </tr>\n","    <tr>\n","      <th>35</th>\n","      <td>gandhi mandela hitler mao plato chandragupt ma...</td>\n","      <td>negative</td>\n","      <td>gandhi mandela hitler mao plato chandragupt ma...</td>\n","      <td>negative</td>\n","      <td>[0.027552243322134018, 0.013075066730380058, 0...</td>\n","      <td>0.719779</td>\n","    </tr>\n","    <tr>\n","      <th>36</th>\n","      <td>hate aap for the other thread points such the ...</td>\n","      <td>negative</td>\n","      <td>hate aap for the other thread points such the...</td>\n","      <td>negative</td>\n","      <td>[0.01461736112833023, -0.038017574697732925, -...</td>\n","      <td>0.756800</td>\n","    </tr>\n","    <tr>\n","      <th>37</th>\n","      <td>absolutely agree with you subsidies the worst ...</td>\n","      <td>negative</td>\n","      <td>absolutely agree with you subsidies the worst ...</td>\n","      <td>negative</td>\n","      <td>[0.010974399745464325, 0.0033110962249338627, ...</td>\n","      <td>0.655372</td>\n","    </tr>\n","    <tr>\n","      <th>38</th>\n","      <td>are you corrupt mind have you benefited throug...</td>\n","      <td>negative</td>\n","      <td>are you corrupt mind have you benefited throu...</td>\n","      <td>negative</td>\n","      <td>[0.03834373503923416, -0.06521473079919815, -0...</td>\n","      <td>0.752354</td>\n","    </tr>\n","    <tr>\n","      <th>39</th>\n","      <td>congress needs bogeyman modi without the bad g...</td>\n","      <td>positive</td>\n","      <td>congress needs bogeyman modi without the bad g...</td>\n","      <td>negative</td>\n","      <td>[0.03138439729809761, -0.06221967190504074, -0...</td>\n","      <td>0.703794</td>\n","    </tr>\n","    <tr>\n","      <th>40</th>\n","      <td>protip don type uppercase text all caps harder...</td>\n","      <td>negative</td>\n","      <td>protip don type uppercase text all caps harder...</td>\n","      <td>negative</td>\n","      <td>[0.044019922614097595, 0.025341013446450233, 0...</td>\n","      <td>0.673459</td>\n","    </tr>\n","    <tr>\n","      <th>41</th>\n","      <td>brother trog very wrathful indeed but his will...</td>\n","      <td>positive</td>\n","      <td>brother trog very wrathful indeed but his wil...</td>\n","      <td>neutral</td>\n","      <td>[-0.024625714868307114, 0.06193268671631813, 0...</td>\n","      <td>0.537965</td>\n","    </tr>\n","    <tr>\n","      <th>42</th>\n","      <td>start off saying that the craftsmanship this p...</td>\n","      <td>positive</td>\n","      <td>start off saying that the craftsmanship this ...</td>\n","      <td>positive</td>\n","      <td>[0.05780623108148575, -0.06291749328374863, -0...</td>\n","      <td>0.723931</td>\n","    </tr>\n","    <tr>\n","      <th>43</th>\n","      <td>have made request unban namoarmy hell moron ho...</td>\n","      <td>negative</td>\n","      <td>have made request unban namoarmy hell moron h...</td>\n","      <td>negative</td>\n","      <td>[0.015555822290480137, -0.012748800218105316, ...</td>\n","      <td>0.718607</td>\n","    </tr>\n","    <tr>\n","      <th>44</th>\n","      <td>child modi worked his fatherâ tea shop and y...</td>\n","      <td>negative</td>\n","      <td>child modi worked his fatherâ tea shop and ...</td>\n","      <td>negative</td>\n","      <td>[0.05774841830134392, -0.059567004442214966, -...</td>\n","      <td>0.743616</td>\n","    </tr>\n","    <tr>\n","      <th>45</th>\n","      <td>namo tea yuupea horrible rhyme know</td>\n","      <td>negative</td>\n","      <td>namo tea yuupea horrible rhyme know</td>\n","      <td>negative</td>\n","      <td>[0.025534288957715034, 0.004176765214651823, -...</td>\n","      <td>0.760347</td>\n","    </tr>\n","    <tr>\n","      <th>46</th>\n","      <td>great agility from akpom cut back and bend</td>\n","      <td>positive</td>\n","      <td>great agility from akpom cut back and bend</td>\n","      <td>positive</td>\n","      <td>[0.06865684688091278, -0.02164856530725956, -0...</td>\n","      <td>0.670042</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>from undecided pro aap they are not perfect bu...</td>\n","      <td>positive</td>\n","      <td>from undecided pro aap they are not perfect bu...</td>\n","      <td>negative</td>\n","      <td>[0.01590304635465145, -0.0683458000421524, -0....</td>\n","      <td>0.647296</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>woah there don insane with pray mean you don w...</td>\n","      <td>negative</td>\n","      <td>woah there don insane with pray mean you don w...</td>\n","      <td>negative</td>\n","      <td>[0.050547026097774506, -0.01725909113883972, 0...</td>\n","      <td>0.711541</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>porngress wont announce their candidate cuz th...</td>\n","      <td>positive</td>\n","      <td>porngress wont announce their candidate cuz th...</td>\n","      <td>negative</td>\n","      <td>[0.05935536324977875, -0.051609162241220474, -...</td>\n","      <td>0.671247</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                       document  ... sentiment_confidence\n","origin_index                                                     ...                     \n","0             its true they had cut the power what douchebag...  ...             0.632922\n","1                  fuck giroud better finishing like this month  ...             0.558096\n","2                           looks shit now but still proud made  ...             0.594104\n","3                      pelor the burning hate the best evil god  ...             0.699286\n","4                 can ask what you with something this powerful  ...             0.615222\n","5             aapâ shazia ilmi from puram constituency lag...  ...             0.751383\n","6                                                     fuck yeah  ...             0.663185\n","7             honestly really surprised alice ranked that lo...  ...             0.605483\n","8                      didn care about politics before now hate  ...             0.701191\n","9                                      hard nips and goosebumps  ...             0.629745\n","10            varadabhai ndtv trying too well dilute bjp tre...  ...             0.756238\n","11                                    old man has lost his mind  ...             0.502476\n","12            why this being downvoted you might ask both mo...  ...             0.710366\n","13            hasnt changed all apolitical before simply don...  ...             0.603606\n","14            for one campaign pretty much just snatched the...  ...             0.631376\n","15            vajpayee managed forge much broader coalition ...  ...             0.685135\n","16            lol this only proves how desperate they are ge...  ...             0.624959\n","17            dont hate aap but your questions are example w...  ...             0.769971\n","18            what were the other policies you discussed not...  ...             0.669384\n","19            wow lots favorites this bracket haqua tsukushi...  ...             0.593471\n","20            sorry know this isn what you asked just ventin...  ...             0.745406\n","21            coming out strongly against gujarat chief mini...  ...             0.694449\n","22            there one tool bjp can use their manifesto whi...  ...             0.623127\n","23                          jakiro spotted the middle top maybe  ...             0.575394\n","24            family mormon have never tried explain them th...  ...             0.606252\n","25            with these results would have grudgingly accep...  ...             0.736970\n","26            tea partier expresses support for namo after e...  ...             0.760564\n","27            politically would stupid move take stand right...  ...             0.674769\n","28                                                      wtf why  ...             0.635538\n","29            have actually seen lot users views change duri...  ...             0.662819\n","30            truth told there not insignificant percentage ...  ...             0.713786\n","31            was anti bjp and neutral cong became anti bjp ...  ...             0.732909\n","32            most religions have dogmatic orthodox well eso...  ...             0.625969\n","33            laureatte sen said christian schools are perfe...  ...             0.510249\n","34            need stop watching the garbage that you watch ...  ...             0.552975\n","35            gandhi mandela hitler mao plato chandragupt ma...  ...             0.719779\n","36            hate aap for the other thread points such the ...  ...             0.756800\n","37            absolutely agree with you subsidies the worst ...  ...             0.655372\n","38            are you corrupt mind have you benefited throug...  ...             0.752354\n","39            congress needs bogeyman modi without the bad g...  ...             0.703794\n","40            protip don type uppercase text all caps harder...  ...             0.673459\n","41            brother trog very wrathful indeed but his will...  ...             0.537965\n","42            start off saying that the craftsmanship this p...  ...             0.723931\n","43            have made request unban namoarmy hell moron ho...  ...             0.718607\n","44            child modi worked his fatherâ tea shop and y...  ...             0.743616\n","45                          namo tea yuupea horrible rhyme know  ...             0.760347\n","46                   great agility from akpom cut back and bend  ...             0.670042\n","47            from undecided pro aap they are not perfect bu...  ...             0.647296\n","48            woah there don insane with pray mean you don w...  ...             0.711541\n","49            porngress wont announce their candidate cuz th...  ...             0.671247\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":109},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609522047859,"user_tz":-300,"elapsed":222054,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a862e5e9-9580-4257-de54-dec3acfbdd6e"},"source":["fitted_pipe.predict(\"Indian prime minister was assinated!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","      <th>default_name_embeddings</th>\n","      <th>sentiment_confidence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Bitcoin is going to the moon!</td>\n","      <td>neutral</td>\n","      <td>[0.06468033790588379, -0.040837567299604416, -...</td>\n","      <td>0.524234</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                   document  ... sentiment_confidence\n","origin_index                                 ...                     \n","0             Bitcoin is going to the moon!  ...             0.524234\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609522047861,"user_tz":-300,"elapsed":222040,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cf81e598-13e9-40fd-e8bb-937b8a8933f3"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2)                 | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005)                    | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64)                | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5)                 | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True)       | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6)               | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')    | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512)               | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use')      | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609522047863,"user_tz":-300,"elapsed":222021,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"fe05a28d-53de-4ec5-e7fc-1ec49eaeddd6"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5)  \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","    negative       1.00      0.83      0.91        24\n","     neutral       0.00      0.00      0.00         0\n","    positive       1.00      1.00      1.00        26\n","\n","    accuracy                           0.92        50\n","   macro avg       0.67      0.61      0.64        50\n","weighted avg       1.00      0.92      0.96        50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>document</th>\n","      <th>y</th>\n","      <th>text</th>\n","      <th>sentiment</th>\n","      <th>default_name_embeddings</th>\n","      <th>sentiment_confidence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>its true they had cut the power what douchebag...</td>\n","      <td>positive</td>\n","      <td>its true they had cut the power what douchebag...</td>\n","      <td>positive</td>\n","      <td>[0.033111296594142914, 0.053994592279195786, -...</td>\n","      <td>0.761194</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>fuck giroud better finishing like this month</td>\n","      <td>positive</td>\n","      <td>fuck giroud better finishing like this month</td>\n","      <td>positive</td>\n","      <td>[0.0678204670548439, 0.01411951333284378, -0.0...</td>\n","      <td>0.938677</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>looks shit now but still proud made</td>\n","      <td>positive</td>\n","      <td>looks shit now but still proud made</td>\n","      <td>positive</td>\n","      <td>[0.03247416764497757, -0.09844464808702469, -0...</td>\n","      <td>0.954937</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>pelor the burning hate the best evil god</td>\n","      <td>negative</td>\n","      <td>pelor the burning hate the best evil god</td>\n","      <td>negative</td>\n","      <td>[0.04032062739133835, 0.07666623592376709, -0....</td>\n","      <td>0.810980</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>can ask what you with something this powerful</td>\n","      <td>positive</td>\n","      <td>can ask what you with something this powerful</td>\n","      <td>positive</td>\n","      <td>[0.015518003143370152, -0.05116305500268936, -...</td>\n","      <td>0.956043</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>aapâ shazia ilmi from puram constituency lag...</td>\n","      <td>negative</td>\n","      <td>aapâ shazia ilmi from puram constituency lag...</td>\n","      <td>negative</td>\n","      <td>[0.02478150464594364, -0.06508146971464157, -0...</td>\n","      <td>0.708917</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>fuck yeah</td>\n","      <td>negative</td>\n","      <td>fuck yeah</td>\n","      <td>negative</td>\n","      <td>[0.04602408409118652, -0.025047995150089264, -...</td>\n","      <td>0.731940</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>honestly really surprised alice ranked that lo...</td>\n","      <td>positive</td>\n","      <td>honestly really surprised alice ranked that lo...</td>\n","      <td>positive</td>\n","      <td>[-0.035716041922569275, -0.04127982258796692, ...</td>\n","      <td>0.966494</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>didn care about politics before now hate</td>\n","      <td>negative</td>\n","      <td>didn care about politics before now hate</td>\n","      <td>negative</td>\n","      <td>[-0.006816444452852011, 0.06221264973282814, -...</td>\n","      <td>0.672320</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>hard nips and goosebumps</td>\n","      <td>negative</td>\n","      <td>hard nips and goosebumps</td>\n","      <td>negative</td>\n","      <td>[-0.02919699251651764, -0.030449824407696724, ...</td>\n","      <td>0.604969</td>\n","    </tr>\n","    <tr>\n","      <th>10</th>\n","      <td>varadabhai ndtv trying too well dilute bjp tre...</td>\n","      <td>negative</td>\n","      <td>varadabhai ndtv trying too well dilute bjp tre...</td>\n","      <td>negative</td>\n","      <td>[0.04727796092629433, -0.06792476028203964, -0...</td>\n","      <td>0.639880</td>\n","    </tr>\n","    <tr>\n","      <th>11</th>\n","      <td>old man has lost his mind</td>\n","      <td>positive</td>\n","      <td>old man has lost his mind</td>\n","      <td>positive</td>\n","      <td>[0.039657335728406906, -0.04277808964252472, -...</td>\n","      <td>0.929136</td>\n","    </tr>\n","    <tr>\n","      <th>12</th>\n","      <td>why this being downvoted you might ask both mo...</td>\n","      <td>negative</td>\n","      <td>why this being downvoted you might ask both mo...</td>\n","      <td>neutral</td>\n","      <td>[0.06581216305494308, -0.06079106032848358, -0...</td>\n","      <td>0.546161</td>\n","    </tr>\n","    <tr>\n","      <th>13</th>\n","      <td>hasnt changed all apolitical before simply don...</td>\n","      <td>positive</td>\n","      <td>hasnt changed all apolitical before simply do...</td>\n","      <td>positive</td>\n","      <td>[0.03509754315018654, -0.004639611579477787, -...</td>\n","      <td>0.883017</td>\n","    </tr>\n","    <tr>\n","      <th>14</th>\n","      <td>for one campaign pretty much just snatched the...</td>\n","      <td>negative</td>\n","      <td>for one campaign pretty much just snatched the...</td>\n","      <td>negative</td>\n","      <td>[0.017386479303240776, 0.0443551279604435, -0....</td>\n","      <td>0.636396</td>\n","    </tr>\n","    <tr>\n","      <th>15</th>\n","      <td>vajpayee managed forge much broader coalition ...</td>\n","      <td>positive</td>\n","      <td>vajpayee managed forge much broader coalition ...</td>\n","      <td>positive</td>\n","      <td>[0.0372871570289135, -0.051079731434583664, -0...</td>\n","      <td>0.848566</td>\n","    </tr>\n","    <tr>\n","      <th>16</th>\n","      <td>lol this only proves how desperate they are ge...</td>\n","      <td>positive</td>\n","      <td>lol this only proves how desperate they are ge...</td>\n","      <td>positive</td>\n","      <td>[0.05233633145689964, -0.03147873282432556, 0....</td>\n","      <td>0.819890</td>\n","    </tr>\n","    <tr>\n","      <th>17</th>\n","      <td>dont hate aap but your questions are example w...</td>\n","      <td>negative</td>\n","      <td>dont hate aap but your questions are example ...</td>\n","      <td>negative</td>\n","      <td>[0.026356497779488564, -0.04044198617339134, -...</td>\n","      <td>0.724538</td>\n","    </tr>\n","    <tr>\n","      <th>18</th>\n","      <td>what were the other policies you discussed not...</td>\n","      <td>negative</td>\n","      <td>what were the other policies you discussed not...</td>\n","      <td>negative</td>\n","      <td>[-0.07521010935306549, 0.008543566800653934, 0...</td>\n","      <td>0.732422</td>\n","    </tr>\n","    <tr>\n","      <th>19</th>\n","      <td>wow lots favorites this bracket haqua tsukushi...</td>\n","      <td>positive</td>\n","      <td>wow lots favorites this bracket haqua tsukushi...</td>\n","      <td>positive</td>\n","      <td>[-0.0693160742521286, -0.015458519570529461, -...</td>\n","      <td>0.971349</td>\n","    </tr>\n","    <tr>\n","      <th>20</th>\n","      <td>sorry know this isn what you asked just ventin...</td>\n","      <td>negative</td>\n","      <td>sorry know this isn what you asked just ventin...</td>\n","      <td>negative</td>\n","      <td>[0.016777772456407547, -0.05478338897228241, -...</td>\n","      <td>0.623325</td>\n","    </tr>\n","    <tr>\n","      <th>21</th>\n","      <td>coming out strongly against gujarat chief mini...</td>\n","      <td>positive</td>\n","      <td>coming out strongly against gujarat chief min...</td>\n","      <td>positive</td>\n","      <td>[0.06856723129749298, -0.019821858033537865, -...</td>\n","      <td>0.736283</td>\n","    </tr>\n","    <tr>\n","      <th>22</th>\n","      <td>there one tool bjp can use their manifesto whi...</td>\n","      <td>positive</td>\n","      <td>there one tool bjp can use their manifesto whi...</td>\n","      <td>positive</td>\n","      <td>[0.057847339659929276, -0.05365725979208946, -...</td>\n","      <td>0.870023</td>\n","    </tr>\n","    <tr>\n","      <th>23</th>\n","      <td>jakiro spotted the middle top maybe</td>\n","      <td>positive</td>\n","      <td>jakiro spotted the middle top maybe</td>\n","      <td>positive</td>\n","      <td>[-0.011690962128341198, -0.024473998695611954,...</td>\n","      <td>0.965604</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>family mormon have never tried explain them th...</td>\n","      <td>positive</td>\n","      <td>family mormon have never tried explain them t...</td>\n","      <td>positive</td>\n","      <td>[0.03987010195851326, -0.0009543427731841803, ...</td>\n","      <td>0.964053</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>with these results would have grudgingly accep...</td>\n","      <td>negative</td>\n","      <td>with these results would have grudgingly accep...</td>\n","      <td>neutral</td>\n","      <td>[0.034668292850255966, -0.05392604321241379, -...</td>\n","      <td>0.521402</td>\n","    </tr>\n","    <tr>\n","      <th>26</th>\n","      <td>tea partier expresses support for namo after e...</td>\n","      <td>negative</td>\n","      <td>tea partier expresses support for namo after ...</td>\n","      <td>negative</td>\n","      <td>[0.032365716993808746, -0.056087080389261246, ...</td>\n","      <td>0.837552</td>\n","    </tr>\n","    <tr>\n","      <th>27</th>\n","      <td>politically would stupid move take stand right...</td>\n","      <td>negative</td>\n","      <td>politically would stupid move take stand right...</td>\n","      <td>neutral</td>\n","      <td>[-0.00040777752292342484, -0.01262842211872339...</td>\n","      <td>0.541656</td>\n","    </tr>\n","    <tr>\n","      <th>28</th>\n","      <td>wtf why</td>\n","      <td>negative</td>\n","      <td>wtf why</td>\n","      <td>negative</td>\n","      <td>[0.025807170197367668, -0.07080958038568497, -...</td>\n","      <td>0.747054</td>\n","    </tr>\n","    <tr>\n","      <th>29</th>\n","      <td>have actually seen lot users views change duri...</td>\n","      <td>positive</td>\n","      <td>have actually seen lot users views change dur...</td>\n","      <td>positive</td>\n","      <td>[-0.009333955124020576, 0.01388698909431696, -...</td>\n","      <td>0.818759</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>truth told there not insignificant percentage ...</td>\n","      <td>positive</td>\n","      <td>truth told there not insignificant percentage ...</td>\n","      <td>positive</td>\n","      <td>[0.03927519917488098, -0.05597652122378349, -0...</td>\n","      <td>0.776765</td>\n","    </tr>\n","    <tr>\n","      <th>31</th>\n","      <td>was anti bjp and neutral cong became anti bjp ...</td>\n","      <td>positive</td>\n","      <td>was anti bjp and neutral cong became anti bjp ...</td>\n","      <td>positive</td>\n","      <td>[0.03805134445428848, -0.030298737809062004, -...</td>\n","      <td>0.630857</td>\n","    </tr>\n","    <tr>\n","      <th>32</th>\n","      <td>most religions have dogmatic orthodox well eso...</td>\n","      <td>positive</td>\n","      <td>most religions have dogmatic orthodox well eso...</td>\n","      <td>positive</td>\n","      <td>[0.03939439728856087, -0.02040349319577217, -0...</td>\n","      <td>0.972607</td>\n","    </tr>\n","    <tr>\n","      <th>33</th>\n","      <td>laureatte sen said christian schools are perfe...</td>\n","      <td>positive</td>\n","      <td>laureatte sen said christian schools are perfe...</td>\n","      <td>positive</td>\n","      <td>[0.05267934128642082, 0.05836360529065132, 0.0...</td>\n","      <td>0.911020</td>\n","    </tr>\n","    <tr>\n","      <th>34</th>\n","      <td>need stop watching the garbage that you watch ...</td>\n","      <td>positive</td>\n","      <td>need stop watching the garbage that you watch...</td>\n","      <td>positive</td>\n","      <td>[-0.012382612563669682, 0.01988200470805168, 0...</td>\n","      <td>0.954440</td>\n","    </tr>\n","    <tr>\n","      <th>35</th>\n","      <td>gandhi mandela hitler mao plato chandragupt ma...</td>\n","      <td>negative</td>\n","      <td>gandhi mandela hitler mao plato chandragupt ma...</td>\n","      <td>negative</td>\n","      <td>[0.027552243322134018, 0.013075066730380058, 0...</td>\n","      <td>0.767667</td>\n","    </tr>\n","    <tr>\n","      <th>36</th>\n","      <td>hate aap for the other thread points such the ...</td>\n","      <td>negative</td>\n","      <td>hate aap for the other thread points such the...</td>\n","      <td>negative</td>\n","      <td>[0.01461736112833023, -0.038017574697732925, -...</td>\n","      <td>0.690414</td>\n","    </tr>\n","    <tr>\n","      <th>37</th>\n","      <td>absolutely agree with you subsidies the worst ...</td>\n","      <td>negative</td>\n","      <td>absolutely agree with you subsidies the worst ...</td>\n","      <td>neutral</td>\n","      <td>[0.010974399745464325, 0.0033110962249338627, ...</td>\n","      <td>0.581476</td>\n","    </tr>\n","    <tr>\n","      <th>38</th>\n","      <td>are you corrupt mind have you benefited throug...</td>\n","      <td>negative</td>\n","      <td>are you corrupt mind have you benefited throu...</td>\n","      <td>negative</td>\n","      <td>[0.03834373503923416, -0.06521473079919815, -0...</td>\n","      <td>0.783217</td>\n","    </tr>\n","    <tr>\n","      <th>39</th>\n","      <td>congress needs bogeyman modi without the bad g...</td>\n","      <td>positive</td>\n","      <td>congress needs bogeyman modi without the bad g...</td>\n","      <td>positive</td>\n","      <td>[0.03138439729809761, -0.06221967190504074, -0...</td>\n","      <td>0.764358</td>\n","    </tr>\n","    <tr>\n","      <th>40</th>\n","      <td>protip don type uppercase text all caps harder...</td>\n","      <td>negative</td>\n","      <td>protip don type uppercase text all caps harder...</td>\n","      <td>negative</td>\n","      <td>[0.044019922614097595, 0.025341013446450233, 0...</td>\n","      <td>0.738550</td>\n","    </tr>\n","    <tr>\n","      <th>41</th>\n","      <td>brother trog very wrathful indeed but his will...</td>\n","      <td>positive</td>\n","      <td>brother trog very wrathful indeed but his wil...</td>\n","      <td>positive</td>\n","      <td>[-0.024625714868307114, 0.06193268671631813, 0...</td>\n","      <td>0.923871</td>\n","    </tr>\n","    <tr>\n","      <th>42</th>\n","      <td>start off saying that the craftsmanship this p...</td>\n","      <td>positive</td>\n","      <td>start off saying that the craftsmanship this ...</td>\n","      <td>positive</td>\n","      <td>[0.05780623108148575, -0.06291749328374863, -0...</td>\n","      <td>0.985073</td>\n","    </tr>\n","    <tr>\n","      <th>43</th>\n","      <td>have made request unban namoarmy hell moron ho...</td>\n","      <td>negative</td>\n","      <td>have made request unban namoarmy hell moron h...</td>\n","      <td>negative</td>\n","      <td>[0.015555822290480137, -0.012748800218105316, ...</td>\n","      <td>0.796430</td>\n","    </tr>\n","    <tr>\n","      <th>44</th>\n","      <td>child modi worked his fatherâ tea shop and y...</td>\n","      <td>negative</td>\n","      <td>child modi worked his fatherâ tea shop and ...</td>\n","      <td>negative</td>\n","      <td>[0.05774841830134392, -0.059567004442214966, -...</td>\n","      <td>0.709697</td>\n","    </tr>\n","    <tr>\n","      <th>45</th>\n","      <td>namo tea yuupea horrible rhyme know</td>\n","      <td>negative</td>\n","      <td>namo tea yuupea horrible rhyme know</td>\n","      <td>negative</td>\n","      <td>[0.025534288957715034, 0.004176765214651823, -...</td>\n","      <td>0.851523</td>\n","    </tr>\n","    <tr>\n","      <th>46</th>\n","      <td>great agility from akpom cut back and bend</td>\n","      <td>positive</td>\n","      <td>great agility from akpom cut back and bend</td>\n","      <td>positive</td>\n","      <td>[0.06865684688091278, -0.02164856530725956, -0...</td>\n","      <td>0.966416</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>from undecided pro aap they are not perfect bu...</td>\n","      <td>positive</td>\n","      <td>from undecided pro aap they are not perfect bu...</td>\n","      <td>positive</td>\n","      <td>[0.01590304635465145, -0.0683458000421524, -0....</td>\n","      <td>0.891286</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>woah there don insane with pray mean you don w...</td>\n","      <td>negative</td>\n","      <td>woah there don insane with pray mean you don w...</td>\n","      <td>negative</td>\n","      <td>[0.050547026097774506, -0.01725909113883972, 0...</td>\n","      <td>0.798072</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>porngress wont announce their candidate cuz th...</td>\n","      <td>positive</td>\n","      <td>porngress wont announce their candidate cuz th...</td>\n","      <td>positive</td>\n","      <td>[0.05935536324977875, -0.051609162241220474, -...</td>\n","      <td>0.858501</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                       document  ... sentiment_confidence\n","origin_index                                                     ...                     \n","0             its true they had cut the power what douchebag...  ...             0.761194\n","1                  fuck giroud better finishing like this month  ...             0.938677\n","2                           looks shit now but still proud made  ...             0.954937\n","3                      pelor the burning hate the best evil god  ...             0.810980\n","4                 can ask what you with something this powerful  ...             0.956043\n","5             aapâ shazia ilmi from puram constituency lag...  ...             0.708917\n","6                                                     fuck yeah  ...             0.731940\n","7             honestly really surprised alice ranked that lo...  ...             0.966494\n","8                      didn care about politics before now hate  ...             0.672320\n","9                                      hard nips and goosebumps  ...             0.604969\n","10            varadabhai ndtv trying too well dilute bjp tre...  ...             0.639880\n","11                                    old man has lost his mind  ...             0.929136\n","12            why this being downvoted you might ask both mo...  ...             0.546161\n","13            hasnt changed all apolitical before simply don...  ...             0.883017\n","14            for one campaign pretty much just snatched the...  ...             0.636396\n","15            vajpayee managed forge much broader coalition ...  ...             0.848566\n","16            lol this only proves how desperate they are ge...  ...             0.819890\n","17            dont hate aap but your questions are example w...  ...             0.724538\n","18            what were the other policies you discussed not...  ...             0.732422\n","19            wow lots favorites this bracket haqua tsukushi...  ...             0.971349\n","20            sorry know this isn what you asked just ventin...  ...             0.623325\n","21            coming out strongly against gujarat chief mini...  ...             0.736283\n","22            there one tool bjp can use their manifesto whi...  ...             0.870023\n","23                          jakiro spotted the middle top maybe  ...             0.965604\n","24            family mormon have never tried explain them th...  ...             0.964053\n","25            with these results would have grudgingly accep...  ...             0.521402\n","26            tea partier expresses support for namo after e...  ...             0.837552\n","27            politically would stupid move take stand right...  ...             0.541656\n","28                                                      wtf why  ...             0.747054\n","29            have actually seen lot users views change duri...  ...             0.818759\n","30            truth told there not insignificant percentage ...  ...             0.776765\n","31            was anti bjp and neutral cong became anti bjp ...  ...             0.630857\n","32            most religions have dogmatic orthodox well eso...  ...             0.972607\n","33            laureatte sen said christian schools are perfe...  ...             0.911020\n","34            need stop watching the garbage that you watch ...  ...             0.954440\n","35            gandhi mandela hitler mao plato chandragupt ma...  ...             0.767667\n","36            hate aap for the other thread points such the ...  ...             0.690414\n","37            absolutely agree with you subsidies the worst ...  ...             0.581476\n","38            are you corrupt mind have you benefited throug...  ...             0.783217\n","39            congress needs bogeyman modi without the bad g...  ...             0.764358\n","40            protip don type uppercase text all caps harder...  ...             0.738550\n","41            brother trog very wrathful indeed but his will...  ...             0.923871\n","42            start off saying that the craftsmanship this p...  ...             0.985073\n","43            have made request unban namoarmy hell moron ho...  ...             0.796430\n","44            child modi worked his fatherâ tea shop and y...  ...             0.709697\n","45                          namo tea yuupea horrible rhyme know  ...             0.851523\n","46                   great agility from akpom cut back and bend  ...             0.966416\n","47            from undecided pro aap they are not perfect bu...  ...             0.891286\n","48            woah there don insane with pray mean you don w...  ...             0.798072\n","49            porngress wont announce their candidate cuz th...  ...             0.858501\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nxWFzQOhjWC8","executionInfo":{"status":"ok","timestamp":1609522047865,"user_tz":-300,"elapsed":221994,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6f5e4138-03d4-495a-ce16-0be512588c81"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1609523959387,"user_tz":-300,"elapsed":476394,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1baf2085-bee8-48c3-fd11-401722536642"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(70)  \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.84      0.77      0.80       300\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.91      0.72      0.80       300\n","\n","    accuracy                           0.74       600\n","   macro avg       0.59      0.50      0.54       600\n","weighted avg       0.88      0.74      0.80       600\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1609522704715,"user_tz":-300,"elapsed":161180,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"bc27a985-38bf-4b98-f3fe-8e4955cd83cc"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":127},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1609522719523,"user_tz":-300,"elapsed":14825,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b14d7193-6b43-4c1d-eb42-326af88ffc0a"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Indian prime minister was assinated')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","      <th>sentiment_confidence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>[0.15737222135066986, 0.2598555386066437, 0.85...</td>\n","      <td>Tesla plans to invest 10M into the ML sector</td>\n","      <td>positive</td>\n","      <td>0.638827</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                en_embed_sentence_small_bert_L12_768_embeddings  ... sentiment_confidence\n","origin_index                                                     ...                     \n","0             [0.15737222135066986, 0.2598555386066437, 0.85...  ...             0.638827\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1609522719526,"user_tz":-300,"elapsed":24,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"acbf137f-60ff-4804-b903-bb88f00c78d0"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')            | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                  | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                 | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)           | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                  | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                      | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)            | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)        | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)       | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                    | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                        | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                 | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                          | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                        | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                 | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')         | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6)                         | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')              | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative'])      | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]}
\ No newline at end of file
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_reddit.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_reddit.ipynb)\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 class Reddit comment sentiment classifier training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator)  from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install  pyspark==2.4.7 \n","! pip install nlu > /dev/null    \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Reddit  Sentiment dataset \n","https://www.kaggle.com/cosmos98/twitter-and-reddit-sentimental-analysis-dataset\n","#Context\n","\n","This is was a Dataset Created as a part of the university Project On Sentimental Analysis On Multi-Source Social Media Platforms using PySpark."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610788117464,"user_tz":-300,"elapsed":1827,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"69719e54-e5be-4a26-bcde-b913531cbcd9"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/Reddit_Data.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:08:00--  http://ckl-it.de/wp-content/uploads/2021/01/Reddit_Data.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 153265 (150K) [text/csv]\n","Saving to: ‘Reddit_Data.csv’\n","\n","Reddit_Data.csv     100%[===================>] 149.67K   402KB/s    in 0.4s    \n","\n","2021-01-16 09:08:01 (402 KB/s) - ‘Reddit_Data.csv’ saved [153265/153265]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610788135165,"user_tz":-300,"elapsed":1025,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cdd2f3e1-3f96-4a5b-9291-34bce078fbf0"},"source":["import pandas as pd\n","train_path = '/content/Reddit_Data.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>y</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>its true they had cut the power what douchebag...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>fuck giroud better finishing like this month</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>looks shit now but still proud made</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>pelor the burning hate the best evil god</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>can ask what you with something this powerful</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>595</th>\n","      <td>bangali desh bechne main sabse aage</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>596</th>\n","      <td>national media channels were gaged not cover t...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>597</th>\n","      <td>been following these threads from the beginni...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>598</th>\n","      <td>pretty sure this sarcasm satire the news 1500...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>599</th>\n","      <td>much would love for namo our next hard imagin...</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>600 rows × 2 columns</p>\n","</div>"],"text/plain":["                                                  text         y\n","0    its true they had cut the power what douchebag...  positive\n","1        fuck giroud better finishing like this month   positive\n","2                 looks shit now but still proud made   positive\n","3            pelor the burning hate the best evil god   negative\n","4       can ask what you with something this powerful   positive\n","..                                                 ...       ...\n","595               bangali desh bechne main sabse aage   positive\n","596  national media channels were gaged not cover t...  positive\n","597   been following these threads from the beginni...  negative\n","598   pretty sure this sarcasm satire the news 1500...  positive\n","599   much would love for namo our next hard imagin...  positive\n","\n","[600 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609522047844,"user_tz":-300,"elapsed":222057,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"83383a78-d68b-43a0-a253-318696580942"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.67      1.00      0.80        24\n","     neutral       0.00      0.00      0.00         0\n","    positive       1.00      0.23      0.38        26\n","\n","    accuracy                           0.60        50\n","   macro avg       0.56      0.41      0.39        50\n","weighted avg       0.84      0.60      0.58        50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>document</th>\n","      <th>y</th>\n","      <th>text</th>\n","      <th>sentiment</th>\n","      <th>default_name_embeddings</th>\n","      <th>sentiment_confidence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>its true they had cut the power what douchebag...</td>\n","      <td>positive</td>\n","      <td>its true they had cut the power what douchebag...</td>\n","      <td>negative</td>\n","      <td>[0.033111296594142914, 0.053994592279195786, -...</td>\n","      <td>0.632922</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>fuck giroud better finishing like this month</td>\n","      <td>positive</td>\n","      <td>fuck giroud better finishing like this month</td>\n","      <td>neutral</td>\n","      <td>[0.0678204670548439, 0.01411951333284378, -0.0...</td>\n","      <td>0.558096</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>looks shit now but still proud made</td>\n","      <td>positive</td>\n","      <td>looks shit now but still proud made</td>\n","      <td>neutral</td>\n","      <td>[0.03247416764497757, -0.09844464808702469, -0...</td>\n","      <td>0.594104</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>pelor the burning hate the best evil god</td>\n","      <td>negative</td>\n","      <td>pelor the burning hate the best evil god</td>\n","      <td>negative</td>\n","      <td>[0.04032062739133835, 0.07666623592376709, -0....</td>\n","      <td>0.699286</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>can ask what you with something this powerful</td>\n","      <td>positive</td>\n","      <td>can ask what you with something this powerful</td>\n","      <td>positive</td>\n","      <td>[0.015518003143370152, -0.05116305500268936, -...</td>\n","      <td>0.615222</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>aapâ shazia ilmi from puram constituency lag...</td>\n","      <td>negative</td>\n","      <td>aapâ shazia ilmi from puram constituency lag...</td>\n","      <td>negative</td>\n","      <td>[0.02478150464594364, -0.06508146971464157, -0...</td>\n","      <td>0.751383</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>fuck yeah</td>\n","      <td>negative</td>\n","      <td>fuck yeah</td>\n","      <td>negative</td>\n","      <td>[0.04602408409118652, -0.025047995150089264, -...</td>\n","      <td>0.663185</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>honestly really surprised alice ranked that lo...</td>\n","      <td>positive</td>\n","      <td>honestly really surprised alice ranked that lo...</td>\n","      <td>positive</td>\n","      <td>[-0.035716041922569275, -0.04127982258796692, ...</td>\n","      <td>0.605483</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>didn care about politics before now hate</td>\n","      <td>negative</td>\n","      <td>didn care about politics before now hate</td>\n","      <td>negative</td>\n","      <td>[-0.006816444452852011, 0.06221264973282814, -...</td>\n","      <td>0.701191</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>hard nips and goosebumps</td>\n","      <td>negative</td>\n","      <td>hard nips and goosebumps</td>\n","      <td>negative</td>\n","      <td>[-0.02919699251651764, -0.030449824407696724, ...</td>\n","      <td>0.629745</td>\n","    </tr>\n","    <tr>\n","      <th>10</th>\n","      <td>varadabhai ndtv trying too well dilute bjp tre...</td>\n","      <td>negative</td>\n","      <td>varadabhai ndtv trying too well dilute bjp tre...</td>\n","      <td>negative</td>\n","      <td>[0.04727796092629433, -0.06792476028203964, -0...</td>\n","      <td>0.756238</td>\n","    </tr>\n","    <tr>\n","      <th>11</th>\n","      <td>old man has lost his mind</td>\n","      <td>positive</td>\n","      <td>old man has lost his mind</td>\n","      <td>neutral</td>\n","      <td>[0.039657335728406906, -0.04277808964252472, -...</td>\n","      <td>0.502476</td>\n","    </tr>\n","    <tr>\n","      <th>12</th>\n","      <td>why this being downvoted you might ask both mo...</td>\n","      <td>negative</td>\n","      <td>why this being downvoted you might ask both mo...</td>\n","      <td>negative</td>\n","      <td>[0.06581216305494308, -0.06079106032848358, -0...</td>\n","      <td>0.710366</td>\n","    </tr>\n","    <tr>\n","      <th>13</th>\n","      <td>hasnt changed all apolitical before simply don...</td>\n","      <td>positive</td>\n","      <td>hasnt changed all apolitical before simply do...</td>\n","      <td>negative</td>\n","      <td>[0.03509754315018654, -0.004639611579477787, -...</td>\n","      <td>0.603606</td>\n","    </tr>\n","    <tr>\n","      <th>14</th>\n","      <td>for one campaign pretty much just snatched the...</td>\n","      <td>negative</td>\n","      <td>for one campaign pretty much just snatched the...</td>\n","      <td>negative</td>\n","      <td>[0.017386479303240776, 0.0443551279604435, -0....</td>\n","      <td>0.631376</td>\n","    </tr>\n","    <tr>\n","      <th>15</th>\n","      <td>vajpayee managed forge much broader coalition ...</td>\n","      <td>positive</td>\n","      <td>vajpayee managed forge much broader coalition ...</td>\n","      <td>negative</td>\n","      <td>[0.0372871570289135, -0.051079731434583664, -0...</td>\n","      <td>0.685135</td>\n","    </tr>\n","    <tr>\n","      <th>16</th>\n","      <td>lol this only proves how desperate they are ge...</td>\n","      <td>positive</td>\n","      <td>lol this only proves how desperate they are ge...</td>\n","      <td>negative</td>\n","      <td>[0.05233633145689964, -0.03147873282432556, 0....</td>\n","      <td>0.624959</td>\n","    </tr>\n","    <tr>\n","      <th>17</th>\n","      <td>dont hate aap but your questions are example w...</td>\n","      <td>negative</td>\n","      <td>dont hate aap but your questions are example ...</td>\n","      <td>negative</td>\n","      <td>[0.026356497779488564, -0.04044198617339134, -...</td>\n","      <td>0.769971</td>\n","    </tr>\n","    <tr>\n","      <th>18</th>\n","      <td>what were the other policies you discussed not...</td>\n","      <td>negative</td>\n","      <td>what were the other policies you discussed not...</td>\n","      <td>negative</td>\n","      <td>[-0.07521010935306549, 0.008543566800653934, 0...</td>\n","      <td>0.669384</td>\n","    </tr>\n","    <tr>\n","      <th>19</th>\n","      <td>wow lots favorites this bracket haqua tsukushi...</td>\n","      <td>positive</td>\n","      <td>wow lots favorites this bracket haqua tsukushi...</td>\n","      <td>neutral</td>\n","      <td>[-0.0693160742521286, -0.015458519570529461, -...</td>\n","      <td>0.593471</td>\n","    </tr>\n","    <tr>\n","      <th>20</th>\n","      <td>sorry know this isn what you asked just ventin...</td>\n","      <td>negative</td>\n","      <td>sorry know this isn what you asked just ventin...</td>\n","      <td>negative</td>\n","      <td>[0.016777772456407547, -0.05478338897228241, -...</td>\n","      <td>0.745406</td>\n","    </tr>\n","    <tr>\n","      <th>21</th>\n","      <td>coming out strongly against gujarat chief mini...</td>\n","      <td>positive</td>\n","      <td>coming out strongly against gujarat chief min...</td>\n","      <td>negative</td>\n","      <td>[0.06856723129749298, -0.019821858033537865, -...</td>\n","      <td>0.694449</td>\n","    </tr>\n","    <tr>\n","      <th>22</th>\n","      <td>there one tool bjp can use their manifesto whi...</td>\n","      <td>positive</td>\n","      <td>there one tool bjp can use their manifesto whi...</td>\n","      <td>negative</td>\n","      <td>[0.057847339659929276, -0.05365725979208946, -...</td>\n","      <td>0.623127</td>\n","    </tr>\n","    <tr>\n","      <th>23</th>\n","      <td>jakiro spotted the middle top maybe</td>\n","      <td>positive</td>\n","      <td>jakiro spotted the middle top maybe</td>\n","      <td>neutral</td>\n","      <td>[-0.011690962128341198, -0.024473998695611954,...</td>\n","      <td>0.575394</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>family mormon have never tried explain them th...</td>\n","      <td>positive</td>\n","      <td>family mormon have never tried explain them t...</td>\n","      <td>positive</td>\n","      <td>[0.03987010195851326, -0.0009543427731841803, ...</td>\n","      <td>0.606252</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>with these results would have grudgingly accep...</td>\n","      <td>negative</td>\n","      <td>with these results would have grudgingly accep...</td>\n","      <td>negative</td>\n","      <td>[0.034668292850255966, -0.05392604321241379, -...</td>\n","      <td>0.736970</td>\n","    </tr>\n","    <tr>\n","      <th>26</th>\n","      <td>tea partier expresses support for namo after e...</td>\n","      <td>negative</td>\n","      <td>tea partier expresses support for namo after ...</td>\n","      <td>negative</td>\n","      <td>[0.032365716993808746, -0.056087080389261246, ...</td>\n","      <td>0.760564</td>\n","    </tr>\n","    <tr>\n","      <th>27</th>\n","      <td>politically would stupid move take stand right...</td>\n","      <td>negative</td>\n","      <td>politically would stupid move take stand right...</td>\n","      <td>negative</td>\n","      <td>[-0.00040777752292342484, -0.01262842211872339...</td>\n","      <td>0.674769</td>\n","    </tr>\n","    <tr>\n","      <th>28</th>\n","      <td>wtf why</td>\n","      <td>negative</td>\n","      <td>wtf why</td>\n","      <td>negative</td>\n","      <td>[0.025807170197367668, -0.07080958038568497, -...</td>\n","      <td>0.635538</td>\n","    </tr>\n","    <tr>\n","      <th>29</th>\n","      <td>have actually seen lot users views change duri...</td>\n","      <td>positive</td>\n","      <td>have actually seen lot users views change dur...</td>\n","      <td>negative</td>\n","      <td>[-0.009333955124020576, 0.01388698909431696, -...</td>\n","      <td>0.662819</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>truth told there not insignificant percentage ...</td>\n","      <td>positive</td>\n","      <td>truth told there not insignificant percentage ...</td>\n","      <td>negative</td>\n","      <td>[0.03927519917488098, -0.05597652122378349, -0...</td>\n","      <td>0.713786</td>\n","    </tr>\n","    <tr>\n","      <th>31</th>\n","      <td>was anti bjp and neutral cong became anti bjp ...</td>\n","      <td>positive</td>\n","      <td>was anti bjp and neutral cong became anti bjp ...</td>\n","      <td>negative</td>\n","      <td>[0.03805134445428848, -0.030298737809062004, -...</td>\n","      <td>0.732909</td>\n","    </tr>\n","    <tr>\n","      <th>32</th>\n","      <td>most religions have dogmatic orthodox well eso...</td>\n","      <td>positive</td>\n","      <td>most religions have dogmatic orthodox well eso...</td>\n","      <td>positive</td>\n","      <td>[0.03939439728856087, -0.02040349319577217, -0...</td>\n","      <td>0.625969</td>\n","    </tr>\n","    <tr>\n","      <th>33</th>\n","      <td>laureatte sen said christian schools are perfe...</td>\n","      <td>positive</td>\n","      <td>laureatte sen said christian schools are perfe...</td>\n","      <td>neutral</td>\n","      <td>[0.05267934128642082, 0.05836360529065132, 0.0...</td>\n","      <td>0.510249</td>\n","    </tr>\n","    <tr>\n","      <th>34</th>\n","      <td>need stop watching the garbage that you watch ...</td>\n","      <td>positive</td>\n","      <td>need stop watching the garbage that you watch...</td>\n","      <td>neutral</td>\n","      <td>[-0.012382612563669682, 0.01988200470805168, 0...</td>\n","      <td>0.552975</td>\n","    </tr>\n","    <tr>\n","      <th>35</th>\n","      <td>gandhi mandela hitler mao plato chandragupt ma...</td>\n","      <td>negative</td>\n","      <td>gandhi mandela hitler mao plato chandragupt ma...</td>\n","      <td>negative</td>\n","      <td>[0.027552243322134018, 0.013075066730380058, 0...</td>\n","      <td>0.719779</td>\n","    </tr>\n","    <tr>\n","      <th>36</th>\n","      <td>hate aap for the other thread points such the ...</td>\n","      <td>negative</td>\n","      <td>hate aap for the other thread points such the...</td>\n","      <td>negative</td>\n","      <td>[0.01461736112833023, -0.038017574697732925, -...</td>\n","      <td>0.756800</td>\n","    </tr>\n","    <tr>\n","      <th>37</th>\n","      <td>absolutely agree with you subsidies the worst ...</td>\n","      <td>negative</td>\n","      <td>absolutely agree with you subsidies the worst ...</td>\n","      <td>negative</td>\n","      <td>[0.010974399745464325, 0.0033110962249338627, ...</td>\n","      <td>0.655372</td>\n","    </tr>\n","    <tr>\n","      <th>38</th>\n","      <td>are you corrupt mind have you benefited throug...</td>\n","      <td>negative</td>\n","      <td>are you corrupt mind have you benefited throu...</td>\n","      <td>negative</td>\n","      <td>[0.03834373503923416, -0.06521473079919815, -0...</td>\n","      <td>0.752354</td>\n","    </tr>\n","    <tr>\n","      <th>39</th>\n","      <td>congress needs bogeyman modi without the bad g...</td>\n","      <td>positive</td>\n","      <td>congress needs bogeyman modi without the bad g...</td>\n","      <td>negative</td>\n","      <td>[0.03138439729809761, -0.06221967190504074, -0...</td>\n","      <td>0.703794</td>\n","    </tr>\n","    <tr>\n","      <th>40</th>\n","      <td>protip don type uppercase text all caps harder...</td>\n","      <td>negative</td>\n","      <td>protip don type uppercase text all caps harder...</td>\n","      <td>negative</td>\n","      <td>[0.044019922614097595, 0.025341013446450233, 0...</td>\n","      <td>0.673459</td>\n","    </tr>\n","    <tr>\n","      <th>41</th>\n","      <td>brother trog very wrathful indeed but his will...</td>\n","      <td>positive</td>\n","      <td>brother trog very wrathful indeed but his wil...</td>\n","      <td>neutral</td>\n","      <td>[-0.024625714868307114, 0.06193268671631813, 0...</td>\n","      <td>0.537965</td>\n","    </tr>\n","    <tr>\n","      <th>42</th>\n","      <td>start off saying that the craftsmanship this p...</td>\n","      <td>positive</td>\n","      <td>start off saying that the craftsmanship this ...</td>\n","      <td>positive</td>\n","      <td>[0.05780623108148575, -0.06291749328374863, -0...</td>\n","      <td>0.723931</td>\n","    </tr>\n","    <tr>\n","      <th>43</th>\n","      <td>have made request unban namoarmy hell moron ho...</td>\n","      <td>negative</td>\n","      <td>have made request unban namoarmy hell moron h...</td>\n","      <td>negative</td>\n","      <td>[0.015555822290480137, -0.012748800218105316, ...</td>\n","      <td>0.718607</td>\n","    </tr>\n","    <tr>\n","      <th>44</th>\n","      <td>child modi worked his fatherâ tea shop and y...</td>\n","      <td>negative</td>\n","      <td>child modi worked his fatherâ tea shop and ...</td>\n","      <td>negative</td>\n","      <td>[0.05774841830134392, -0.059567004442214966, -...</td>\n","      <td>0.743616</td>\n","    </tr>\n","    <tr>\n","      <th>45</th>\n","      <td>namo tea yuupea horrible rhyme know</td>\n","      <td>negative</td>\n","      <td>namo tea yuupea horrible rhyme know</td>\n","      <td>negative</td>\n","      <td>[0.025534288957715034, 0.004176765214651823, -...</td>\n","      <td>0.760347</td>\n","    </tr>\n","    <tr>\n","      <th>46</th>\n","      <td>great agility from akpom cut back and bend</td>\n","      <td>positive</td>\n","      <td>great agility from akpom cut back and bend</td>\n","      <td>positive</td>\n","      <td>[0.06865684688091278, -0.02164856530725956, -0...</td>\n","      <td>0.670042</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>from undecided pro aap they are not perfect bu...</td>\n","      <td>positive</td>\n","      <td>from undecided pro aap they are not perfect bu...</td>\n","      <td>negative</td>\n","      <td>[0.01590304635465145, -0.0683458000421524, -0....</td>\n","      <td>0.647296</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>woah there don insane with pray mean you don w...</td>\n","      <td>negative</td>\n","      <td>woah there don insane with pray mean you don w...</td>\n","      <td>negative</td>\n","      <td>[0.050547026097774506, -0.01725909113883972, 0...</td>\n","      <td>0.711541</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>porngress wont announce their candidate cuz th...</td>\n","      <td>positive</td>\n","      <td>porngress wont announce their candidate cuz th...</td>\n","      <td>negative</td>\n","      <td>[0.05935536324977875, -0.051609162241220474, -...</td>\n","      <td>0.671247</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                       document  ... sentiment_confidence\n","origin_index                                                     ...                     \n","0             its true they had cut the power what douchebag...  ...             0.632922\n","1                  fuck giroud better finishing like this month  ...             0.558096\n","2                           looks shit now but still proud made  ...             0.594104\n","3                      pelor the burning hate the best evil god  ...             0.699286\n","4                 can ask what you with something this powerful  ...             0.615222\n","5             aapâ shazia ilmi from puram constituency lag...  ...             0.751383\n","6                                                     fuck yeah  ...             0.663185\n","7             honestly really surprised alice ranked that lo...  ...             0.605483\n","8                      didn care about politics before now hate  ...             0.701191\n","9                                      hard nips and goosebumps  ...             0.629745\n","10            varadabhai ndtv trying too well dilute bjp tre...  ...             0.756238\n","11                                    old man has lost his mind  ...             0.502476\n","12            why this being downvoted you might ask both mo...  ...             0.710366\n","13            hasnt changed all apolitical before simply don...  ...             0.603606\n","14            for one campaign pretty much just snatched the...  ...             0.631376\n","15            vajpayee managed forge much broader coalition ...  ...             0.685135\n","16            lol this only proves how desperate they are ge...  ...             0.624959\n","17            dont hate aap but your questions are example w...  ...             0.769971\n","18            what were the other policies you discussed not...  ...             0.669384\n","19            wow lots favorites this bracket haqua tsukushi...  ...             0.593471\n","20            sorry know this isn what you asked just ventin...  ...             0.745406\n","21            coming out strongly against gujarat chief mini...  ...             0.694449\n","22            there one tool bjp can use their manifesto whi...  ...             0.623127\n","23                          jakiro spotted the middle top maybe  ...             0.575394\n","24            family mormon have never tried explain them th...  ...             0.606252\n","25            with these results would have grudgingly accep...  ...             0.736970\n","26            tea partier expresses support for namo after e...  ...             0.760564\n","27            politically would stupid move take stand right...  ...             0.674769\n","28                                                      wtf why  ...             0.635538\n","29            have actually seen lot users views change duri...  ...             0.662819\n","30            truth told there not insignificant percentage ...  ...             0.713786\n","31            was anti bjp and neutral cong became anti bjp ...  ...             0.732909\n","32            most religions have dogmatic orthodox well eso...  ...             0.625969\n","33            laureatte sen said christian schools are perfe...  ...             0.510249\n","34            need stop watching the garbage that you watch ...  ...             0.552975\n","35            gandhi mandela hitler mao plato chandragupt ma...  ...             0.719779\n","36            hate aap for the other thread points such the ...  ...             0.756800\n","37            absolutely agree with you subsidies the worst ...  ...             0.655372\n","38            are you corrupt mind have you benefited throug...  ...             0.752354\n","39            congress needs bogeyman modi without the bad g...  ...             0.703794\n","40            protip don type uppercase text all caps harder...  ...             0.673459\n","41            brother trog very wrathful indeed but his will...  ...             0.537965\n","42            start off saying that the craftsmanship this p...  ...             0.723931\n","43            have made request unban namoarmy hell moron ho...  ...             0.718607\n","44            child modi worked his fatherâ tea shop and y...  ...             0.743616\n","45                          namo tea yuupea horrible rhyme know  ...             0.760347\n","46                   great agility from akpom cut back and bend  ...             0.670042\n","47            from undecided pro aap they are not perfect bu...  ...             0.647296\n","48            woah there don insane with pray mean you don w...  ...             0.711541\n","49            porngress wont announce their candidate cuz th...  ...             0.671247\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":109},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609522047859,"user_tz":-300,"elapsed":222054,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a862e5e9-9580-4257-de54-dec3acfbdd6e"},"source":["fitted_pipe.predict(\"Indian prime minister was assinated!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","      <th>default_name_embeddings</th>\n","      <th>sentiment_confidence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Bitcoin is going to the moon!</td>\n","      <td>neutral</td>\n","      <td>[0.06468033790588379, -0.040837567299604416, -...</td>\n","      <td>0.524234</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                   document  ... sentiment_confidence\n","origin_index                                 ...                     \n","0             Bitcoin is going to the moon!  ...             0.524234\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609522047861,"user_tz":-300,"elapsed":222040,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cf81e598-13e9-40fd-e8bb-937b8a8933f3"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2)                 | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005)                    | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64)                | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5)                 | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True)       | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6)               | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')    | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512)               | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use')      | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609522047863,"user_tz":-300,"elapsed":222021,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"fe05a28d-53de-4ec5-e7fc-1ec49eaeddd6"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5)  \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","    negative       1.00      0.83      0.91        24\n","     neutral       0.00      0.00      0.00         0\n","    positive       1.00      1.00      1.00        26\n","\n","    accuracy                           0.92        50\n","   macro avg       0.67      0.61      0.64        50\n","weighted avg       1.00      0.92      0.96        50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>document</th>\n","      <th>y</th>\n","      <th>text</th>\n","      <th>sentiment</th>\n","      <th>default_name_embeddings</th>\n","      <th>sentiment_confidence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>its true they had cut the power what douchebag...</td>\n","      <td>positive</td>\n","      <td>its true they had cut the power what douchebag...</td>\n","      <td>positive</td>\n","      <td>[0.033111296594142914, 0.053994592279195786, -...</td>\n","      <td>0.761194</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>fuck giroud better finishing like this month</td>\n","      <td>positive</td>\n","      <td>fuck giroud better finishing like this month</td>\n","      <td>positive</td>\n","      <td>[0.0678204670548439, 0.01411951333284378, -0.0...</td>\n","      <td>0.938677</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>looks shit now but still proud made</td>\n","      <td>positive</td>\n","      <td>looks shit now but still proud made</td>\n","      <td>positive</td>\n","      <td>[0.03247416764497757, -0.09844464808702469, -0...</td>\n","      <td>0.954937</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>pelor the burning hate the best evil god</td>\n","      <td>negative</td>\n","      <td>pelor the burning hate the best evil god</td>\n","      <td>negative</td>\n","      <td>[0.04032062739133835, 0.07666623592376709, -0....</td>\n","      <td>0.810980</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>can ask what you with something this powerful</td>\n","      <td>positive</td>\n","      <td>can ask what you with something this powerful</td>\n","      <td>positive</td>\n","      <td>[0.015518003143370152, -0.05116305500268936, -...</td>\n","      <td>0.956043</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>aapâ shazia ilmi from puram constituency lag...</td>\n","      <td>negative</td>\n","      <td>aapâ shazia ilmi from puram constituency lag...</td>\n","      <td>negative</td>\n","      <td>[0.02478150464594364, -0.06508146971464157, -0...</td>\n","      <td>0.708917</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>fuck yeah</td>\n","      <td>negative</td>\n","      <td>fuck yeah</td>\n","      <td>negative</td>\n","      <td>[0.04602408409118652, -0.025047995150089264, -...</td>\n","      <td>0.731940</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>honestly really surprised alice ranked that lo...</td>\n","      <td>positive</td>\n","      <td>honestly really surprised alice ranked that lo...</td>\n","      <td>positive</td>\n","      <td>[-0.035716041922569275, -0.04127982258796692, ...</td>\n","      <td>0.966494</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>didn care about politics before now hate</td>\n","      <td>negative</td>\n","      <td>didn care about politics before now hate</td>\n","      <td>negative</td>\n","      <td>[-0.006816444452852011, 0.06221264973282814, -...</td>\n","      <td>0.672320</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>hard nips and goosebumps</td>\n","      <td>negative</td>\n","      <td>hard nips and goosebumps</td>\n","      <td>negative</td>\n","      <td>[-0.02919699251651764, -0.030449824407696724, ...</td>\n","      <td>0.604969</td>\n","    </tr>\n","    <tr>\n","      <th>10</th>\n","      <td>varadabhai ndtv trying too well dilute bjp tre...</td>\n","      <td>negative</td>\n","      <td>varadabhai ndtv trying too well dilute bjp tre...</td>\n","      <td>negative</td>\n","      <td>[0.04727796092629433, -0.06792476028203964, -0...</td>\n","      <td>0.639880</td>\n","    </tr>\n","    <tr>\n","      <th>11</th>\n","      <td>old man has lost his mind</td>\n","      <td>positive</td>\n","      <td>old man has lost his mind</td>\n","      <td>positive</td>\n","      <td>[0.039657335728406906, -0.04277808964252472, -...</td>\n","      <td>0.929136</td>\n","    </tr>\n","    <tr>\n","      <th>12</th>\n","      <td>why this being downvoted you might ask both mo...</td>\n","      <td>negative</td>\n","      <td>why this being downvoted you might ask both mo...</td>\n","      <td>neutral</td>\n","      <td>[0.06581216305494308, -0.06079106032848358, -0...</td>\n","      <td>0.546161</td>\n","    </tr>\n","    <tr>\n","      <th>13</th>\n","      <td>hasnt changed all apolitical before simply don...</td>\n","      <td>positive</td>\n","      <td>hasnt changed all apolitical before simply do...</td>\n","      <td>positive</td>\n","      <td>[0.03509754315018654, -0.004639611579477787, -...</td>\n","      <td>0.883017</td>\n","    </tr>\n","    <tr>\n","      <th>14</th>\n","      <td>for one campaign pretty much just snatched the...</td>\n","      <td>negative</td>\n","      <td>for one campaign pretty much just snatched the...</td>\n","      <td>negative</td>\n","      <td>[0.017386479303240776, 0.0443551279604435, -0....</td>\n","      <td>0.636396</td>\n","    </tr>\n","    <tr>\n","      <th>15</th>\n","      <td>vajpayee managed forge much broader coalition ...</td>\n","      <td>positive</td>\n","      <td>vajpayee managed forge much broader coalition ...</td>\n","      <td>positive</td>\n","      <td>[0.0372871570289135, -0.051079731434583664, -0...</td>\n","      <td>0.848566</td>\n","    </tr>\n","    <tr>\n","      <th>16</th>\n","      <td>lol this only proves how desperate they are ge...</td>\n","      <td>positive</td>\n","      <td>lol this only proves how desperate they are ge...</td>\n","      <td>positive</td>\n","      <td>[0.05233633145689964, -0.03147873282432556, 0....</td>\n","      <td>0.819890</td>\n","    </tr>\n","    <tr>\n","      <th>17</th>\n","      <td>dont hate aap but your questions are example w...</td>\n","      <td>negative</td>\n","      <td>dont hate aap but your questions are example ...</td>\n","      <td>negative</td>\n","      <td>[0.026356497779488564, -0.04044198617339134, -...</td>\n","      <td>0.724538</td>\n","    </tr>\n","    <tr>\n","      <th>18</th>\n","      <td>what were the other policies you discussed not...</td>\n","      <td>negative</td>\n","      <td>what were the other policies you discussed not...</td>\n","      <td>negative</td>\n","      <td>[-0.07521010935306549, 0.008543566800653934, 0...</td>\n","      <td>0.732422</td>\n","    </tr>\n","    <tr>\n","      <th>19</th>\n","      <td>wow lots favorites this bracket haqua tsukushi...</td>\n","      <td>positive</td>\n","      <td>wow lots favorites this bracket haqua tsukushi...</td>\n","      <td>positive</td>\n","      <td>[-0.0693160742521286, -0.015458519570529461, -...</td>\n","      <td>0.971349</td>\n","    </tr>\n","    <tr>\n","      <th>20</th>\n","      <td>sorry know this isn what you asked just ventin...</td>\n","      <td>negative</td>\n","      <td>sorry know this isn what you asked just ventin...</td>\n","      <td>negative</td>\n","      <td>[0.016777772456407547, -0.05478338897228241, -...</td>\n","      <td>0.623325</td>\n","    </tr>\n","    <tr>\n","      <th>21</th>\n","      <td>coming out strongly against gujarat chief mini...</td>\n","      <td>positive</td>\n","      <td>coming out strongly against gujarat chief min...</td>\n","      <td>positive</td>\n","      <td>[0.06856723129749298, -0.019821858033537865, -...</td>\n","      <td>0.736283</td>\n","    </tr>\n","    <tr>\n","      <th>22</th>\n","      <td>there one tool bjp can use their manifesto whi...</td>\n","      <td>positive</td>\n","      <td>there one tool bjp can use their manifesto whi...</td>\n","      <td>positive</td>\n","      <td>[0.057847339659929276, -0.05365725979208946, -...</td>\n","      <td>0.870023</td>\n","    </tr>\n","    <tr>\n","      <th>23</th>\n","      <td>jakiro spotted the middle top maybe</td>\n","      <td>positive</td>\n","      <td>jakiro spotted the middle top maybe</td>\n","      <td>positive</td>\n","      <td>[-0.011690962128341198, -0.024473998695611954,...</td>\n","      <td>0.965604</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>family mormon have never tried explain them th...</td>\n","      <td>positive</td>\n","      <td>family mormon have never tried explain them t...</td>\n","      <td>positive</td>\n","      <td>[0.03987010195851326, -0.0009543427731841803, ...</td>\n","      <td>0.964053</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>with these results would have grudgingly accep...</td>\n","      <td>negative</td>\n","      <td>with these results would have grudgingly accep...</td>\n","      <td>neutral</td>\n","      <td>[0.034668292850255966, -0.05392604321241379, -...</td>\n","      <td>0.521402</td>\n","    </tr>\n","    <tr>\n","      <th>26</th>\n","      <td>tea partier expresses support for namo after e...</td>\n","      <td>negative</td>\n","      <td>tea partier expresses support for namo after ...</td>\n","      <td>negative</td>\n","      <td>[0.032365716993808746, -0.056087080389261246, ...</td>\n","      <td>0.837552</td>\n","    </tr>\n","    <tr>\n","      <th>27</th>\n","      <td>politically would stupid move take stand right...</td>\n","      <td>negative</td>\n","      <td>politically would stupid move take stand right...</td>\n","      <td>neutral</td>\n","      <td>[-0.00040777752292342484, -0.01262842211872339...</td>\n","      <td>0.541656</td>\n","    </tr>\n","    <tr>\n","      <th>28</th>\n","      <td>wtf why</td>\n","      <td>negative</td>\n","      <td>wtf why</td>\n","      <td>negative</td>\n","      <td>[0.025807170197367668, -0.07080958038568497, -...</td>\n","      <td>0.747054</td>\n","    </tr>\n","    <tr>\n","      <th>29</th>\n","      <td>have actually seen lot users views change duri...</td>\n","      <td>positive</td>\n","      <td>have actually seen lot users views change dur...</td>\n","      <td>positive</td>\n","      <td>[-0.009333955124020576, 0.01388698909431696, -...</td>\n","      <td>0.818759</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>truth told there not insignificant percentage ...</td>\n","      <td>positive</td>\n","      <td>truth told there not insignificant percentage ...</td>\n","      <td>positive</td>\n","      <td>[0.03927519917488098, -0.05597652122378349, -0...</td>\n","      <td>0.776765</td>\n","    </tr>\n","    <tr>\n","      <th>31</th>\n","      <td>was anti bjp and neutral cong became anti bjp ...</td>\n","      <td>positive</td>\n","      <td>was anti bjp and neutral cong became anti bjp ...</td>\n","      <td>positive</td>\n","      <td>[0.03805134445428848, -0.030298737809062004, -...</td>\n","      <td>0.630857</td>\n","    </tr>\n","    <tr>\n","      <th>32</th>\n","      <td>most religions have dogmatic orthodox well eso...</td>\n","      <td>positive</td>\n","      <td>most religions have dogmatic orthodox well eso...</td>\n","      <td>positive</td>\n","      <td>[0.03939439728856087, -0.02040349319577217, -0...</td>\n","      <td>0.972607</td>\n","    </tr>\n","    <tr>\n","      <th>33</th>\n","      <td>laureatte sen said christian schools are perfe...</td>\n","      <td>positive</td>\n","      <td>laureatte sen said christian schools are perfe...</td>\n","      <td>positive</td>\n","      <td>[0.05267934128642082, 0.05836360529065132, 0.0...</td>\n","      <td>0.911020</td>\n","    </tr>\n","    <tr>\n","      <th>34</th>\n","      <td>need stop watching the garbage that you watch ...</td>\n","      <td>positive</td>\n","      <td>need stop watching the garbage that you watch...</td>\n","      <td>positive</td>\n","      <td>[-0.012382612563669682, 0.01988200470805168, 0...</td>\n","      <td>0.954440</td>\n","    </tr>\n","    <tr>\n","      <th>35</th>\n","      <td>gandhi mandela hitler mao plato chandragupt ma...</td>\n","      <td>negative</td>\n","      <td>gandhi mandela hitler mao plato chandragupt ma...</td>\n","      <td>negative</td>\n","      <td>[0.027552243322134018, 0.013075066730380058, 0...</td>\n","      <td>0.767667</td>\n","    </tr>\n","    <tr>\n","      <th>36</th>\n","      <td>hate aap for the other thread points such the ...</td>\n","      <td>negative</td>\n","      <td>hate aap for the other thread points such the...</td>\n","      <td>negative</td>\n","      <td>[0.01461736112833023, -0.038017574697732925, -...</td>\n","      <td>0.690414</td>\n","    </tr>\n","    <tr>\n","      <th>37</th>\n","      <td>absolutely agree with you subsidies the worst ...</td>\n","      <td>negative</td>\n","      <td>absolutely agree with you subsidies the worst ...</td>\n","      <td>neutral</td>\n","      <td>[0.010974399745464325, 0.0033110962249338627, ...</td>\n","      <td>0.581476</td>\n","    </tr>\n","    <tr>\n","      <th>38</th>\n","      <td>are you corrupt mind have you benefited throug...</td>\n","      <td>negative</td>\n","      <td>are you corrupt mind have you benefited throu...</td>\n","      <td>negative</td>\n","      <td>[0.03834373503923416, -0.06521473079919815, -0...</td>\n","      <td>0.783217</td>\n","    </tr>\n","    <tr>\n","      <th>39</th>\n","      <td>congress needs bogeyman modi without the bad g...</td>\n","      <td>positive</td>\n","      <td>congress needs bogeyman modi without the bad g...</td>\n","      <td>positive</td>\n","      <td>[0.03138439729809761, -0.06221967190504074, -0...</td>\n","      <td>0.764358</td>\n","    </tr>\n","    <tr>\n","      <th>40</th>\n","      <td>protip don type uppercase text all caps harder...</td>\n","      <td>negative</td>\n","      <td>protip don type uppercase text all caps harder...</td>\n","      <td>negative</td>\n","      <td>[0.044019922614097595, 0.025341013446450233, 0...</td>\n","      <td>0.738550</td>\n","    </tr>\n","    <tr>\n","      <th>41</th>\n","      <td>brother trog very wrathful indeed but his will...</td>\n","      <td>positive</td>\n","      <td>brother trog very wrathful indeed but his wil...</td>\n","      <td>positive</td>\n","      <td>[-0.024625714868307114, 0.06193268671631813, 0...</td>\n","      <td>0.923871</td>\n","    </tr>\n","    <tr>\n","      <th>42</th>\n","      <td>start off saying that the craftsmanship this p...</td>\n","      <td>positive</td>\n","      <td>start off saying that the craftsmanship this ...</td>\n","      <td>positive</td>\n","      <td>[0.05780623108148575, -0.06291749328374863, -0...</td>\n","      <td>0.985073</td>\n","    </tr>\n","    <tr>\n","      <th>43</th>\n","      <td>have made request unban namoarmy hell moron ho...</td>\n","      <td>negative</td>\n","      <td>have made request unban namoarmy hell moron h...</td>\n","      <td>negative</td>\n","      <td>[0.015555822290480137, -0.012748800218105316, ...</td>\n","      <td>0.796430</td>\n","    </tr>\n","    <tr>\n","      <th>44</th>\n","      <td>child modi worked his fatherâ tea shop and y...</td>\n","      <td>negative</td>\n","      <td>child modi worked his fatherâ tea shop and ...</td>\n","      <td>negative</td>\n","      <td>[0.05774841830134392, -0.059567004442214966, -...</td>\n","      <td>0.709697</td>\n","    </tr>\n","    <tr>\n","      <th>45</th>\n","      <td>namo tea yuupea horrible rhyme know</td>\n","      <td>negative</td>\n","      <td>namo tea yuupea horrible rhyme know</td>\n","      <td>negative</td>\n","      <td>[0.025534288957715034, 0.004176765214651823, -...</td>\n","      <td>0.851523</td>\n","    </tr>\n","    <tr>\n","      <th>46</th>\n","      <td>great agility from akpom cut back and bend</td>\n","      <td>positive</td>\n","      <td>great agility from akpom cut back and bend</td>\n","      <td>positive</td>\n","      <td>[0.06865684688091278, -0.02164856530725956, -0...</td>\n","      <td>0.966416</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>from undecided pro aap they are not perfect bu...</td>\n","      <td>positive</td>\n","      <td>from undecided pro aap they are not perfect bu...</td>\n","      <td>positive</td>\n","      <td>[0.01590304635465145, -0.0683458000421524, -0....</td>\n","      <td>0.891286</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>woah there don insane with pray mean you don w...</td>\n","      <td>negative</td>\n","      <td>woah there don insane with pray mean you don w...</td>\n","      <td>negative</td>\n","      <td>[0.050547026097774506, -0.01725909113883972, 0...</td>\n","      <td>0.798072</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>porngress wont announce their candidate cuz th...</td>\n","      <td>positive</td>\n","      <td>porngress wont announce their candidate cuz th...</td>\n","      <td>positive</td>\n","      <td>[0.05935536324977875, -0.051609162241220474, -...</td>\n","      <td>0.858501</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                       document  ... sentiment_confidence\n","origin_index                                                     ...                     \n","0             its true they had cut the power what douchebag...  ...             0.761194\n","1                  fuck giroud better finishing like this month  ...             0.938677\n","2                           looks shit now but still proud made  ...             0.954937\n","3                      pelor the burning hate the best evil god  ...             0.810980\n","4                 can ask what you with something this powerful  ...             0.956043\n","5             aapâ shazia ilmi from puram constituency lag...  ...             0.708917\n","6                                                     fuck yeah  ...             0.731940\n","7             honestly really surprised alice ranked that lo...  ...             0.966494\n","8                      didn care about politics before now hate  ...             0.672320\n","9                                      hard nips and goosebumps  ...             0.604969\n","10            varadabhai ndtv trying too well dilute bjp tre...  ...             0.639880\n","11                                    old man has lost his mind  ...             0.929136\n","12            why this being downvoted you might ask both mo...  ...             0.546161\n","13            hasnt changed all apolitical before simply don...  ...             0.883017\n","14            for one campaign pretty much just snatched the...  ...             0.636396\n","15            vajpayee managed forge much broader coalition ...  ...             0.848566\n","16            lol this only proves how desperate they are ge...  ...             0.819890\n","17            dont hate aap but your questions are example w...  ...             0.724538\n","18            what were the other policies you discussed not...  ...             0.732422\n","19            wow lots favorites this bracket haqua tsukushi...  ...             0.971349\n","20            sorry know this isn what you asked just ventin...  ...             0.623325\n","21            coming out strongly against gujarat chief mini...  ...             0.736283\n","22            there one tool bjp can use their manifesto whi...  ...             0.870023\n","23                          jakiro spotted the middle top maybe  ...             0.965604\n","24            family mormon have never tried explain them th...  ...             0.964053\n","25            with these results would have grudgingly accep...  ...             0.521402\n","26            tea partier expresses support for namo after e...  ...             0.837552\n","27            politically would stupid move take stand right...  ...             0.541656\n","28                                                      wtf why  ...             0.747054\n","29            have actually seen lot users views change duri...  ...             0.818759\n","30            truth told there not insignificant percentage ...  ...             0.776765\n","31            was anti bjp and neutral cong became anti bjp ...  ...             0.630857\n","32            most religions have dogmatic orthodox well eso...  ...             0.972607\n","33            laureatte sen said christian schools are perfe...  ...             0.911020\n","34            need stop watching the garbage that you watch ...  ...             0.954440\n","35            gandhi mandela hitler mao plato chandragupt ma...  ...             0.767667\n","36            hate aap for the other thread points such the ...  ...             0.690414\n","37            absolutely agree with you subsidies the worst ...  ...             0.581476\n","38            are you corrupt mind have you benefited throug...  ...             0.783217\n","39            congress needs bogeyman modi without the bad g...  ...             0.764358\n","40            protip don type uppercase text all caps harder...  ...             0.738550\n","41            brother trog very wrathful indeed but his will...  ...             0.923871\n","42            start off saying that the craftsmanship this p...  ...             0.985073\n","43            have made request unban namoarmy hell moron ho...  ...             0.796430\n","44            child modi worked his fatherâ tea shop and y...  ...             0.709697\n","45                          namo tea yuupea horrible rhyme know  ...             0.851523\n","46                   great agility from akpom cut back and bend  ...             0.966416\n","47            from undecided pro aap they are not perfect bu...  ...             0.891286\n","48            woah there don insane with pray mean you don w...  ...             0.798072\n","49            porngress wont announce their candidate cuz th...  ...             0.858501\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nxWFzQOhjWC8","executionInfo":{"status":"ok","timestamp":1609522047865,"user_tz":-300,"elapsed":221994,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6f5e4138-03d4-495a-ce16-0be512588c81"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1609523959387,"user_tz":-300,"elapsed":476394,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1baf2085-bee8-48c3-fd11-401722536642"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(70)  \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.84      0.77      0.80       300\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.91      0.72      0.80       300\n","\n","    accuracy                           0.74       600\n","   macro avg       0.59      0.50      0.54       600\n","weighted avg       0.88      0.74      0.80       600\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1609522704715,"user_tz":-300,"elapsed":161180,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"bc27a985-38bf-4b98-f3fe-8e4955cd83cc"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":127},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1609522719523,"user_tz":-300,"elapsed":14825,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b14d7193-6b43-4c1d-eb42-326af88ffc0a"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Indian prime minister was assinated')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","      <th>document</th>\n","      <th>sentiment</th>\n","      <th>sentiment_confidence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>[0.15737222135066986, 0.2598555386066437, 0.85...</td>\n","      <td>Tesla plans to invest 10M into the ML sector</td>\n","      <td>positive</td>\n","      <td>0.638827</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                en_embed_sentence_small_bert_L12_768_embeddings  ... sentiment_confidence\n","origin_index                                                     ...                     \n","0             [0.15737222135066986, 0.2598555386066437, 0.85...  ...             0.638827\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1609522719526,"user_tz":-300,"elapsed":24,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"acbf137f-60ff-4804-b903-bb88f00c78d0"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')            | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                  | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                 | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)           | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                  | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                      | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)            | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)        | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)       | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                    | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                        | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                 | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                          | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                        | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                 | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')         | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6)                         | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')              | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative'])      | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]}
\ No newline at end of file
diff --git a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_twitter.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_twitter.ipynb
index 3aca40f1..3f5c3d66 100644
--- a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_twitter.ipynb
+++ b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_twitter.ipynb
@@ -1 +1 @@
-{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_twitter.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_twitter.ipynb)\n","\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install  pyspark==2.4.7 \n","! pip install nlu > /dev/null    \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download twitter Sentiment dataset \n","https://www.kaggle.com/cosmos98/twitter-and-reddit-sentimental-analysis-dataset\n","#Context\n","\n","This is was a Dataset Created as a part of the university Project On Sentimental Analysis On Multi-Source Social Media Platforms using PySpark."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610788006096,"user_tz":-300,"elapsed":2486,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"610519c2-4cf5-4835-d1aa-8da2f83fadf7"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/Twitter_Data.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:06:08--  http://ckl-it.de/wp-content/uploads/2021/01/Twitter_Data.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 99657 (97K) [text/csv]\n","Saving to: ‘Twitter_Data.csv’\n","\n","Twitter_Data.csv    100%[===================>]  97.32K   122KB/s    in 0.8s    \n","\n","2021-01-16 09:06:10 (122 KB/s) - ‘Twitter_Data.csv’ saved [99657/99657]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610788009315,"user_tz":-300,"elapsed":1404,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b73ee311-b15e-4fc3-a5b5-5772f87dac99"},"source":["import pandas as pd\n","train_path = '/content/Twitter_Data.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>y</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>how narendra modi has almost killed the indian...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>you think was modi behind that accident</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>kamal haasan takes chowkidar modi kamal haasan...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>connected name with surname not bcz religion c...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>anyone better than modi when nehruji expired s...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>595</th>\n","      <td>perception makes fool some call âforeign inv...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>596</th>\n","      <td>when will see your tweet for justice for you a...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>597</th>\n","      <td>haha congress going gaga over this after looti...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>598</th>\n","      <td>this movie shows the life histiry narendra mod...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>599</th>\n","      <td>modi left his year old wife and returned her r...</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>600 rows × 2 columns</p>\n","</div>"],"text/plain":["                                                  text         y\n","0    how narendra modi has almost killed the indian...  negative\n","1              you think was modi behind that accident  negative\n","2    kamal haasan takes chowkidar modi kamal haasan...  negative\n","3    connected name with surname not bcz religion c...  negative\n","4    anyone better than modi when nehruji expired s...  positive\n","..                                                 ...       ...\n","595  perception makes fool some call âforeign inv...  negative\n","596  when will see your tweet for justice for you a...  negative\n","597  haha congress going gaga over this after looti...  positive\n","598  this movie shows the life histiry narendra mod...  negative\n","599  modi left his year old wife and returned her r...  positive\n","\n","[600 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609553662416,"user_tz":-300,"elapsed":192414,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a077e55a-4805-43a2-fb11-46074b487e2e"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.00      0.00      0.00        27\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.78      0.61      0.68        23\n","\n","    accuracy                           0.28        50\n","   macro avg       0.26      0.20      0.23        50\n","weighted avg       0.36      0.28      0.31        50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>default_name_embeddings</th>\n","      <th>sentiment</th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","      <th>y</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>how narendra modi has almost killed the indian...</td>\n","      <td>[0.060062434524297714, -0.05557167902588844, -...</td>\n","      <td>neutral</td>\n","      <td>0.590739</td>\n","      <td>how narendra modi has almost killed the indian...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>you think was modi behind that accident</td>\n","      <td>[0.05362718179821968, -0.004547705873847008, -...</td>\n","      <td>neutral</td>\n","      <td>0.577544</td>\n","      <td>you think was modi behind that accident</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>kamal haasan takes chowkidar modi kamal haasan...</td>\n","      <td>[0.07274721562862396, -0.061593908816576004, -...</td>\n","      <td>neutral</td>\n","      <td>0.585589</td>\n","      <td>kamal haasan takes chowkidar modi kamal haasan...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>connected name with surname not bcz religion c...</td>\n","      <td>[0.06106054410338402, -0.060213156044483185, -...</td>\n","      <td>neutral</td>\n","      <td>0.554036</td>\n","      <td>connected name with surname not bcz religion c...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>anyone better than modi when nehruji expired s...</td>\n","      <td>[0.0737471655011177, 0.006071773823350668, -0....</td>\n","      <td>neutral</td>\n","      <td>0.595608</td>\n","      <td>anyone better than modi when nehruji expired s...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>\\r\\nmodiji wont tired crying foul\\r\\nmain chow...</td>\n","      <td>[0.05888385698199272, -0.0646616593003273, -0....</td>\n","      <td>neutral</td>\n","      <td>0.583403</td>\n","      <td>modiji wont tired crying foul main chowkidar h...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>poor chap modi hasnâ given him anything can ...</td>\n","      <td>[0.058948416262865067, -0.029682165011763573, ...</td>\n","      <td>neutral</td>\n","      <td>0.578272</td>\n","      <td>poor chap modi hasnâ given him anything can ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>green underwear missing ive been doubting isi ...</td>\n","      <td>[0.05133155733346939, -0.06789954006671906, -0...</td>\n","      <td>neutral</td>\n","      <td>0.575918</td>\n","      <td>green underwear missing ive been doubting isi ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>congress years wasnt able complete one rafale ...</td>\n","      <td>[0.044129759073257446, -0.06111813709139824, -...</td>\n","      <td>positive</td>\n","      <td>0.605829</td>\n","      <td>congress years wasnt able complete one rafale ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>asked learn from how treat minority well does ...</td>\n","      <td>[0.03665374591946602, -0.03695330768823624, -0...</td>\n","      <td>neutral</td>\n","      <td>0.534121</td>\n","      <td>asked learn from how treat minority well does ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>10</th>\n","      <td>stop bull shitting worry about criminal vivek ...</td>\n","      <td>[0.07035735249519348, -0.06952506303787231, -0...</td>\n","      <td>neutral</td>\n","      <td>0.539481</td>\n","      <td>stop bull shitting worry about criminal vivek ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>11</th>\n","      <td>drswamys timesnow last year debate nearly mill...</td>\n","      <td>[0.013958276249468327, -0.030759528279304504, ...</td>\n","      <td>positive</td>\n","      <td>0.613331</td>\n","      <td>drswamys timesnow last year debate nearly mill...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>12</th>\n","      <td>asshole bahujan radical marxist grow brain kno...</td>\n","      <td>[0.026277026161551476, -0.06238812580704689, -...</td>\n","      <td>neutral</td>\n","      <td>0.587796</td>\n","      <td>asshole bahujan radical marxist grow brain kno...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>13</th>\n","      <td>from selling dreams 2014 selling tshirts 2019 ...</td>\n","      <td>[0.07457270473241806, -0.058670494705438614, -...</td>\n","      <td>neutral</td>\n","      <td>0.584601</td>\n","      <td>from selling dreams 2014 selling tshirts 2019 ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>14</th>\n","      <td>very true sir thats why they are against modi ...</td>\n","      <td>[0.061704088002443314, -0.04553354158997536, -...</td>\n","      <td>neutral</td>\n","      <td>0.584490</td>\n","      <td>very true sir thats why they are against modi ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>15</th>\n","      <td>they are giving jobs citizen india what you ar...</td>\n","      <td>[0.05342026799917221, -0.003889711806550622, -...</td>\n","      <td>neutral</td>\n","      <td>0.574127</td>\n","      <td>they are giving jobs citizen india what you ar...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>16</th>\n","      <td>congress has always attempted empower people g...</td>\n","      <td>[0.027197618037462234, -0.036435648798942566, ...</td>\n","      <td>positive</td>\n","      <td>0.602392</td>\n","      <td>congress has always attempted empower people g...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>17</th>\n","      <td>have never said that modi succeed yet even als...</td>\n","      <td>[0.06601183861494064, -0.020045211538672447, -...</td>\n","      <td>positive</td>\n","      <td>0.606807</td>\n","      <td>have never said that modi succeed yet even als...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>18</th>\n","      <td>\\r\\nthe foundation for new india 2022 has alre...</td>\n","      <td>[0.04694363474845886, -0.06800008565187454, -0...</td>\n","      <td>neutral</td>\n","      <td>0.599807</td>\n","      <td>the foundation for new india 2022 has already ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>19</th>\n","      <td>only rahul gandhis politics love can defeat th...</td>\n","      <td>[0.05615750327706337, -0.002462629694491625, -...</td>\n","      <td>positive</td>\n","      <td>0.602275</td>\n","      <td>only rahul gandhis politics love can defeat th...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>20</th>\n","      <td>one step time navigating thru looteyns when ev...</td>\n","      <td>[0.030352214351296425, -0.06195472553372383, 0...</td>\n","      <td>neutral</td>\n","      <td>0.570779</td>\n","      <td>one step time navigating thru looteyns when ev...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>21</th>\n","      <td>why sir mam shabana azami hate much that have ...</td>\n","      <td>[0.07535804808139801, -0.05643236264586449, -0...</td>\n","      <td>neutral</td>\n","      <td>0.571882</td>\n","      <td>why sir mam shabana azami hate much that have ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>22</th>\n","      <td>modi will remain for next 510 years and till t...</td>\n","      <td>[0.05986170098185539, -0.0674145296216011, -0....</td>\n","      <td>neutral</td>\n","      <td>0.591540</td>\n","      <td>modi will remain for next 510 years and till t...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>23</th>\n","      <td>pledge your first vote for modi</td>\n","      <td>[0.023959940299391747, -0.013972461223602295, ...</td>\n","      <td>positive</td>\n","      <td>0.606293</td>\n","      <td>pledge your first vote for modi</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>why need modi lead bjp government again 2019 j...</td>\n","      <td>[0.04451165348291397, -0.06473662704229355, -0...</td>\n","      <td>positive</td>\n","      <td>0.609683</td>\n","      <td>why need modi lead bjp government again 2019 j...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>raghuram rajan sent list high profile bank fra...</td>\n","      <td>[0.06561190634965897, -0.0614917054772377, -0....</td>\n","      <td>neutral</td>\n","      <td>0.578591</td>\n","      <td>raghuram rajan sent list high profile bank fra...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>26</th>\n","      <td>modi govts slashing indias education budget cl...</td>\n","      <td>[0.05217093601822853, -0.05785880982875824, -0...</td>\n","      <td>neutral</td>\n","      <td>0.594771</td>\n","      <td>modi govts slashing indias education budget cl...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>27</th>\n","      <td>why are you hell bent manoj tiwari just her ph...</td>\n","      <td>[0.04579753428697586, -0.05176748335361481, -0...</td>\n","      <td>positive</td>\n","      <td>0.600511</td>\n","      <td>why are you hell bent manoj tiwari just her ph...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>28</th>\n","      <td>know going into dirty details nehru family its...</td>\n","      <td>[0.047987841069698334, -0.050984784960746765, ...</td>\n","      <td>neutral</td>\n","      <td>0.533372</td>\n","      <td>know going into dirty details nehru family its...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>29</th>\n","      <td>momota begum will let her state become total s...</td>\n","      <td>[0.04509664326906204, -0.05019481107592583, -0...</td>\n","      <td>neutral</td>\n","      <td>0.593740</td>\n","      <td>momota begum will let her state become total s...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>thanks anu sharma will vote and make sure peop...</td>\n","      <td>[0.04315190762281418, -0.04578147828578949, -0...</td>\n","      <td>positive</td>\n","      <td>0.601758</td>\n","      <td>thanks anu sharma will vote and make sure peop...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>31</th>\n","      <td>those who themselves dont know how many father...</td>\n","      <td>[0.0144237345084548, -0.052222371101379395, -0...</td>\n","      <td>neutral</td>\n","      <td>0.589971</td>\n","      <td>those who themselves dont know how many father...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>32</th>\n","      <td>the star campaigner myth bjp lost more than as...</td>\n","      <td>[0.02492097206413746, -0.0531931146979332, -0....</td>\n","      <td>positive</td>\n","      <td>0.607886</td>\n","      <td>the star campaigner myth bjp lost more than as...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>33</th>\n","      <td>modi also live for few years only like you not...</td>\n","      <td>[0.040389616042375565, -0.06375984847545624, -...</td>\n","      <td>positive</td>\n","      <td>0.612952</td>\n","      <td>modi also live for few years only like you not...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>34</th>\n","      <td>narendra modi more brainy than all the drswamy...</td>\n","      <td>[0.06742898374795914, -0.060488566756248474, -...</td>\n","      <td>positive</td>\n","      <td>0.621238</td>\n","      <td>narendra modi more brainy than all the drswamy...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>35</th>\n","      <td>have started calling chowkidaar narendra modi ...</td>\n","      <td>[0.06360629200935364, -0.06786973774433136, -0...</td>\n","      <td>neutral</td>\n","      <td>0.593359</td>\n","      <td>have started calling chowkidaar narendra modi ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>36</th>\n","      <td>this the difference confident leaders call upo...</td>\n","      <td>[0.024233123287558556, -0.05243394151329994, -...</td>\n","      <td>positive</td>\n","      <td>0.610326</td>\n","      <td>this the difference confident leaders call upo...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>37</th>\n","      <td>jawans killed the border\\r\\ncrimes against wom...</td>\n","      <td>[0.03928006440401077, -0.051466524600982666, -...</td>\n","      <td>neutral</td>\n","      <td>0.582484</td>\n","      <td>jawans killed the border crimes against women ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>38</th>\n","      <td>tag this fast growing youtuber cared abt this ...</td>\n","      <td>[0.05051109194755554, -0.0660049319267273, 0.0...</td>\n","      <td>neutral</td>\n","      <td>0.584719</td>\n","      <td>tag this fast growing youtuber cared abt this ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>39</th>\n","      <td>think hindus should back off and let them suff...</td>\n","      <td>[-0.010975896380841732, -0.059168506413698196,...</td>\n","      <td>neutral</td>\n","      <td>0.597051</td>\n","      <td>think hindus should back off and let them suff...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>40</th>\n","      <td>yes cannot make any knee jerk moves drastic ac...</td>\n","      <td>[0.023108134046196938, -0.027600249275565147, ...</td>\n","      <td>positive</td>\n","      <td>0.618802</td>\n","      <td>yes cannot make any knee jerk moves drastic ac...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>41</th>\n","      <td>why picked chairman the devious aadhaar isnt h...</td>\n","      <td>[0.043231260031461716, -0.07101075351238251, -...</td>\n","      <td>neutral</td>\n","      <td>0.581575</td>\n","      <td>why picked chairman the devious aadhaar isnt h...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>42</th>\n","      <td>due automation and artificial intelligence fur...</td>\n","      <td>[0.04160398617386818, -0.06572042405605316, -0...</td>\n","      <td>neutral</td>\n","      <td>0.594700</td>\n","      <td>due automation and artificial intelligence fur...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>43</th>\n","      <td>weak state capacity exacerbated excessive acco...</td>\n","      <td>[-0.00038854932063259184, -0.04599419981241226...</td>\n","      <td>neutral</td>\n","      <td>0.593749</td>\n","      <td>weak state capacity exacerbated excessive acco...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>44</th>\n","      <td>our narendra modi ordered indian air force tak...</td>\n","      <td>[-0.02063656784594059, -0.07548005133867264, -...</td>\n","      <td>positive</td>\n","      <td>0.601453</td>\n","      <td>our narendra modi ordered indian air force tak...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>45</th>\n","      <td>why vote modi dynasty visionary 3no high level...</td>\n","      <td>[0.01779576763510704, -0.06789527833461761, -0...</td>\n","      <td>neutral</td>\n","      <td>0.579034</td>\n","      <td>why vote modi dynasty visionary 3no high level...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>46</th>\n","      <td>its modi chor corrupt maha thugbandhan janta w...</td>\n","      <td>[0.065566785633564, -0.04119298234581947, -0.0...</td>\n","      <td>positive</td>\n","      <td>0.602544</td>\n","      <td>its modi chor corrupt maha thugbandhan janta w...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>before modis arrival 2014 all supported him fo...</td>\n","      <td>[0.03988223522901535, -0.04965453967452049, -0...</td>\n","      <td>positive</td>\n","      <td>0.604502</td>\n","      <td>before modis arrival 2014 all supported him fo...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>think you forgot dollar india handled exceptio...</td>\n","      <td>[0.01084248349070549, 0.013633836060762405, -0...</td>\n","      <td>neutral</td>\n","      <td>0.598473</td>\n","      <td>think you forgot dollar india handled exceptio...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>tulsi gabbard rejected interviews with tyt but...</td>\n","      <td>[-0.01967957802116871, 0.05570048466324806, -0...</td>\n","      <td>positive</td>\n","      <td>0.621699</td>\n","      <td>tulsi gabbard rejected interviews with tyt but...</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                           text  ...         y\n","origin_index                                                     ...          \n","0             how narendra modi has almost killed the indian...  ...  negative\n","1                       you think was modi behind that accident  ...  negative\n","2             kamal haasan takes chowkidar modi kamal haasan...  ...  negative\n","3             connected name with surname not bcz religion c...  ...  negative\n","4             anyone better than modi when nehruji expired s...  ...  positive\n","5             \\r\\nmodiji wont tired crying foul\\r\\nmain chow...  ...  negative\n","6             poor chap modi hasnâ given him anything can ...  ...  negative\n","7             green underwear missing ive been doubting isi ...  ...  negative\n","8             congress years wasnt able complete one rafale ...  ...  positive\n","9             asked learn from how treat minority well does ...  ...  negative\n","10            stop bull shitting worry about criminal vivek ...  ...  negative\n","11            drswamys timesnow last year debate nearly mill...  ...  positive\n","12            asshole bahujan radical marxist grow brain kno...  ...  negative\n","13            from selling dreams 2014 selling tshirts 2019 ...  ...  positive\n","14            very true sir thats why they are against modi ...  ...  positive\n","15            they are giving jobs citizen india what you ar...  ...  negative\n","16            congress has always attempted empower people g...  ...  negative\n","17            have never said that modi succeed yet even als...  ...  positive\n","18            \\r\\nthe foundation for new india 2022 has alre...  ...  positive\n","19            only rahul gandhis politics love can defeat th...  ...  negative\n","20            one step time navigating thru looteyns when ev...  ...  negative\n","21            why sir mam shabana azami hate much that have ...  ...  negative\n","22            modi will remain for next 510 years and till t...  ...  negative\n","23                             pledge your first vote for modi   ...  positive\n","24            why need modi lead bjp government again 2019 j...  ...  positive\n","25            raghuram rajan sent list high profile bank fra...  ...  negative\n","26            modi govts slashing indias education budget cl...  ...  negative\n","27            why are you hell bent manoj tiwari just her ph...  ...  positive\n","28            know going into dirty details nehru family its...  ...  negative\n","29            momota begum will let her state become total s...  ...  negative\n","30            thanks anu sharma will vote and make sure peop...  ...  positive\n","31            those who themselves dont know how many father...  ...  positive\n","32            the star campaigner myth bjp lost more than as...  ...  positive\n","33            modi also live for few years only like you not...  ...  negative\n","34            narendra modi more brainy than all the drswamy...  ...  positive\n","35            have started calling chowkidaar narendra modi ...  ...  negative\n","36            this the difference confident leaders call upo...  ...  positive\n","37            jawans killed the border\\r\\ncrimes against wom...  ...  negative\n","38            tag this fast growing youtuber cared abt this ...  ...  negative\n","39            think hindus should back off and let them suff...  ...  positive\n","40            yes cannot make any knee jerk moves drastic ac...  ...  positive\n","41            why picked chairman the devious aadhaar isnt h...  ...  negative\n","42            due automation and artificial intelligence fur...  ...  positive\n","43            weak state capacity exacerbated excessive acco...  ...  positive\n","44            our narendra modi ordered indian air force tak...  ...  positive\n","45            why vote modi dynasty visionary 3no high level...  ...  negative\n","46            its modi chor corrupt maha thugbandhan janta w...  ...  negative\n","47            before modis arrival 2014 all supported him fo...  ...  positive\n","48            think you forgot dollar india handled exceptio...  ...  positive\n","49            tulsi gabbard rejected interviews with tyt but...  ...  positive\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":109},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609553664952,"user_tz":-300,"elapsed":194919,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8f463f08-944f-45dc-f463-e381c05f89db"},"source":["fitted_pipe.predict('the president of india just died')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>default_name_embeddings</th>\n","      <th>sentiment</th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>[0.06468033790588379, -0.040837567299604416, -...</td>\n","      <td>neutral</td>\n","      <td>0.562996</td>\n","      <td>Bitcoin is going to the moon!</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                        default_name_embeddings  ...                       document\n","origin_index                                                     ...                               \n","0             [0.06468033790588379, -0.040837567299604416, -...  ...  Bitcoin is going to the moon!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609553664954,"user_tz":-300,"elapsed":194907,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7be62829-d712-4afd-900f-fd655e8282d7"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2)                 | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005)                    | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64)                | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5)                 | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True)       | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6)               | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')    | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512)               | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use')      | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609553671081,"user_tz":-300,"elapsed":201019,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"602947fd-13b8-438e-d5d1-64df15c2096b"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5)  \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","    negative       0.79      0.96      0.87        27\n","     neutral       0.00      0.00      0.00         0\n","    positive       1.00      0.09      0.16        23\n","\n","    accuracy                           0.56        50\n","   macro avg       0.60      0.35      0.34        50\n","weighted avg       0.89      0.56      0.54        50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>default_name_embeddings</th>\n","      <th>sentiment</th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","      <th>y</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>how narendra modi has almost killed the indian...</td>\n","      <td>[0.060062434524297714, -0.05557167902588844, -...</td>\n","      <td>negative</td>\n","      <td>0.689142</td>\n","      <td>how narendra modi has almost killed the indian...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>you think was modi behind that accident</td>\n","      <td>[0.05362718179821968, -0.004547705873847008, -...</td>\n","      <td>negative</td>\n","      <td>0.689483</td>\n","      <td>you think was modi behind that accident</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>kamal haasan takes chowkidar modi kamal haasan...</td>\n","      <td>[0.07274721562862396, -0.061593908816576004, -...</td>\n","      <td>negative</td>\n","      <td>0.707988</td>\n","      <td>kamal haasan takes chowkidar modi kamal haasan...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>connected name with surname not bcz religion c...</td>\n","      <td>[0.06106054410338402, -0.060213156044483185, -...</td>\n","      <td>negative</td>\n","      <td>0.675382</td>\n","      <td>connected name with surname not bcz religion c...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>anyone better than modi when nehruji expired s...</td>\n","      <td>[0.0737471655011177, 0.006071773823350668, -0....</td>\n","      <td>negative</td>\n","      <td>0.638730</td>\n","      <td>anyone better than modi when nehruji expired s...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>\\r\\nmodiji wont tired crying foul\\r\\nmain chow...</td>\n","      <td>[0.05888385698199272, -0.0646616593003273, -0....</td>\n","      <td>negative</td>\n","      <td>0.723110</td>\n","      <td>modiji wont tired crying foul main chowkidar h...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>poor chap modi hasnâ given him anything can ...</td>\n","      <td>[0.058948416262865067, -0.029682165011763573, ...</td>\n","      <td>negative</td>\n","      <td>0.690602</td>\n","      <td>poor chap modi hasnâ given him anything can ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>green underwear missing ive been doubting isi ...</td>\n","      <td>[0.05133155733346939, -0.06789954006671906, -0...</td>\n","      <td>negative</td>\n","      <td>0.705077</td>\n","      <td>green underwear missing ive been doubting isi ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>congress years wasnt able complete one rafale ...</td>\n","      <td>[0.044129759073257446, -0.06111813709139824, -...</td>\n","      <td>neutral</td>\n","      <td>0.561979</td>\n","      <td>congress years wasnt able complete one rafale ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>asked learn from how treat minority well does ...</td>\n","      <td>[0.03665374591946602, -0.03695330768823624, -0...</td>\n","      <td>negative</td>\n","      <td>0.746584</td>\n","      <td>asked learn from how treat minority well does ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>10</th>\n","      <td>stop bull shitting worry about criminal vivek ...</td>\n","      <td>[0.07035735249519348, -0.06952506303787231, -0...</td>\n","      <td>negative</td>\n","      <td>0.768111</td>\n","      <td>stop bull shitting worry about criminal vivek ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>11</th>\n","      <td>drswamys timesnow last year debate nearly mill...</td>\n","      <td>[0.013958276249468327, -0.030759528279304504, ...</td>\n","      <td>neutral</td>\n","      <td>0.511294</td>\n","      <td>drswamys timesnow last year debate nearly mill...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>12</th>\n","      <td>asshole bahujan radical marxist grow brain kno...</td>\n","      <td>[0.026277026161551476, -0.06238812580704689, -...</td>\n","      <td>negative</td>\n","      <td>0.689268</td>\n","      <td>asshole bahujan radical marxist grow brain kno...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>13</th>\n","      <td>from selling dreams 2014 selling tshirts 2019 ...</td>\n","      <td>[0.07457270473241806, -0.058670494705438614, -...</td>\n","      <td>negative</td>\n","      <td>0.641822</td>\n","      <td>from selling dreams 2014 selling tshirts 2019 ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>14</th>\n","      <td>very true sir thats why they are against modi ...</td>\n","      <td>[0.061704088002443314, -0.04553354158997536, -...</td>\n","      <td>negative</td>\n","      <td>0.651231</td>\n","      <td>very true sir thats why they are against modi ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>15</th>\n","      <td>they are giving jobs citizen india what you ar...</td>\n","      <td>[0.05342026799917221, -0.003889711806550622, -...</td>\n","      <td>negative</td>\n","      <td>0.706768</td>\n","      <td>they are giving jobs citizen india what you ar...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>16</th>\n","      <td>congress has always attempted empower people g...</td>\n","      <td>[0.027197618037462234, -0.036435648798942566, ...</td>\n","      <td>negative</td>\n","      <td>0.607062</td>\n","      <td>congress has always attempted empower people g...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>17</th>\n","      <td>have never said that modi succeed yet even als...</td>\n","      <td>[0.06601183861494064, -0.020045211538672447, -...</td>\n","      <td>negative</td>\n","      <td>0.628577</td>\n","      <td>have never said that modi succeed yet even als...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>18</th>\n","      <td>\\r\\nthe foundation for new india 2022 has alre...</td>\n","      <td>[0.04694363474845886, -0.06800008565187454, -0...</td>\n","      <td>neutral</td>\n","      <td>0.547697</td>\n","      <td>the foundation for new india 2022 has already ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>19</th>\n","      <td>only rahul gandhis politics love can defeat th...</td>\n","      <td>[0.05615750327706337, -0.002462629694491625, -...</td>\n","      <td>negative</td>\n","      <td>0.632572</td>\n","      <td>only rahul gandhis politics love can defeat th...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>20</th>\n","      <td>one step time navigating thru looteyns when ev...</td>\n","      <td>[0.030352214351296425, -0.06195472553372383, 0...</td>\n","      <td>negative</td>\n","      <td>0.635106</td>\n","      <td>one step time navigating thru looteyns when ev...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>21</th>\n","      <td>why sir mam shabana azami hate much that have ...</td>\n","      <td>[0.07535804808139801, -0.05643236264586449, -0...</td>\n","      <td>negative</td>\n","      <td>0.738669</td>\n","      <td>why sir mam shabana azami hate much that have ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>22</th>\n","      <td>modi will remain for next 510 years and till t...</td>\n","      <td>[0.05986170098185539, -0.0674145296216011, -0....</td>\n","      <td>negative</td>\n","      <td>0.659078</td>\n","      <td>modi will remain for next 510 years and till t...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>23</th>\n","      <td>pledge your first vote for modi</td>\n","      <td>[0.023959940299391747, -0.013972461223602295, ...</td>\n","      <td>neutral</td>\n","      <td>0.555447</td>\n","      <td>pledge your first vote for modi</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>why need modi lead bjp government again 2019 j...</td>\n","      <td>[0.04451165348291397, -0.06473662704229355, -0...</td>\n","      <td>neutral</td>\n","      <td>0.578395</td>\n","      <td>why need modi lead bjp government again 2019 j...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>raghuram rajan sent list high profile bank fra...</td>\n","      <td>[0.06561190634965897, -0.0614917054772377, -0....</td>\n","      <td>negative</td>\n","      <td>0.706507</td>\n","      <td>raghuram rajan sent list high profile bank fra...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>26</th>\n","      <td>modi govts slashing indias education budget cl...</td>\n","      <td>[0.05217093601822853, -0.05785880982875824, -0...</td>\n","      <td>negative</td>\n","      <td>0.607360</td>\n","      <td>modi govts slashing indias education budget cl...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>27</th>\n","      <td>why are you hell bent manoj tiwari just her ph...</td>\n","      <td>[0.04579753428697586, -0.05176748335361481, -0...</td>\n","      <td>neutral</td>\n","      <td>0.588993</td>\n","      <td>why are you hell bent manoj tiwari just her ph...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>28</th>\n","      <td>know going into dirty details nehru family its...</td>\n","      <td>[0.047987841069698334, -0.050984784960746765, ...</td>\n","      <td>negative</td>\n","      <td>0.753084</td>\n","      <td>know going into dirty details nehru family its...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>29</th>\n","      <td>momota begum will let her state become total s...</td>\n","      <td>[0.04509664326906204, -0.05019481107592583, -0...</td>\n","      <td>negative</td>\n","      <td>0.615988</td>\n","      <td>momota begum will let her state become total s...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>thanks anu sharma will vote and make sure peop...</td>\n","      <td>[0.04315190762281418, -0.04578147828578949, -0...</td>\n","      <td>neutral</td>\n","      <td>0.555271</td>\n","      <td>thanks anu sharma will vote and make sure peop...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>31</th>\n","      <td>those who themselves dont know how many father...</td>\n","      <td>[0.0144237345084548, -0.052222371101379395, -0...</td>\n","      <td>negative</td>\n","      <td>0.631877</td>\n","      <td>those who themselves dont know how many father...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>32</th>\n","      <td>the star campaigner myth bjp lost more than as...</td>\n","      <td>[0.02492097206413746, -0.0531931146979332, -0....</td>\n","      <td>neutral</td>\n","      <td>0.586682</td>\n","      <td>the star campaigner myth bjp lost more than as...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>33</th>\n","      <td>modi also live for few years only like you not...</td>\n","      <td>[0.040389616042375565, -0.06375984847545624, -...</td>\n","      <td>neutral</td>\n","      <td>0.587196</td>\n","      <td>modi also live for few years only like you not...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>34</th>\n","      <td>narendra modi more brainy than all the drswamy...</td>\n","      <td>[0.06742898374795914, -0.060488566756248474, -...</td>\n","      <td>neutral</td>\n","      <td>0.533663</td>\n","      <td>narendra modi more brainy than all the drswamy...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>35</th>\n","      <td>have started calling chowkidaar narendra modi ...</td>\n","      <td>[0.06360629200935364, -0.06786973774433136, -0...</td>\n","      <td>negative</td>\n","      <td>0.672972</td>\n","      <td>have started calling chowkidaar narendra modi ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>36</th>\n","      <td>this the difference confident leaders call upo...</td>\n","      <td>[0.024233123287558556, -0.05243394151329994, -...</td>\n","      <td>neutral</td>\n","      <td>0.510922</td>\n","      <td>this the difference confident leaders call upo...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>37</th>\n","      <td>jawans killed the border\\r\\ncrimes against wom...</td>\n","      <td>[0.03928006440401077, -0.051466524600982666, -...</td>\n","      <td>negative</td>\n","      <td>0.701794</td>\n","      <td>jawans killed the border crimes against women ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>38</th>\n","      <td>tag this fast growing youtuber cared abt this ...</td>\n","      <td>[0.05051109194755554, -0.0660049319267273, 0.0...</td>\n","      <td>negative</td>\n","      <td>0.714883</td>\n","      <td>tag this fast growing youtuber cared abt this ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>39</th>\n","      <td>think hindus should back off and let them suff...</td>\n","      <td>[-0.010975896380841732, -0.059168506413698196,...</td>\n","      <td>neutral</td>\n","      <td>0.553189</td>\n","      <td>think hindus should back off and let them suff...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>40</th>\n","      <td>yes cannot make any knee jerk moves drastic ac...</td>\n","      <td>[0.023108134046196938, -0.027600249275565147, ...</td>\n","      <td>positive</td>\n","      <td>0.671809</td>\n","      <td>yes cannot make any knee jerk moves drastic ac...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>41</th>\n","      <td>why picked chairman the devious aadhaar isnt h...</td>\n","      <td>[0.043231260031461716, -0.07101075351238251, -...</td>\n","      <td>negative</td>\n","      <td>0.709371</td>\n","      <td>why picked chairman the devious aadhaar isnt h...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>42</th>\n","      <td>due automation and artificial intelligence fur...</td>\n","      <td>[0.04160398617386818, -0.06572042405605316, -0...</td>\n","      <td>neutral</td>\n","      <td>0.553482</td>\n","      <td>due automation and artificial intelligence fur...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>43</th>\n","      <td>weak state capacity exacerbated excessive acco...</td>\n","      <td>[-0.00038854932063259184, -0.04599419981241226...</td>\n","      <td>negative</td>\n","      <td>0.609747</td>\n","      <td>weak state capacity exacerbated excessive acco...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>44</th>\n","      <td>our narendra modi ordered indian air force tak...</td>\n","      <td>[-0.02063656784594059, -0.07548005133867264, -...</td>\n","      <td>neutral</td>\n","      <td>0.513191</td>\n","      <td>our narendra modi ordered indian air force tak...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>45</th>\n","      <td>why vote modi dynasty visionary 3no high level...</td>\n","      <td>[0.01779576763510704, -0.06789527833461761, -0...</td>\n","      <td>negative</td>\n","      <td>0.635148</td>\n","      <td>why vote modi dynasty visionary 3no high level...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>46</th>\n","      <td>its modi chor corrupt maha thugbandhan janta w...</td>\n","      <td>[0.065566785633564, -0.04119298234581947, -0.0...</td>\n","      <td>negative</td>\n","      <td>0.687171</td>\n","      <td>its modi chor corrupt maha thugbandhan janta w...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>before modis arrival 2014 all supported him fo...</td>\n","      <td>[0.03988223522901535, -0.04965453967452049, -0...</td>\n","      <td>neutral</td>\n","      <td>0.557571</td>\n","      <td>before modis arrival 2014 all supported him fo...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>think you forgot dollar india handled exceptio...</td>\n","      <td>[0.01084248349070549, 0.013633836060762405, -0...</td>\n","      <td>negative</td>\n","      <td>0.615532</td>\n","      <td>think you forgot dollar india handled exceptio...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>tulsi gabbard rejected interviews with tyt but...</td>\n","      <td>[-0.01967957802116871, 0.05570048466324806, -0...</td>\n","      <td>positive</td>\n","      <td>0.604604</td>\n","      <td>tulsi gabbard rejected interviews with tyt but...</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                           text  ...         y\n","origin_index                                                     ...          \n","0             how narendra modi has almost killed the indian...  ...  negative\n","1                       you think was modi behind that accident  ...  negative\n","2             kamal haasan takes chowkidar modi kamal haasan...  ...  negative\n","3             connected name with surname not bcz religion c...  ...  negative\n","4             anyone better than modi when nehruji expired s...  ...  positive\n","5             \\r\\nmodiji wont tired crying foul\\r\\nmain chow...  ...  negative\n","6             poor chap modi hasnâ given him anything can ...  ...  negative\n","7             green underwear missing ive been doubting isi ...  ...  negative\n","8             congress years wasnt able complete one rafale ...  ...  positive\n","9             asked learn from how treat minority well does ...  ...  negative\n","10            stop bull shitting worry about criminal vivek ...  ...  negative\n","11            drswamys timesnow last year debate nearly mill...  ...  positive\n","12            asshole bahujan radical marxist grow brain kno...  ...  negative\n","13            from selling dreams 2014 selling tshirts 2019 ...  ...  positive\n","14            very true sir thats why they are against modi ...  ...  positive\n","15            they are giving jobs citizen india what you ar...  ...  negative\n","16            congress has always attempted empower people g...  ...  negative\n","17            have never said that modi succeed yet even als...  ...  positive\n","18            \\r\\nthe foundation for new india 2022 has alre...  ...  positive\n","19            only rahul gandhis politics love can defeat th...  ...  negative\n","20            one step time navigating thru looteyns when ev...  ...  negative\n","21            why sir mam shabana azami hate much that have ...  ...  negative\n","22            modi will remain for next 510 years and till t...  ...  negative\n","23                             pledge your first vote for modi   ...  positive\n","24            why need modi lead bjp government again 2019 j...  ...  positive\n","25            raghuram rajan sent list high profile bank fra...  ...  negative\n","26            modi govts slashing indias education budget cl...  ...  negative\n","27            why are you hell bent manoj tiwari just her ph...  ...  positive\n","28            know going into dirty details nehru family its...  ...  negative\n","29            momota begum will let her state become total s...  ...  negative\n","30            thanks anu sharma will vote and make sure peop...  ...  positive\n","31            those who themselves dont know how many father...  ...  positive\n","32            the star campaigner myth bjp lost more than as...  ...  positive\n","33            modi also live for few years only like you not...  ...  negative\n","34            narendra modi more brainy than all the drswamy...  ...  positive\n","35            have started calling chowkidaar narendra modi ...  ...  negative\n","36            this the difference confident leaders call upo...  ...  positive\n","37            jawans killed the border\\r\\ncrimes against wom...  ...  negative\n","38            tag this fast growing youtuber cared abt this ...  ...  negative\n","39            think hindus should back off and let them suff...  ...  positive\n","40            yes cannot make any knee jerk moves drastic ac...  ...  positive\n","41            why picked chairman the devious aadhaar isnt h...  ...  negative\n","42            due automation and artificial intelligence fur...  ...  positive\n","43            weak state capacity exacerbated excessive acco...  ...  positive\n","44            our narendra modi ordered indian air force tak...  ...  positive\n","45            why vote modi dynasty visionary 3no high level...  ...  negative\n","46            its modi chor corrupt maha thugbandhan janta w...  ...  negative\n","47            before modis arrival 2014 all supported him fo...  ...  positive\n","48            think you forgot dollar india handled exceptio...  ...  positive\n","49            tulsi gabbard rejected interviews with tyt but...  ...  positive\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nxWFzQOhjWC8","executionInfo":{"status":"ok","timestamp":1609553671091,"user_tz":-300,"elapsed":200991,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2f071682-e615-4556-b813-a56f405ff9c3"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1609554113187,"user_tz":-300,"elapsed":140893,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"413eef4f-f423-439b-ad57-2ccfcf4bbe62"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(100)  \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.81      0.66      0.73       300\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.85      0.59      0.69       300\n","\n","    accuracy                           0.62       600\n","   macro avg       0.55      0.42      0.47       600\n","weighted avg       0.83      0.62      0.71       600\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1609554302650,"user_tz":-300,"elapsed":189472,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"858c8cf2-ba4d-48fc-b333-e4b2819dadb2"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('the president of india just died')\n","preds"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1609554388428,"user_tz":-300,"elapsed":879,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"212c87f7-8200-4646-cfcd-5bae608b3848"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')            | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)       | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                    | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                        | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                  | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                 | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)           | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                  | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                      | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)            | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)        | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                 | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                          | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                        | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                 | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')         | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6)                         | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')              | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative'])      | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]}
\ No newline at end of file
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_twitter.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_twitter.ipynb)\n","\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 class twitter classifier training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install  pyspark==2.4.7 \n","! pip install nlu > /dev/null    \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download twitter Sentiment dataset \n","https://www.kaggle.com/cosmos98/twitter-and-reddit-sentimental-analysis-dataset\n","#Context\n","\n","This is was a Dataset Created as a part of the university Project On Sentimental Analysis On Multi-Source Social Media Platforms using PySpark."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610788006096,"user_tz":-300,"elapsed":2486,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"610519c2-4cf5-4835-d1aa-8da2f83fadf7"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/Twitter_Data.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:06:08--  http://ckl-it.de/wp-content/uploads/2021/01/Twitter_Data.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 99657 (97K) [text/csv]\n","Saving to: ‘Twitter_Data.csv’\n","\n","Twitter_Data.csv    100%[===================>]  97.32K   122KB/s    in 0.8s    \n","\n","2021-01-16 09:06:10 (122 KB/s) - ‘Twitter_Data.csv’ saved [99657/99657]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610788009315,"user_tz":-300,"elapsed":1404,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b73ee311-b15e-4fc3-a5b5-5772f87dac99"},"source":["import pandas as pd\n","train_path = '/content/Twitter_Data.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>y</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>how narendra modi has almost killed the indian...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>you think was modi behind that accident</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>kamal haasan takes chowkidar modi kamal haasan...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>connected name with surname not bcz religion c...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>anyone better than modi when nehruji expired s...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>595</th>\n","      <td>perception makes fool some call âforeign inv...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>596</th>\n","      <td>when will see your tweet for justice for you a...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>597</th>\n","      <td>haha congress going gaga over this after looti...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>598</th>\n","      <td>this movie shows the life histiry narendra mod...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>599</th>\n","      <td>modi left his year old wife and returned her r...</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>600 rows × 2 columns</p>\n","</div>"],"text/plain":["                                                  text         y\n","0    how narendra modi has almost killed the indian...  negative\n","1              you think was modi behind that accident  negative\n","2    kamal haasan takes chowkidar modi kamal haasan...  negative\n","3    connected name with surname not bcz religion c...  negative\n","4    anyone better than modi when nehruji expired s...  positive\n","..                                                 ...       ...\n","595  perception makes fool some call âforeign inv...  negative\n","596  when will see your tweet for justice for you a...  negative\n","597  haha congress going gaga over this after looti...  positive\n","598  this movie shows the life histiry narendra mod...  negative\n","599  modi left his year old wife and returned her r...  positive\n","\n","[600 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609553662416,"user_tz":-300,"elapsed":192414,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a077e55a-4805-43a2-fb11-46074b487e2e"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.00      0.00      0.00        27\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.78      0.61      0.68        23\n","\n","    accuracy                           0.28        50\n","   macro avg       0.26      0.20      0.23        50\n","weighted avg       0.36      0.28      0.31        50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>default_name_embeddings</th>\n","      <th>sentiment</th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","      <th>y</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>how narendra modi has almost killed the indian...</td>\n","      <td>[0.060062434524297714, -0.05557167902588844, -...</td>\n","      <td>neutral</td>\n","      <td>0.590739</td>\n","      <td>how narendra modi has almost killed the indian...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>you think was modi behind that accident</td>\n","      <td>[0.05362718179821968, -0.004547705873847008, -...</td>\n","      <td>neutral</td>\n","      <td>0.577544</td>\n","      <td>you think was modi behind that accident</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>kamal haasan takes chowkidar modi kamal haasan...</td>\n","      <td>[0.07274721562862396, -0.061593908816576004, -...</td>\n","      <td>neutral</td>\n","      <td>0.585589</td>\n","      <td>kamal haasan takes chowkidar modi kamal haasan...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>connected name with surname not bcz religion c...</td>\n","      <td>[0.06106054410338402, -0.060213156044483185, -...</td>\n","      <td>neutral</td>\n","      <td>0.554036</td>\n","      <td>connected name with surname not bcz religion c...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>anyone better than modi when nehruji expired s...</td>\n","      <td>[0.0737471655011177, 0.006071773823350668, -0....</td>\n","      <td>neutral</td>\n","      <td>0.595608</td>\n","      <td>anyone better than modi when nehruji expired s...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>\\r\\nmodiji wont tired crying foul\\r\\nmain chow...</td>\n","      <td>[0.05888385698199272, -0.0646616593003273, -0....</td>\n","      <td>neutral</td>\n","      <td>0.583403</td>\n","      <td>modiji wont tired crying foul main chowkidar h...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>poor chap modi hasnâ given him anything can ...</td>\n","      <td>[0.058948416262865067, -0.029682165011763573, ...</td>\n","      <td>neutral</td>\n","      <td>0.578272</td>\n","      <td>poor chap modi hasnâ given him anything can ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>green underwear missing ive been doubting isi ...</td>\n","      <td>[0.05133155733346939, -0.06789954006671906, -0...</td>\n","      <td>neutral</td>\n","      <td>0.575918</td>\n","      <td>green underwear missing ive been doubting isi ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>congress years wasnt able complete one rafale ...</td>\n","      <td>[0.044129759073257446, -0.06111813709139824, -...</td>\n","      <td>positive</td>\n","      <td>0.605829</td>\n","      <td>congress years wasnt able complete one rafale ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>asked learn from how treat minority well does ...</td>\n","      <td>[0.03665374591946602, -0.03695330768823624, -0...</td>\n","      <td>neutral</td>\n","      <td>0.534121</td>\n","      <td>asked learn from how treat minority well does ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>10</th>\n","      <td>stop bull shitting worry about criminal vivek ...</td>\n","      <td>[0.07035735249519348, -0.06952506303787231, -0...</td>\n","      <td>neutral</td>\n","      <td>0.539481</td>\n","      <td>stop bull shitting worry about criminal vivek ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>11</th>\n","      <td>drswamys timesnow last year debate nearly mill...</td>\n","      <td>[0.013958276249468327, -0.030759528279304504, ...</td>\n","      <td>positive</td>\n","      <td>0.613331</td>\n","      <td>drswamys timesnow last year debate nearly mill...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>12</th>\n","      <td>asshole bahujan radical marxist grow brain kno...</td>\n","      <td>[0.026277026161551476, -0.06238812580704689, -...</td>\n","      <td>neutral</td>\n","      <td>0.587796</td>\n","      <td>asshole bahujan radical marxist grow brain kno...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>13</th>\n","      <td>from selling dreams 2014 selling tshirts 2019 ...</td>\n","      <td>[0.07457270473241806, -0.058670494705438614, -...</td>\n","      <td>neutral</td>\n","      <td>0.584601</td>\n","      <td>from selling dreams 2014 selling tshirts 2019 ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>14</th>\n","      <td>very true sir thats why they are against modi ...</td>\n","      <td>[0.061704088002443314, -0.04553354158997536, -...</td>\n","      <td>neutral</td>\n","      <td>0.584490</td>\n","      <td>very true sir thats why they are against modi ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>15</th>\n","      <td>they are giving jobs citizen india what you ar...</td>\n","      <td>[0.05342026799917221, -0.003889711806550622, -...</td>\n","      <td>neutral</td>\n","      <td>0.574127</td>\n","      <td>they are giving jobs citizen india what you ar...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>16</th>\n","      <td>congress has always attempted empower people g...</td>\n","      <td>[0.027197618037462234, -0.036435648798942566, ...</td>\n","      <td>positive</td>\n","      <td>0.602392</td>\n","      <td>congress has always attempted empower people g...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>17</th>\n","      <td>have never said that modi succeed yet even als...</td>\n","      <td>[0.06601183861494064, -0.020045211538672447, -...</td>\n","      <td>positive</td>\n","      <td>0.606807</td>\n","      <td>have never said that modi succeed yet even als...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>18</th>\n","      <td>\\r\\nthe foundation for new india 2022 has alre...</td>\n","      <td>[0.04694363474845886, -0.06800008565187454, -0...</td>\n","      <td>neutral</td>\n","      <td>0.599807</td>\n","      <td>the foundation for new india 2022 has already ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>19</th>\n","      <td>only rahul gandhis politics love can defeat th...</td>\n","      <td>[0.05615750327706337, -0.002462629694491625, -...</td>\n","      <td>positive</td>\n","      <td>0.602275</td>\n","      <td>only rahul gandhis politics love can defeat th...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>20</th>\n","      <td>one step time navigating thru looteyns when ev...</td>\n","      <td>[0.030352214351296425, -0.06195472553372383, 0...</td>\n","      <td>neutral</td>\n","      <td>0.570779</td>\n","      <td>one step time navigating thru looteyns when ev...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>21</th>\n","      <td>why sir mam shabana azami hate much that have ...</td>\n","      <td>[0.07535804808139801, -0.05643236264586449, -0...</td>\n","      <td>neutral</td>\n","      <td>0.571882</td>\n","      <td>why sir mam shabana azami hate much that have ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>22</th>\n","      <td>modi will remain for next 510 years and till t...</td>\n","      <td>[0.05986170098185539, -0.0674145296216011, -0....</td>\n","      <td>neutral</td>\n","      <td>0.591540</td>\n","      <td>modi will remain for next 510 years and till t...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>23</th>\n","      <td>pledge your first vote for modi</td>\n","      <td>[0.023959940299391747, -0.013972461223602295, ...</td>\n","      <td>positive</td>\n","      <td>0.606293</td>\n","      <td>pledge your first vote for modi</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>why need modi lead bjp government again 2019 j...</td>\n","      <td>[0.04451165348291397, -0.06473662704229355, -0...</td>\n","      <td>positive</td>\n","      <td>0.609683</td>\n","      <td>why need modi lead bjp government again 2019 j...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>raghuram rajan sent list high profile bank fra...</td>\n","      <td>[0.06561190634965897, -0.0614917054772377, -0....</td>\n","      <td>neutral</td>\n","      <td>0.578591</td>\n","      <td>raghuram rajan sent list high profile bank fra...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>26</th>\n","      <td>modi govts slashing indias education budget cl...</td>\n","      <td>[0.05217093601822853, -0.05785880982875824, -0...</td>\n","      <td>neutral</td>\n","      <td>0.594771</td>\n","      <td>modi govts slashing indias education budget cl...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>27</th>\n","      <td>why are you hell bent manoj tiwari just her ph...</td>\n","      <td>[0.04579753428697586, -0.05176748335361481, -0...</td>\n","      <td>positive</td>\n","      <td>0.600511</td>\n","      <td>why are you hell bent manoj tiwari just her ph...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>28</th>\n","      <td>know going into dirty details nehru family its...</td>\n","      <td>[0.047987841069698334, -0.050984784960746765, ...</td>\n","      <td>neutral</td>\n","      <td>0.533372</td>\n","      <td>know going into dirty details nehru family its...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>29</th>\n","      <td>momota begum will let her state become total s...</td>\n","      <td>[0.04509664326906204, -0.05019481107592583, -0...</td>\n","      <td>neutral</td>\n","      <td>0.593740</td>\n","      <td>momota begum will let her state become total s...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>thanks anu sharma will vote and make sure peop...</td>\n","      <td>[0.04315190762281418, -0.04578147828578949, -0...</td>\n","      <td>positive</td>\n","      <td>0.601758</td>\n","      <td>thanks anu sharma will vote and make sure peop...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>31</th>\n","      <td>those who themselves dont know how many father...</td>\n","      <td>[0.0144237345084548, -0.052222371101379395, -0...</td>\n","      <td>neutral</td>\n","      <td>0.589971</td>\n","      <td>those who themselves dont know how many father...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>32</th>\n","      <td>the star campaigner myth bjp lost more than as...</td>\n","      <td>[0.02492097206413746, -0.0531931146979332, -0....</td>\n","      <td>positive</td>\n","      <td>0.607886</td>\n","      <td>the star campaigner myth bjp lost more than as...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>33</th>\n","      <td>modi also live for few years only like you not...</td>\n","      <td>[0.040389616042375565, -0.06375984847545624, -...</td>\n","      <td>positive</td>\n","      <td>0.612952</td>\n","      <td>modi also live for few years only like you not...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>34</th>\n","      <td>narendra modi more brainy than all the drswamy...</td>\n","      <td>[0.06742898374795914, -0.060488566756248474, -...</td>\n","      <td>positive</td>\n","      <td>0.621238</td>\n","      <td>narendra modi more brainy than all the drswamy...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>35</th>\n","      <td>have started calling chowkidaar narendra modi ...</td>\n","      <td>[0.06360629200935364, -0.06786973774433136, -0...</td>\n","      <td>neutral</td>\n","      <td>0.593359</td>\n","      <td>have started calling chowkidaar narendra modi ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>36</th>\n","      <td>this the difference confident leaders call upo...</td>\n","      <td>[0.024233123287558556, -0.05243394151329994, -...</td>\n","      <td>positive</td>\n","      <td>0.610326</td>\n","      <td>this the difference confident leaders call upo...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>37</th>\n","      <td>jawans killed the border\\r\\ncrimes against wom...</td>\n","      <td>[0.03928006440401077, -0.051466524600982666, -...</td>\n","      <td>neutral</td>\n","      <td>0.582484</td>\n","      <td>jawans killed the border crimes against women ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>38</th>\n","      <td>tag this fast growing youtuber cared abt this ...</td>\n","      <td>[0.05051109194755554, -0.0660049319267273, 0.0...</td>\n","      <td>neutral</td>\n","      <td>0.584719</td>\n","      <td>tag this fast growing youtuber cared abt this ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>39</th>\n","      <td>think hindus should back off and let them suff...</td>\n","      <td>[-0.010975896380841732, -0.059168506413698196,...</td>\n","      <td>neutral</td>\n","      <td>0.597051</td>\n","      <td>think hindus should back off and let them suff...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>40</th>\n","      <td>yes cannot make any knee jerk moves drastic ac...</td>\n","      <td>[0.023108134046196938, -0.027600249275565147, ...</td>\n","      <td>positive</td>\n","      <td>0.618802</td>\n","      <td>yes cannot make any knee jerk moves drastic ac...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>41</th>\n","      <td>why picked chairman the devious aadhaar isnt h...</td>\n","      <td>[0.043231260031461716, -0.07101075351238251, -...</td>\n","      <td>neutral</td>\n","      <td>0.581575</td>\n","      <td>why picked chairman the devious aadhaar isnt h...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>42</th>\n","      <td>due automation and artificial intelligence fur...</td>\n","      <td>[0.04160398617386818, -0.06572042405605316, -0...</td>\n","      <td>neutral</td>\n","      <td>0.594700</td>\n","      <td>due automation and artificial intelligence fur...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>43</th>\n","      <td>weak state capacity exacerbated excessive acco...</td>\n","      <td>[-0.00038854932063259184, -0.04599419981241226...</td>\n","      <td>neutral</td>\n","      <td>0.593749</td>\n","      <td>weak state capacity exacerbated excessive acco...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>44</th>\n","      <td>our narendra modi ordered indian air force tak...</td>\n","      <td>[-0.02063656784594059, -0.07548005133867264, -...</td>\n","      <td>positive</td>\n","      <td>0.601453</td>\n","      <td>our narendra modi ordered indian air force tak...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>45</th>\n","      <td>why vote modi dynasty visionary 3no high level...</td>\n","      <td>[0.01779576763510704, -0.06789527833461761, -0...</td>\n","      <td>neutral</td>\n","      <td>0.579034</td>\n","      <td>why vote modi dynasty visionary 3no high level...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>46</th>\n","      <td>its modi chor corrupt maha thugbandhan janta w...</td>\n","      <td>[0.065566785633564, -0.04119298234581947, -0.0...</td>\n","      <td>positive</td>\n","      <td>0.602544</td>\n","      <td>its modi chor corrupt maha thugbandhan janta w...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>before modis arrival 2014 all supported him fo...</td>\n","      <td>[0.03988223522901535, -0.04965453967452049, -0...</td>\n","      <td>positive</td>\n","      <td>0.604502</td>\n","      <td>before modis arrival 2014 all supported him fo...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>think you forgot dollar india handled exceptio...</td>\n","      <td>[0.01084248349070549, 0.013633836060762405, -0...</td>\n","      <td>neutral</td>\n","      <td>0.598473</td>\n","      <td>think you forgot dollar india handled exceptio...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>tulsi gabbard rejected interviews with tyt but...</td>\n","      <td>[-0.01967957802116871, 0.05570048466324806, -0...</td>\n","      <td>positive</td>\n","      <td>0.621699</td>\n","      <td>tulsi gabbard rejected interviews with tyt but...</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                           text  ...         y\n","origin_index                                                     ...          \n","0             how narendra modi has almost killed the indian...  ...  negative\n","1                       you think was modi behind that accident  ...  negative\n","2             kamal haasan takes chowkidar modi kamal haasan...  ...  negative\n","3             connected name with surname not bcz religion c...  ...  negative\n","4             anyone better than modi when nehruji expired s...  ...  positive\n","5             \\r\\nmodiji wont tired crying foul\\r\\nmain chow...  ...  negative\n","6             poor chap modi hasnâ given him anything can ...  ...  negative\n","7             green underwear missing ive been doubting isi ...  ...  negative\n","8             congress years wasnt able complete one rafale ...  ...  positive\n","9             asked learn from how treat minority well does ...  ...  negative\n","10            stop bull shitting worry about criminal vivek ...  ...  negative\n","11            drswamys timesnow last year debate nearly mill...  ...  positive\n","12            asshole bahujan radical marxist grow brain kno...  ...  negative\n","13            from selling dreams 2014 selling tshirts 2019 ...  ...  positive\n","14            very true sir thats why they are against modi ...  ...  positive\n","15            they are giving jobs citizen india what you ar...  ...  negative\n","16            congress has always attempted empower people g...  ...  negative\n","17            have never said that modi succeed yet even als...  ...  positive\n","18            \\r\\nthe foundation for new india 2022 has alre...  ...  positive\n","19            only rahul gandhis politics love can defeat th...  ...  negative\n","20            one step time navigating thru looteyns when ev...  ...  negative\n","21            why sir mam shabana azami hate much that have ...  ...  negative\n","22            modi will remain for next 510 years and till t...  ...  negative\n","23                             pledge your first vote for modi   ...  positive\n","24            why need modi lead bjp government again 2019 j...  ...  positive\n","25            raghuram rajan sent list high profile bank fra...  ...  negative\n","26            modi govts slashing indias education budget cl...  ...  negative\n","27            why are you hell bent manoj tiwari just her ph...  ...  positive\n","28            know going into dirty details nehru family its...  ...  negative\n","29            momota begum will let her state become total s...  ...  negative\n","30            thanks anu sharma will vote and make sure peop...  ...  positive\n","31            those who themselves dont know how many father...  ...  positive\n","32            the star campaigner myth bjp lost more than as...  ...  positive\n","33            modi also live for few years only like you not...  ...  negative\n","34            narendra modi more brainy than all the drswamy...  ...  positive\n","35            have started calling chowkidaar narendra modi ...  ...  negative\n","36            this the difference confident leaders call upo...  ...  positive\n","37            jawans killed the border\\r\\ncrimes against wom...  ...  negative\n","38            tag this fast growing youtuber cared abt this ...  ...  negative\n","39            think hindus should back off and let them suff...  ...  positive\n","40            yes cannot make any knee jerk moves drastic ac...  ...  positive\n","41            why picked chairman the devious aadhaar isnt h...  ...  negative\n","42            due automation and artificial intelligence fur...  ...  positive\n","43            weak state capacity exacerbated excessive acco...  ...  positive\n","44            our narendra modi ordered indian air force tak...  ...  positive\n","45            why vote modi dynasty visionary 3no high level...  ...  negative\n","46            its modi chor corrupt maha thugbandhan janta w...  ...  negative\n","47            before modis arrival 2014 all supported him fo...  ...  positive\n","48            think you forgot dollar india handled exceptio...  ...  positive\n","49            tulsi gabbard rejected interviews with tyt but...  ...  positive\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":109},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609553664952,"user_tz":-300,"elapsed":194919,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8f463f08-944f-45dc-f463-e381c05f89db"},"source":["fitted_pipe.predict('the president of india just died')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>default_name_embeddings</th>\n","      <th>sentiment</th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>[0.06468033790588379, -0.040837567299604416, -...</td>\n","      <td>neutral</td>\n","      <td>0.562996</td>\n","      <td>Bitcoin is going to the moon!</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                        default_name_embeddings  ...                       document\n","origin_index                                                     ...                               \n","0             [0.06468033790588379, -0.040837567299604416, -...  ...  Bitcoin is going to the moon!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609553664954,"user_tz":-300,"elapsed":194907,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7be62829-d712-4afd-900f-fd655e8282d7"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2)                 | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005)                    | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64)                | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5)                 | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True)       | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6)               | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')    | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512)               | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use')      | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609553671081,"user_tz":-300,"elapsed":201019,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"602947fd-13b8-438e-d5d1-64df15c2096b"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5)  \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","    negative       0.79      0.96      0.87        27\n","     neutral       0.00      0.00      0.00         0\n","    positive       1.00      0.09      0.16        23\n","\n","    accuracy                           0.56        50\n","   macro avg       0.60      0.35      0.34        50\n","weighted avg       0.89      0.56      0.54        50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>default_name_embeddings</th>\n","      <th>sentiment</th>\n","      <th>sentiment_confidence</th>\n","      <th>document</th>\n","      <th>y</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>how narendra modi has almost killed the indian...</td>\n","      <td>[0.060062434524297714, -0.05557167902588844, -...</td>\n","      <td>negative</td>\n","      <td>0.689142</td>\n","      <td>how narendra modi has almost killed the indian...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>you think was modi behind that accident</td>\n","      <td>[0.05362718179821968, -0.004547705873847008, -...</td>\n","      <td>negative</td>\n","      <td>0.689483</td>\n","      <td>you think was modi behind that accident</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>kamal haasan takes chowkidar modi kamal haasan...</td>\n","      <td>[0.07274721562862396, -0.061593908816576004, -...</td>\n","      <td>negative</td>\n","      <td>0.707988</td>\n","      <td>kamal haasan takes chowkidar modi kamal haasan...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>connected name with surname not bcz religion c...</td>\n","      <td>[0.06106054410338402, -0.060213156044483185, -...</td>\n","      <td>negative</td>\n","      <td>0.675382</td>\n","      <td>connected name with surname not bcz religion c...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>anyone better than modi when nehruji expired s...</td>\n","      <td>[0.0737471655011177, 0.006071773823350668, -0....</td>\n","      <td>negative</td>\n","      <td>0.638730</td>\n","      <td>anyone better than modi when nehruji expired s...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>\\r\\nmodiji wont tired crying foul\\r\\nmain chow...</td>\n","      <td>[0.05888385698199272, -0.0646616593003273, -0....</td>\n","      <td>negative</td>\n","      <td>0.723110</td>\n","      <td>modiji wont tired crying foul main chowkidar h...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>poor chap modi hasnâ given him anything can ...</td>\n","      <td>[0.058948416262865067, -0.029682165011763573, ...</td>\n","      <td>negative</td>\n","      <td>0.690602</td>\n","      <td>poor chap modi hasnâ given him anything can ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>green underwear missing ive been doubting isi ...</td>\n","      <td>[0.05133155733346939, -0.06789954006671906, -0...</td>\n","      <td>negative</td>\n","      <td>0.705077</td>\n","      <td>green underwear missing ive been doubting isi ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>congress years wasnt able complete one rafale ...</td>\n","      <td>[0.044129759073257446, -0.06111813709139824, -...</td>\n","      <td>neutral</td>\n","      <td>0.561979</td>\n","      <td>congress years wasnt able complete one rafale ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>asked learn from how treat minority well does ...</td>\n","      <td>[0.03665374591946602, -0.03695330768823624, -0...</td>\n","      <td>negative</td>\n","      <td>0.746584</td>\n","      <td>asked learn from how treat minority well does ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>10</th>\n","      <td>stop bull shitting worry about criminal vivek ...</td>\n","      <td>[0.07035735249519348, -0.06952506303787231, -0...</td>\n","      <td>negative</td>\n","      <td>0.768111</td>\n","      <td>stop bull shitting worry about criminal vivek ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>11</th>\n","      <td>drswamys timesnow last year debate nearly mill...</td>\n","      <td>[0.013958276249468327, -0.030759528279304504, ...</td>\n","      <td>neutral</td>\n","      <td>0.511294</td>\n","      <td>drswamys timesnow last year debate nearly mill...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>12</th>\n","      <td>asshole bahujan radical marxist grow brain kno...</td>\n","      <td>[0.026277026161551476, -0.06238812580704689, -...</td>\n","      <td>negative</td>\n","      <td>0.689268</td>\n","      <td>asshole bahujan radical marxist grow brain kno...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>13</th>\n","      <td>from selling dreams 2014 selling tshirts 2019 ...</td>\n","      <td>[0.07457270473241806, -0.058670494705438614, -...</td>\n","      <td>negative</td>\n","      <td>0.641822</td>\n","      <td>from selling dreams 2014 selling tshirts 2019 ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>14</th>\n","      <td>very true sir thats why they are against modi ...</td>\n","      <td>[0.061704088002443314, -0.04553354158997536, -...</td>\n","      <td>negative</td>\n","      <td>0.651231</td>\n","      <td>very true sir thats why they are against modi ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>15</th>\n","      <td>they are giving jobs citizen india what you ar...</td>\n","      <td>[0.05342026799917221, -0.003889711806550622, -...</td>\n","      <td>negative</td>\n","      <td>0.706768</td>\n","      <td>they are giving jobs citizen india what you ar...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>16</th>\n","      <td>congress has always attempted empower people g...</td>\n","      <td>[0.027197618037462234, -0.036435648798942566, ...</td>\n","      <td>negative</td>\n","      <td>0.607062</td>\n","      <td>congress has always attempted empower people g...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>17</th>\n","      <td>have never said that modi succeed yet even als...</td>\n","      <td>[0.06601183861494064, -0.020045211538672447, -...</td>\n","      <td>negative</td>\n","      <td>0.628577</td>\n","      <td>have never said that modi succeed yet even als...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>18</th>\n","      <td>\\r\\nthe foundation for new india 2022 has alre...</td>\n","      <td>[0.04694363474845886, -0.06800008565187454, -0...</td>\n","      <td>neutral</td>\n","      <td>0.547697</td>\n","      <td>the foundation for new india 2022 has already ...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>19</th>\n","      <td>only rahul gandhis politics love can defeat th...</td>\n","      <td>[0.05615750327706337, -0.002462629694491625, -...</td>\n","      <td>negative</td>\n","      <td>0.632572</td>\n","      <td>only rahul gandhis politics love can defeat th...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>20</th>\n","      <td>one step time navigating thru looteyns when ev...</td>\n","      <td>[0.030352214351296425, -0.06195472553372383, 0...</td>\n","      <td>negative</td>\n","      <td>0.635106</td>\n","      <td>one step time navigating thru looteyns when ev...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>21</th>\n","      <td>why sir mam shabana azami hate much that have ...</td>\n","      <td>[0.07535804808139801, -0.05643236264586449, -0...</td>\n","      <td>negative</td>\n","      <td>0.738669</td>\n","      <td>why sir mam shabana azami hate much that have ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>22</th>\n","      <td>modi will remain for next 510 years and till t...</td>\n","      <td>[0.05986170098185539, -0.0674145296216011, -0....</td>\n","      <td>negative</td>\n","      <td>0.659078</td>\n","      <td>modi will remain for next 510 years and till t...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>23</th>\n","      <td>pledge your first vote for modi</td>\n","      <td>[0.023959940299391747, -0.013972461223602295, ...</td>\n","      <td>neutral</td>\n","      <td>0.555447</td>\n","      <td>pledge your first vote for modi</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>why need modi lead bjp government again 2019 j...</td>\n","      <td>[0.04451165348291397, -0.06473662704229355, -0...</td>\n","      <td>neutral</td>\n","      <td>0.578395</td>\n","      <td>why need modi lead bjp government again 2019 j...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>raghuram rajan sent list high profile bank fra...</td>\n","      <td>[0.06561190634965897, -0.0614917054772377, -0....</td>\n","      <td>negative</td>\n","      <td>0.706507</td>\n","      <td>raghuram rajan sent list high profile bank fra...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>26</th>\n","      <td>modi govts slashing indias education budget cl...</td>\n","      <td>[0.05217093601822853, -0.05785880982875824, -0...</td>\n","      <td>negative</td>\n","      <td>0.607360</td>\n","      <td>modi govts slashing indias education budget cl...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>27</th>\n","      <td>why are you hell bent manoj tiwari just her ph...</td>\n","      <td>[0.04579753428697586, -0.05176748335361481, -0...</td>\n","      <td>neutral</td>\n","      <td>0.588993</td>\n","      <td>why are you hell bent manoj tiwari just her ph...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>28</th>\n","      <td>know going into dirty details nehru family its...</td>\n","      <td>[0.047987841069698334, -0.050984784960746765, ...</td>\n","      <td>negative</td>\n","      <td>0.753084</td>\n","      <td>know going into dirty details nehru family its...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>29</th>\n","      <td>momota begum will let her state become total s...</td>\n","      <td>[0.04509664326906204, -0.05019481107592583, -0...</td>\n","      <td>negative</td>\n","      <td>0.615988</td>\n","      <td>momota begum will let her state become total s...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>thanks anu sharma will vote and make sure peop...</td>\n","      <td>[0.04315190762281418, -0.04578147828578949, -0...</td>\n","      <td>neutral</td>\n","      <td>0.555271</td>\n","      <td>thanks anu sharma will vote and make sure peop...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>31</th>\n","      <td>those who themselves dont know how many father...</td>\n","      <td>[0.0144237345084548, -0.052222371101379395, -0...</td>\n","      <td>negative</td>\n","      <td>0.631877</td>\n","      <td>those who themselves dont know how many father...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>32</th>\n","      <td>the star campaigner myth bjp lost more than as...</td>\n","      <td>[0.02492097206413746, -0.0531931146979332, -0....</td>\n","      <td>neutral</td>\n","      <td>0.586682</td>\n","      <td>the star campaigner myth bjp lost more than as...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>33</th>\n","      <td>modi also live for few years only like you not...</td>\n","      <td>[0.040389616042375565, -0.06375984847545624, -...</td>\n","      <td>neutral</td>\n","      <td>0.587196</td>\n","      <td>modi also live for few years only like you not...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>34</th>\n","      <td>narendra modi more brainy than all the drswamy...</td>\n","      <td>[0.06742898374795914, -0.060488566756248474, -...</td>\n","      <td>neutral</td>\n","      <td>0.533663</td>\n","      <td>narendra modi more brainy than all the drswamy...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>35</th>\n","      <td>have started calling chowkidaar narendra modi ...</td>\n","      <td>[0.06360629200935364, -0.06786973774433136, -0...</td>\n","      <td>negative</td>\n","      <td>0.672972</td>\n","      <td>have started calling chowkidaar narendra modi ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>36</th>\n","      <td>this the difference confident leaders call upo...</td>\n","      <td>[0.024233123287558556, -0.05243394151329994, -...</td>\n","      <td>neutral</td>\n","      <td>0.510922</td>\n","      <td>this the difference confident leaders call upo...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>37</th>\n","      <td>jawans killed the border\\r\\ncrimes against wom...</td>\n","      <td>[0.03928006440401077, -0.051466524600982666, -...</td>\n","      <td>negative</td>\n","      <td>0.701794</td>\n","      <td>jawans killed the border crimes against women ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>38</th>\n","      <td>tag this fast growing youtuber cared abt this ...</td>\n","      <td>[0.05051109194755554, -0.0660049319267273, 0.0...</td>\n","      <td>negative</td>\n","      <td>0.714883</td>\n","      <td>tag this fast growing youtuber cared abt this ...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>39</th>\n","      <td>think hindus should back off and let them suff...</td>\n","      <td>[-0.010975896380841732, -0.059168506413698196,...</td>\n","      <td>neutral</td>\n","      <td>0.553189</td>\n","      <td>think hindus should back off and let them suff...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>40</th>\n","      <td>yes cannot make any knee jerk moves drastic ac...</td>\n","      <td>[0.023108134046196938, -0.027600249275565147, ...</td>\n","      <td>positive</td>\n","      <td>0.671809</td>\n","      <td>yes cannot make any knee jerk moves drastic ac...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>41</th>\n","      <td>why picked chairman the devious aadhaar isnt h...</td>\n","      <td>[0.043231260031461716, -0.07101075351238251, -...</td>\n","      <td>negative</td>\n","      <td>0.709371</td>\n","      <td>why picked chairman the devious aadhaar isnt h...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>42</th>\n","      <td>due automation and artificial intelligence fur...</td>\n","      <td>[0.04160398617386818, -0.06572042405605316, -0...</td>\n","      <td>neutral</td>\n","      <td>0.553482</td>\n","      <td>due automation and artificial intelligence fur...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>43</th>\n","      <td>weak state capacity exacerbated excessive acco...</td>\n","      <td>[-0.00038854932063259184, -0.04599419981241226...</td>\n","      <td>negative</td>\n","      <td>0.609747</td>\n","      <td>weak state capacity exacerbated excessive acco...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>44</th>\n","      <td>our narendra modi ordered indian air force tak...</td>\n","      <td>[-0.02063656784594059, -0.07548005133867264, -...</td>\n","      <td>neutral</td>\n","      <td>0.513191</td>\n","      <td>our narendra modi ordered indian air force tak...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>45</th>\n","      <td>why vote modi dynasty visionary 3no high level...</td>\n","      <td>[0.01779576763510704, -0.06789527833461761, -0...</td>\n","      <td>negative</td>\n","      <td>0.635148</td>\n","      <td>why vote modi dynasty visionary 3no high level...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>46</th>\n","      <td>its modi chor corrupt maha thugbandhan janta w...</td>\n","      <td>[0.065566785633564, -0.04119298234581947, -0.0...</td>\n","      <td>negative</td>\n","      <td>0.687171</td>\n","      <td>its modi chor corrupt maha thugbandhan janta w...</td>\n","      <td>negative</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>before modis arrival 2014 all supported him fo...</td>\n","      <td>[0.03988223522901535, -0.04965453967452049, -0...</td>\n","      <td>neutral</td>\n","      <td>0.557571</td>\n","      <td>before modis arrival 2014 all supported him fo...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>think you forgot dollar india handled exceptio...</td>\n","      <td>[0.01084248349070549, 0.013633836060762405, -0...</td>\n","      <td>negative</td>\n","      <td>0.615532</td>\n","      <td>think you forgot dollar india handled exceptio...</td>\n","      <td>positive</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>tulsi gabbard rejected interviews with tyt but...</td>\n","      <td>[-0.01967957802116871, 0.05570048466324806, -0...</td>\n","      <td>positive</td>\n","      <td>0.604604</td>\n","      <td>tulsi gabbard rejected interviews with tyt but...</td>\n","      <td>positive</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                           text  ...         y\n","origin_index                                                     ...          \n","0             how narendra modi has almost killed the indian...  ...  negative\n","1                       you think was modi behind that accident  ...  negative\n","2             kamal haasan takes chowkidar modi kamal haasan...  ...  negative\n","3             connected name with surname not bcz religion c...  ...  negative\n","4             anyone better than modi when nehruji expired s...  ...  positive\n","5             \\r\\nmodiji wont tired crying foul\\r\\nmain chow...  ...  negative\n","6             poor chap modi hasnâ given him anything can ...  ...  negative\n","7             green underwear missing ive been doubting isi ...  ...  negative\n","8             congress years wasnt able complete one rafale ...  ...  positive\n","9             asked learn from how treat minority well does ...  ...  negative\n","10            stop bull shitting worry about criminal vivek ...  ...  negative\n","11            drswamys timesnow last year debate nearly mill...  ...  positive\n","12            asshole bahujan radical marxist grow brain kno...  ...  negative\n","13            from selling dreams 2014 selling tshirts 2019 ...  ...  positive\n","14            very true sir thats why they are against modi ...  ...  positive\n","15            they are giving jobs citizen india what you ar...  ...  negative\n","16            congress has always attempted empower people g...  ...  negative\n","17            have never said that modi succeed yet even als...  ...  positive\n","18            \\r\\nthe foundation for new india 2022 has alre...  ...  positive\n","19            only rahul gandhis politics love can defeat th...  ...  negative\n","20            one step time navigating thru looteyns when ev...  ...  negative\n","21            why sir mam shabana azami hate much that have ...  ...  negative\n","22            modi will remain for next 510 years and till t...  ...  negative\n","23                             pledge your first vote for modi   ...  positive\n","24            why need modi lead bjp government again 2019 j...  ...  positive\n","25            raghuram rajan sent list high profile bank fra...  ...  negative\n","26            modi govts slashing indias education budget cl...  ...  negative\n","27            why are you hell bent manoj tiwari just her ph...  ...  positive\n","28            know going into dirty details nehru family its...  ...  negative\n","29            momota begum will let her state become total s...  ...  negative\n","30            thanks anu sharma will vote and make sure peop...  ...  positive\n","31            those who themselves dont know how many father...  ...  positive\n","32            the star campaigner myth bjp lost more than as...  ...  positive\n","33            modi also live for few years only like you not...  ...  negative\n","34            narendra modi more brainy than all the drswamy...  ...  positive\n","35            have started calling chowkidaar narendra modi ...  ...  negative\n","36            this the difference confident leaders call upo...  ...  positive\n","37            jawans killed the border\\r\\ncrimes against wom...  ...  negative\n","38            tag this fast growing youtuber cared abt this ...  ...  negative\n","39            think hindus should back off and let them suff...  ...  positive\n","40            yes cannot make any knee jerk moves drastic ac...  ...  positive\n","41            why picked chairman the devious aadhaar isnt h...  ...  negative\n","42            due automation and artificial intelligence fur...  ...  positive\n","43            weak state capacity exacerbated excessive acco...  ...  positive\n","44            our narendra modi ordered indian air force tak...  ...  positive\n","45            why vote modi dynasty visionary 3no high level...  ...  negative\n","46            its modi chor corrupt maha thugbandhan janta w...  ...  negative\n","47            before modis arrival 2014 all supported him fo...  ...  positive\n","48            think you forgot dollar india handled exceptio...  ...  positive\n","49            tulsi gabbard rejected interviews with tyt but...  ...  positive\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nxWFzQOhjWC8","executionInfo":{"status":"ok","timestamp":1609553671091,"user_tz":-300,"elapsed":200991,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2f071682-e615-4556-b813-a56f405ff9c3"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1609554113187,"user_tz":-300,"elapsed":140893,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"413eef4f-f423-439b-ad57-2ccfcf4bbe62"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(100)  \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    negative       0.81      0.66      0.73       300\n","     neutral       0.00      0.00      0.00         0\n","    positive       0.85      0.59      0.69       300\n","\n","    accuracy                           0.62       600\n","   macro avg       0.55      0.42      0.47       600\n","weighted avg       0.83      0.62      0.71       600\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1609554302650,"user_tz":-300,"elapsed":189472,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"858c8cf2-ba4d-48fc-b333-e4b2819dadb2"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('the president of india just died')\n","preds"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1609554388428,"user_tz":-300,"elapsed":879,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"212c87f7-8200-4646-cfcd-5bae608b3848"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')            | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)       | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                    | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                        | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                  | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                 | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)           | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                  | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                      | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)            | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)        | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                 | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                          | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                        | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                 | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')         | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6)                         | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral')              | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative'])      | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]}
\ No newline at end of file
diff --git a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo.ipynb b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo.ipynb
index 600d3ee9..d259a24b 100644
--- a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo.ipynb
+++ b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo.ipynb
@@ -1 +1 @@
-{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyM9f3LyT6TSckfAZm2wYkjU"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/collab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu > /dev/null pyspark==2.4.7\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download news classification dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1607912618662,"user_tz":-60,"elapsed":94251,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"4fe5c4cb-76ff-44a0-9936-dfbddfeb5140"},"source":["! wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_train.csv\n","! wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_test.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2020-12-14 02:23:36--  https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_train.csv\n","Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.154.38\n","Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.154.38|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 24032125 (23M) [text/csv]\n","Saving to: ‘news_category_train.csv’\n","\n","news_category_train 100%[===================>]  22.92M  21.7MB/s    in 1.1s    \n","\n","2020-12-14 02:23:37 (21.7 MB/s) - ‘news_category_train.csv’ saved [24032125/24032125]\n","\n","--2020-12-14 02:23:37--  https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_test.csv\n","Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.74.118\n","Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.74.118|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 1504408 (1.4M) [text/csv]\n","Saving to: ‘news_category_test.csv’\n","\n","news_category_test. 100%[===================>]   1.43M  2.77MB/s    in 0.5s    \n","\n","2020-12-14 02:23:38 (2.77 MB/s) - ‘news_category_test.csv’ saved [1504408/1504408]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":419},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1607912619037,"user_tz":-60,"elapsed":94620,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"1cf7867f-21ab-4ba1-9ab3-c95a191b0286"},"source":["import pandas as pd\n","test_path = '/content/news_category_test.csv'\n","train_df = pd.read_csv(test_path)\n","train_df.columns=['y','text']\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Business</td>\n","      <td>Unions representing workers at Turner   Newall...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Sci/Tech</td>\n","      <td>TORONTO, Canada    A second team of rocketeer...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>Sci/Tech</td>\n","      <td>A company founded by a chemistry researcher a...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Sci/Tech</td>\n","      <td>It's barely dawn when Mike Fitzpatrick starts...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>Sci/Tech</td>\n","      <td>Southern California's smog fighting agency we...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>7595</th>\n","      <td>World</td>\n","      <td>Ukrainian presidential candidate Viktor Yushch...</td>\n","    </tr>\n","    <tr>\n","      <th>7596</th>\n","      <td>Sports</td>\n","      <td>With the supply of attractive pitching options...</td>\n","    </tr>\n","    <tr>\n","      <th>7597</th>\n","      <td>Sports</td>\n","      <td>Like Roger Clemens did almost exactly eight ye...</td>\n","    </tr>\n","    <tr>\n","      <th>7598</th>\n","      <td>Business</td>\n","      <td>SINGAPORE : Doctors in the United States have ...</td>\n","    </tr>\n","    <tr>\n","      <th>7599</th>\n","      <td>Business</td>\n","      <td>EBay plans to buy the apartment and home renta...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>7600 rows × 2 columns</p>\n","</div>"],"text/plain":["             y                                               text\n","0     Business  Unions representing workers at Turner   Newall...\n","1     Sci/Tech   TORONTO, Canada    A second team of rocketeer...\n","2     Sci/Tech   A company founded by a chemistry researcher a...\n","3     Sci/Tech   It's barely dawn when Mike Fitzpatrick starts...\n","4     Sci/Tech   Southern California's smog fighting agency we...\n","...        ...                                                ...\n","7595     World  Ukrainian presidential candidate Viktor Yushch...\n","7596    Sports  With the supply of attractive pitching options...\n","7597    Sports  Like Roger Clemens did almost exactly eight ye...\n","7598  Business  SINGAPORE : Doctors in the United States have ...\n","7599  Business  EBay plans to buy the apartment and home renta...\n","\n","[7600 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","By default, the Universal Sentence Encoder Embeddings (USE) are beeing downloaded to provide embeddings for the classifier. You can use any of the 50+ other sentence Emeddings in NLU tough!\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":501},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1607912857369,"user_tz":-60,"elapsed":332946,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"8bce881e-edb7-4d2b-cf61-b9f26a05ea4b"},"source":["# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# Since there are no\n","fitted_pipe = nlu.load('train.classifier').fit(train_df)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df)\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>default_name_embeddings</th>\n","      <th>text</th>\n","      <th>sentence</th>\n","      <th>category_confidence</th>\n","      <th>category</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Business</td>\n","      <td>[0.012997539713978767, 0.019844762980937958, -...</td>\n","      <td>Unions representing workers at Turner   Newall...</td>\n","      <td>Unions representing workers at Turner Newall s...</td>\n","      <td>0.999985</td>\n","      <td>Business</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Sci/Tech</td>\n","      <td>[0.023022323846817017, -0.01595703884959221, -...</td>\n","      <td>TORONTO, Canada    A second team of rocketeer...</td>\n","      <td>TORONTO, Canada A second team of rocketeers co...</td>\n","      <td>1.000000</td>\n","      <td>Sports</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Sci/Tech</td>\n","      <td>[-0.010587693192064762, 0.011531050316989422, ...</td>\n","      <td>TORONTO, Canada    A second team of rocketeer...</td>\n","      <td>10 million Ansari X Prize, a contest for priva...</td>\n","      <td>1.000000</td>\n","      <td>Sports</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>Sci/Tech</td>\n","      <td>[0.038641855120658875, 0.02322080172598362, -0...</td>\n","      <td>A company founded by a chemistry researcher a...</td>\n","      <td>A company founded by a chemistry researcher at...</td>\n","      <td>0.744563</td>\n","      <td>Business</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Sci/Tech</td>\n","      <td>[-0.006857294123619795, 0.01967567577958107, -...</td>\n","      <td>It's barely dawn when Mike Fitzpatrick starts...</td>\n","      <td>It's barely dawn when Mike Fitzpatrick starts ...</td>\n","      <td>0.999360</td>\n","      <td>Sci/Tech</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>7596</th>\n","      <td>Sports</td>\n","      <td>[0.005107458680868149, -0.011805553920567036, ...</td>\n","      <td>With the supply of attractive pitching options...</td>\n","      <td>.</td>\n","      <td>1.000000</td>\n","      <td>Sports</td>\n","    </tr>\n","    <tr>\n","      <th>7596</th>\n","      <td>Sports</td>\n","      <td>[0.005107458680868149, -0.011805553920567036, ...</td>\n","      <td>With the supply of attractive pitching options...</td>\n","      <td>.</td>\n","      <td>2.000000</td>\n","      <td>Sports</td>\n","    </tr>\n","    <tr>\n","      <th>7597</th>\n","      <td>Sports</td>\n","      <td>[0.044696468859910965, 0.0015660696662962437, ...</td>\n","      <td>Like Roger Clemens did almost exactly eight ye...</td>\n","      <td>Like Roger Clemens did almost exactly eight ye...</td>\n","      <td>1.000000</td>\n","      <td>Sports</td>\n","    </tr>\n","    <tr>\n","      <th>7598</th>\n","      <td>Business</td>\n","      <td>[0.05564942583441734, -0.021285761147737503, -...</td>\n","      <td>SINGAPORE : Doctors in the United States have ...</td>\n","      <td>SINGAPORE : Doctors in the United States have ...</td>\n","      <td>0.999433</td>\n","      <td>Business</td>\n","    </tr>\n","    <tr>\n","      <th>7599</th>\n","      <td>Business</td>\n","      <td>[0.08172684907913208, -0.013251541182398796, -...</td>\n","      <td>EBay plans to buy the apartment and home renta...</td>\n","      <td>EBay plans to buy the apartment and home renta...</td>\n","      <td>0.820492</td>\n","      <td>Business</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>14399 rows × 6 columns</p>\n","</div>"],"text/plain":["                     y  ...  category\n","origin_index            ...          \n","0             Business  ...  Business\n","1             Sci/Tech  ...    Sports\n","1             Sci/Tech  ...    Sports\n","2             Sci/Tech  ...  Business\n","3             Sci/Tech  ...  Sci/Tech\n","...                ...  ...       ...\n","7596            Sports  ...    Sports\n","7596            Sports  ...    Sports\n","7597            Sports  ...    Sports\n","7598          Business  ...  Business\n","7599          Business  ...  Business\n","\n","[14399 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"DL_5aY9b3jSd"},"source":["# 4. Evaluate the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"djtoZVKBw2WU","executionInfo":{"status":"ok","timestamp":1607912858793,"user_tz":-60,"elapsed":334365,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"c41b52d9-2a4b-47ee-92e8-758399ef45cc"},"source":["from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","    Business       0.76      0.81      0.78      3671\n","    Sci/Tech       0.80      0.79      0.79      3983\n","      Sports       0.86      0.92      0.89      3687\n","       World       0.89      0.77      0.83      3058\n","\n","    accuracy                           0.82     14399\n","   macro avg       0.83      0.82      0.82     14399\n","weighted avg       0.82      0.82      0.82     14399\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mhFKVN93o1ZO"},"source":["# 5. Lets try different Sentence Emebddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CzJd8omao0gt","executionInfo":{"status":"ok","timestamp":1607912858794,"user_tz":-60,"elapsed":334358,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"8021f8c3-d711-4d06-d184-88df1a29441e"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ABHLgirmG1n9","executionInfo":{"status":"ok","timestamp":1607918642391,"user_tz":-60,"elapsed":6117950,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"fcc6f823-4332-471f-c2dc-201916ef1b97"},"source":["# Load pipe with bert embeds\n","# using large embeddings can take a few hours..\n","# fitted_pipe = nlu.load('en.embed_sentence.bert_large_uncased train.classifier').fit(train_df)\n","fitted_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier').fit(train_df)\n","\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df)\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    Business       0.00      0.00      0.00      1900\n","    Sci/Tech       0.25      1.00      0.40      1900\n","      Sports       0.00      0.00      0.00      1900\n","       World       0.00      0.00      0.00      1900\n","\n","    accuracy                           0.25      7600\n","   macro avg       0.06      0.25      0.10      7600\n","weighted avg       0.06      0.25      0.10      7600\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nbpdZGoZPslz","executionInfo":{"status":"ok","timestamp":1607918778139,"user_tz":-60,"elapsed":6253693,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"1dcc8aa9-fd89-4b7a-d78d-c641c09f67d6"},"source":["# Load pipe with bert embeds\n","fitted_pipe = nlu.load('embed_sentence.bert train.classifier').fit(train_df)\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df)\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L2_128 download started this may take some time.\n","Approximate size to download 16.1 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    Business       0.81      0.74      0.77      1900\n","    Sci/Tech       0.74      0.87      0.80      1900\n","      Sports       0.92      0.94      0.93      1900\n","       World       0.91      0.81      0.86      1900\n","\n","    accuracy                           0.84      7600\n","   macro avg       0.85      0.84      0.84      7600\n","weighted avg       0.85      0.84      0.84      7600\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607918802363,"user_tz":-60,"elapsed":6277910,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"79c442f9-959e-4b14-ae85-6ef9f654f297"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":128},"executionInfo":{"status":"ok","timestamp":1607918809822,"user_tz":-60,"elapsed":6285365,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"f755aaa0-974c-4c6f-c079-0f3d681dbc82"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Tesla plans to invest 10M into the ML sector')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>classifier_confidence</th>\n","      <th>document</th>\n","      <th>classifier</th>\n","      <th>embed_sentence_bert_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.997592</td>\n","      <td>Tesla plans to invest 10M into the ML sector</td>\n","      <td>Business</td>\n","      <td>[-0.07111635059118271, 0.9532930850982666, -1....</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             classifier_confidence  ...                     embed_sentence_bert_embeddings\n","origin_index                        ...                                                   \n","0                         0.997592  ...  [-0.07111635059118271, 0.9532930850982666, -1....\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607918809824,"user_tz":-60,"elapsed":6285363,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"5e8b8c8a-5cd1-4d20-bde2-a4003d5687d0"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')                            | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)                       | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                                | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                                    | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                                        | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                                  | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                                 | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)                           | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                                  | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                                      | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)                            | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)                        | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                                 | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                                          | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(128)                                                | Info: Number of embedding dimensions | Currently set to : 128\n","pipe['glove'].setMaxSentenceLength(128)                                        | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                                 | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L2_128')                          | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['World', 'Sci/Tech', 'Sports', 'Business'])  | Info: get the tags used to trained this NerDLModel | Currently set to : ['World', 'Sci/Tech', 'Sports', 'Business']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L2_128')                  | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"M1LjAwJVJxun"},"source":[""],"execution_count":null,"outputs":[]}]}
\ No newline at end of file
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu > /dev/null pyspark==2.4.7\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download news classification dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1607912618662,"user_tz":-60,"elapsed":94251,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"4fe5c4cb-76ff-44a0-9936-dfbddfeb5140"},"source":["! wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_train.csv\n","! wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_test.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2020-12-14 02:23:36--  https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_train.csv\n","Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.154.38\n","Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.154.38|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 24032125 (23M) [text/csv]\n","Saving to: ‘news_category_train.csv’\n","\n","news_category_train 100%[===================>]  22.92M  21.7MB/s    in 1.1s    \n","\n","2020-12-14 02:23:37 (21.7 MB/s) - ‘news_category_train.csv’ saved [24032125/24032125]\n","\n","--2020-12-14 02:23:37--  https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_test.csv\n","Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.74.118\n","Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.74.118|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 1504408 (1.4M) [text/csv]\n","Saving to: ‘news_category_test.csv’\n","\n","news_category_test. 100%[===================>]   1.43M  2.77MB/s    in 0.5s    \n","\n","2020-12-14 02:23:38 (2.77 MB/s) - ‘news_category_test.csv’ saved [1504408/1504408]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":419},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1607912619037,"user_tz":-60,"elapsed":94620,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"1cf7867f-21ab-4ba1-9ab3-c95a191b0286"},"source":["import pandas as pd\n","test_path = '/content/news_category_test.csv'\n","train_df = pd.read_csv(test_path)\n","train_df.columns=['y','text']\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Business</td>\n","      <td>Unions representing workers at Turner   Newall...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Sci/Tech</td>\n","      <td>TORONTO, Canada    A second team of rocketeer...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>Sci/Tech</td>\n","      <td>A company founded by a chemistry researcher a...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Sci/Tech</td>\n","      <td>It's barely dawn when Mike Fitzpatrick starts...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>Sci/Tech</td>\n","      <td>Southern California's smog fighting agency we...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>7595</th>\n","      <td>World</td>\n","      <td>Ukrainian presidential candidate Viktor Yushch...</td>\n","    </tr>\n","    <tr>\n","      <th>7596</th>\n","      <td>Sports</td>\n","      <td>With the supply of attractive pitching options...</td>\n","    </tr>\n","    <tr>\n","      <th>7597</th>\n","      <td>Sports</td>\n","      <td>Like Roger Clemens did almost exactly eight ye...</td>\n","    </tr>\n","    <tr>\n","      <th>7598</th>\n","      <td>Business</td>\n","      <td>SINGAPORE : Doctors in the United States have ...</td>\n","    </tr>\n","    <tr>\n","      <th>7599</th>\n","      <td>Business</td>\n","      <td>EBay plans to buy the apartment and home renta...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>7600 rows × 2 columns</p>\n","</div>"],"text/plain":["             y                                               text\n","0     Business  Unions representing workers at Turner   Newall...\n","1     Sci/Tech   TORONTO, Canada    A second team of rocketeer...\n","2     Sci/Tech   A company founded by a chemistry researcher a...\n","3     Sci/Tech   It's barely dawn when Mike Fitzpatrick starts...\n","4     Sci/Tech   Southern California's smog fighting agency we...\n","...        ...                                                ...\n","7595     World  Ukrainian presidential candidate Viktor Yushch...\n","7596    Sports  With the supply of attractive pitching options...\n","7597    Sports  Like Roger Clemens did almost exactly eight ye...\n","7598  Business  SINGAPORE : Doctors in the United States have ...\n","7599  Business  EBay plans to buy the apartment and home renta...\n","\n","[7600 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","By default, the Universal Sentence Encoder Embeddings (USE) are beeing downloaded to provide embeddings for the classifier. You can use any of the 50+ other sentence Emeddings in NLU tough!\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":501},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1607912857369,"user_tz":-60,"elapsed":332946,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"8bce881e-edb7-4d2b-cf61-b9f26a05ea4b"},"source":["# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# Since there are no\n","fitted_pipe = nlu.load('train.classifier').fit(train_df)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df)\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>default_name_embeddings</th>\n","      <th>text</th>\n","      <th>sentence</th>\n","      <th>category_confidence</th>\n","      <th>category</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Business</td>\n","      <td>[0.012997539713978767, 0.019844762980937958, -...</td>\n","      <td>Unions representing workers at Turner   Newall...</td>\n","      <td>Unions representing workers at Turner Newall s...</td>\n","      <td>0.999985</td>\n","      <td>Business</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Sci/Tech</td>\n","      <td>[0.023022323846817017, -0.01595703884959221, -...</td>\n","      <td>TORONTO, Canada    A second team of rocketeer...</td>\n","      <td>TORONTO, Canada A second team of rocketeers co...</td>\n","      <td>1.000000</td>\n","      <td>Sports</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Sci/Tech</td>\n","      <td>[-0.010587693192064762, 0.011531050316989422, ...</td>\n","      <td>TORONTO, Canada    A second team of rocketeer...</td>\n","      <td>10 million Ansari X Prize, a contest for priva...</td>\n","      <td>1.000000</td>\n","      <td>Sports</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>Sci/Tech</td>\n","      <td>[0.038641855120658875, 0.02322080172598362, -0...</td>\n","      <td>A company founded by a chemistry researcher a...</td>\n","      <td>A company founded by a chemistry researcher at...</td>\n","      <td>0.744563</td>\n","      <td>Business</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Sci/Tech</td>\n","      <td>[-0.006857294123619795, 0.01967567577958107, -...</td>\n","      <td>It's barely dawn when Mike Fitzpatrick starts...</td>\n","      <td>It's barely dawn when Mike Fitzpatrick starts ...</td>\n","      <td>0.999360</td>\n","      <td>Sci/Tech</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>7596</th>\n","      <td>Sports</td>\n","      <td>[0.005107458680868149, -0.011805553920567036, ...</td>\n","      <td>With the supply of attractive pitching options...</td>\n","      <td>.</td>\n","      <td>1.000000</td>\n","      <td>Sports</td>\n","    </tr>\n","    <tr>\n","      <th>7596</th>\n","      <td>Sports</td>\n","      <td>[0.005107458680868149, -0.011805553920567036, ...</td>\n","      <td>With the supply of attractive pitching options...</td>\n","      <td>.</td>\n","      <td>2.000000</td>\n","      <td>Sports</td>\n","    </tr>\n","    <tr>\n","      <th>7597</th>\n","      <td>Sports</td>\n","      <td>[0.044696468859910965, 0.0015660696662962437, ...</td>\n","      <td>Like Roger Clemens did almost exactly eight ye...</td>\n","      <td>Like Roger Clemens did almost exactly eight ye...</td>\n","      <td>1.000000</td>\n","      <td>Sports</td>\n","    </tr>\n","    <tr>\n","      <th>7598</th>\n","      <td>Business</td>\n","      <td>[0.05564942583441734, -0.021285761147737503, -...</td>\n","      <td>SINGAPORE : Doctors in the United States have ...</td>\n","      <td>SINGAPORE : Doctors in the United States have ...</td>\n","      <td>0.999433</td>\n","      <td>Business</td>\n","    </tr>\n","    <tr>\n","      <th>7599</th>\n","      <td>Business</td>\n","      <td>[0.08172684907913208, -0.013251541182398796, -...</td>\n","      <td>EBay plans to buy the apartment and home renta...</td>\n","      <td>EBay plans to buy the apartment and home renta...</td>\n","      <td>0.820492</td>\n","      <td>Business</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>14399 rows × 6 columns</p>\n","</div>"],"text/plain":["                     y  ...  category\n","origin_index            ...          \n","0             Business  ...  Business\n","1             Sci/Tech  ...    Sports\n","1             Sci/Tech  ...    Sports\n","2             Sci/Tech  ...  Business\n","3             Sci/Tech  ...  Sci/Tech\n","...                ...  ...       ...\n","7596            Sports  ...    Sports\n","7596            Sports  ...    Sports\n","7597            Sports  ...    Sports\n","7598          Business  ...  Business\n","7599          Business  ...  Business\n","\n","[14399 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"DL_5aY9b3jSd"},"source":["# 4. Evaluate the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"djtoZVKBw2WU","executionInfo":{"status":"ok","timestamp":1607912858793,"user_tz":-60,"elapsed":334365,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"c41b52d9-2a4b-47ee-92e8-758399ef45cc"},"source":["from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","    Business       0.76      0.81      0.78      3671\n","    Sci/Tech       0.80      0.79      0.79      3983\n","      Sports       0.86      0.92      0.89      3687\n","       World       0.89      0.77      0.83      3058\n","\n","    accuracy                           0.82     14399\n","   macro avg       0.83      0.82      0.82     14399\n","weighted avg       0.82      0.82      0.82     14399\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mhFKVN93o1ZO"},"source":["# 5. Lets try different Sentence Emebddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CzJd8omao0gt","executionInfo":{"status":"ok","timestamp":1607912858794,"user_tz":-60,"elapsed":334358,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"8021f8c3-d711-4d06-d184-88df1a29441e"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ABHLgirmG1n9","executionInfo":{"status":"ok","timestamp":1607918642391,"user_tz":-60,"elapsed":6117950,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"fcc6f823-4332-471f-c2dc-201916ef1b97"},"source":["# Load pipe with bert embeds\n","# using large embeddings can take a few hours..\n","# fitted_pipe = nlu.load('en.embed_sentence.bert_large_uncased train.classifier').fit(train_df)\n","fitted_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier').fit(train_df)\n","\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df)\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    Business       0.00      0.00      0.00      1900\n","    Sci/Tech       0.25      1.00      0.40      1900\n","      Sports       0.00      0.00      0.00      1900\n","       World       0.00      0.00      0.00      1900\n","\n","    accuracy                           0.25      7600\n","   macro avg       0.06      0.25      0.10      7600\n","weighted avg       0.06      0.25      0.10      7600\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nbpdZGoZPslz","executionInfo":{"status":"ok","timestamp":1607918778139,"user_tz":-60,"elapsed":6253693,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"1dcc8aa9-fd89-4b7a-d78d-c641c09f67d6"},"source":["# Load pipe with bert embeds\n","fitted_pipe = nlu.load('embed_sentence.bert train.classifier').fit(train_df)\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df)\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L2_128 download started this may take some time.\n","Approximate size to download 16.1 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","    Business       0.81      0.74      0.77      1900\n","    Sci/Tech       0.74      0.87      0.80      1900\n","      Sports       0.92      0.94      0.93      1900\n","       World       0.91      0.81      0.86      1900\n","\n","    accuracy                           0.84      7600\n","   macro avg       0.85      0.84      0.84      7600\n","weighted avg       0.85      0.84      0.84      7600\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607918802363,"user_tz":-60,"elapsed":6277910,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"79c442f9-959e-4b14-ae85-6ef9f654f297"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":128},"executionInfo":{"status":"ok","timestamp":1607918809822,"user_tz":-60,"elapsed":6285365,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"f755aaa0-974c-4c6f-c079-0f3d681dbc82"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Tesla plans to invest 10M into the ML sector')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>classifier_confidence</th>\n","      <th>document</th>\n","      <th>classifier</th>\n","      <th>embed_sentence_bert_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.997592</td>\n","      <td>Tesla plans to invest 10M into the ML sector</td>\n","      <td>Business</td>\n","      <td>[-0.07111635059118271, 0.9532930850982666, -1....</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             classifier_confidence  ...                     embed_sentence_bert_embeddings\n","origin_index                        ...                                                   \n","0                         0.997592  ...  [-0.07111635059118271, 0.9532930850982666, -1....\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607918809824,"user_tz":-60,"elapsed":6285363,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"5e8b8c8a-5cd1-4d20-bde2-a4003d5687d0"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')                            | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)                       | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                                | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                                    | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                                        | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                                  | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                                 | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)                           | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                                  | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                                      | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)                            | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)                        | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                                 | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                                          | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(128)                                                | Info: Number of embedding dimensions | Currently set to : 128\n","pipe['glove'].setMaxSentenceLength(128)                                        | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                                 | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L2_128')                          | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['World', 'Sci/Tech', 'Sports', 'Business'])  | Info: get the tags used to trained this NerDLModel | Currently set to : ['World', 'Sci/Tech', 'Sports', 'Business']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L2_128')                  | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"M1LjAwJVJxun"},"source":[""],"execution_count":null,"outputs":[]}]}
\ No newline at end of file
diff --git a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_amazon.ipynb b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_amazon.ipynb
index 03ca9530..8255e3ed 100644
--- a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_amazon.ipynb
+++ b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_amazon.ipynb
@@ -1 +1 @@
-{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo_amazon.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_amazon.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install  pyspark==2.4.7 \n","! pip install nlu > /dev/null    \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Amazon Unlocked mobile phones dataset \n","https://www.kaggle.com/PromptCloudHQ/amazon-reviews-unlocked-mobile-phones\n","\n","dataset with unlocked mobile phone reviews in 5 review classes\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610787546042,"user_tz":-300,"elapsed":3459,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ca2d6419-7d62-400b-d3d7-9b16fa9bce2c"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/Amazon_Unlocked_Mobile.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 08:58:27--  http://ckl-it.de/wp-content/uploads/2021/01/Amazon_Unlocked_Mobile.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 452621 (442K) [text/csv]\n","Saving to: ‘Amazon_Unlocked_Mobile.csv’\n","\n","Amazon_Unlocked_Mob 100%[===================>] 442.01K   308KB/s    in 1.4s    \n","\n","2021-01-16 08:58:29 (308 KB/s) - ‘Amazon_Unlocked_Mobile.csv’ saved [452621/452621]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610787551525,"user_tz":-300,"elapsed":1188,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"dfe55b6f-f33a-4bd2-a2ba-5b1a306e1ab4"},"source":["import pandas as pd\n","test_path = '/content/Amazon_Unlocked_Mobile.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>poor</td>\n","      <td>Bought it, turned it on, did not work. Opened ...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>average</td>\n","      <td>Currently it is 2014, the 3gs is discontinued....</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>good</td>\n","      <td>100% recomendado</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>average</td>\n","      <td>It's a good phone but if you use it to browse ...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>average</td>\n","      <td>It's nice that this phone has LTE and it funct...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>1495</th>\n","      <td>poor</td>\n","      <td>Not happy with this phone. Not able to get but...</td>\n","    </tr>\n","    <tr>\n","      <th>1496</th>\n","      <td>good</td>\n","      <td>great phablet for all general uses</td>\n","    </tr>\n","    <tr>\n","      <th>1497</th>\n","      <td>poor</td>\n","      <td>Hate this phone had it for one day</td>\n","    </tr>\n","    <tr>\n","      <th>1498</th>\n","      <td>good</td>\n","      <td>Great cheap phone.</td>\n","    </tr>\n","    <tr>\n","      <th>1499</th>\n","      <td>good</td>\n","      <td>Very good</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>1500 rows × 2 columns</p>\n","</div>"],"text/plain":["            y                                               text\n","0        poor  Bought it, turned it on, did not work. Opened ...\n","1     average  Currently it is 2014, the 3gs is discontinued....\n","2        good                                   100% recomendado\n","3     average  It's a good phone but if you use it to browse ...\n","4     average  It's nice that this phone has LTE and it funct...\n","...       ...                                                ...\n","1495     poor  Not happy with this phone. Not able to get but...\n","1496     good                 great phablet for all general uses\n","1497     poor                 Hate this phone had it for one day\n","1498     good                                 Great cheap phone.\n","1499     good                                          Very good\n","\n","[1500 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":501},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609621542716,"user_tz":-300,"elapsed":207913,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d0eb19cc-8849-43f7-9cdf-a88fd8f11676"},"source":["# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# Since there are no\n","\n","trainable_pipe = nlu.load('train.classifier')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50] )\n","\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50] )\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","      <th>category</th>\n","      <th>default_name_embeddings</th>\n","      <th>category_confidence</th>\n","      <th>sentence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>poor</td>\n","      <td>Bought it, turned it on, did not work. Opened ...</td>\n","      <td>average</td>\n","      <td>[0.020834514871239662, 0.03326118737459183, -0...</td>\n","      <td>0.763940</td>\n","      <td>Bought it, turned it on, did not work.</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>poor</td>\n","      <td>Bought it, turned it on, did not work. Opened ...</td>\n","      <td>average</td>\n","      <td>[0.030574046075344086, -0.009678893722593784, ...</td>\n","      <td>1.000000</td>\n","      <td>Opened up the back, made sure it was in right,...</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>poor</td>\n","      <td>Bought it, turned it on, did not work. Opened ...</td>\n","      <td>average</td>\n","      <td>[0.023421283811330795, 0.02294657751917839, -0...</td>\n","      <td>2.000000</td>\n","      <td>It was supposed to be new, but i it was used.</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>poor</td>\n","      <td>Bought it, turned it on, did not work. Opened ...</td>\n","      <td>average</td>\n","      <td>[0.06009713560342789, 0.046434734016656876, -0...</td>\n","      <td>3.000000</td>\n","      <td>Found scratches on cover.</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>average</td>\n","      <td>Currently it is 2014, the 3gs is discontinued....</td>\n","      <td>average</td>\n","      <td>[0.04893391206860542, -0.010221654549241066, -...</td>\n","      <td>0.631228</td>\n","      <td>Currently it is 2014, the 3gs is discontinued.</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>good</td>\n","      <td>Bought for my mom! She loves it!</td>\n","      <td>good</td>\n","      <td>[0.021471485495567322, -0.027823669835925102, ...</td>\n","      <td>0.656713</td>\n","      <td>Bought for my mom!</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>good</td>\n","      <td>Bought for my mom! She loves it!</td>\n","      <td>good</td>\n","      <td>[0.0001737327256705612, -0.014630521647632122,...</td>\n","      <td>1.000000</td>\n","      <td>She loves it!</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>good</td>\n","      <td>Gave the phone as a birthday gift. My friend s...</td>\n","      <td>good</td>\n","      <td>[0.03572574257850647, 0.013357092626392841, -0...</td>\n","      <td>0.701626</td>\n","      <td>Gave the phone as a birthday gift.</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>good</td>\n","      <td>Gave the phone as a birthday gift. My friend s...</td>\n","      <td>good</td>\n","      <td>[0.08371475338935852, -0.01581401191651821, -0...</td>\n","      <td>1.000000</td>\n","      <td>My friend seems happy with it so far.</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>good</td>\n","      <td>Great Product</td>\n","      <td>good</td>\n","      <td>[0.03334435820579529, -0.05353177338838577, -0...</td>\n","      <td>0.593622</td>\n","      <td>Great Product</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>215 rows × 6 columns</p>\n","</div>"],"text/plain":["                    y  ...                                           sentence\n","origin_index           ...                                                   \n","0                poor  ...             Bought it, turned it on, did not work.\n","0                poor  ...  Opened up the back, made sure it was in right,...\n","0                poor  ...      It was supposed to be new, but i it was used.\n","0                poor  ...                          Found scratches on cover.\n","1             average  ...     Currently it is 2014, the 3gs is discontinued.\n","...               ...  ...                                                ...\n","47               good  ...                                 Bought for my mom!\n","47               good  ...                                      She loves it!\n","48               good  ...                 Gave the phone as a birthday gift.\n","48               good  ...              My friend seems happy with it so far.\n","49               good  ...                                      Great Product\n","\n","[215 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609621546162,"user_tz":-300,"elapsed":211344,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5dc268e6-e97f-4378-85d1-8319d3f7893f"},"source":["fitted_pipe.predict(\"It worked perfectly .\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>category</th>\n","      <th>default_name_embeddings</th>\n","      <th>category_confidence</th>\n","      <th>sentence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>average</td>\n","      <td>[0.06468033790588379, -0.040837567299604416, -...</td>\n","      <td>0.460187</td>\n","      <td>Bitcoin is going to the moon!</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             category  ...                       sentence\n","origin_index           ...                               \n","0             average  ...  Bitcoin is going to the moon!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609621546165,"user_tz":-300,"elapsed":211336,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c0633c00-9bfd-412b-ee55-0f6e5b150f39"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setMaxEpochs(3)                | Info: Maximum number of epochs to train | Currently set to : 3\n","pipe['classifier_dl'].setLr(0.005)                   | Info: Learning Rate | Currently set to : 0.005\n","pipe['classifier_dl'].setBatchSize(64)               | Info: Batch size | Currently set to : 64\n","pipe['classifier_dl'].setDropout(0.5)                | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['classifier_dl'].setEnableOutputLogs(True)      | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512)               | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use')      | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":620},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609621557024,"user_tz":-300,"elapsed":222179,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"12b53152-fcdf-4180-91b8-cc150e5bb23a"},"source":["# Train longer!\n","trainable_pipe['classifier_dl'].setMaxEpochs(5)  \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","     average       0.00      0.00      0.00        29\n","        good       0.65      0.94      0.77        32\n","        poor       0.69      0.95      0.80        39\n","\n","    accuracy                           0.67       100\n","   macro avg       0.45      0.63      0.52       100\n","weighted avg       0.48      0.67      0.56       100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","      <th>document</th>\n","      <th>category</th>\n","      <th>default_name_embeddings</th>\n","      <th>category_confidence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>poor</td>\n","      <td>Bought it, turned it on, did not work. Opened ...</td>\n","      <td>Bought it, turned it on, did not work. Opened ...</td>\n","      <td>poor</td>\n","      <td>[0.059367865324020386, 0.05043933913111687, -0...</td>\n","      <td>0.952295</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>average</td>\n","      <td>Currently it is 2014, the 3gs is discontinued....</td>\n","      <td>Currently it is 2014, the 3gs is discontinued....</td>\n","      <td>good</td>\n","      <td>[0.0046275281347334385, 0.012452688068151474, ...</td>\n","      <td>0.396265</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>good</td>\n","      <td>100% recomendado</td>\n","      <td>100% recomendado</td>\n","      <td>good</td>\n","      <td>[0.008266163989901543, 0.00396152026951313, -0...</td>\n","      <td>0.773682</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>average</td>\n","      <td>It's a good phone but if you use it to browse ...</td>\n","      <td>It's a good phone but if you use it to browse ...</td>\n","      <td>poor</td>\n","      <td>[0.05291805788874626, 0.002292224671691656, -0...</td>\n","      <td>0.506015</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>average</td>\n","      <td>It's nice that this phone has LTE and it funct...</td>\n","      <td>It's nice that this phone has LTE and it funct...</td>\n","      <td>good</td>\n","      <td>[0.03426238149404526, -0.024366019293665886, -...</td>\n","      <td>0.648859</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>95</th>\n","      <td>poor</td>\n","      <td>Hola, compramos dos telÃ©fonos y vienieron tot...</td>\n","      <td>Hola, compramos dos telÃ©fonos y vienieron tot...</td>\n","      <td>poor</td>\n","      <td>[0.06324272602796555, -0.06387951225042343, -0...</td>\n","      <td>0.790492</td>\n","    </tr>\n","    <tr>\n","      <th>96</th>\n","      <td>good</td>\n","      <td>Excelente</td>\n","      <td>Excelente</td>\n","      <td>good</td>\n","      <td>[0.03246314451098442, -0.01719777286052704, -0...</td>\n","      <td>0.813424</td>\n","    </tr>\n","    <tr>\n","      <th>97</th>\n","      <td>poor</td>\n","      <td>the product is good but the English language s...</td>\n","      <td>the product is good but the English language s...</td>\n","      <td>poor</td>\n","      <td>[0.056343767791986465, -0.016822000965476036, ...</td>\n","      <td>0.940151</td>\n","    </tr>\n","    <tr>\n","      <th>98</th>\n","      <td>poor</td>\n","      <td>Supposed to be a brand new unlock phone. The p...</td>\n","      <td>Supposed to be a brand new unlock phone. The p...</td>\n","      <td>poor</td>\n","      <td>[0.03210984170436859, 0.018154876306653023, -0...</td>\n","      <td>0.984983</td>\n","    </tr>\n","    <tr>\n","      <th>99</th>\n","      <td>average</td>\n","      <td>Minor, very annoying glitch when texting. Not ...</td>\n","      <td>Minor, very annoying glitch when texting. Not ...</td>\n","      <td>poor</td>\n","      <td>[-0.026854539290070534, 0.03769969940185547, 0...</td>\n","      <td>0.969512</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>100 rows × 6 columns</p>\n","</div>"],"text/plain":["                    y  ... category_confidence\n","origin_index           ...                    \n","0                poor  ...            0.952295\n","1             average  ...            0.396265\n","2                good  ...            0.773682\n","3             average  ...            0.506015\n","4             average  ...            0.648859\n","...               ...  ...                 ...\n","95               poor  ...            0.790492\n","96               good  ...            0.813424\n","97               poor  ...            0.940151\n","98               poor  ...            0.984983\n","99            average  ...            0.969512\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609621557034,"user_tz":-300,"elapsed":222174,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1a690639-c397-4ced-c222-981776472766"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609622725629,"user_tz":-300,"elapsed":1390760,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"9e8f7464-0bca-4a03-9212-2ab8ccb8f319"},"source":["from sklearn.metrics import classification_report\n","trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['classifier_dl'].setMaxEpochs(90)  \n","trainable_pipe['classifier_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","     average       0.72      0.67      0.69       500\n","        good       0.85      0.87      0.86       500\n","        poor       0.78      0.83      0.80       500\n","\n","    accuracy                           0.79      1500\n","   macro avg       0.78      0.79      0.79      1500\n","weighted avg       0.78      0.79      0.79      1500\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609622897186,"user_tz":-300,"elapsed":1562308,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a3175762-9ea0-472e-a8bf-0a64fd1176c9"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":111},"executionInfo":{"status":"ok","timestamp":1609622933158,"user_tz":-300,"elapsed":1598267,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a40c74fc-c2f1-4a58-ba4e-5d1e21e39da3"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It worked perfectly.')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>classifier_confidence</th>\n","      <th>document</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","      <th>classifier</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.950214</td>\n","      <td>It worked perfectly.</td>\n","      <td>[0.275971919298172, 0.4924655854701996, 0.2755...</td>\n","      <td>good</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             classifier_confidence  ... classifier\n","origin_index                        ...           \n","0                         0.950214  ...       good\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609622963569,"user_tz":-300,"elapsed":903,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"662a1dc1-b3fc-4137-b95a-8d7f38326fd5"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')             | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                   | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                  | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)            | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                   | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                       | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)             | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)         | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)        | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                 | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                     | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                         | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                  | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                           | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                 | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                         | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                  | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')          | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['average', 'poor', 'good'])   | Info: get the tags used to trained this NerDLModel | Currently set to : ['average', 'poor', 'good']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L12_768')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]}
\ No newline at end of file
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo_amazon.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_amazon.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","## 3 class Amazon Phone review classifier training]\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install  pyspark==2.4.7 \n","! pip install nlu > /dev/null    \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Amazon Unlocked mobile phones dataset \n","https://www.kaggle.com/PromptCloudHQ/amazon-reviews-unlocked-mobile-phones\n","\n","dataset with unlocked mobile phone reviews in 5 review classes\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610787546042,"user_tz":-300,"elapsed":3459,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ca2d6419-7d62-400b-d3d7-9b16fa9bce2c"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/Amazon_Unlocked_Mobile.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 08:58:27--  http://ckl-it.de/wp-content/uploads/2021/01/Amazon_Unlocked_Mobile.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 452621 (442K) [text/csv]\n","Saving to: ‘Amazon_Unlocked_Mobile.csv’\n","\n","Amazon_Unlocked_Mob 100%[===================>] 442.01K   308KB/s    in 1.4s    \n","\n","2021-01-16 08:58:29 (308 KB/s) - ‘Amazon_Unlocked_Mobile.csv’ saved [452621/452621]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610787551525,"user_tz":-300,"elapsed":1188,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"dfe55b6f-f33a-4bd2-a2ba-5b1a306e1ab4"},"source":["import pandas as pd\n","test_path = '/content/Amazon_Unlocked_Mobile.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>poor</td>\n","      <td>Bought it, turned it on, did not work. Opened ...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>average</td>\n","      <td>Currently it is 2014, the 3gs is discontinued....</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>good</td>\n","      <td>100% recomendado</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>average</td>\n","      <td>It's a good phone but if you use it to browse ...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>average</td>\n","      <td>It's nice that this phone has LTE and it funct...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>1495</th>\n","      <td>poor</td>\n","      <td>Not happy with this phone. Not able to get but...</td>\n","    </tr>\n","    <tr>\n","      <th>1496</th>\n","      <td>good</td>\n","      <td>great phablet for all general uses</td>\n","    </tr>\n","    <tr>\n","      <th>1497</th>\n","      <td>poor</td>\n","      <td>Hate this phone had it for one day</td>\n","    </tr>\n","    <tr>\n","      <th>1498</th>\n","      <td>good</td>\n","      <td>Great cheap phone.</td>\n","    </tr>\n","    <tr>\n","      <th>1499</th>\n","      <td>good</td>\n","      <td>Very good</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>1500 rows × 2 columns</p>\n","</div>"],"text/plain":["            y                                               text\n","0        poor  Bought it, turned it on, did not work. Opened ...\n","1     average  Currently it is 2014, the 3gs is discontinued....\n","2        good                                   100% recomendado\n","3     average  It's a good phone but if you use it to browse ...\n","4     average  It's nice that this phone has LTE and it funct...\n","...       ...                                                ...\n","1495     poor  Not happy with this phone. Not able to get but...\n","1496     good                 great phablet for all general uses\n","1497     poor                 Hate this phone had it for one day\n","1498     good                                 Great cheap phone.\n","1499     good                                          Very good\n","\n","[1500 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":501},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609621542716,"user_tz":-300,"elapsed":207913,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d0eb19cc-8849-43f7-9cdf-a88fd8f11676"},"source":["# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# Since there are no\n","\n","trainable_pipe = nlu.load('train.classifier')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50] )\n","\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50] )\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","      <th>category</th>\n","      <th>default_name_embeddings</th>\n","      <th>category_confidence</th>\n","      <th>sentence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>poor</td>\n","      <td>Bought it, turned it on, did not work. Opened ...</td>\n","      <td>average</td>\n","      <td>[0.020834514871239662, 0.03326118737459183, -0...</td>\n","      <td>0.763940</td>\n","      <td>Bought it, turned it on, did not work.</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>poor</td>\n","      <td>Bought it, turned it on, did not work. Opened ...</td>\n","      <td>average</td>\n","      <td>[0.030574046075344086, -0.009678893722593784, ...</td>\n","      <td>1.000000</td>\n","      <td>Opened up the back, made sure it was in right,...</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>poor</td>\n","      <td>Bought it, turned it on, did not work. Opened ...</td>\n","      <td>average</td>\n","      <td>[0.023421283811330795, 0.02294657751917839, -0...</td>\n","      <td>2.000000</td>\n","      <td>It was supposed to be new, but i it was used.</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>poor</td>\n","      <td>Bought it, turned it on, did not work. Opened ...</td>\n","      <td>average</td>\n","      <td>[0.06009713560342789, 0.046434734016656876, -0...</td>\n","      <td>3.000000</td>\n","      <td>Found scratches on cover.</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>average</td>\n","      <td>Currently it is 2014, the 3gs is discontinued....</td>\n","      <td>average</td>\n","      <td>[0.04893391206860542, -0.010221654549241066, -...</td>\n","      <td>0.631228</td>\n","      <td>Currently it is 2014, the 3gs is discontinued.</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>good</td>\n","      <td>Bought for my mom! She loves it!</td>\n","      <td>good</td>\n","      <td>[0.021471485495567322, -0.027823669835925102, ...</td>\n","      <td>0.656713</td>\n","      <td>Bought for my mom!</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>good</td>\n","      <td>Bought for my mom! She loves it!</td>\n","      <td>good</td>\n","      <td>[0.0001737327256705612, -0.014630521647632122,...</td>\n","      <td>1.000000</td>\n","      <td>She loves it!</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>good</td>\n","      <td>Gave the phone as a birthday gift. My friend s...</td>\n","      <td>good</td>\n","      <td>[0.03572574257850647, 0.013357092626392841, -0...</td>\n","      <td>0.701626</td>\n","      <td>Gave the phone as a birthday gift.</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>good</td>\n","      <td>Gave the phone as a birthday gift. My friend s...</td>\n","      <td>good</td>\n","      <td>[0.08371475338935852, -0.01581401191651821, -0...</td>\n","      <td>1.000000</td>\n","      <td>My friend seems happy with it so far.</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>good</td>\n","      <td>Great Product</td>\n","      <td>good</td>\n","      <td>[0.03334435820579529, -0.05353177338838577, -0...</td>\n","      <td>0.593622</td>\n","      <td>Great Product</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>215 rows × 6 columns</p>\n","</div>"],"text/plain":["                    y  ...                                           sentence\n","origin_index           ...                                                   \n","0                poor  ...             Bought it, turned it on, did not work.\n","0                poor  ...  Opened up the back, made sure it was in right,...\n","0                poor  ...      It was supposed to be new, but i it was used.\n","0                poor  ...                          Found scratches on cover.\n","1             average  ...     Currently it is 2014, the 3gs is discontinued.\n","...               ...  ...                                                ...\n","47               good  ...                                 Bought for my mom!\n","47               good  ...                                      She loves it!\n","48               good  ...                 Gave the phone as a birthday gift.\n","48               good  ...              My friend seems happy with it so far.\n","49               good  ...                                      Great Product\n","\n","[215 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609621546162,"user_tz":-300,"elapsed":211344,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5dc268e6-e97f-4378-85d1-8319d3f7893f"},"source":["fitted_pipe.predict(\"It worked perfectly .\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>category</th>\n","      <th>default_name_embeddings</th>\n","      <th>category_confidence</th>\n","      <th>sentence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>average</td>\n","      <td>[0.06468033790588379, -0.040837567299604416, -...</td>\n","      <td>0.460187</td>\n","      <td>Bitcoin is going to the moon!</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             category  ...                       sentence\n","origin_index           ...                               \n","0             average  ...  Bitcoin is going to the moon!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609621546165,"user_tz":-300,"elapsed":211336,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c0633c00-9bfd-412b-ee55-0f6e5b150f39"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setMaxEpochs(3)                | Info: Maximum number of epochs to train | Currently set to : 3\n","pipe['classifier_dl'].setLr(0.005)                   | Info: Learning Rate | Currently set to : 0.005\n","pipe['classifier_dl'].setBatchSize(64)               | Info: Batch size | Currently set to : 64\n","pipe['classifier_dl'].setDropout(0.5)                | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['classifier_dl'].setEnableOutputLogs(True)      | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512)               | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use')      | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":620},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609621557024,"user_tz":-300,"elapsed":222179,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"12b53152-fcdf-4180-91b8-cc150e5bb23a"},"source":["# Train longer!\n","trainable_pipe['classifier_dl'].setMaxEpochs(5)  \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","     average       0.00      0.00      0.00        29\n","        good       0.65      0.94      0.77        32\n","        poor       0.69      0.95      0.80        39\n","\n","    accuracy                           0.67       100\n","   macro avg       0.45      0.63      0.52       100\n","weighted avg       0.48      0.67      0.56       100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","      <th>document</th>\n","      <th>category</th>\n","      <th>default_name_embeddings</th>\n","      <th>category_confidence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>poor</td>\n","      <td>Bought it, turned it on, did not work. Opened ...</td>\n","      <td>Bought it, turned it on, did not work. Opened ...</td>\n","      <td>poor</td>\n","      <td>[0.059367865324020386, 0.05043933913111687, -0...</td>\n","      <td>0.952295</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>average</td>\n","      <td>Currently it is 2014, the 3gs is discontinued....</td>\n","      <td>Currently it is 2014, the 3gs is discontinued....</td>\n","      <td>good</td>\n","      <td>[0.0046275281347334385, 0.012452688068151474, ...</td>\n","      <td>0.396265</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>good</td>\n","      <td>100% recomendado</td>\n","      <td>100% recomendado</td>\n","      <td>good</td>\n","      <td>[0.008266163989901543, 0.00396152026951313, -0...</td>\n","      <td>0.773682</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>average</td>\n","      <td>It's a good phone but if you use it to browse ...</td>\n","      <td>It's a good phone but if you use it to browse ...</td>\n","      <td>poor</td>\n","      <td>[0.05291805788874626, 0.002292224671691656, -0...</td>\n","      <td>0.506015</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>average</td>\n","      <td>It's nice that this phone has LTE and it funct...</td>\n","      <td>It's nice that this phone has LTE and it funct...</td>\n","      <td>good</td>\n","      <td>[0.03426238149404526, -0.024366019293665886, -...</td>\n","      <td>0.648859</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>95</th>\n","      <td>poor</td>\n","      <td>Hola, compramos dos telÃ©fonos y vienieron tot...</td>\n","      <td>Hola, compramos dos telÃ©fonos y vienieron tot...</td>\n","      <td>poor</td>\n","      <td>[0.06324272602796555, -0.06387951225042343, -0...</td>\n","      <td>0.790492</td>\n","    </tr>\n","    <tr>\n","      <th>96</th>\n","      <td>good</td>\n","      <td>Excelente</td>\n","      <td>Excelente</td>\n","      <td>good</td>\n","      <td>[0.03246314451098442, -0.01719777286052704, -0...</td>\n","      <td>0.813424</td>\n","    </tr>\n","    <tr>\n","      <th>97</th>\n","      <td>poor</td>\n","      <td>the product is good but the English language s...</td>\n","      <td>the product is good but the English language s...</td>\n","      <td>poor</td>\n","      <td>[0.056343767791986465, -0.016822000965476036, ...</td>\n","      <td>0.940151</td>\n","    </tr>\n","    <tr>\n","      <th>98</th>\n","      <td>poor</td>\n","      <td>Supposed to be a brand new unlock phone. The p...</td>\n","      <td>Supposed to be a brand new unlock phone. The p...</td>\n","      <td>poor</td>\n","      <td>[0.03210984170436859, 0.018154876306653023, -0...</td>\n","      <td>0.984983</td>\n","    </tr>\n","    <tr>\n","      <th>99</th>\n","      <td>average</td>\n","      <td>Minor, very annoying glitch when texting. Not ...</td>\n","      <td>Minor, very annoying glitch when texting. Not ...</td>\n","      <td>poor</td>\n","      <td>[-0.026854539290070534, 0.03769969940185547, 0...</td>\n","      <td>0.969512</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>100 rows × 6 columns</p>\n","</div>"],"text/plain":["                    y  ... category_confidence\n","origin_index           ...                    \n","0                poor  ...            0.952295\n","1             average  ...            0.396265\n","2                good  ...            0.773682\n","3             average  ...            0.506015\n","4             average  ...            0.648859\n","...               ...  ...                 ...\n","95               poor  ...            0.790492\n","96               good  ...            0.813424\n","97               poor  ...            0.940151\n","98               poor  ...            0.984983\n","99            average  ...            0.969512\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609621557034,"user_tz":-300,"elapsed":222174,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1a690639-c397-4ced-c222-981776472766"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609622725629,"user_tz":-300,"elapsed":1390760,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"9e8f7464-0bca-4a03-9212-2ab8ccb8f319"},"source":["from sklearn.metrics import classification_report\n","trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['classifier_dl'].setMaxEpochs(90)  \n","trainable_pipe['classifier_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","     average       0.72      0.67      0.69       500\n","        good       0.85      0.87      0.86       500\n","        poor       0.78      0.83      0.80       500\n","\n","    accuracy                           0.79      1500\n","   macro avg       0.78      0.79      0.79      1500\n","weighted avg       0.78      0.79      0.79      1500\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609622897186,"user_tz":-300,"elapsed":1562308,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a3175762-9ea0-472e-a8bf-0a64fd1176c9"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":111},"executionInfo":{"status":"ok","timestamp":1609622933158,"user_tz":-300,"elapsed":1598267,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a40c74fc-c2f1-4a58-ba4e-5d1e21e39da3"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It worked perfectly.')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>classifier_confidence</th>\n","      <th>document</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","      <th>classifier</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.950214</td>\n","      <td>It worked perfectly.</td>\n","      <td>[0.275971919298172, 0.4924655854701996, 0.2755...</td>\n","      <td>good</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             classifier_confidence  ... classifier\n","origin_index                        ...           \n","0                         0.950214  ...       good\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609622963569,"user_tz":-300,"elapsed":903,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"662a1dc1-b3fc-4137-b95a-8d7f38326fd5"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')             | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                   | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                  | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)            | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                   | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                       | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)             | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)         | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)        | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                 | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                     | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                         | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                  | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                           | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                 | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                         | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                  | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')          | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['average', 'poor', 'good'])   | Info: get the tags used to trained this NerDLModel | Currently set to : ['average', 'poor', 'good']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L12_768')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]}
\ No newline at end of file
diff --git a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_hotel_reviews.ipynb b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_hotel_reviews.ipynb
index a62e0ab3..35d328ee 100644
--- a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_hotel_reviews.ipynb
+++ b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_hotel_reviews.ipynb
@@ -1 +1 @@
-{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo_hotel_reviews.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_hotel_reviews.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install  pyspark==2.4.7 \n","! pip install nlu > /dev/null    \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download hotel reviews  dataset \n","https://www.kaggle.com/andrewmvd/trip-advisor-hotel-reviews\n","\n","Hotels play a crucial role in traveling and with the increased access to information new pathways of selecting the best ones emerged.\n","With this dataset, consisting of 20k reviews crawled from Tripadvisor, you can explore what makes a great hotel and maybe even use this model in your travels!\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610787917402,"user_tz":-300,"elapsed":5153,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0d1c17b6-555c-4df6-cfb6-08af37c3f9ef"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/tripadvisor_hotel_reviews.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:04:37--  http://ckl-it.de/wp-content/uploads/2021/01/tripadvisor_hotel_reviews.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 5160790 (4.9M) [text/csv]\n","Saving to: ‘tripadvisor_hotel_reviews.csv’\n","\n","tripadvisor_hotel_r 100%[===================>]   4.92M  1.46MB/s    in 3.4s    \n","\n","2021-01-16 09:04:41 (1.46 MB/s) - ‘tripadvisor_hotel_reviews.csv’ saved [5160790/5160790]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610787919775,"user_tz":-300,"elapsed":1300,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f579f26c-2a41-47ec-fe20-989c3ec16643"},"source":["import pandas as pd\n","test_path = '/content/tripadvisor_hotel_reviews.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>great</td>\n","      <td>great stayed hotel 5 nights end august 2005. r...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>poor</td>\n","      <td>watch bait-and-switch room rates, rooms accept...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>average</td>\n","      <td>good check liked hotel good location friendly ...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>great</td>\n","      <td>best location value properties waikiki head ho...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>poor</td>\n","      <td>botel not recommended little disappointed hone...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>6547</th>\n","      <td>great</td>\n","      <td>big bang buck st. charles great new orleans st...</td>\n","    </tr>\n","    <tr>\n","      <th>6548</th>\n","      <td>great</td>\n","      <td>loved minute, reading reviews hotel bit worrie...</td>\n","    </tr>\n","    <tr>\n","      <th>6549</th>\n","      <td>great</td>\n","      <td>wonderful, let tell place, 3 friends stayed ap...</td>\n","    </tr>\n","    <tr>\n","      <th>6550</th>\n","      <td>average</td>\n","      <td>small bathroom clean hmmm ok let stay used tra...</td>\n","    </tr>\n","    <tr>\n","      <th>6551</th>\n","      <td>poor</td>\n","      <td>vvvv bad went hotel valantine day weekend, hot...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>6552 rows × 2 columns</p>\n","</div>"],"text/plain":["            y                                               text\n","0       great  great stayed hotel 5 nights end august 2005. r...\n","1        poor  watch bait-and-switch room rates, rooms accept...\n","2     average  good check liked hotel good location friendly ...\n","3       great  best location value properties waikiki head ho...\n","4        poor  botel not recommended little disappointed hone...\n","...       ...                                                ...\n","6547    great  big bang buck st. charles great new orleans st...\n","6548    great  loved minute, reading reviews hotel bit worrie...\n","6549    great  wonderful, let tell place, 3 friends stayed ap...\n","6550  average  small bathroom clean hmmm ok let stay used tra...\n","6551     poor  vvvv bad went hotel valantine day weekend, hot...\n","\n","[6552 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":501},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609621945982,"user_tz":-300,"elapsed":194629,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ea741862-2923-441b-ed64-bb5da1eb5e3e"},"source":["# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# Since there are no\n","\n","trainable_pipe = nlu.load('train.classifier')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50] )\n","\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50] )\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","      <th>category_confidence</th>\n","      <th>token</th>\n","      <th>category</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>great</td>\n","      <td>great stayed hotel 5 nights end august 2005. r...</td>\n","      <td>0.496030</td>\n","      <td>great</td>\n","      <td>great</td>\n","      <td>[[0.03609783574938774, 0.05106373876333237, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>great</td>\n","      <td>great stayed hotel 5 nights end august 2005. r...</td>\n","      <td>0.496030</td>\n","      <td>stayed</td>\n","      <td>great</td>\n","      <td>[[0.03609783574938774, 0.05106373876333237, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>great</td>\n","      <td>great stayed hotel 5 nights end august 2005. r...</td>\n","      <td>0.496030</td>\n","      <td>hotel</td>\n","      <td>great</td>\n","      <td>[[0.03609783574938774, 0.05106373876333237, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>great</td>\n","      <td>great stayed hotel 5 nights end august 2005. r...</td>\n","      <td>0.496030</td>\n","      <td>5</td>\n","      <td>great</td>\n","      <td>[[0.03609783574938774, 0.05106373876333237, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>great</td>\n","      <td>great stayed hotel 5 nights end august 2005. r...</td>\n","      <td>0.496030</td>\n","      <td>nights</td>\n","      <td>great</td>\n","      <td>[[0.03609783574938774, 0.05106373876333237, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>poor</td>\n","      <td>kidding, arrived riu palace macao punta cana w...</td>\n","      <td>0.476485</td>\n","      <td>recommend</td>\n","      <td>average</td>\n","      <td>[[-0.017401963472366333, 0.04562698304653168, ...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>poor</td>\n","      <td>kidding, arrived riu palace macao punta cana w...</td>\n","      <td>0.476485</td>\n","      <td>riu</td>\n","      <td>average</td>\n","      <td>[[-0.017401963472366333, 0.04562698304653168, ...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>poor</td>\n","      <td>kidding, arrived riu palace macao punta cana w...</td>\n","      <td>0.476485</td>\n","      <td>palace</td>\n","      <td>average</td>\n","      <td>[[-0.017401963472366333, 0.04562698304653168, ...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>poor</td>\n","      <td>kidding, arrived riu palace macao punta cana w...</td>\n","      <td>0.476485</td>\n","      <td>macao</td>\n","      <td>average</td>\n","      <td>[[-0.017401963472366333, 0.04562698304653168, ...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>poor</td>\n","      <td>kidding, arrived riu palace macao punta cana w...</td>\n","      <td>0.476485</td>\n","      <td>,</td>\n","      <td>average</td>\n","      <td>[[-0.017401963472366333, 0.04562698304653168, ...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>7014 rows × 6 columns</p>\n","</div>"],"text/plain":["                  y  ...                            default_name_embeddings\n","origin_index         ...                                                   \n","0             great  ...  [[0.03609783574938774, 0.05106373876333237, 0....\n","0             great  ...  [[0.03609783574938774, 0.05106373876333237, 0....\n","0             great  ...  [[0.03609783574938774, 0.05106373876333237, 0....\n","0             great  ...  [[0.03609783574938774, 0.05106373876333237, 0....\n","0             great  ...  [[0.03609783574938774, 0.05106373876333237, 0....\n","...             ...  ...                                                ...\n","49             poor  ...  [[-0.017401963472366333, 0.04562698304653168, ...\n","49             poor  ...  [[-0.017401963472366333, 0.04562698304653168, ...\n","49             poor  ...  [[-0.017401963472366333, 0.04562698304653168, ...\n","49             poor  ...  [[-0.017401963472366333, 0.04562698304653168, ...\n","49             poor  ...  [[-0.017401963472366333, 0.04562698304653168, ...\n","\n","[7014 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":297},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609621948873,"user_tz":-300,"elapsed":197503,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7c8077e8-b95b-40e5-839b-29e738884851"},"source":["fitted_pipe.predict(\"It was a good experince!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>category_confidence</th>\n","      <th>token</th>\n","      <th>category</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.739900</td>\n","      <td>Bitcoin</td>\n","      <td>average</td>\n","      <td>[[0.06468033790588379, -0.040837567299604416, ...</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>0.739900</td>\n","      <td>is</td>\n","      <td>average</td>\n","      <td>[[0.06468033790588379, -0.040837567299604416, ...</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>0.739900</td>\n","      <td>going</td>\n","      <td>average</td>\n","      <td>[[0.06468033790588379, -0.040837567299604416, ...</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>0.739900</td>\n","      <td>to</td>\n","      <td>average</td>\n","      <td>[[0.06468033790588379, -0.040837567299604416, ...</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>0.739900</td>\n","      <td>the</td>\n","      <td>average</td>\n","      <td>[[0.06468033790588379, -0.040837567299604416, ...</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>0.739900</td>\n","      <td>moon</td>\n","      <td>average</td>\n","      <td>[[0.06468033790588379, -0.040837567299604416, ...</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>0.739900</td>\n","      <td>!</td>\n","      <td>average</td>\n","      <td>[[0.06468033790588379, -0.040837567299604416, ...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             category_confidence  ...                            default_name_embeddings\n","origin_index                      ...                                                   \n","0                       0.739900  ...  [[0.06468033790588379, -0.040837567299604416, ...\n","0                       0.739900  ...  [[0.06468033790588379, -0.040837567299604416, ...\n","0                       0.739900  ...  [[0.06468033790588379, -0.040837567299604416, ...\n","0                       0.739900  ...  [[0.06468033790588379, -0.040837567299604416, ...\n","0                       0.739900  ...  [[0.06468033790588379, -0.040837567299604416, ...\n","0                       0.739900  ...  [[0.06468033790588379, -0.040837567299604416, ...\n","0                       0.739900  ...  [[0.06468033790588379, -0.040837567299604416, ...\n","\n","[7 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609621948879,"user_tz":-300,"elapsed":197499,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3b2e8fac-92a8-436c-93b7-d548f39f95a1"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setMaxEpochs(3)                | Info: Maximum number of epochs to train | Currently set to : 3\n","pipe['classifier_dl'].setLr(0.005)                   | Info: Learning Rate | Currently set to : 0.005\n","pipe['classifier_dl'].setBatchSize(64)               | Info: Batch size | Currently set to : 64\n","pipe['classifier_dl'].setDropout(0.5)                | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['classifier_dl'].setEnableOutputLogs(True)      | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512)               | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use')      | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":620},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609621958501,"user_tz":-300,"elapsed":207107,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e86e37d9-df7d-4c70-fafe-f1b297860fa9"},"source":["# Train longer!\n","trainable_pipe['classifier_dl'].setMaxEpochs(5)  \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","     average       0.48      0.76      0.59        33\n","       great       0.86      0.51      0.64        35\n","        poor       0.74      0.62      0.68        32\n","\n","    accuracy                           0.63       100\n","   macro avg       0.69      0.63      0.64       100\n","weighted avg       0.70      0.63      0.64       100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","      <th>document</th>\n","      <th>category_confidence</th>\n","      <th>category</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>great</td>\n","      <td>great stayed hotel 5 nights end august 2005. r...</td>\n","      <td>great stayed hotel 5 nights end august 2005. r...</td>\n","      <td>0.595822</td>\n","      <td>average</td>\n","      <td>[0.06212242692708969, 0.04104098677635193, 0.0...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>poor</td>\n","      <td>watch bait-and-switch room rates, rooms accept...</td>\n","      <td>watch bait-and-switch room rates, rooms accept...</td>\n","      <td>0.498284</td>\n","      <td>poor</td>\n","      <td>[0.0546528585255146, 0.02160552889108658, -0.0...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>average</td>\n","      <td>good check liked hotel good location friendly ...</td>\n","      <td>good check liked hotel good location friendly ...</td>\n","      <td>0.557739</td>\n","      <td>average</td>\n","      <td>[0.008103911764919758, 0.02573486790060997, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>great</td>\n","      <td>best location value properties waikiki head ho...</td>\n","      <td>best location value properties waikiki head ho...</td>\n","      <td>0.418274</td>\n","      <td>average</td>\n","      <td>[0.05095028877258301, -0.003614993067458272, 0...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>poor</td>\n","      <td>botel not recommended little disappointed hone...</td>\n","      <td>botel not recommended little disappointed hone...</td>\n","      <td>0.491956</td>\n","      <td>average</td>\n","      <td>[0.03620055690407753, 0.010797196999192238, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>95</th>\n","      <td>great</td>\n","      <td>great location spent 7 days castle inn beginni...</td>\n","      <td>great location spent 7 days castle inn beginni...</td>\n","      <td>0.402236</td>\n","      <td>average</td>\n","      <td>[0.03295842185616493, 0.04682551696896553, 0.0...</td>\n","    </tr>\n","    <tr>\n","      <th>96</th>\n","      <td>average</td>\n","      <td>great location hard beds really liked hotel si...</td>\n","      <td>great location hard beds really liked hotel si...</td>\n","      <td>0.598560</td>\n","      <td>average</td>\n","      <td>[0.02258184179663658, 0.0432007722556591, -0.0...</td>\n","    </tr>\n","    <tr>\n","      <th>97</th>\n","      <td>great</td>\n","      <td>great location location hotel perfect right mi...</td>\n","      <td>great location location hotel perfect right mi...</td>\n","      <td>0.552369</td>\n","      <td>average</td>\n","      <td>[0.06024744734168053, 0.05366133153438568, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>98</th>\n","      <td>great</td>\n","      <td>just starting lose lustre stayed chancellor co...</td>\n","      <td>just starting lose lustre stayed chancellor co...</td>\n","      <td>0.374642</td>\n","      <td>poor</td>\n","      <td>[0.0255410298705101, 0.0401645191013813, 0.003...</td>\n","    </tr>\n","    <tr>\n","      <th>99</th>\n","      <td>poor</td>\n","      <td>bittersweet memories glorious past recent stay...</td>\n","      <td>bittersweet memories glorious past recent stay...</td>\n","      <td>0.415380</td>\n","      <td>poor</td>\n","      <td>[0.03259000554680824, 0.049256037920713425, 0....</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>100 rows × 6 columns</p>\n","</div>"],"text/plain":["                    y  ...                            default_name_embeddings\n","origin_index           ...                                                   \n","0               great  ...  [0.06212242692708969, 0.04104098677635193, 0.0...\n","1                poor  ...  [0.0546528585255146, 0.02160552889108658, -0.0...\n","2             average  ...  [0.008103911764919758, 0.02573486790060997, 0....\n","3               great  ...  [0.05095028877258301, -0.003614993067458272, 0...\n","4                poor  ...  [0.03620055690407753, 0.010797196999192238, 0....\n","...               ...  ...                                                ...\n","95              great  ...  [0.03295842185616493, 0.04682551696896553, 0.0...\n","96            average  ...  [0.02258184179663658, 0.0432007722556591, -0.0...\n","97              great  ...  [0.06024744734168053, 0.05366133153438568, -0....\n","98              great  ...  [0.0255410298705101, 0.0401645191013813, 0.003...\n","99               poor  ...  [0.03259000554680824, 0.049256037920713425, 0....\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609621958515,"user_tz":-300,"elapsed":207110,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"de28c144-5456-4998-ffad-2d5046d5efc4"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609631124604,"user_tz":-300,"elapsed":7463638,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"49f0d684-0253-441f-d322-2e286b89fa24"},"source":["from sklearn.metrics import classification_report\n","trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['classifier_dl'].setMaxEpochs(90)  \n","trainable_pipe['classifier_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","     average       0.66      0.65      0.65      2184\n","       great       0.79      0.81      0.80      2184\n","        poor       0.77      0.78      0.78      2184\n","\n","    accuracy                           0.74      6552\n","   macro avg       0.74      0.74      0.74      6552\n","weighted avg       0.74      0.74      0.74      6552\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609631288714,"user_tz":-300,"elapsed":164136,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"42ce7ad9-b16c-404f-a717-b6d98651af95"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":111},"executionInfo":{"status":"ok","timestamp":1609631304458,"user_tz":-300,"elapsed":15754,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a0f4d6e7-e607-41f2-91ae-7b170b03b40a"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It was a good experince!')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>classifier</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","      <th>document</th>\n","      <th>classifier_confidence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>great</td>\n","      <td>[-0.07878006249666214, 0.1528550535440445, 0.1...</td>\n","      <td>It was one of the best wines i ever tasted .</td>\n","      <td>0.865597</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             classifier  ... classifier_confidence\n","origin_index             ...                      \n","0                 great  ...              0.865597\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609631312511,"user_tz":-300,"elapsed":2776,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3de0f51e-3fd0-4dae-ee05-81459d162c42"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')             | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)        | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                 | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                     | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                         | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                   | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                  | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)            | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                   | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                       | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)             | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)         | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                  | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                           | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                 | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                         | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                  | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')          | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['average', 'great', 'poor'])  | Info: get the tags used to trained this NerDLModel | Currently set to : ['average', 'great', 'poor']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L12_768')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]}
\ No newline at end of file
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo_hotel_reviews.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_hotel_reviews.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","## 3 class Tripadvisor Hotel review classifier training\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install  pyspark==2.4.7 \n","! pip install nlu > /dev/null    \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download hotel reviews  dataset \n","https://www.kaggle.com/andrewmvd/trip-advisor-hotel-reviews\n","\n","Hotels play a crucial role in traveling and with the increased access to information new pathways of selecting the best ones emerged.\n","With this dataset, consisting of 20k reviews crawled from Tripadvisor, you can explore what makes a great hotel and maybe even use this model in your travels!\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610787917402,"user_tz":-300,"elapsed":5153,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0d1c17b6-555c-4df6-cfb6-08af37c3f9ef"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/tripadvisor_hotel_reviews.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:04:37--  http://ckl-it.de/wp-content/uploads/2021/01/tripadvisor_hotel_reviews.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 5160790 (4.9M) [text/csv]\n","Saving to: ‘tripadvisor_hotel_reviews.csv’\n","\n","tripadvisor_hotel_r 100%[===================>]   4.92M  1.46MB/s    in 3.4s    \n","\n","2021-01-16 09:04:41 (1.46 MB/s) - ‘tripadvisor_hotel_reviews.csv’ saved [5160790/5160790]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610787919775,"user_tz":-300,"elapsed":1300,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f579f26c-2a41-47ec-fe20-989c3ec16643"},"source":["import pandas as pd\n","test_path = '/content/tripadvisor_hotel_reviews.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>great</td>\n","      <td>great stayed hotel 5 nights end august 2005. r...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>poor</td>\n","      <td>watch bait-and-switch room rates, rooms accept...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>average</td>\n","      <td>good check liked hotel good location friendly ...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>great</td>\n","      <td>best location value properties waikiki head ho...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>poor</td>\n","      <td>botel not recommended little disappointed hone...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>6547</th>\n","      <td>great</td>\n","      <td>big bang buck st. charles great new orleans st...</td>\n","    </tr>\n","    <tr>\n","      <th>6548</th>\n","      <td>great</td>\n","      <td>loved minute, reading reviews hotel bit worrie...</td>\n","    </tr>\n","    <tr>\n","      <th>6549</th>\n","      <td>great</td>\n","      <td>wonderful, let tell place, 3 friends stayed ap...</td>\n","    </tr>\n","    <tr>\n","      <th>6550</th>\n","      <td>average</td>\n","      <td>small bathroom clean hmmm ok let stay used tra...</td>\n","    </tr>\n","    <tr>\n","      <th>6551</th>\n","      <td>poor</td>\n","      <td>vvvv bad went hotel valantine day weekend, hot...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>6552 rows × 2 columns</p>\n","</div>"],"text/plain":["            y                                               text\n","0       great  great stayed hotel 5 nights end august 2005. r...\n","1        poor  watch bait-and-switch room rates, rooms accept...\n","2     average  good check liked hotel good location friendly ...\n","3       great  best location value properties waikiki head ho...\n","4        poor  botel not recommended little disappointed hone...\n","...       ...                                                ...\n","6547    great  big bang buck st. charles great new orleans st...\n","6548    great  loved minute, reading reviews hotel bit worrie...\n","6549    great  wonderful, let tell place, 3 friends stayed ap...\n","6550  average  small bathroom clean hmmm ok let stay used tra...\n","6551     poor  vvvv bad went hotel valantine day weekend, hot...\n","\n","[6552 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":501},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609621945982,"user_tz":-300,"elapsed":194629,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ea741862-2923-441b-ed64-bb5da1eb5e3e"},"source":["# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# Since there are no\n","\n","trainable_pipe = nlu.load('train.classifier')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50] )\n","\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50] )\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","      <th>category_confidence</th>\n","      <th>token</th>\n","      <th>category</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>great</td>\n","      <td>great stayed hotel 5 nights end august 2005. r...</td>\n","      <td>0.496030</td>\n","      <td>great</td>\n","      <td>great</td>\n","      <td>[[0.03609783574938774, 0.05106373876333237, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>great</td>\n","      <td>great stayed hotel 5 nights end august 2005. r...</td>\n","      <td>0.496030</td>\n","      <td>stayed</td>\n","      <td>great</td>\n","      <td>[[0.03609783574938774, 0.05106373876333237, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>great</td>\n","      <td>great stayed hotel 5 nights end august 2005. r...</td>\n","      <td>0.496030</td>\n","      <td>hotel</td>\n","      <td>great</td>\n","      <td>[[0.03609783574938774, 0.05106373876333237, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>great</td>\n","      <td>great stayed hotel 5 nights end august 2005. r...</td>\n","      <td>0.496030</td>\n","      <td>5</td>\n","      <td>great</td>\n","      <td>[[0.03609783574938774, 0.05106373876333237, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>great</td>\n","      <td>great stayed hotel 5 nights end august 2005. r...</td>\n","      <td>0.496030</td>\n","      <td>nights</td>\n","      <td>great</td>\n","      <td>[[0.03609783574938774, 0.05106373876333237, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>poor</td>\n","      <td>kidding, arrived riu palace macao punta cana w...</td>\n","      <td>0.476485</td>\n","      <td>recommend</td>\n","      <td>average</td>\n","      <td>[[-0.017401963472366333, 0.04562698304653168, ...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>poor</td>\n","      <td>kidding, arrived riu palace macao punta cana w...</td>\n","      <td>0.476485</td>\n","      <td>riu</td>\n","      <td>average</td>\n","      <td>[[-0.017401963472366333, 0.04562698304653168, ...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>poor</td>\n","      <td>kidding, arrived riu palace macao punta cana w...</td>\n","      <td>0.476485</td>\n","      <td>palace</td>\n","      <td>average</td>\n","      <td>[[-0.017401963472366333, 0.04562698304653168, ...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>poor</td>\n","      <td>kidding, arrived riu palace macao punta cana w...</td>\n","      <td>0.476485</td>\n","      <td>macao</td>\n","      <td>average</td>\n","      <td>[[-0.017401963472366333, 0.04562698304653168, ...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>poor</td>\n","      <td>kidding, arrived riu palace macao punta cana w...</td>\n","      <td>0.476485</td>\n","      <td>,</td>\n","      <td>average</td>\n","      <td>[[-0.017401963472366333, 0.04562698304653168, ...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>7014 rows × 6 columns</p>\n","</div>"],"text/plain":["                  y  ...                            default_name_embeddings\n","origin_index         ...                                                   \n","0             great  ...  [[0.03609783574938774, 0.05106373876333237, 0....\n","0             great  ...  [[0.03609783574938774, 0.05106373876333237, 0....\n","0             great  ...  [[0.03609783574938774, 0.05106373876333237, 0....\n","0             great  ...  [[0.03609783574938774, 0.05106373876333237, 0....\n","0             great  ...  [[0.03609783574938774, 0.05106373876333237, 0....\n","...             ...  ...                                                ...\n","49             poor  ...  [[-0.017401963472366333, 0.04562698304653168, ...\n","49             poor  ...  [[-0.017401963472366333, 0.04562698304653168, ...\n","49             poor  ...  [[-0.017401963472366333, 0.04562698304653168, ...\n","49             poor  ...  [[-0.017401963472366333, 0.04562698304653168, ...\n","49             poor  ...  [[-0.017401963472366333, 0.04562698304653168, ...\n","\n","[7014 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":297},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609621948873,"user_tz":-300,"elapsed":197503,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7c8077e8-b95b-40e5-839b-29e738884851"},"source":["fitted_pipe.predict(\"It was a good experince!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>category_confidence</th>\n","      <th>token</th>\n","      <th>category</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.739900</td>\n","      <td>Bitcoin</td>\n","      <td>average</td>\n","      <td>[[0.06468033790588379, -0.040837567299604416, ...</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>0.739900</td>\n","      <td>is</td>\n","      <td>average</td>\n","      <td>[[0.06468033790588379, -0.040837567299604416, ...</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>0.739900</td>\n","      <td>going</td>\n","      <td>average</td>\n","      <td>[[0.06468033790588379, -0.040837567299604416, ...</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>0.739900</td>\n","      <td>to</td>\n","      <td>average</td>\n","      <td>[[0.06468033790588379, -0.040837567299604416, ...</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>0.739900</td>\n","      <td>the</td>\n","      <td>average</td>\n","      <td>[[0.06468033790588379, -0.040837567299604416, ...</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>0.739900</td>\n","      <td>moon</td>\n","      <td>average</td>\n","      <td>[[0.06468033790588379, -0.040837567299604416, ...</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>0.739900</td>\n","      <td>!</td>\n","      <td>average</td>\n","      <td>[[0.06468033790588379, -0.040837567299604416, ...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             category_confidence  ...                            default_name_embeddings\n","origin_index                      ...                                                   \n","0                       0.739900  ...  [[0.06468033790588379, -0.040837567299604416, ...\n","0                       0.739900  ...  [[0.06468033790588379, -0.040837567299604416, ...\n","0                       0.739900  ...  [[0.06468033790588379, -0.040837567299604416, ...\n","0                       0.739900  ...  [[0.06468033790588379, -0.040837567299604416, ...\n","0                       0.739900  ...  [[0.06468033790588379, -0.040837567299604416, ...\n","0                       0.739900  ...  [[0.06468033790588379, -0.040837567299604416, ...\n","0                       0.739900  ...  [[0.06468033790588379, -0.040837567299604416, ...\n","\n","[7 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609621948879,"user_tz":-300,"elapsed":197499,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3b2e8fac-92a8-436c-93b7-d548f39f95a1"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setMaxEpochs(3)                | Info: Maximum number of epochs to train | Currently set to : 3\n","pipe['classifier_dl'].setLr(0.005)                   | Info: Learning Rate | Currently set to : 0.005\n","pipe['classifier_dl'].setBatchSize(64)               | Info: Batch size | Currently set to : 64\n","pipe['classifier_dl'].setDropout(0.5)                | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['classifier_dl'].setEnableOutputLogs(True)      | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512)               | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use')      | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":620},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609621958501,"user_tz":-300,"elapsed":207107,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e86e37d9-df7d-4c70-fafe-f1b297860fa9"},"source":["# Train longer!\n","trainable_pipe['classifier_dl'].setMaxEpochs(5)  \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","     average       0.48      0.76      0.59        33\n","       great       0.86      0.51      0.64        35\n","        poor       0.74      0.62      0.68        32\n","\n","    accuracy                           0.63       100\n","   macro avg       0.69      0.63      0.64       100\n","weighted avg       0.70      0.63      0.64       100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","      <th>document</th>\n","      <th>category_confidence</th>\n","      <th>category</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>great</td>\n","      <td>great stayed hotel 5 nights end august 2005. r...</td>\n","      <td>great stayed hotel 5 nights end august 2005. r...</td>\n","      <td>0.595822</td>\n","      <td>average</td>\n","      <td>[0.06212242692708969, 0.04104098677635193, 0.0...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>poor</td>\n","      <td>watch bait-and-switch room rates, rooms accept...</td>\n","      <td>watch bait-and-switch room rates, rooms accept...</td>\n","      <td>0.498284</td>\n","      <td>poor</td>\n","      <td>[0.0546528585255146, 0.02160552889108658, -0.0...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>average</td>\n","      <td>good check liked hotel good location friendly ...</td>\n","      <td>good check liked hotel good location friendly ...</td>\n","      <td>0.557739</td>\n","      <td>average</td>\n","      <td>[0.008103911764919758, 0.02573486790060997, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>great</td>\n","      <td>best location value properties waikiki head ho...</td>\n","      <td>best location value properties waikiki head ho...</td>\n","      <td>0.418274</td>\n","      <td>average</td>\n","      <td>[0.05095028877258301, -0.003614993067458272, 0...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>poor</td>\n","      <td>botel not recommended little disappointed hone...</td>\n","      <td>botel not recommended little disappointed hone...</td>\n","      <td>0.491956</td>\n","      <td>average</td>\n","      <td>[0.03620055690407753, 0.010797196999192238, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>95</th>\n","      <td>great</td>\n","      <td>great location spent 7 days castle inn beginni...</td>\n","      <td>great location spent 7 days castle inn beginni...</td>\n","      <td>0.402236</td>\n","      <td>average</td>\n","      <td>[0.03295842185616493, 0.04682551696896553, 0.0...</td>\n","    </tr>\n","    <tr>\n","      <th>96</th>\n","      <td>average</td>\n","      <td>great location hard beds really liked hotel si...</td>\n","      <td>great location hard beds really liked hotel si...</td>\n","      <td>0.598560</td>\n","      <td>average</td>\n","      <td>[0.02258184179663658, 0.0432007722556591, -0.0...</td>\n","    </tr>\n","    <tr>\n","      <th>97</th>\n","      <td>great</td>\n","      <td>great location location hotel perfect right mi...</td>\n","      <td>great location location hotel perfect right mi...</td>\n","      <td>0.552369</td>\n","      <td>average</td>\n","      <td>[0.06024744734168053, 0.05366133153438568, -0....</td>\n","    </tr>\n","    <tr>\n","      <th>98</th>\n","      <td>great</td>\n","      <td>just starting lose lustre stayed chancellor co...</td>\n","      <td>just starting lose lustre stayed chancellor co...</td>\n","      <td>0.374642</td>\n","      <td>poor</td>\n","      <td>[0.0255410298705101, 0.0401645191013813, 0.003...</td>\n","    </tr>\n","    <tr>\n","      <th>99</th>\n","      <td>poor</td>\n","      <td>bittersweet memories glorious past recent stay...</td>\n","      <td>bittersweet memories glorious past recent stay...</td>\n","      <td>0.415380</td>\n","      <td>poor</td>\n","      <td>[0.03259000554680824, 0.049256037920713425, 0....</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>100 rows × 6 columns</p>\n","</div>"],"text/plain":["                    y  ...                            default_name_embeddings\n","origin_index           ...                                                   \n","0               great  ...  [0.06212242692708969, 0.04104098677635193, 0.0...\n","1                poor  ...  [0.0546528585255146, 0.02160552889108658, -0.0...\n","2             average  ...  [0.008103911764919758, 0.02573486790060997, 0....\n","3               great  ...  [0.05095028877258301, -0.003614993067458272, 0...\n","4                poor  ...  [0.03620055690407753, 0.010797196999192238, 0....\n","...               ...  ...                                                ...\n","95              great  ...  [0.03295842185616493, 0.04682551696896553, 0.0...\n","96            average  ...  [0.02258184179663658, 0.0432007722556591, -0.0...\n","97              great  ...  [0.06024744734168053, 0.05366133153438568, -0....\n","98              great  ...  [0.0255410298705101, 0.0401645191013813, 0.003...\n","99               poor  ...  [0.03259000554680824, 0.049256037920713425, 0....\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609621958515,"user_tz":-300,"elapsed":207110,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"de28c144-5456-4998-ffad-2d5046d5efc4"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609631124604,"user_tz":-300,"elapsed":7463638,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"49f0d684-0253-441f-d322-2e286b89fa24"},"source":["from sklearn.metrics import classification_report\n","trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['classifier_dl'].setMaxEpochs(90)  \n","trainable_pipe['classifier_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","     average       0.66      0.65      0.65      2184\n","       great       0.79      0.81      0.80      2184\n","        poor       0.77      0.78      0.78      2184\n","\n","    accuracy                           0.74      6552\n","   macro avg       0.74      0.74      0.74      6552\n","weighted avg       0.74      0.74      0.74      6552\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609631288714,"user_tz":-300,"elapsed":164136,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"42ce7ad9-b16c-404f-a717-b6d98651af95"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":111},"executionInfo":{"status":"ok","timestamp":1609631304458,"user_tz":-300,"elapsed":15754,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a0f4d6e7-e607-41f2-91ae-7b170b03b40a"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It was a good experince!')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>classifier</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","      <th>document</th>\n","      <th>classifier_confidence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>great</td>\n","      <td>[-0.07878006249666214, 0.1528550535440445, 0.1...</td>\n","      <td>It was one of the best wines i ever tasted .</td>\n","      <td>0.865597</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             classifier  ... classifier_confidence\n","origin_index             ...                      \n","0                 great  ...              0.865597\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609631312511,"user_tz":-300,"elapsed":2776,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3de0f51e-3fd0-4dae-ee05-81459d162c42"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')             | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)        | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                 | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                     | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                         | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                   | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                  | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)            | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                   | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                       | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)             | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)         | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                  | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                           | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                 | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                         | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                  | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')          | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['average', 'great', 'poor'])  | Info: get the tags used to trained this NerDLModel | Currently set to : ['average', 'great', 'poor']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L12_768')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]}
\ No newline at end of file
diff --git a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_musical_instruments.ipynb b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_musical_instruments.ipynb
index 3e474fa9..046560b8 100644
--- a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_musical_instruments.ipynb
+++ b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_musical_instruments.ipynb
@@ -1 +1 @@
-{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo_musical_instruments.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_musical_instruments.ipynb)\n","\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu > /dev/null\n","! pip install  pyspark==2.4.7 > /dev/null\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download musical instruments  classification dataset\r\n","\r\n","https://www.kaggle.com/eswarchandt/amazon-music-reviews\r\n","\r\n","dataset with products rated between 5 classes"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610787881309,"user_tz":-300,"elapsed":1350,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c7f2277f-e7a9-484a-cf3b-457bdc65e457"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/Musical_instruments_reviews.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:04:04--  http://ckl-it.de/wp-content/uploads/2021/01/Musical_instruments_reviews.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 51708 (50K) [text/csv]\n","Saving to: ‘Musical_instruments_reviews.csv’\n","\n","Musical_instruments 100%[===================>]  50.50K   241KB/s    in 0.2s    \n","\n","2021-01-16 09:04:05 (241 KB/s) - ‘Musical_instruments_reviews.csv’ saved [51708/51708]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610787895917,"user_tz":-300,"elapsed":1017,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1c4ca0c9-9489-47b9-a85e-3a8a3cc092a6"},"source":["import pandas as pd\n","test_path = '/content/Musical_instruments_reviews.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>good</td>\n","      <td>Hosa products are a good bang for the buck. I ...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>average</td>\n","      <td>I now use this cable to run from the output of...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>good</td>\n","      <td>Cheap and good texture rubber that does not ge...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>average</td>\n","      <td>These cables are a little thin compared to hos...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>average</td>\n","      <td>It is a decent cable. It does its job, but it ...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>115</th>\n","      <td>very poor</td>\n","      <td>It just randomly pops off my bass, it's so sli...</td>\n","    </tr>\n","    <tr>\n","      <th>116</th>\n","      <td>very good</td>\n","      <td>The primary job of this device is to block the...</td>\n","    </tr>\n","    <tr>\n","      <th>117</th>\n","      <td>good</td>\n","      <td>The Hosa XLR cables are affordable and very he...</td>\n","    </tr>\n","    <tr>\n","      <th>118</th>\n","      <td>average</td>\n","      <td>It's a cable, no frills, tangles pretty easy a...</td>\n","    </tr>\n","    <tr>\n","      <th>119</th>\n","      <td>very poor</td>\n","      <td>It hums, crackles, and I think I'm having prob...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>120 rows × 2 columns</p>\n","</div>"],"text/plain":["             y                                               text\n","0         good  Hosa products are a good bang for the buck. I ...\n","1      average  I now use this cable to run from the output of...\n","2         good  Cheap and good texture rubber that does not ge...\n","3      average  These cables are a little thin compared to hos...\n","4      average  It is a decent cable. It does its job, but it ...\n","..         ...                                                ...\n","115  very poor  It just randomly pops off my bass, it's so sli...\n","116  very good  The primary job of this device is to block the...\n","117       good  The Hosa XLR cables are affordable and very he...\n","118    average  It's a cable, no frills, tangles pretty easy a...\n","119  very poor  It hums, crackles, and I think I'm having prob...\n","\n","[120 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","By default, the Universal Sentence Encoder Embeddings (USE) are beeing downloaded to provide embeddings for the classifier. You can use any of the 50+ other sentence Emeddings in NLU tough!\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":671},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609472199891,"user_tz":-300,"elapsed":191855,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"bd493b9c-fa33-44af-e941-1000f0aa137d"},"source":["# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# Since there are no\n","\n","trainable_pipe = nlu.load('train.classifier')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50] )\n","\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50] )\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>category_confidence</th>\n","      <th>text</th>\n","      <th>y</th>\n","      <th>category</th>\n","      <th>default_name_embeddings</th>\n","      <th>sentence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.304148</td>\n","      <td>Hosa products are a good bang for the buck. I ...</td>\n","      <td>good</td>\n","      <td>average</td>\n","      <td>[0.07208353281021118, 0.028736615553498268, -0...</td>\n","      <td>Hosa products are a good bang for the buck.</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>1.000000</td>\n","      <td>Hosa products are a good bang for the buck. I ...</td>\n","      <td>good</td>\n","      <td>average</td>\n","      <td>[0.056614313274621964, -0.04707420617341995, -...</td>\n","      <td>I haven't looked up the specifications, but I'...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>0.956961</td>\n","      <td>I now use this cable to run from the output of...</td>\n","      <td>average</td>\n","      <td>average</td>\n","      <td>[0.06778458505868912, -0.0052166287787258625, ...</td>\n","      <td>I now use this cable to run from the output of...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>1.000000</td>\n","      <td>I now use this cable to run from the output of...</td>\n","      <td>average</td>\n","      <td>average</td>\n","      <td>[0.06371542811393738, -0.022252758964896202, -...</td>\n","      <td>After I bought Monster Cable to hook up my ped...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>2.000000</td>\n","      <td>I now use this cable to run from the output of...</td>\n","      <td>average</td>\n","      <td>average</td>\n","      <td>[0.018308864906430244, 0.0024022769648581743, ...</td>\n","      <td>I had been using a high end Planet Waves cable...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>0.841045</td>\n","      <td>Update: The right angle switched end started d...</td>\n","      <td>average</td>\n","      <td>average</td>\n","      <td>[-0.013615701347589493, -0.04160430282354355, ...</td>\n","      <td>I like knowing that.</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>0.841045</td>\n","      <td>Update: The right angle switched end started d...</td>\n","      <td>average</td>\n","      <td>average</td>\n","      <td>[0.02372647449374199, 0.04573449119925499, -0....</td>\n","      <td>** EDIT: AS STATED ABOVE, YOU WILL NOT BE ABLE...</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>0.997217</td>\n","      <td>Doe's not stay on to well, moves to much even ...</td>\n","      <td>average</td>\n","      <td>average</td>\n","      <td>[0.08493339270353317, 0.047714825719594955, -0...</td>\n","      <td>Doe's not stay on to well, moves to much even ...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>0.401975</td>\n","      <td>These are not the greatest but they're cheap a...</td>\n","      <td>good</td>\n","      <td>very poor</td>\n","      <td>[0.03083745203912258, 0.01701708696782589, -0....</td>\n","      <td>These are not the greatest but they're cheap a...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>1.000000</td>\n","      <td>These are not the greatest but they're cheap a...</td>\n","      <td>good</td>\n","      <td>very poor</td>\n","      <td>[0.06084448844194412, 0.0020018713548779488, 0...</td>\n","      <td>I've only had one fail and I've bought many of...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>297 rows × 6 columns</p>\n","</div>"],"text/plain":["             category_confidence  ...                                           sentence\n","origin_index                      ...                                                   \n","0                       0.304148  ...        Hosa products are a good bang for the buck.\n","0                       1.000000  ...  I haven't looked up the specifications, but I'...\n","1                       0.956961  ...  I now use this cable to run from the output of...\n","1                       1.000000  ...  After I bought Monster Cable to hook up my ped...\n","1                       2.000000  ...  I had been using a high end Planet Waves cable...\n","...                          ...  ...                                                ...\n","47                      0.841045  ...                               I like knowing that.\n","47                      0.841045  ...  ** EDIT: AS STATED ABOVE, YOU WILL NOT BE ABLE...\n","48                      0.997217  ...  Doe's not stay on to well, moves to much even ...\n","49                      0.401975  ...  These are not the greatest but they're cheap a...\n","49                      1.000000  ...  I've only had one fail and I've bought many of...\n","\n","[297 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"DL_5aY9b3jSd"},"source":["# 4. Evaluate the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"djtoZVKBw2WU","executionInfo":{"status":"ok","timestamp":1609472199894,"user_tz":-300,"elapsed":191838,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c3c86659-f624-486c-bb48-f514ac1e8fc0"},"source":["from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","     average       0.63      0.76      0.69       123\n","        good       0.00      0.00      0.00        51\n","   very good       0.00      0.00      0.00        39\n","   very poor       0.50      0.87      0.63        84\n","\n","    accuracy                           0.56       297\n","   macro avg       0.28      0.41      0.33       297\n","weighted avg       0.40      0.56      0.46       297\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mhFKVN93o1ZO"},"source":["# 5. Lets try different Sentence Emebddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CzJd8omao0gt","executionInfo":{"status":"ok","timestamp":1609472199895,"user_tz":-300,"elapsed":191822,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0257d8c7-ce4a-4ac4-837c-5513639da2d4"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ABHLgirmG1n9","executionInfo":{"status":"ok","timestamp":1609472351316,"user_tz":-300,"elapsed":343219,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b4823a3d-fcf8-4e40-e6dd-00051347b3a8"},"source":["# Load pipe with bert embeds\n","# using large embeddings can take a few hours..\n","# fitted_pipe = nlu.load('en.embed_sentence.bert_large_uncased train.classifier').fit(train_df)\n","fitted_pipe = nlu.load('en.embed_sentence.bert train.classifier').fit(train_df.iloc[:100])\n","\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100])\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_bert_base_uncased download started this may take some time.\n","Approximate size to download 392.5 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","     average       0.29      1.00      0.45        27\n","        good       0.00      0.00      0.00        25\n","   very good       0.00      0.00      0.00        25\n","   very poor       1.00      0.30      0.47        23\n","\n","    accuracy                           0.34       100\n","   macro avg       0.32      0.33      0.23       100\n","weighted avg       0.31      0.34      0.23       100\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nbpdZGoZPslz","executionInfo":{"status":"ok","timestamp":1609472368869,"user_tz":-300,"elapsed":360758,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a9574559-a655-464a-f159-b85c2c64b5b0"},"source":["# Load pipe with bert embeds\n","fitted_pipe = nlu.load('embed_sentence.bert train.classifier').fit(train_df.iloc[:100])\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100])\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L2_128 download started this may take some time.\n","Approximate size to download 16.1 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","     average       0.00      0.00      0.00        27\n","        good       0.00      0.00      0.00        25\n","   very good       0.25      1.00      0.40        25\n","   very poor       0.00      0.00      0.00        23\n","\n","    accuracy                           0.25       100\n","   macro avg       0.06      0.25      0.10       100\n","weighted avg       0.06      0.25      0.10       100\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"wYV7ivdsQY8Z","executionInfo":{"status":"ok","timestamp":1609475397624,"user_tz":-300,"elapsed":155002,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"759ff4c2-dcf3-4f65-bf3b-95f1f32e0a39"},"source":["from sklearn.metrics import classification_report\r\n","trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier')\r\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\r\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\r\n","# Also longer training gives more accuracy\r\n","trainable_pipe['classifier_dl'].setMaxEpochs(90)  \r\n","trainable_pipe['classifier_dl'].setLr(0.0005) \r\n","fitted_pipe = trainable_pipe.fit(train_df)\r\n","# predict with the trainable pipeline on dataset and get predictions\r\n","preds = fitted_pipe.predict(train_df,output_level='document')\r\n","\r\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\r\n","preds.dropna(inplace=True)\r\n","print(classification_report(preds['y'], preds['category']))\r\n","\r\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","     average       0.89      0.53      0.67        30\n","        good       0.62      0.83      0.71        30\n","   very good       0.93      0.47      0.62        30\n","   very poor       0.62      0.97      0.75        30\n","\n","    accuracy                           0.70       120\n","   macro avg       0.77      0.70      0.69       120\n","weighted avg       0.77      0.70      0.69       120\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1609472722793,"user_tz":-300,"elapsed":714653,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4b052d0c-f581-4c96-a7d5-91885525e96e"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"qeuzjy2IJTif"},"source":[""]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":127},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1609472740229,"user_tz":-300,"elapsed":732057,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c57f3bd7-4590-4a82-9d01-99b7dd1e7a34"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It was really good ')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>document</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","      <th>classifier_confidence</th>\n","      <th>classifier</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>It was really good</td>\n","      <td>[-0.034663598984479904, 0.3307220935821533, 0....</td>\n","      <td>0.529977</td>\n","      <td>very good</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                        document  ... classifier\n","origin_index                      ...           \n","0             It was really good  ...  very good\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1609472740233,"user_tz":-300,"elapsed":732044,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a85b252c-2f3a-401d-8579-de7c2c9acbc1"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')                              | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                                    | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                                   | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)                             | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                                    | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                                        | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)                              | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)                          | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)                         | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                                  | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                                      | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                                          | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                                   | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                                            | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                                  | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                                          | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                                   | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')                           | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['very good', 'very poor', 'average', 'good'])  | Info: get the tags used to trained this NerDLModel | Currently set to : ['very good', 'very poor', 'average', 'good']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L12_768')                   | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]}
\ No newline at end of file
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo_musical_instruments.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_musical_instruments.ipynb)\n","\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","## 4 class Amazon Musical Instruments review classifier training\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu > /dev/null\n","! pip install  pyspark==2.4.7 > /dev/null\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download musical instruments  classification dataset\r\n","\r\n","https://www.kaggle.com/eswarchandt/amazon-music-reviews\r\n","\r\n","dataset with products rated between 5 classes"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610787881309,"user_tz":-300,"elapsed":1350,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c7f2277f-e7a9-484a-cf3b-457bdc65e457"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/Musical_instruments_reviews.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:04:04--  http://ckl-it.de/wp-content/uploads/2021/01/Musical_instruments_reviews.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 51708 (50K) [text/csv]\n","Saving to: ‘Musical_instruments_reviews.csv’\n","\n","Musical_instruments 100%[===================>]  50.50K   241KB/s    in 0.2s    \n","\n","2021-01-16 09:04:05 (241 KB/s) - ‘Musical_instruments_reviews.csv’ saved [51708/51708]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610787895917,"user_tz":-300,"elapsed":1017,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1c4ca0c9-9489-47b9-a85e-3a8a3cc092a6"},"source":["import pandas as pd\n","test_path = '/content/Musical_instruments_reviews.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>good</td>\n","      <td>Hosa products are a good bang for the buck. I ...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>average</td>\n","      <td>I now use this cable to run from the output of...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>good</td>\n","      <td>Cheap and good texture rubber that does not ge...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>average</td>\n","      <td>These cables are a little thin compared to hos...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>average</td>\n","      <td>It is a decent cable. It does its job, but it ...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>115</th>\n","      <td>very poor</td>\n","      <td>It just randomly pops off my bass, it's so sli...</td>\n","    </tr>\n","    <tr>\n","      <th>116</th>\n","      <td>very good</td>\n","      <td>The primary job of this device is to block the...</td>\n","    </tr>\n","    <tr>\n","      <th>117</th>\n","      <td>good</td>\n","      <td>The Hosa XLR cables are affordable and very he...</td>\n","    </tr>\n","    <tr>\n","      <th>118</th>\n","      <td>average</td>\n","      <td>It's a cable, no frills, tangles pretty easy a...</td>\n","    </tr>\n","    <tr>\n","      <th>119</th>\n","      <td>very poor</td>\n","      <td>It hums, crackles, and I think I'm having prob...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>120 rows × 2 columns</p>\n","</div>"],"text/plain":["             y                                               text\n","0         good  Hosa products are a good bang for the buck. I ...\n","1      average  I now use this cable to run from the output of...\n","2         good  Cheap and good texture rubber that does not ge...\n","3      average  These cables are a little thin compared to hos...\n","4      average  It is a decent cable. It does its job, but it ...\n","..         ...                                                ...\n","115  very poor  It just randomly pops off my bass, it's so sli...\n","116  very good  The primary job of this device is to block the...\n","117       good  The Hosa XLR cables are affordable and very he...\n","118    average  It's a cable, no frills, tangles pretty easy a...\n","119  very poor  It hums, crackles, and I think I'm having prob...\n","\n","[120 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","By default, the Universal Sentence Encoder Embeddings (USE) are beeing downloaded to provide embeddings for the classifier. You can use any of the 50+ other sentence Emeddings in NLU tough!\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":671},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609472199891,"user_tz":-300,"elapsed":191855,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"bd493b9c-fa33-44af-e941-1000f0aa137d"},"source":["# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# Since there are no\n","\n","trainable_pipe = nlu.load('train.classifier')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50] )\n","\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50] )\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>category_confidence</th>\n","      <th>text</th>\n","      <th>y</th>\n","      <th>category</th>\n","      <th>default_name_embeddings</th>\n","      <th>sentence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.304148</td>\n","      <td>Hosa products are a good bang for the buck. I ...</td>\n","      <td>good</td>\n","      <td>average</td>\n","      <td>[0.07208353281021118, 0.028736615553498268, -0...</td>\n","      <td>Hosa products are a good bang for the buck.</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>1.000000</td>\n","      <td>Hosa products are a good bang for the buck. I ...</td>\n","      <td>good</td>\n","      <td>average</td>\n","      <td>[0.056614313274621964, -0.04707420617341995, -...</td>\n","      <td>I haven't looked up the specifications, but I'...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>0.956961</td>\n","      <td>I now use this cable to run from the output of...</td>\n","      <td>average</td>\n","      <td>average</td>\n","      <td>[0.06778458505868912, -0.0052166287787258625, ...</td>\n","      <td>I now use this cable to run from the output of...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>1.000000</td>\n","      <td>I now use this cable to run from the output of...</td>\n","      <td>average</td>\n","      <td>average</td>\n","      <td>[0.06371542811393738, -0.022252758964896202, -...</td>\n","      <td>After I bought Monster Cable to hook up my ped...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>2.000000</td>\n","      <td>I now use this cable to run from the output of...</td>\n","      <td>average</td>\n","      <td>average</td>\n","      <td>[0.018308864906430244, 0.0024022769648581743, ...</td>\n","      <td>I had been using a high end Planet Waves cable...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>0.841045</td>\n","      <td>Update: The right angle switched end started d...</td>\n","      <td>average</td>\n","      <td>average</td>\n","      <td>[-0.013615701347589493, -0.04160430282354355, ...</td>\n","      <td>I like knowing that.</td>\n","    </tr>\n","    <tr>\n","      <th>47</th>\n","      <td>0.841045</td>\n","      <td>Update: The right angle switched end started d...</td>\n","      <td>average</td>\n","      <td>average</td>\n","      <td>[0.02372647449374199, 0.04573449119925499, -0....</td>\n","      <td>** EDIT: AS STATED ABOVE, YOU WILL NOT BE ABLE...</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>0.997217</td>\n","      <td>Doe's not stay on to well, moves to much even ...</td>\n","      <td>average</td>\n","      <td>average</td>\n","      <td>[0.08493339270353317, 0.047714825719594955, -0...</td>\n","      <td>Doe's not stay on to well, moves to much even ...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>0.401975</td>\n","      <td>These are not the greatest but they're cheap a...</td>\n","      <td>good</td>\n","      <td>very poor</td>\n","      <td>[0.03083745203912258, 0.01701708696782589, -0....</td>\n","      <td>These are not the greatest but they're cheap a...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>1.000000</td>\n","      <td>These are not the greatest but they're cheap a...</td>\n","      <td>good</td>\n","      <td>very poor</td>\n","      <td>[0.06084448844194412, 0.0020018713548779488, 0...</td>\n","      <td>I've only had one fail and I've bought many of...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>297 rows × 6 columns</p>\n","</div>"],"text/plain":["             category_confidence  ...                                           sentence\n","origin_index                      ...                                                   \n","0                       0.304148  ...        Hosa products are a good bang for the buck.\n","0                       1.000000  ...  I haven't looked up the specifications, but I'...\n","1                       0.956961  ...  I now use this cable to run from the output of...\n","1                       1.000000  ...  After I bought Monster Cable to hook up my ped...\n","1                       2.000000  ...  I had been using a high end Planet Waves cable...\n","...                          ...  ...                                                ...\n","47                      0.841045  ...                               I like knowing that.\n","47                      0.841045  ...  ** EDIT: AS STATED ABOVE, YOU WILL NOT BE ABLE...\n","48                      0.997217  ...  Doe's not stay on to well, moves to much even ...\n","49                      0.401975  ...  These are not the greatest but they're cheap a...\n","49                      1.000000  ...  I've only had one fail and I've bought many of...\n","\n","[297 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"DL_5aY9b3jSd"},"source":["# 4. Evaluate the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"djtoZVKBw2WU","executionInfo":{"status":"ok","timestamp":1609472199894,"user_tz":-300,"elapsed":191838,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c3c86659-f624-486c-bb48-f514ac1e8fc0"},"source":["from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","     average       0.63      0.76      0.69       123\n","        good       0.00      0.00      0.00        51\n","   very good       0.00      0.00      0.00        39\n","   very poor       0.50      0.87      0.63        84\n","\n","    accuracy                           0.56       297\n","   macro avg       0.28      0.41      0.33       297\n","weighted avg       0.40      0.56      0.46       297\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mhFKVN93o1ZO"},"source":["# 5. Lets try different Sentence Emebddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CzJd8omao0gt","executionInfo":{"status":"ok","timestamp":1609472199895,"user_tz":-300,"elapsed":191822,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0257d8c7-ce4a-4ac4-837c-5513639da2d4"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ABHLgirmG1n9","executionInfo":{"status":"ok","timestamp":1609472351316,"user_tz":-300,"elapsed":343219,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b4823a3d-fcf8-4e40-e6dd-00051347b3a8"},"source":["# Load pipe with bert embeds\n","# using large embeddings can take a few hours..\n","# fitted_pipe = nlu.load('en.embed_sentence.bert_large_uncased train.classifier').fit(train_df)\n","fitted_pipe = nlu.load('en.embed_sentence.bert train.classifier').fit(train_df.iloc[:100])\n","\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100])\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_bert_base_uncased download started this may take some time.\n","Approximate size to download 392.5 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","     average       0.29      1.00      0.45        27\n","        good       0.00      0.00      0.00        25\n","   very good       0.00      0.00      0.00        25\n","   very poor       1.00      0.30      0.47        23\n","\n","    accuracy                           0.34       100\n","   macro avg       0.32      0.33      0.23       100\n","weighted avg       0.31      0.34      0.23       100\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nbpdZGoZPslz","executionInfo":{"status":"ok","timestamp":1609472368869,"user_tz":-300,"elapsed":360758,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a9574559-a655-464a-f159-b85c2c64b5b0"},"source":["# Load pipe with bert embeds\n","fitted_pipe = nlu.load('embed_sentence.bert train.classifier').fit(train_df.iloc[:100])\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100])\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L2_128 download started this may take some time.\n","Approximate size to download 16.1 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","     average       0.00      0.00      0.00        27\n","        good       0.00      0.00      0.00        25\n","   very good       0.25      1.00      0.40        25\n","   very poor       0.00      0.00      0.00        23\n","\n","    accuracy                           0.25       100\n","   macro avg       0.06      0.25      0.10       100\n","weighted avg       0.06      0.25      0.10       100\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"wYV7ivdsQY8Z","executionInfo":{"status":"ok","timestamp":1609475397624,"user_tz":-300,"elapsed":155002,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"759ff4c2-dcf3-4f65-bf3b-95f1f32e0a39"},"source":["from sklearn.metrics import classification_report\r\n","trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier')\r\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\r\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\r\n","# Also longer training gives more accuracy\r\n","trainable_pipe['classifier_dl'].setMaxEpochs(90)  \r\n","trainable_pipe['classifier_dl'].setLr(0.0005) \r\n","fitted_pipe = trainable_pipe.fit(train_df)\r\n","# predict with the trainable pipeline on dataset and get predictions\r\n","preds = fitted_pipe.predict(train_df,output_level='document')\r\n","\r\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\r\n","preds.dropna(inplace=True)\r\n","print(classification_report(preds['y'], preds['category']))\r\n","\r\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","     average       0.89      0.53      0.67        30\n","        good       0.62      0.83      0.71        30\n","   very good       0.93      0.47      0.62        30\n","   very poor       0.62      0.97      0.75        30\n","\n","    accuracy                           0.70       120\n","   macro avg       0.77      0.70      0.69       120\n","weighted avg       0.77      0.70      0.69       120\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1609472722793,"user_tz":-300,"elapsed":714653,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4b052d0c-f581-4c96-a7d5-91885525e96e"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"qeuzjy2IJTif"},"source":[""]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":127},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1609472740229,"user_tz":-300,"elapsed":732057,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c57f3bd7-4590-4a82-9d01-99b7dd1e7a34"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It was really good ')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>document</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","      <th>classifier_confidence</th>\n","      <th>classifier</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>It was really good</td>\n","      <td>[-0.034663598984479904, 0.3307220935821533, 0....</td>\n","      <td>0.529977</td>\n","      <td>very good</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                        document  ... classifier\n","origin_index                      ...           \n","0             It was really good  ...  very good\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1609472740233,"user_tz":-300,"elapsed":732044,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a85b252c-2f3a-401d-8579-de7c2c9acbc1"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')                              | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                                    | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                                   | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)                             | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                                    | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                                        | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)                              | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)                          | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)                         | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                                  | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                                      | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                                          | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                                   | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                                            | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                                  | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                                          | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                                   | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')                           | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['very good', 'very poor', 'average', 'good'])  | Info: get the tags used to trained this NerDLModel | Currently set to : ['very good', 'very poor', 'average', 'good']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L12_768')                   | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]}
\ No newline at end of file
diff --git a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_wine.ipynb b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_wine.ipynb
index b0cd3dbf..6447d424 100644
--- a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_wine.ipynb
+++ b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_wine.ipynb
@@ -1 +1 @@
-{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo_wine.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_wine.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","\n","! pip install nlu pyspark==2.4.7   > /dev/null    \n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download wine review dataset \n","https://www.kaggle.com/zynicide/wine-reviews\n","dataset with products between 5 review classes"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610787966036,"user_tz":-300,"elapsed":2003,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d4fe7b73-eebc-4c11-8e58-de7ebeb1a556"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/winemag-data_first150k.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:05:28--  http://ckl-it.de/wp-content/uploads/2021/01/winemag-data_first150k.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 1447273 (1.4M) [text/csv]\n","Saving to: ‘winemag-data_first150k.csv’\n","\n","winemag-data_first1 100%[===================>]   1.38M  1.74MB/s    in 0.8s    \n","\n","2021-01-16 09:05:30 (1.74 MB/s) - ‘winemag-data_first150k.csv’ saved [1447273/1447273]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610787968921,"user_tz":-300,"elapsed":925,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"393355a3-3aea-4805-a0ef-87ad8be6bf8e"},"source":["import pandas as pd\n","test_path = '/content/winemag-data_first150k.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>acceptable</td>\n","      <td>This wine is closed, tight and possibly still ...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>best</td>\n","      <td>This wine shows growing intensity the longer i...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>good</td>\n","      <td>This moderately aromatic wine conveys Red Hots...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>best</td>\n","      <td>This feels slightly softer in the mouth than t...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>best</td>\n","      <td>A terrific Pinot, and one of the few that abso...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>5055</th>\n","      <td>very good</td>\n","      <td>A classic Napa Valley Chardonnay, this is smoo...</td>\n","    </tr>\n","    <tr>\n","      <th>5056</th>\n","      <td>very good</td>\n","      <td>The wine from this estate perched high above C...</td>\n","    </tr>\n","    <tr>\n","      <th>5057</th>\n","      <td>very good</td>\n","      <td>Distinct and delicious aromas of crÃ¨me brÃ»lÃ...</td>\n","    </tr>\n","    <tr>\n","      <th>5058</th>\n","      <td>good</td>\n","      <td>Smooth, deep aromas of licorice and blackberry...</td>\n","    </tr>\n","    <tr>\n","      <th>5059</th>\n","      <td>very good</td>\n","      <td>Wonderfully aromatic fruit rises from the glas...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5060 rows × 2 columns</p>\n","</div>"],"text/plain":["               y                                               text\n","0     acceptable  This wine is closed, tight and possibly still ...\n","1           best  This wine shows growing intensity the longer i...\n","2           good  This moderately aromatic wine conveys Red Hots...\n","3           best  This feels slightly softer in the mouth than t...\n","4           best  A terrific Pinot, and one of the few that abso...\n","...          ...                                                ...\n","5055   very good  A classic Napa Valley Chardonnay, this is smoo...\n","5056   very good  The wine from this estate perched high above C...\n","5057   very good  Distinct and delicious aromas of crÃ¨me brÃ»lÃ...\n","5058        good  Smooth, deep aromas of licorice and blackberry...\n","5059   very good  Wonderfully aromatic fruit rises from the glas...\n","\n","[5060 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":487},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609603533911,"user_tz":-300,"elapsed":208298,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"bbd738d4-b241-4994-979d-c5ca0989dc4b"},"source":["# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# Since there are no\n","\n","trainable_pipe = nlu.load('train.classifier')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50] )\n","\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50] )\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>category_confidence</th>\n","      <th>default_name_embeddings</th>\n","      <th>y</th>\n","      <th>category</th>\n","      <th>sentence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>This wine is closed, tight and possibly still ...</td>\n","      <td>0.386967</td>\n","      <td>[-0.00495561771094799, -0.07129219174385071, -...</td>\n","      <td>acceptable</td>\n","      <td>very good</td>\n","      <td>This wine is closed, tight and possibly still ...</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>This wine is closed, tight and possibly still ...</td>\n","      <td>1.000000</td>\n","      <td>[0.06035454571247101, 0.041439250111579895, -0...</td>\n","      <td>acceptable</td>\n","      <td>very good</td>\n","      <td>There's also a cheesy character that is less a...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>This wine shows growing intensity the longer i...</td>\n","      <td>0.454979</td>\n","      <td>[0.0541062131524086, -0.0517219714820385, -0.0...</td>\n","      <td>best</td>\n","      <td>best</td>\n","      <td>This wine shows growing intensity the longer i...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>This wine shows growing intensity the longer i...</td>\n","      <td>1.000000</td>\n","      <td>[-0.026120899245142937, -0.0751243457198143, -...</td>\n","      <td>best</td>\n","      <td>best</td>\n","      <td>Aromas include red fruit, spice and rosemary: ...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>This moderately aromatic wine conveys Red Hots...</td>\n","      <td>0.433734</td>\n","      <td>[-0.0444738008081913, -0.05501846224069595, 0....</td>\n","      <td>good</td>\n","      <td>very good</td>\n","      <td>This moderately aromatic wine conveys Red Hots...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>Bright sparks of red currant, black cherry and...</td>\n","      <td>0.439928</td>\n","      <td>[-0.001167353126220405, -0.062205277383327484,...</td>\n","      <td>very good</td>\n","      <td>very good</td>\n","      <td>Bright sparks of red currant, black cherry and...</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>Bright sparks of red currant, black cherry and...</td>\n","      <td>1.000000</td>\n","      <td>[0.001156042329967022, -0.041525647044181824, ...</td>\n","      <td>very good</td>\n","      <td>very good</td>\n","      <td>Bold tannins frame its dense layers of fruit, ...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>Based in the Jura, this producer blends grapes...</td>\n","      <td>0.730394</td>\n","      <td>[-0.012110762298107147, -0.06961353123188019, ...</td>\n","      <td>acceptable</td>\n","      <td>best</td>\n","      <td>Based in the Jura, this producer blends grapes...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>Based in the Jura, this producer blends grapes...</td>\n","      <td>1.000000</td>\n","      <td>[0.05220193415880203, 0.04676426202058792, -0....</td>\n","      <td>acceptable</td>\n","      <td>best</td>\n","      <td>It's light, bright and just off dry, with attr...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>Based in the Jura, this producer blends grapes...</td>\n","      <td>2.000000</td>\n","      <td>[0.09586171805858612, 0.029351763427257538, -0...</td>\n","      <td>acceptable</td>\n","      <td>best</td>\n","      <td>Like it's rosÃ© partner, it is really for apÃ©...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>158 rows × 6 columns</p>\n","</div>"],"text/plain":["                                                           text  ...                                           sentence\n","origin_index                                                     ...                                                   \n","0             This wine is closed, tight and possibly still ...  ...  This wine is closed, tight and possibly still ...\n","0             This wine is closed, tight and possibly still ...  ...  There's also a cheesy character that is less a...\n","1             This wine shows growing intensity the longer i...  ...  This wine shows growing intensity the longer i...\n","1             This wine shows growing intensity the longer i...  ...  Aromas include red fruit, spice and rosemary: ...\n","2             This moderately aromatic wine conveys Red Hots...  ...  This moderately aromatic wine conveys Red Hots...\n","...                                                         ...  ...                                                ...\n","48            Bright sparks of red currant, black cherry and...  ...  Bright sparks of red currant, black cherry and...\n","48            Bright sparks of red currant, black cherry and...  ...  Bold tannins frame its dense layers of fruit, ...\n","49            Based in the Jura, this producer blends grapes...  ...  Based in the Jura, this producer blends grapes...\n","49            Based in the Jura, this producer blends grapes...  ...  It's light, bright and just off dry, with attr...\n","49            Based in the Jura, this producer blends grapes...  ...  Like it's rosÃ© partner, it is really for apÃ©...\n","\n","[158 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":107},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609603536901,"user_tz":-300,"elapsed":211278,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d50adb08-b82b-4edc-e473-d273f153fa62"},"source":["fitted_pipe.predict('It was one of the best wines i ever tasted .')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>category_confidence</th>\n","      <th>default_name_embeddings</th>\n","      <th>category</th>\n","      <th>sentence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.842125</td>\n","      <td>[0.06468033790588379, -0.040837567299604416, -...</td>\n","      <td>best</td>\n","      <td>Bitcoin is going to the moon!</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             category_confidence  ...                       sentence\n","origin_index                      ...                               \n","0                       0.842125  ...  Bitcoin is going to the moon!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609603536903,"user_tz":-300,"elapsed":211274,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e9ed8d20-ed9d-4522-bf9e-ed7414f7a686"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setMaxEpochs(3)                | Info: Maximum number of epochs to train | Currently set to : 3\n","pipe['classifier_dl'].setLr(0.005)                   | Info: Learning Rate | Currently set to : 0.005\n","pipe['classifier_dl'].setBatchSize(64)               | Info: Batch size | Currently set to : 64\n","pipe['classifier_dl'].setDropout(0.5)                | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['classifier_dl'].setEnableOutputLogs(True)      | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512)               | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use')      | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":620},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609603618345,"user_tz":-300,"elapsed":11545,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"be93ca05-7e98-484b-9c24-a09976430afc"},"source":["# Train longer!\n","trainable_pipe['classifier_dl'].setMaxEpochs(5)  \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","  acceptable       0.00      0.00      0.00        22\n","        best       0.71      0.89      0.79        28\n","        good       0.42      0.96      0.58        28\n","   very good       0.00      0.00      0.00        22\n","\n","    accuracy                           0.52       100\n","   macro avg       0.28      0.46      0.34       100\n","weighted avg       0.32      0.52      0.38       100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>document</th>\n","      <th>default_name_embeddings</th>\n","      <th>category_confidence</th>\n","      <th>y</th>\n","      <th>category</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>This wine is closed, tight and possibly still ...</td>\n","      <td>This wine is closed, tight and possibly still ...</td>\n","      <td>[0.02915436401963234, -0.0378003790974617, -0....</td>\n","      <td>0.584848</td>\n","      <td>acceptable</td>\n","      <td>good</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>This wine shows growing intensity the longer i...</td>\n","      <td>This wine shows growing intensity the longer i...</td>\n","      <td>[0.019120197743177414, -0.06991834938526154, 0...</td>\n","      <td>0.875611</td>\n","      <td>best</td>\n","      <td>best</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>This moderately aromatic wine conveys Red Hots...</td>\n","      <td>This moderately aromatic wine conveys Red Hots...</td>\n","      <td>[-0.025461390614509583, -0.02650509588420391, ...</td>\n","      <td>0.783311</td>\n","      <td>good</td>\n","      <td>good</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>This feels slightly softer in the mouth than t...</td>\n","      <td>This feels slightly softer in the mouth than t...</td>\n","      <td>[0.011777156963944435, 0.008188367821276188, -...</td>\n","      <td>0.711578</td>\n","      <td>best</td>\n","      <td>good</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>A terrific Pinot, and one of the few that abso...</td>\n","      <td>A terrific Pinot, and one of the few that abso...</td>\n","      <td>[0.014174058102071285, -0.057778846472501755, ...</td>\n","      <td>0.794139</td>\n","      <td>best</td>\n","      <td>best</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>95</th>\n","      <td>Radiator dust, lees and vanilla cookie aromas ...</td>\n","      <td>Radiator dust, lees and vanilla cookie aromas ...</td>\n","      <td>[-0.009873664006590843, 0.0033919725101441145,...</td>\n","      <td>0.792627</td>\n","      <td>acceptable</td>\n","      <td>good</td>\n","    </tr>\n","    <tr>\n","      <th>96</th>\n","      <td>You'll detect aromas reminiscent of wood shop ...</td>\n","      <td>You'll detect aromas reminiscent of wood shop ...</td>\n","      <td>[0.03787693753838539, -0.030119985342025757, -...</td>\n","      <td>0.573790</td>\n","      <td>acceptable</td>\n","      <td>good</td>\n","    </tr>\n","    <tr>\n","      <th>97</th>\n","      <td>The old vines on the steep slopes of the Heili...</td>\n","      <td>The old vines on the steep slopes of the Heili...</td>\n","      <td>[0.020556319504976273, -0.059675734490156174, ...</td>\n","      <td>0.919109</td>\n","      <td>best</td>\n","      <td>best</td>\n","    </tr>\n","    <tr>\n","      <th>98</th>\n","      <td>This wine takes time to unravel and reveal its...</td>\n","      <td>This wine takes time to unravel and reveal its...</td>\n","      <td>[-0.00832163542509079, -0.029637429863214493, ...</td>\n","      <td>0.485587</td>\n","      <td>very good</td>\n","      <td>best</td>\n","    </tr>\n","    <tr>\n","      <th>99</th>\n","      <td>Buttery oak aromas cover up any white-fruit ch...</td>\n","      <td>Buttery oak aromas cover up any white-fruit ch...</td>\n","      <td>[0.02920656092464924, -0.05507100373506546, -0...</td>\n","      <td>0.768109</td>\n","      <td>acceptable</td>\n","      <td>good</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>100 rows × 6 columns</p>\n","</div>"],"text/plain":["                                                           text  ... category\n","origin_index                                                     ...         \n","0             This wine is closed, tight and possibly still ...  ...     good\n","1             This wine shows growing intensity the longer i...  ...     best\n","2             This moderately aromatic wine conveys Red Hots...  ...     good\n","3             This feels slightly softer in the mouth than t...  ...     good\n","4             A terrific Pinot, and one of the few that abso...  ...     best\n","...                                                         ...  ...      ...\n","95            Radiator dust, lees and vanilla cookie aromas ...  ...     good\n","96            You'll detect aromas reminiscent of wood shop ...  ...     good\n","97            The old vines on the steep slopes of the Heili...  ...     best\n","98            This wine takes time to unravel and reveal its...  ...     best\n","99            Buttery oak aromas cover up any white-fruit ch...  ...     good\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609603620060,"user_tz":-300,"elapsed":1698,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"977caf3a-f20a-4f44-fd09-15069e2a6ef0"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609609274404,"user_tz":-300,"elapsed":92614,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"31f4bb74-2906-4d84-f353-2cb946407c63"},"source":["from sklearn.metrics import classification_report\n","trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['classifier_dl'].setMaxEpochs(90)  \n","trainable_pipe['classifier_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","  acceptable       0.78      0.84      0.81      1265\n","        best       0.87      0.90      0.88      1265\n","        good       0.59      0.54      0.56      1265\n","   very good       0.62      0.60      0.61      1265\n","\n","    accuracy                           0.72      5060\n","   macro avg       0.71      0.72      0.72      5060\n","weighted avg       0.71      0.72      0.72      5060\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609609549407,"user_tz":-300,"elapsed":275012,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b307a3f0-a9eb-4332-eb86-c17cfb97aaf1"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":107},"executionInfo":{"status":"ok","timestamp":1609609567537,"user_tz":-300,"elapsed":18138,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"511b976f-b1cd-41a4-d425-555fe38c0e0a"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It was one of the best wines i ever tasted .')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>classifier</th>\n","      <th>classifier_confidence</th>\n","      <th>document</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>good</td>\n","      <td>0.515783</td>\n","      <td>Tesla plans to invest 10M into the ML sector</td>\n","      <td>[0.15737222135066986, 0.2598555386066437, 0.85...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             classifier  ...    en_embed_sentence_small_bert_L12_768_embeddings\n","origin_index             ...                                                   \n","0                  good  ...  [0.15737222135066986, 0.2598555386066437, 0.85...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609609567540,"user_tz":-300,"elapsed":99,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c05f0c7f-b826-45eb-a038-b9d9f1b12f7b"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')                            | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                                  | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                                 | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)                           | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                                  | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                                      | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)                            | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)                        | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)                       | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                                | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                                    | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                                        | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                                 | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                                          | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                                | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                                        | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                                 | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')                         | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['very good', 'acceptable', 'best', 'good'])  | Info: get the tags used to trained this NerDLModel | Currently set to : ['very good', 'acceptable', 'best', 'good']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L12_768')                 | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]}
\ No newline at end of file
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo_wine.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_wine.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","## 4 class WineEnthusiast Wine review classifier training\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","\n","! pip install nlu pyspark==2.4.7   > /dev/null    \n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download wine review dataset \n","https://www.kaggle.com/zynicide/wine-reviews\n","dataset with products between 5 review classes"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610787966036,"user_tz":-300,"elapsed":2003,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d4fe7b73-eebc-4c11-8e58-de7ebeb1a556"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/winemag-data_first150k.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:05:28--  http://ckl-it.de/wp-content/uploads/2021/01/winemag-data_first150k.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 1447273 (1.4M) [text/csv]\n","Saving to: ‘winemag-data_first150k.csv’\n","\n","winemag-data_first1 100%[===================>]   1.38M  1.74MB/s    in 0.8s    \n","\n","2021-01-16 09:05:30 (1.74 MB/s) - ‘winemag-data_first150k.csv’ saved [1447273/1447273]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610787968921,"user_tz":-300,"elapsed":925,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"393355a3-3aea-4805-a0ef-87ad8be6bf8e"},"source":["import pandas as pd\n","test_path = '/content/winemag-data_first150k.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>acceptable</td>\n","      <td>This wine is closed, tight and possibly still ...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>best</td>\n","      <td>This wine shows growing intensity the longer i...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>good</td>\n","      <td>This moderately aromatic wine conveys Red Hots...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>best</td>\n","      <td>This feels slightly softer in the mouth than t...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>best</td>\n","      <td>A terrific Pinot, and one of the few that abso...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>5055</th>\n","      <td>very good</td>\n","      <td>A classic Napa Valley Chardonnay, this is smoo...</td>\n","    </tr>\n","    <tr>\n","      <th>5056</th>\n","      <td>very good</td>\n","      <td>The wine from this estate perched high above C...</td>\n","    </tr>\n","    <tr>\n","      <th>5057</th>\n","      <td>very good</td>\n","      <td>Distinct and delicious aromas of crÃ¨me brÃ»lÃ...</td>\n","    </tr>\n","    <tr>\n","      <th>5058</th>\n","      <td>good</td>\n","      <td>Smooth, deep aromas of licorice and blackberry...</td>\n","    </tr>\n","    <tr>\n","      <th>5059</th>\n","      <td>very good</td>\n","      <td>Wonderfully aromatic fruit rises from the glas...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5060 rows × 2 columns</p>\n","</div>"],"text/plain":["               y                                               text\n","0     acceptable  This wine is closed, tight and possibly still ...\n","1           best  This wine shows growing intensity the longer i...\n","2           good  This moderately aromatic wine conveys Red Hots...\n","3           best  This feels slightly softer in the mouth than t...\n","4           best  A terrific Pinot, and one of the few that abso...\n","...          ...                                                ...\n","5055   very good  A classic Napa Valley Chardonnay, this is smoo...\n","5056   very good  The wine from this estate perched high above C...\n","5057   very good  Distinct and delicious aromas of crÃ¨me brÃ»lÃ...\n","5058        good  Smooth, deep aromas of licorice and blackberry...\n","5059   very good  Wonderfully aromatic fruit rises from the glas...\n","\n","[5060 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":487},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609603533911,"user_tz":-300,"elapsed":208298,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"bbd738d4-b241-4994-979d-c5ca0989dc4b"},"source":["# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# Since there are no\n","\n","trainable_pipe = nlu.load('train.classifier')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50] )\n","\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50] )\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>category_confidence</th>\n","      <th>default_name_embeddings</th>\n","      <th>y</th>\n","      <th>category</th>\n","      <th>sentence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>This wine is closed, tight and possibly still ...</td>\n","      <td>0.386967</td>\n","      <td>[-0.00495561771094799, -0.07129219174385071, -...</td>\n","      <td>acceptable</td>\n","      <td>very good</td>\n","      <td>This wine is closed, tight and possibly still ...</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>This wine is closed, tight and possibly still ...</td>\n","      <td>1.000000</td>\n","      <td>[0.06035454571247101, 0.041439250111579895, -0...</td>\n","      <td>acceptable</td>\n","      <td>very good</td>\n","      <td>There's also a cheesy character that is less a...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>This wine shows growing intensity the longer i...</td>\n","      <td>0.454979</td>\n","      <td>[0.0541062131524086, -0.0517219714820385, -0.0...</td>\n","      <td>best</td>\n","      <td>best</td>\n","      <td>This wine shows growing intensity the longer i...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>This wine shows growing intensity the longer i...</td>\n","      <td>1.000000</td>\n","      <td>[-0.026120899245142937, -0.0751243457198143, -...</td>\n","      <td>best</td>\n","      <td>best</td>\n","      <td>Aromas include red fruit, spice and rosemary: ...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>This moderately aromatic wine conveys Red Hots...</td>\n","      <td>0.433734</td>\n","      <td>[-0.0444738008081913, -0.05501846224069595, 0....</td>\n","      <td>good</td>\n","      <td>very good</td>\n","      <td>This moderately aromatic wine conveys Red Hots...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>Bright sparks of red currant, black cherry and...</td>\n","      <td>0.439928</td>\n","      <td>[-0.001167353126220405, -0.062205277383327484,...</td>\n","      <td>very good</td>\n","      <td>very good</td>\n","      <td>Bright sparks of red currant, black cherry and...</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>Bright sparks of red currant, black cherry and...</td>\n","      <td>1.000000</td>\n","      <td>[0.001156042329967022, -0.041525647044181824, ...</td>\n","      <td>very good</td>\n","      <td>very good</td>\n","      <td>Bold tannins frame its dense layers of fruit, ...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>Based in the Jura, this producer blends grapes...</td>\n","      <td>0.730394</td>\n","      <td>[-0.012110762298107147, -0.06961353123188019, ...</td>\n","      <td>acceptable</td>\n","      <td>best</td>\n","      <td>Based in the Jura, this producer blends grapes...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>Based in the Jura, this producer blends grapes...</td>\n","      <td>1.000000</td>\n","      <td>[0.05220193415880203, 0.04676426202058792, -0....</td>\n","      <td>acceptable</td>\n","      <td>best</td>\n","      <td>It's light, bright and just off dry, with attr...</td>\n","    </tr>\n","    <tr>\n","      <th>49</th>\n","      <td>Based in the Jura, this producer blends grapes...</td>\n","      <td>2.000000</td>\n","      <td>[0.09586171805858612, 0.029351763427257538, -0...</td>\n","      <td>acceptable</td>\n","      <td>best</td>\n","      <td>Like it's rosÃ© partner, it is really for apÃ©...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>158 rows × 6 columns</p>\n","</div>"],"text/plain":["                                                           text  ...                                           sentence\n","origin_index                                                     ...                                                   \n","0             This wine is closed, tight and possibly still ...  ...  This wine is closed, tight and possibly still ...\n","0             This wine is closed, tight and possibly still ...  ...  There's also a cheesy character that is less a...\n","1             This wine shows growing intensity the longer i...  ...  This wine shows growing intensity the longer i...\n","1             This wine shows growing intensity the longer i...  ...  Aromas include red fruit, spice and rosemary: ...\n","2             This moderately aromatic wine conveys Red Hots...  ...  This moderately aromatic wine conveys Red Hots...\n","...                                                         ...  ...                                                ...\n","48            Bright sparks of red currant, black cherry and...  ...  Bright sparks of red currant, black cherry and...\n","48            Bright sparks of red currant, black cherry and...  ...  Bold tannins frame its dense layers of fruit, ...\n","49            Based in the Jura, this producer blends grapes...  ...  Based in the Jura, this producer blends grapes...\n","49            Based in the Jura, this producer blends grapes...  ...  It's light, bright and just off dry, with attr...\n","49            Based in the Jura, this producer blends grapes...  ...  Like it's rosÃ© partner, it is really for apÃ©...\n","\n","[158 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":107},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609603536901,"user_tz":-300,"elapsed":211278,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d50adb08-b82b-4edc-e473-d273f153fa62"},"source":["fitted_pipe.predict('It was one of the best wines i ever tasted .')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>category_confidence</th>\n","      <th>default_name_embeddings</th>\n","      <th>category</th>\n","      <th>sentence</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.842125</td>\n","      <td>[0.06468033790588379, -0.040837567299604416, -...</td>\n","      <td>best</td>\n","      <td>Bitcoin is going to the moon!</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             category_confidence  ...                       sentence\n","origin_index                      ...                               \n","0                       0.842125  ...  Bitcoin is going to the moon!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609603536903,"user_tz":-300,"elapsed":211274,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e9ed8d20-ed9d-4522-bf9e-ed7414f7a686"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setMaxEpochs(3)                | Info: Maximum number of epochs to train | Currently set to : 3\n","pipe['classifier_dl'].setLr(0.005)                   | Info: Learning Rate | Currently set to : 0.005\n","pipe['classifier_dl'].setBatchSize(64)               | Info: Batch size | Currently set to : 64\n","pipe['classifier_dl'].setDropout(0.5)                | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['classifier_dl'].setEnableOutputLogs(True)      | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512)               | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use')      | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":620},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609603618345,"user_tz":-300,"elapsed":11545,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"be93ca05-7e98-484b-9c24-a09976430afc"},"source":["# Train longer!\n","trainable_pipe['classifier_dl'].setMaxEpochs(5)  \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["              precision    recall  f1-score   support\n","\n","  acceptable       0.00      0.00      0.00        22\n","        best       0.71      0.89      0.79        28\n","        good       0.42      0.96      0.58        28\n","   very good       0.00      0.00      0.00        22\n","\n","    accuracy                           0.52       100\n","   macro avg       0.28      0.46      0.34       100\n","weighted avg       0.32      0.52      0.38       100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>document</th>\n","      <th>default_name_embeddings</th>\n","      <th>category_confidence</th>\n","      <th>y</th>\n","      <th>category</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>This wine is closed, tight and possibly still ...</td>\n","      <td>This wine is closed, tight and possibly still ...</td>\n","      <td>[0.02915436401963234, -0.0378003790974617, -0....</td>\n","      <td>0.584848</td>\n","      <td>acceptable</td>\n","      <td>good</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>This wine shows growing intensity the longer i...</td>\n","      <td>This wine shows growing intensity the longer i...</td>\n","      <td>[0.019120197743177414, -0.06991834938526154, 0...</td>\n","      <td>0.875611</td>\n","      <td>best</td>\n","      <td>best</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>This moderately aromatic wine conveys Red Hots...</td>\n","      <td>This moderately aromatic wine conveys Red Hots...</td>\n","      <td>[-0.025461390614509583, -0.02650509588420391, ...</td>\n","      <td>0.783311</td>\n","      <td>good</td>\n","      <td>good</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>This feels slightly softer in the mouth than t...</td>\n","      <td>This feels slightly softer in the mouth than t...</td>\n","      <td>[0.011777156963944435, 0.008188367821276188, -...</td>\n","      <td>0.711578</td>\n","      <td>best</td>\n","      <td>good</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>A terrific Pinot, and one of the few that abso...</td>\n","      <td>A terrific Pinot, and one of the few that abso...</td>\n","      <td>[0.014174058102071285, -0.057778846472501755, ...</td>\n","      <td>0.794139</td>\n","      <td>best</td>\n","      <td>best</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>95</th>\n","      <td>Radiator dust, lees and vanilla cookie aromas ...</td>\n","      <td>Radiator dust, lees and vanilla cookie aromas ...</td>\n","      <td>[-0.009873664006590843, 0.0033919725101441145,...</td>\n","      <td>0.792627</td>\n","      <td>acceptable</td>\n","      <td>good</td>\n","    </tr>\n","    <tr>\n","      <th>96</th>\n","      <td>You'll detect aromas reminiscent of wood shop ...</td>\n","      <td>You'll detect aromas reminiscent of wood shop ...</td>\n","      <td>[0.03787693753838539, -0.030119985342025757, -...</td>\n","      <td>0.573790</td>\n","      <td>acceptable</td>\n","      <td>good</td>\n","    </tr>\n","    <tr>\n","      <th>97</th>\n","      <td>The old vines on the steep slopes of the Heili...</td>\n","      <td>The old vines on the steep slopes of the Heili...</td>\n","      <td>[0.020556319504976273, -0.059675734490156174, ...</td>\n","      <td>0.919109</td>\n","      <td>best</td>\n","      <td>best</td>\n","    </tr>\n","    <tr>\n","      <th>98</th>\n","      <td>This wine takes time to unravel and reveal its...</td>\n","      <td>This wine takes time to unravel and reveal its...</td>\n","      <td>[-0.00832163542509079, -0.029637429863214493, ...</td>\n","      <td>0.485587</td>\n","      <td>very good</td>\n","      <td>best</td>\n","    </tr>\n","    <tr>\n","      <th>99</th>\n","      <td>Buttery oak aromas cover up any white-fruit ch...</td>\n","      <td>Buttery oak aromas cover up any white-fruit ch...</td>\n","      <td>[0.02920656092464924, -0.05507100373506546, -0...</td>\n","      <td>0.768109</td>\n","      <td>acceptable</td>\n","      <td>good</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>100 rows × 6 columns</p>\n","</div>"],"text/plain":["                                                           text  ... category\n","origin_index                                                     ...         \n","0             This wine is closed, tight and possibly still ...  ...     good\n","1             This wine shows growing intensity the longer i...  ...     best\n","2             This moderately aromatic wine conveys Red Hots...  ...     good\n","3             This feels slightly softer in the mouth than t...  ...     good\n","4             A terrific Pinot, and one of the few that abso...  ...     best\n","...                                                         ...  ...      ...\n","95            Radiator dust, lees and vanilla cookie aromas ...  ...     good\n","96            You'll detect aromas reminiscent of wood shop ...  ...     good\n","97            The old vines on the steep slopes of the Heili...  ...     best\n","98            This wine takes time to unravel and reveal its...  ...     best\n","99            Buttery oak aromas cover up any white-fruit ch...  ...     good\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609603620060,"user_tz":-300,"elapsed":1698,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"977caf3a-f20a-4f44-fd09-15069e2a6ef0"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609609274404,"user_tz":-300,"elapsed":92614,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"31f4bb74-2906-4d84-f353-2cb946407c63"},"source":["from sklearn.metrics import classification_report\n","trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['classifier_dl'].setMaxEpochs(90)  \n","trainable_pipe['classifier_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","              precision    recall  f1-score   support\n","\n","  acceptable       0.78      0.84      0.81      1265\n","        best       0.87      0.90      0.88      1265\n","        good       0.59      0.54      0.56      1265\n","   very good       0.62      0.60      0.61      1265\n","\n","    accuracy                           0.72      5060\n","   macro avg       0.71      0.72      0.72      5060\n","weighted avg       0.71      0.72      0.72      5060\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609609549407,"user_tz":-300,"elapsed":275012,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b307a3f0-a9eb-4332-eb86-c17cfb97aaf1"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":107},"executionInfo":{"status":"ok","timestamp":1609609567537,"user_tz":-300,"elapsed":18138,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"511b976f-b1cd-41a4-d425-555fe38c0e0a"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It was one of the best wines i ever tasted .')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>classifier</th>\n","      <th>classifier_confidence</th>\n","      <th>document</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>good</td>\n","      <td>0.515783</td>\n","      <td>Tesla plans to invest 10M into the ML sector</td>\n","      <td>[0.15737222135066986, 0.2598555386066437, 0.85...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["             classifier  ...    en_embed_sentence_small_bert_L12_768_embeddings\n","origin_index             ...                                                   \n","0                  good  ...  [0.15737222135066986, 0.2598555386066437, 0.85...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609609567540,"user_tz":-300,"elapsed":99,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c05f0c7f-b826-45eb-a038-b9d9f1b12f7b"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')                            | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                                  | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                                 | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)                           | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                                  | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                                      | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)                            | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)                        | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)                       | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                                | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                                    | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                                        | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                                 | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                                          | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                                | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                                        | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                                 | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')                         | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['very good', 'acceptable', 'best', 'good'])  | Info: get the tags used to trained this NerDLModel | Currently set to : ['very good', 'acceptable', 'best', 'good']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L12_768')                 | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]}
\ No newline at end of file
diff --git a/examples/colab/Training/multi_label_text_classification/NLU_traing_multi_label_classifier_E2e.ipynb b/examples/colab/Training/multi_label_text_classification/NLU_traing_multi_label_classifier_E2e.ipynb
index 8da274a9..4da08e1d 100644
--- a/examples/colab/Training/multi_label_text_classification/NLU_traing_multi_label_classifier_E2e.ipynb
+++ b/examples/colab/Training/multi_label_text_classification/NLU_traing_multi_label_classifier_E2e.ipynb
@@ -1 +1 @@
-{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_traing_multi_label_classifier_E2e.ipynb","provenance":[],"collapsed_sections":[],"toc_visible":true,"authorship_tag":"ABX9TyPWs1vEzUhNrsIX3nR13R72"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/collab/Training/multi_label_text_classification/NLU_traing_multi_label_classifier_E2e.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier for multi label prediction\n","MultiClassifierDL is a Multi-label Text Classification. MultiClassifierDL uses a Bidirectional GRU with Convolution model that we have built inside TensorFlow and supports up to 100 classes. The input to MultiClassifierDL is Sentence Embeddings such as state-of-the-art UniversalSentenceEncoder, BertSentenceEmbeddings, or SentenceEmbeddings\n","\n","\n","\n","### Multi ClassifierDL (Multi-class Text Classification with multiple classes per sentence)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#multiclassifierdl-multi-label-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download E2E Challenge multi token label classification dataset\n","\n","http://www.macs.hw.ac.uk/InteractionLab/E2E/"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":586},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1609529840956,"user_tz":-60,"elapsed":160088,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"39519c61-f3a4-4369-f72a-1f0590d9bb2e"},"source":["import pandas as pd\n","!wget http://ckl-it.de/wp-content/uploads/2020/12/e2e.csv\n","test_path = '/content/e2e.csv'\n","train_df = pd.read_csv(test_path)\n","train_df = train_df.iloc[:3000]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-01 19:37:17--  http://ckl-it.de/wp-content/uploads/2020/12/e2e.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 1322591 (1.3M) [text/csv]\n","Saving to: ‘e2e.csv’\n","\n","e2e.csv             100%[===================>]   1.26M   715KB/s    in 1.8s    \n","\n","2021-01-01 19:37:20 (715 KB/s) - ‘e2e.csv’ saved [1322591/1322591]\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Unnamed: 0</th>\n","      <th>y</th>\n","      <th>text</th>\n","      <th>origin_index</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[cit...</td>\n","      <td>A coffee shop in the city centre area called B...</td>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>1</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[cit...</td>\n","      <td>Blue Spice is a coffee shop in city centre.</td>\n","      <td>1</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>2</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[riv...</td>\n","      <td>There is a coffee shop Blue Spice in the river...</td>\n","      <td>2</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>3</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[riv...</td>\n","      <td>At the riverside, there is a coffee shop calle...</td>\n","      <td>3</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>4</td>\n","      <td>name[Blue Spice],eatType[coffee shop],customer...</td>\n","      <td>The coffee shop Blue Spice is based near Crown...</td>\n","      <td>4</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>2995</th>\n","      <td>2995</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>Near Express by Holiday Inn, in the riverside ...</td>\n","      <td>2995</td>\n","    </tr>\n","    <tr>\n","      <th>2996</th>\n","      <td>2996</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>In the riverside area, near Express by Holiday...</td>\n","      <td>2996</td>\n","    </tr>\n","    <tr>\n","      <th>2997</th>\n","      <td>2997</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>The Punter is a restaurant with Indian food in...</td>\n","      <td>2997</td>\n","    </tr>\n","    <tr>\n","      <th>2998</th>\n","      <td>2998</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>The Punter is a low rated restaurant that serv...</td>\n","      <td>2998</td>\n","    </tr>\n","    <tr>\n","      <th>2999</th>\n","      <td>2999</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>The Punter is a restaurant providing Indian fo...</td>\n","      <td>2999</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>3000 rows × 4 columns</p>\n","</div>"],"text/plain":["      Unnamed: 0  ... origin_index\n","0              0  ...            0\n","1              1  ...            1\n","2              2  ...            2\n","3              3  ...            3\n","4              4  ...            4\n","...          ...  ...          ...\n","2995        2995  ...         2995\n","2996        2996  ...         2996\n","2997        2997  ...         2997\n","2998        2998  ...         2998\n","2999        2999  ...         2999\n","\n","[3000 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.multi_classifier')\n","\n","By default, the Universal Sentence Encoder Embeddings (USE) are beeing downloaded to provide embeddings for the classifier. You can use any of the 50+ other sentence Emeddings in NLU tough!\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":471},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609522208492,"user_tz":-60,"elapsed":410284,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"bda58bd4-d56e-471c-deea-37fe6e06af5e"},"source":["import nlu\n","# load a trainable pipeline by specifying the train  prefix \n","unfitted_pipe = nlu.load('train.multi_classifier')\n","#configure epochs\n","unfitted_pipe['multi_classifier'].setMaxEpochs(25)\n","#  fit it on a datset with label='y' and text columns. Labels seperated by ','\n","fitted_pipe = unfitted_pipe.fit(train_df[['y','text']], label_seperator=',')\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df[['y','text']])\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>multi_classifier_classes</th>\n","      <th>multi_classifier_confidences</th>\n","      <th>default_name_embeddings</th>\n","      <th>y</th>\n","      <th>sentence</th>\n","      <th>text</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>[near[Café Rouge], name[Blue Spice], near[Rain...</td>\n","      <td>[0.8555223, 0.99276984, 0.87128675, 0.9852337,...</td>\n","      <td>[0.026563657447695732, -0.058662936091423035, ...</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[cit...</td>\n","      <td>A coffee shop in the city centre area called B...</td>\n","      <td>A coffee shop in the city centre area called B...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>[near[Café Rouge], name[Blue Spice], near[Rain...</td>\n","      <td>[0.8142674, 0.99920505, 0.93413615, 0.98056525...</td>\n","      <td>[0.040952689945697784, -0.04276810586452484, -...</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[cit...</td>\n","      <td>Blue Spice is a coffee shop in city centre.</td>\n","      <td>Blue Spice is a coffee shop in city centre.</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>[name[Blue Spice], near[Rainbow Vegetarian Caf...</td>\n","      <td>[0.9966337, 0.9044244, 0.904881, 0.56231284, 0...</td>\n","      <td>[0.03141527622938156, -0.05154882371425629, 0....</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[riv...</td>\n","      <td>There is a coffee shop Blue Spice in the river...</td>\n","      <td>There is a coffee shop Blue Spice in the river...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>[near[Café Rouge], name[Blue Spice], near[Rain...</td>\n","      <td>[0.5227911, 0.99917483, 0.9394022, 0.8839797, ...</td>\n","      <td>[0.03584946319460869, -0.036898739635944366, -...</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[riv...</td>\n","      <td>At the riverside, there is a coffee shop calle...</td>\n","      <td>At the riverside, there is a coffee shop calle...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>[near[Café Rouge], name[Blue Spice], near[Crow...</td>\n","      <td>[0.5985904, 0.7892299, 0.8222753, 0.9378743, 0...</td>\n","      <td>[0.0405426099896431, -0.0243277158588171, 0.00...</td>\n","      <td>name[Blue Spice],eatType[coffee shop],customer...</td>\n","      <td>The coffee shop Blue Spice is based near Crown...</td>\n","      <td>The coffee shop Blue Spice is based near Crown...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>2998</th>\n","      <td>[near[Express by Holiday Inn], priceRange[high...</td>\n","      <td>[0.9999982, 0.8146039, 0.99978125, 0.8511795, ...</td>\n","      <td>[0.05956212058663368, 0.019028551876544952, -0...</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>The Punter has a price range of less than £20,...</td>\n","      <td>The Punter is a low rated restaurant that serv...</td>\n","    </tr>\n","    <tr>\n","      <th>2999</th>\n","      <td>[near[Express by Holiday Inn], food[Indian], c...</td>\n","      <td>[0.99992794, 0.99981034, 0.5099642, 0.9994041,...</td>\n","      <td>[0.04296032711863518, -0.0015949805965647101, ...</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>The Punter is a restaurant providing Indian fo...</td>\n","      <td>The Punter is a restaurant providing Indian fo...</td>\n","    </tr>\n","    <tr>\n","      <th>2999</th>\n","      <td>[near[Express by Holiday Inn], food[Indian], c...</td>\n","      <td>[0.99992794, 0.99981034, 0.5099642, 0.9994041,...</td>\n","      <td>[0.023289771750569344, 0.056861914694309235, -...</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>It is located in the riverside.</td>\n","      <td>The Punter is a restaurant providing Indian fo...</td>\n","    </tr>\n","    <tr>\n","      <th>2999</th>\n","      <td>[near[Express by Holiday Inn], food[Indian], c...</td>\n","      <td>[0.99992794, 0.99981034, 0.5099642, 0.9994041,...</td>\n","      <td>[0.033101629465818405, 0.06402800232172012, 0....</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>It is near Express by Holiday Inn.</td>\n","      <td>The Punter is a restaurant providing Indian fo...</td>\n","    </tr>\n","    <tr>\n","      <th>2999</th>\n","      <td>[near[Express by Holiday Inn], food[Indian], c...</td>\n","      <td>[0.99992794, 0.99981034, 0.5099642, 0.9994041,...</td>\n","      <td>[0.01677701249718666, 0.04876527190208435, -0....</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>Its customer rating is low.</td>\n","      <td>The Punter is a restaurant providing Indian fo...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5266 rows × 6 columns</p>\n","</div>"],"text/plain":["                                       multi_classifier_classes  ...                                               text\n","origin_index                                                     ...                                                   \n","0             [near[Café Rouge], name[Blue Spice], near[Rain...  ...  A coffee shop in the city centre area called B...\n","1             [near[Café Rouge], name[Blue Spice], near[Rain...  ...        Blue Spice is a coffee shop in city centre.\n","2             [name[Blue Spice], near[Rainbow Vegetarian Caf...  ...  There is a coffee shop Blue Spice in the river...\n","3             [near[Café Rouge], name[Blue Spice], near[Rain...  ...  At the riverside, there is a coffee shop calle...\n","4             [near[Café Rouge], name[Blue Spice], near[Crow...  ...  The coffee shop Blue Spice is based near Crown...\n","...                                                         ...  ...                                                ...\n","2998          [near[Express by Holiday Inn], priceRange[high...  ...  The Punter is a low rated restaurant that serv...\n","2999          [near[Express by Holiday Inn], food[Indian], c...  ...  The Punter is a restaurant providing Indian fo...\n","2999          [near[Express by Holiday Inn], food[Indian], c...  ...  The Punter is a restaurant providing Indian fo...\n","2999          [near[Express by Holiday Inn], food[Indian], c...  ...  The Punter is a restaurant providing Indian fo...\n","2999          [near[Express by Holiday Inn], food[Indian], c...  ...  The Punter is a restaurant providing Indian fo...\n","\n","[5266 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"DL_5aY9b3jSd"},"source":["# 4. Evaluate the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0YDA2KunCeqQ","executionInfo":{"status":"ok","timestamp":1609522209572,"user_tz":-60,"elapsed":411343,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"37539c88-d18c-425d-a28d-4127dc9bbb99"},"source":["from sklearn.preprocessing import MultiLabelBinarizer\n","from sklearn.metrics import classification_report\n","from sklearn.metrics import f1_score\n","from sklearn.metrics import roc_auc_score\n","mlb = MultiLabelBinarizer()\n","mlb = mlb.fit(preds.y.str.split(','))\n","y_true = mlb.transform(preds['y'].str.split(','))\n","y_pred = mlb.transform(preds.multi_classifier_classes.str.join(',').str.split(','))\n","print(\"Classification report: \\n\", (classification_report(y_true, y_pred)))\n","print(\"F1 micro averaging:\",(f1_score(y_true, y_pred, average='micro')))\n","print(\"ROC: \",(roc_auc_score(y_true, y_pred, average=\"micro\")))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Classification report: \n","               precision    recall  f1-score   support\n","\n","           0       0.78      0.97      0.86      1700\n","           1       0.95      0.83      0.89      2914\n","           2       0.56      0.64      0.60       576\n","           3       0.33      0.28      0.30       367\n","           4       0.38      0.55      0.45       455\n","           5       0.30      0.76      0.42       599\n","           6       0.37      0.77      0.50       550\n","           7       0.69      0.44      0.54       457\n","           8       0.99      0.72      0.84       337\n","           9       0.91      0.98      0.95      2211\n","          10       0.89      0.99      0.94      2718\n","          11       0.53      0.89      0.67      1914\n","          12       0.88      0.79      0.84      3154\n","          13       0.79      0.98      0.87      1087\n","          14       0.69      0.97      0.81      1118\n","          15       0.98      0.64      0.78      1077\n","          16       0.82      0.96      0.88       671\n","          17       0.71      1.00      0.83       323\n","          18       0.57      0.65      0.61       130\n","          19       0.96      0.80      0.87       186\n","          20       0.77      0.99      0.87       366\n","          21       0.57      0.20      0.30        40\n","          22       0.36      0.10      0.15        42\n","          23       0.00      0.00      0.00         4\n","          24       0.97      0.97      0.97       322\n","          25       0.99      0.83      0.91       338\n","          26       0.00      0.00      0.00         6\n","          27       0.00      0.00      0.00        34\n","          28       0.94      0.99      0.96      1273\n","          29       0.96      1.00      0.98       987\n","          30       0.90      0.99      0.95      1140\n","          31       0.74      0.85      0.79       186\n","          32       0.45      0.98      0.62       528\n","          33       0.91      0.97      0.93       662\n","          34       0.90      0.60      0.72       116\n","          35       0.67      0.09      0.16        22\n","          36       0.58      0.98      0.73       484\n","          37       0.88      0.77      0.82       601\n","          38       0.94      0.97      0.96       711\n","          39       0.99      0.96      0.97       620\n","          40       0.96      0.99      0.98       526\n","          41       0.98      1.00      0.99      1410\n","          42       1.00      0.28      0.43        72\n","          43       0.00      0.00      0.00         8\n","          44       0.00      0.00      0.00         8\n","          45       0.00      0.00      0.00         4\n","          46       0.35      0.42      0.38       595\n","          47       0.34      0.66      0.45       849\n","          48       0.57      0.44      0.50       627\n","          49       0.69      0.53      0.60       767\n","          50       0.31      0.32      0.32       347\n","          51       0.25      0.53      0.34       453\n","\n","   micro avg       0.73      0.84      0.78     36692\n","   macro avg       0.64      0.65      0.62     36692\n","weighted avg       0.78      0.84      0.80     36692\n"," samples avg       0.76      0.84      0.79     36692\n","\n","F1 micro averaging: 0.7831856729396004\n","ROC:  0.8980818453315285\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mhFKVN93o1ZO"},"source":["# 5. Lets try different Sentence Emebddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CzJd8omao0gt","executionInfo":{"status":"ok","timestamp":1609522209573,"user_tz":-60,"elapsed":411328,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"ce35ce12-fbc8-4e0f-c9a1-6feaf68da7b0"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0ofYHpu7sloS","executionInfo":{"status":"ok","timestamp":1609529895586,"user_tz":-60,"elapsed":54621,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"44154b28-c1db-4f58-bab1-7ac185fa40b8"},"source":["# You might need to restart your notebook to clear RAM, or you might run out of Memory when fitting\n","import nlu\n","pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.multi_classifier')\n","pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['en_embed_sentence_small_bert_L12_768'] has settable params:\n","pipe['en_embed_sentence_small_bert_L12_768'].setBatchSize(32)  | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['en_embed_sentence_small_bert_L12_768'].setIsLong(False)  | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['en_embed_sentence_small_bert_L12_768'].setMaxSentenceLength(128)  | Info: Max sentence length to process | Currently set to : 128\n","pipe['en_embed_sentence_small_bert_L12_768'].setDimension(768)  | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['en_embed_sentence_small_bert_L12_768'].setCaseSensitive(False)  | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['en_embed_sentence_small_bert_L12_768'].setStorageRef('sent_small_bert_L12_768')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)      | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])       | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)           | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)       | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')   | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)           | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)       | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['multi_classifier'] has settable params:\n","pipe['multi_classifier'].setMaxEpochs(2)            | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['multi_classifier'].setLr(0.001)               | Info: Learning Rate | Currently set to : 0.001\n","pipe['multi_classifier'].setBatchSize(64)           | Info: Batch size | Currently set to : 64\n","pipe['multi_classifier'].setValidationSplit(0.0)    | Info: Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off. | Currently set to : 0.0\n","pipe['multi_classifier'].setThreshold(0.5)          | Info: The minimum threshold for each label to be accepted. Default is 0.5 | Currently set to : 0.5\n","pipe['multi_classifier'].setRandomSeed(44)          | Info: Random seed | Currently set to : 44\n","pipe['multi_classifier'].setShufflePerEpoch(False)  | Info: whether to shuffle the training data on each Epoch | Currently set to : False\n","pipe['multi_classifier'].setEnableOutputLogs(True)  | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"ABHLgirmG1n9","colab":{"base_uri":"https://localhost:8080/","height":417},"executionInfo":{"status":"ok","timestamp":1609531977887,"user_tz":-60,"elapsed":2136903,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"d312277d-3826-46e2-c67e-4a10a7116c4f"},"source":["\n","# Load pipe with bert embeds and configure hyper parameters\n","# using large embeddings can take a few hours..\n","pipe['multi_classifier'].setMaxEpochs(100)            \n","pipe['multi_classifier'].setLr(0.0005)  \n","fitted_pipe = pipe.fit(train_df[['y','text']],label_seperator=',')\n","preds = fitted_pipe.predict(train_df)\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>multi_classifier_classes</th>\n","      <th>Unnamed: 0</th>\n","      <th>document</th>\n","      <th>y</th>\n","      <th>multi_classifier_confidences</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>A coffee shop in the city centre area called B...</td>\n","      <td>[name[Blue Spice], eatType[coffee shop], area[...</td>\n","      <td>0</td>\n","      <td>A coffee shop in the city centre area called B...</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[cit...</td>\n","      <td>[0.9740321, 0.99538183, 0.92562413]</td>\n","      <td>[-0.1427491158246994, 0.5036071538925171, 0.07...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Blue Spice is a coffee shop in city centre.</td>\n","      <td>[name[Blue Spice], eatType[coffee shop], area[...</td>\n","      <td>1</td>\n","      <td>Blue Spice is a coffee shop in city centre.</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[cit...</td>\n","      <td>[0.9950888, 0.9989519, 0.8684354]</td>\n","      <td>[-0.20697341859340668, 0.5286431312561035, 0.2...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>There is a coffee shop Blue Spice in the river...</td>\n","      <td>[name[Blue Spice], eatType[coffee shop], area[...</td>\n","      <td>2</td>\n","      <td>There is a coffee shop Blue Spice in the river...</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[riv...</td>\n","      <td>[0.95310336, 0.9655487, 0.9785502]</td>\n","      <td>[0.005826675333082676, 0.49930453300476074, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>At the riverside, there is a coffee shop calle...</td>\n","      <td>[name[Blue Spice], eatType[coffee shop], area[...</td>\n","      <td>3</td>\n","      <td>At the riverside, there is a coffee shop calle...</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[riv...</td>\n","      <td>[0.8858954, 0.931189, 0.9990605]</td>\n","      <td>[0.12191159278154373, 0.37966835498809814, 0.0...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>The coffee shop Blue Spice is based near Crown...</td>\n","      <td>[near[Crowne Plaza Hotel], customer rating[5 o...</td>\n","      <td>4</td>\n","      <td>The coffee shop Blue Spice is based near Crown...</td>\n","      <td>name[Blue Spice],eatType[coffee shop],customer...</td>\n","      <td>[0.99912286, 0.7930833, 0.9730882]</td>\n","      <td>[-0.37350592017173767, 0.1885937601327896, 0.1...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>2995</th>\n","      <td>Near Express by Holiday Inn, in the riverside ...</td>\n","      <td>[near[Express by Holiday Inn], customer rating...</td>\n","      <td>2995</td>\n","      <td>Near Express by Holiday Inn, in the riverside ...</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>[0.9476669, 0.9914391, 0.8395983, 0.98047745, ...</td>\n","      <td>[0.0485222227871418, 0.2381688505411148, 0.227...</td>\n","    </tr>\n","    <tr>\n","      <th>2996</th>\n","      <td>In the riverside area, near Express by Holiday...</td>\n","      <td>[near[Express by Holiday Inn], food[Indian], c...</td>\n","      <td>2996</td>\n","      <td>In the riverside area, near Express by Holiday...</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>[0.94435394, 0.6119035, 0.7891044, 0.9885667, ...</td>\n","      <td>[0.06879807263612747, 0.23580998182296753, 0.1...</td>\n","    </tr>\n","    <tr>\n","      <th>2997</th>\n","      <td>The Punter is a restaurant with Indian food in...</td>\n","      <td>[near[Express by Holiday Inn], food[Indian], c...</td>\n","      <td>2997</td>\n","      <td>The Punter is a restaurant with Indian food in...</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>[0.99509084, 0.9424925, 0.7625178, 0.9907007, ...</td>\n","      <td>[-0.12667560577392578, 0.22056235373020172, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>2998</th>\n","      <td>The Punter is a low rated restaurant that serv...</td>\n","      <td>[near[Express by Holiday Inn], food[Indian], c...</td>\n","      <td>2998</td>\n","      <td>The Punter is a low rated restaurant that serv...</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>[0.99541605, 0.9715836, 0.87202764, 0.99880993...</td>\n","      <td>[-0.13057495653629303, 0.21937601268291473, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>2999</th>\n","      <td>The Punter is a restaurant providing Indian fo...</td>\n","      <td>[near[Express by Holiday Inn], food[Indian], c...</td>\n","      <td>2999</td>\n","      <td>The Punter is a restaurant providing Indian fo...</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>[0.98941034, 0.99086845, 0.82358456, 0.985973,...</td>\n","      <td>[-0.10767646133899689, 0.2529870569705963, 0.2...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>3000 rows × 7 columns</p>\n","</div>"],"text/plain":["                                                           text  ...    en_embed_sentence_small_bert_L12_768_embeddings\n","origin_index                                                     ...                                                   \n","0             A coffee shop in the city centre area called B...  ...  [-0.1427491158246994, 0.5036071538925171, 0.07...\n","1                   Blue Spice is a coffee shop in city centre.  ...  [-0.20697341859340668, 0.5286431312561035, 0.2...\n","2             There is a coffee shop Blue Spice in the river...  ...  [0.005826675333082676, 0.49930453300476074, -0...\n","3             At the riverside, there is a coffee shop calle...  ...  [0.12191159278154373, 0.37966835498809814, 0.0...\n","4             The coffee shop Blue Spice is based near Crown...  ...  [-0.37350592017173767, 0.1885937601327896, 0.1...\n","...                                                         ...  ...                                                ...\n","2995          Near Express by Holiday Inn, in the riverside ...  ...  [0.0485222227871418, 0.2381688505411148, 0.227...\n","2996          In the riverside area, near Express by Holiday...  ...  [0.06879807263612747, 0.23580998182296753, 0.1...\n","2997          The Punter is a restaurant with Indian food in...  ...  [-0.12667560577392578, 0.22056235373020172, 0....\n","2998          The Punter is a low rated restaurant that serv...  ...  [-0.13057495653629303, 0.21937601268291473, 0....\n","2999          The Punter is a restaurant providing Indian fo...  ...  [-0.10767646133899689, 0.2529870569705963, 0.2...\n","\n","[3000 rows x 7 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"code","metadata":{"id":"E7ah2LM6tIhG","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609531978935,"user_tz":-60,"elapsed":2137934,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"2636e995-5ef1-4457-895e-adcdf34f40c1"},"source":["from sklearn.preprocessing import MultiLabelBinarizer\n","from sklearn.metrics import classification_report\n","from sklearn.metrics import f1_score\n","from sklearn.metrics import roc_auc_score\n","mlb = MultiLabelBinarizer()\n","mlb = mlb.fit(preds.y.str.split(','))\n","y_true = mlb.transform(preds['y'].str.split(','))\n","y_pred = mlb.transform(preds.multi_classifier_classes.str.join(',').str.split(','))\n","print(\"Classification report: \\n\", (classification_report(y_true, y_pred)))\n","print(\"F1 micro averaging:\",(f1_score(y_true, y_pred, average='micro')))\n","print(\"ROC: \",(roc_auc_score(y_true, y_pred, average=\"micro\")))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Classification report: \n","               precision    recall  f1-score   support\n","\n","           0       0.97      0.98      0.97       846\n","           1       0.99      0.98      0.98      1642\n","           2       0.93      0.70      0.80       300\n","           3       0.90      0.56      0.69       209\n","           4       0.91      0.72      0.81       246\n","           5       0.91      0.79      0.85       333\n","           6       0.95      0.84      0.90       288\n","           7       0.91      0.82      0.86       260\n","           8       0.99      0.99      0.99       267\n","           9       1.00      0.99      0.99      1275\n","          10       0.99      0.99      0.99      1458\n","          11       0.96      0.90      0.93       976\n","          12       0.95      0.97      0.96      1844\n","          13       1.00      0.99      0.99       492\n","          14       0.99      0.98      0.99       613\n","          15       0.97      0.98      0.98       632\n","          16       0.99      0.97      0.98       365\n","          17       1.00      0.97      0.99       145\n","          18       1.00      0.93      0.96        83\n","          19       1.00      0.98      0.99       136\n","          20       1.00      0.99      0.99       228\n","          21       1.00      0.69      0.82        36\n","          22       1.00      0.95      0.97        38\n","          23       1.00      0.50      0.67         4\n","          24       1.00      1.00      1.00       222\n","          25       0.99      1.00      0.99       240\n","          26       1.00      0.67      0.80         6\n","          27       1.00      0.94      0.97        32\n","          28       0.99      1.00      0.99       703\n","          29       1.00      1.00      1.00       524\n","          30       1.00      1.00      1.00       612\n","          31       1.00      0.94      0.97        88\n","          32       1.00      0.97      0.98       267\n","          33       1.00      1.00      1.00       297\n","          34       1.00      0.98      0.99        82\n","          35       1.00      0.89      0.94        18\n","          36       1.00      0.97      0.98       251\n","          37       1.00      1.00      1.00       348\n","          38       1.00      1.00      1.00       393\n","          39       1.00      0.99      1.00       390\n","          40       1.00      0.98      0.99       333\n","          41       1.00      1.00      1.00       794\n","          42       1.00      0.98      0.99        52\n","          43       1.00      0.50      0.67         8\n","          44       1.00      0.88      0.93         8\n","          45       0.00      0.00      0.00         4\n","          46       0.90      0.78      0.83       303\n","          47       0.89      0.70      0.78       425\n","          48       0.89      0.78      0.83       349\n","          49       0.93      0.80      0.86       373\n","          50       0.82      0.42      0.56       170\n","          51       0.95      0.67      0.79       220\n","\n","   micro avg       0.98      0.94      0.95     20228\n","   macro avg       0.96      0.86      0.90     20228\n","weighted avg       0.97      0.94      0.95     20228\n"," samples avg       0.98      0.94      0.96     20228\n","\n","F1 micro averaging: 0.9549113112810033\n","ROC:  0.9659676982287029\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609535641300,"user_tz":-60,"elapsed":243837,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"458863e7-50f4-4cfe-dfdd-1b3edde4e8d8"},"source":["stored_model_path = './models/multi_classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/multi_classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":103},"executionInfo":{"status":"ok","timestamp":1609535674624,"user_tz":-60,"elapsed":274401,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"589912b1-32b5-4333-fe84-46cf40658451"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Tesla plans to invest 10M into the ML sector')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>multi_classifier_classes</th>\n","      <th>document</th>\n","      <th>multi_classifier_confidences</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>[customer rating[high], customer rating[low], ...</td>\n","      <td>Tesla plans to invest 10M into the ML sector</td>\n","      <td>[0.9597453, 0.6497742, 0.986845, 0.5315694, 0....</td>\n","      <td>[0.15737222135066986, 0.2598555386066437, 0.85...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                       multi_classifier_classes  ...    en_embed_sentence_small_bert_L12_768_embeddings\n","origin_index                                                     ...                                                   \n","0             [customer rating[high], customer rating[low], ...  ...  [0.15737222135066986, 0.2598555386066437, 0.85...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609535674627,"user_tz":-60,"elapsed":273679,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"926c0a81-339a-49b8-e9ea-7f3ce049ca01"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['multi_classifier'] has settable params:\n","pipe['multi_classifier'].setThreshold(0.5)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | Info: The minimum threshold for each label to be accepted. Default is 0.5 | Currently set to : 0.5\n","pipe['multi_classifier'].setClasses(['name[Clowns]', 'name[Cotto]', 'near[Burger King]', 'near[Crowne Plaza Hotel]', 'customer rating[high]', 'near[Avalon]', 'near[The Bakers]', 'near[Ranch]', 'eatType[restaurant]', 'near[All Bar One]', 'customer rating[low]', 'near[Café Sicilia]', 'food[Indian]', 'eatType[pub]', 'name[Green Man]', 'name[Strada]', 'eatType[coffee shop]', 'name[Loch Fyne]', 'customer rating[5 out of 5]', 'near[Express by Holiday Inn]', 'food[French]', 'name[The Mill]', 'food[Japanese]', 'name[The Plough]', 'name[Cocum]', 'name[The Phoenix]', 'priceRange[cheap]', 'near[Rainbow Vegetarian Café]', 'near[The Rice Boat]', 'customer rating[3 out of 5]', 'customer rating[1 out of 5]', 'name[The Cricketers]', 'area[riverside]', 'name[Blue Spice]', 'priceRange[£20-25]', 'priceRange[less than £20]', 'priceRange[moderate]', 'priceRange[high]', 'name[Giraffe]', 'customer rating[average]', 'food[Fast food]', 'near[Café Rouge]', 'area[city centre]', 'familyFriendly[no]', 'food[Chinese]', 'food[Italian]', 'near[Raja Indian Cuisine]', 'priceRange[more than £30]', 'name[The Punter]', 'food[English]', 'near[The Sorrento]', 'familyFriendly[yes]'])  | Info: get the tags used to trained this NerDLModel | Currently set to : ['name[Clowns]', 'name[Cotto]', 'near[Burger King]', 'near[Crowne Plaza Hotel]', 'customer rating[high]', 'near[Avalon]', 'near[The Bakers]', 'near[Ranch]', 'eatType[restaurant]', 'near[All Bar One]', 'customer rating[low]', 'near[Café Sicilia]', 'food[Indian]', 'eatType[pub]', 'name[Green Man]', 'name[Strada]', 'eatType[coffee shop]', 'name[Loch Fyne]', 'customer rating[5 out of 5]', 'near[Express by Holiday Inn]', 'food[French]', 'name[The Mill]', 'food[Japanese]', 'name[The Plough]', 'name[Cocum]', 'name[The Phoenix]', 'priceRange[cheap]', 'near[Rainbow Vegetarian Café]', 'near[The Rice Boat]', 'customer rating[3 out of 5]', 'customer rating[1 out of 5]', 'name[The Cricketers]', 'area[riverside]', 'name[Blue Spice]', 'priceRange[£20-25]', 'priceRange[less than £20]', 'priceRange[moderate]', 'priceRange[high]', 'name[Giraffe]', 'customer rating[average]', 'food[Fast food]', 'near[Café Rouge]', 'area[city centre]', 'familyFriendly[no]', 'food[Chinese]', 'food[Italian]', 'near[Raja Indian Cuisine]', 'priceRange[more than £30]', 'name[The Punter]', 'food[English]', 'near[The Sorrento]', 'familyFriendly[yes]']\n","pipe['multi_classifier'].setStorageRef('sent_small_bert_L12_768')                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"M1LjAwJVJxun"},"source":[" "],"execution_count":null,"outputs":[]}]}
\ No newline at end of file
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_traing_multi_label_classifier_E2e.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_label_text_classification/NLU_traing_multi_label_classifier_E2e.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier for multi label prediction\n","MultiClassifierDL is a Multi-label Text Classification. MultiClassifierDL uses a Bidirectional GRU with Convolution model that we have built inside TensorFlow and supports up to 100 classes. The input to MultiClassifierDL is Sentence Embeddings such as state-of-the-art UniversalSentenceEncoder, BertSentenceEmbeddings, or SentenceEmbeddings\n","\n","\n","\n","### Multi ClassifierDL (Multi-class Text Classification with multiple classes per sentence)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#multiclassifierdl-multi-label-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download E2E Challenge multi token label classification dataset\n","\n","http://www.macs.hw.ac.uk/InteractionLab/E2E/"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":586},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1609529840956,"user_tz":-60,"elapsed":160088,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"39519c61-f3a4-4369-f72a-1f0590d9bb2e"},"source":["import pandas as pd\n","!wget http://ckl-it.de/wp-content/uploads/2020/12/e2e.csv\n","test_path = '/content/e2e.csv'\n","train_df = pd.read_csv(test_path)\n","train_df = train_df.iloc[:3000]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-01 19:37:17--  http://ckl-it.de/wp-content/uploads/2020/12/e2e.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 1322591 (1.3M) [text/csv]\n","Saving to: ‘e2e.csv’\n","\n","e2e.csv             100%[===================>]   1.26M   715KB/s    in 1.8s    \n","\n","2021-01-01 19:37:20 (715 KB/s) - ‘e2e.csv’ saved [1322591/1322591]\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Unnamed: 0</th>\n","      <th>y</th>\n","      <th>text</th>\n","      <th>origin_index</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[cit...</td>\n","      <td>A coffee shop in the city centre area called B...</td>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>1</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[cit...</td>\n","      <td>Blue Spice is a coffee shop in city centre.</td>\n","      <td>1</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>2</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[riv...</td>\n","      <td>There is a coffee shop Blue Spice in the river...</td>\n","      <td>2</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>3</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[riv...</td>\n","      <td>At the riverside, there is a coffee shop calle...</td>\n","      <td>3</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>4</td>\n","      <td>name[Blue Spice],eatType[coffee shop],customer...</td>\n","      <td>The coffee shop Blue Spice is based near Crown...</td>\n","      <td>4</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>2995</th>\n","      <td>2995</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>Near Express by Holiday Inn, in the riverside ...</td>\n","      <td>2995</td>\n","    </tr>\n","    <tr>\n","      <th>2996</th>\n","      <td>2996</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>In the riverside area, near Express by Holiday...</td>\n","      <td>2996</td>\n","    </tr>\n","    <tr>\n","      <th>2997</th>\n","      <td>2997</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>The Punter is a restaurant with Indian food in...</td>\n","      <td>2997</td>\n","    </tr>\n","    <tr>\n","      <th>2998</th>\n","      <td>2998</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>The Punter is a low rated restaurant that serv...</td>\n","      <td>2998</td>\n","    </tr>\n","    <tr>\n","      <th>2999</th>\n","      <td>2999</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>The Punter is a restaurant providing Indian fo...</td>\n","      <td>2999</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>3000 rows × 4 columns</p>\n","</div>"],"text/plain":["      Unnamed: 0  ... origin_index\n","0              0  ...            0\n","1              1  ...            1\n","2              2  ...            2\n","3              3  ...            3\n","4              4  ...            4\n","...          ...  ...          ...\n","2995        2995  ...         2995\n","2996        2996  ...         2996\n","2997        2997  ...         2997\n","2998        2998  ...         2998\n","2999        2999  ...         2999\n","\n","[3000 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.multi_classifier')\n","\n","By default, the Universal Sentence Encoder Embeddings (USE) are beeing downloaded to provide embeddings for the classifier. You can use any of the 50+ other sentence Emeddings in NLU tough!\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":471},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609522208492,"user_tz":-60,"elapsed":410284,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"bda58bd4-d56e-471c-deea-37fe6e06af5e"},"source":["import nlu\n","# load a trainable pipeline by specifying the train  prefix \n","unfitted_pipe = nlu.load('train.multi_classifier')\n","#configure epochs\n","unfitted_pipe['multi_classifier'].setMaxEpochs(25)\n","#  fit it on a datset with label='y' and text columns. Labels seperated by ','\n","fitted_pipe = unfitted_pipe.fit(train_df[['y','text']], label_seperator=',')\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df[['y','text']])\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>multi_classifier_classes</th>\n","      <th>multi_classifier_confidences</th>\n","      <th>default_name_embeddings</th>\n","      <th>y</th>\n","      <th>sentence</th>\n","      <th>text</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>[near[Café Rouge], name[Blue Spice], near[Rain...</td>\n","      <td>[0.8555223, 0.99276984, 0.87128675, 0.9852337,...</td>\n","      <td>[0.026563657447695732, -0.058662936091423035, ...</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[cit...</td>\n","      <td>A coffee shop in the city centre area called B...</td>\n","      <td>A coffee shop in the city centre area called B...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>[near[Café Rouge], name[Blue Spice], near[Rain...</td>\n","      <td>[0.8142674, 0.99920505, 0.93413615, 0.98056525...</td>\n","      <td>[0.040952689945697784, -0.04276810586452484, -...</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[cit...</td>\n","      <td>Blue Spice is a coffee shop in city centre.</td>\n","      <td>Blue Spice is a coffee shop in city centre.</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>[name[Blue Spice], near[Rainbow Vegetarian Caf...</td>\n","      <td>[0.9966337, 0.9044244, 0.904881, 0.56231284, 0...</td>\n","      <td>[0.03141527622938156, -0.05154882371425629, 0....</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[riv...</td>\n","      <td>There is a coffee shop Blue Spice in the river...</td>\n","      <td>There is a coffee shop Blue Spice in the river...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>[near[Café Rouge], name[Blue Spice], near[Rain...</td>\n","      <td>[0.5227911, 0.99917483, 0.9394022, 0.8839797, ...</td>\n","      <td>[0.03584946319460869, -0.036898739635944366, -...</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[riv...</td>\n","      <td>At the riverside, there is a coffee shop calle...</td>\n","      <td>At the riverside, there is a coffee shop calle...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>[near[Café Rouge], name[Blue Spice], near[Crow...</td>\n","      <td>[0.5985904, 0.7892299, 0.8222753, 0.9378743, 0...</td>\n","      <td>[0.0405426099896431, -0.0243277158588171, 0.00...</td>\n","      <td>name[Blue Spice],eatType[coffee shop],customer...</td>\n","      <td>The coffee shop Blue Spice is based near Crown...</td>\n","      <td>The coffee shop Blue Spice is based near Crown...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>2998</th>\n","      <td>[near[Express by Holiday Inn], priceRange[high...</td>\n","      <td>[0.9999982, 0.8146039, 0.99978125, 0.8511795, ...</td>\n","      <td>[0.05956212058663368, 0.019028551876544952, -0...</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>The Punter has a price range of less than £20,...</td>\n","      <td>The Punter is a low rated restaurant that serv...</td>\n","    </tr>\n","    <tr>\n","      <th>2999</th>\n","      <td>[near[Express by Holiday Inn], food[Indian], c...</td>\n","      <td>[0.99992794, 0.99981034, 0.5099642, 0.9994041,...</td>\n","      <td>[0.04296032711863518, -0.0015949805965647101, ...</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>The Punter is a restaurant providing Indian fo...</td>\n","      <td>The Punter is a restaurant providing Indian fo...</td>\n","    </tr>\n","    <tr>\n","      <th>2999</th>\n","      <td>[near[Express by Holiday Inn], food[Indian], c...</td>\n","      <td>[0.99992794, 0.99981034, 0.5099642, 0.9994041,...</td>\n","      <td>[0.023289771750569344, 0.056861914694309235, -...</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>It is located in the riverside.</td>\n","      <td>The Punter is a restaurant providing Indian fo...</td>\n","    </tr>\n","    <tr>\n","      <th>2999</th>\n","      <td>[near[Express by Holiday Inn], food[Indian], c...</td>\n","      <td>[0.99992794, 0.99981034, 0.5099642, 0.9994041,...</td>\n","      <td>[0.033101629465818405, 0.06402800232172012, 0....</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>It is near Express by Holiday Inn.</td>\n","      <td>The Punter is a restaurant providing Indian fo...</td>\n","    </tr>\n","    <tr>\n","      <th>2999</th>\n","      <td>[near[Express by Holiday Inn], food[Indian], c...</td>\n","      <td>[0.99992794, 0.99981034, 0.5099642, 0.9994041,...</td>\n","      <td>[0.01677701249718666, 0.04876527190208435, -0....</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>Its customer rating is low.</td>\n","      <td>The Punter is a restaurant providing Indian fo...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5266 rows × 6 columns</p>\n","</div>"],"text/plain":["                                       multi_classifier_classes  ...                                               text\n","origin_index                                                     ...                                                   \n","0             [near[Café Rouge], name[Blue Spice], near[Rain...  ...  A coffee shop in the city centre area called B...\n","1             [near[Café Rouge], name[Blue Spice], near[Rain...  ...        Blue Spice is a coffee shop in city centre.\n","2             [name[Blue Spice], near[Rainbow Vegetarian Caf...  ...  There is a coffee shop Blue Spice in the river...\n","3             [near[Café Rouge], name[Blue Spice], near[Rain...  ...  At the riverside, there is a coffee shop calle...\n","4             [near[Café Rouge], name[Blue Spice], near[Crow...  ...  The coffee shop Blue Spice is based near Crown...\n","...                                                         ...  ...                                                ...\n","2998          [near[Express by Holiday Inn], priceRange[high...  ...  The Punter is a low rated restaurant that serv...\n","2999          [near[Express by Holiday Inn], food[Indian], c...  ...  The Punter is a restaurant providing Indian fo...\n","2999          [near[Express by Holiday Inn], food[Indian], c...  ...  The Punter is a restaurant providing Indian fo...\n","2999          [near[Express by Holiday Inn], food[Indian], c...  ...  The Punter is a restaurant providing Indian fo...\n","2999          [near[Express by Holiday Inn], food[Indian], c...  ...  The Punter is a restaurant providing Indian fo...\n","\n","[5266 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"DL_5aY9b3jSd"},"source":["# 4. Evaluate the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0YDA2KunCeqQ","executionInfo":{"status":"ok","timestamp":1609522209572,"user_tz":-60,"elapsed":411343,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"37539c88-d18c-425d-a28d-4127dc9bbb99"},"source":["from sklearn.preprocessing import MultiLabelBinarizer\n","from sklearn.metrics import classification_report\n","from sklearn.metrics import f1_score\n","from sklearn.metrics import roc_auc_score\n","mlb = MultiLabelBinarizer()\n","mlb = mlb.fit(preds.y.str.split(','))\n","y_true = mlb.transform(preds['y'].str.split(','))\n","y_pred = mlb.transform(preds.multi_classifier_classes.str.join(',').str.split(','))\n","print(\"Classification report: \\n\", (classification_report(y_true, y_pred)))\n","print(\"F1 micro averaging:\",(f1_score(y_true, y_pred, average='micro')))\n","print(\"ROC: \",(roc_auc_score(y_true, y_pred, average=\"micro\")))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Classification report: \n","               precision    recall  f1-score   support\n","\n","           0       0.78      0.97      0.86      1700\n","           1       0.95      0.83      0.89      2914\n","           2       0.56      0.64      0.60       576\n","           3       0.33      0.28      0.30       367\n","           4       0.38      0.55      0.45       455\n","           5       0.30      0.76      0.42       599\n","           6       0.37      0.77      0.50       550\n","           7       0.69      0.44      0.54       457\n","           8       0.99      0.72      0.84       337\n","           9       0.91      0.98      0.95      2211\n","          10       0.89      0.99      0.94      2718\n","          11       0.53      0.89      0.67      1914\n","          12       0.88      0.79      0.84      3154\n","          13       0.79      0.98      0.87      1087\n","          14       0.69      0.97      0.81      1118\n","          15       0.98      0.64      0.78      1077\n","          16       0.82      0.96      0.88       671\n","          17       0.71      1.00      0.83       323\n","          18       0.57      0.65      0.61       130\n","          19       0.96      0.80      0.87       186\n","          20       0.77      0.99      0.87       366\n","          21       0.57      0.20      0.30        40\n","          22       0.36      0.10      0.15        42\n","          23       0.00      0.00      0.00         4\n","          24       0.97      0.97      0.97       322\n","          25       0.99      0.83      0.91       338\n","          26       0.00      0.00      0.00         6\n","          27       0.00      0.00      0.00        34\n","          28       0.94      0.99      0.96      1273\n","          29       0.96      1.00      0.98       987\n","          30       0.90      0.99      0.95      1140\n","          31       0.74      0.85      0.79       186\n","          32       0.45      0.98      0.62       528\n","          33       0.91      0.97      0.93       662\n","          34       0.90      0.60      0.72       116\n","          35       0.67      0.09      0.16        22\n","          36       0.58      0.98      0.73       484\n","          37       0.88      0.77      0.82       601\n","          38       0.94      0.97      0.96       711\n","          39       0.99      0.96      0.97       620\n","          40       0.96      0.99      0.98       526\n","          41       0.98      1.00      0.99      1410\n","          42       1.00      0.28      0.43        72\n","          43       0.00      0.00      0.00         8\n","          44       0.00      0.00      0.00         8\n","          45       0.00      0.00      0.00         4\n","          46       0.35      0.42      0.38       595\n","          47       0.34      0.66      0.45       849\n","          48       0.57      0.44      0.50       627\n","          49       0.69      0.53      0.60       767\n","          50       0.31      0.32      0.32       347\n","          51       0.25      0.53      0.34       453\n","\n","   micro avg       0.73      0.84      0.78     36692\n","   macro avg       0.64      0.65      0.62     36692\n","weighted avg       0.78      0.84      0.80     36692\n"," samples avg       0.76      0.84      0.79     36692\n","\n","F1 micro averaging: 0.7831856729396004\n","ROC:  0.8980818453315285\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mhFKVN93o1ZO"},"source":["# 5. Lets try different Sentence Emebddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CzJd8omao0gt","executionInfo":{"status":"ok","timestamp":1609522209573,"user_tz":-60,"elapsed":411328,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"ce35ce12-fbc8-4e0f-c9a1-6feaf68da7b0"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0ofYHpu7sloS","executionInfo":{"status":"ok","timestamp":1609529895586,"user_tz":-60,"elapsed":54621,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"44154b28-c1db-4f58-bab1-7ac185fa40b8"},"source":["# You might need to restart your notebook to clear RAM, or you might run out of Memory when fitting\n","import nlu\n","pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.multi_classifier')\n","pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['en_embed_sentence_small_bert_L12_768'] has settable params:\n","pipe['en_embed_sentence_small_bert_L12_768'].setBatchSize(32)  | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['en_embed_sentence_small_bert_L12_768'].setIsLong(False)  | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['en_embed_sentence_small_bert_L12_768'].setMaxSentenceLength(128)  | Info: Max sentence length to process | Currently set to : 128\n","pipe['en_embed_sentence_small_bert_L12_768'].setDimension(768)  | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['en_embed_sentence_small_bert_L12_768'].setCaseSensitive(False)  | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['en_embed_sentence_small_bert_L12_768'].setStorageRef('sent_small_bert_L12_768')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)      | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])       | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)           | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)       | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')   | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)           | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)       | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['multi_classifier'] has settable params:\n","pipe['multi_classifier'].setMaxEpochs(2)            | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['multi_classifier'].setLr(0.001)               | Info: Learning Rate | Currently set to : 0.001\n","pipe['multi_classifier'].setBatchSize(64)           | Info: Batch size | Currently set to : 64\n","pipe['multi_classifier'].setValidationSplit(0.0)    | Info: Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off. | Currently set to : 0.0\n","pipe['multi_classifier'].setThreshold(0.5)          | Info: The minimum threshold for each label to be accepted. Default is 0.5 | Currently set to : 0.5\n","pipe['multi_classifier'].setRandomSeed(44)          | Info: Random seed | Currently set to : 44\n","pipe['multi_classifier'].setShufflePerEpoch(False)  | Info: whether to shuffle the training data on each Epoch | Currently set to : False\n","pipe['multi_classifier'].setEnableOutputLogs(True)  | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"ABHLgirmG1n9","colab":{"base_uri":"https://localhost:8080/","height":417},"executionInfo":{"status":"ok","timestamp":1609531977887,"user_tz":-60,"elapsed":2136903,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"d312277d-3826-46e2-c67e-4a10a7116c4f"},"source":["\n","# Load pipe with bert embeds and configure hyper parameters\n","# using large embeddings can take a few hours..\n","pipe['multi_classifier'].setMaxEpochs(100)            \n","pipe['multi_classifier'].setLr(0.0005)  \n","fitted_pipe = pipe.fit(train_df[['y','text']],label_seperator=',')\n","preds = fitted_pipe.predict(train_df)\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>text</th>\n","      <th>multi_classifier_classes</th>\n","      <th>Unnamed: 0</th>\n","      <th>document</th>\n","      <th>y</th>\n","      <th>multi_classifier_confidences</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>A coffee shop in the city centre area called B...</td>\n","      <td>[name[Blue Spice], eatType[coffee shop], area[...</td>\n","      <td>0</td>\n","      <td>A coffee shop in the city centre area called B...</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[cit...</td>\n","      <td>[0.9740321, 0.99538183, 0.92562413]</td>\n","      <td>[-0.1427491158246994, 0.5036071538925171, 0.07...</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Blue Spice is a coffee shop in city centre.</td>\n","      <td>[name[Blue Spice], eatType[coffee shop], area[...</td>\n","      <td>1</td>\n","      <td>Blue Spice is a coffee shop in city centre.</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[cit...</td>\n","      <td>[0.9950888, 0.9989519, 0.8684354]</td>\n","      <td>[-0.20697341859340668, 0.5286431312561035, 0.2...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>There is a coffee shop Blue Spice in the river...</td>\n","      <td>[name[Blue Spice], eatType[coffee shop], area[...</td>\n","      <td>2</td>\n","      <td>There is a coffee shop Blue Spice in the river...</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[riv...</td>\n","      <td>[0.95310336, 0.9655487, 0.9785502]</td>\n","      <td>[0.005826675333082676, 0.49930453300476074, -0...</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>At the riverside, there is a coffee shop calle...</td>\n","      <td>[name[Blue Spice], eatType[coffee shop], area[...</td>\n","      <td>3</td>\n","      <td>At the riverside, there is a coffee shop calle...</td>\n","      <td>name[Blue Spice],eatType[coffee shop],area[riv...</td>\n","      <td>[0.8858954, 0.931189, 0.9990605]</td>\n","      <td>[0.12191159278154373, 0.37966835498809814, 0.0...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>The coffee shop Blue Spice is based near Crown...</td>\n","      <td>[near[Crowne Plaza Hotel], customer rating[5 o...</td>\n","      <td>4</td>\n","      <td>The coffee shop Blue Spice is based near Crown...</td>\n","      <td>name[Blue Spice],eatType[coffee shop],customer...</td>\n","      <td>[0.99912286, 0.7930833, 0.9730882]</td>\n","      <td>[-0.37350592017173767, 0.1885937601327896, 0.1...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>2995</th>\n","      <td>Near Express by Holiday Inn, in the riverside ...</td>\n","      <td>[near[Express by Holiday Inn], customer rating...</td>\n","      <td>2995</td>\n","      <td>Near Express by Holiday Inn, in the riverside ...</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>[0.9476669, 0.9914391, 0.8395983, 0.98047745, ...</td>\n","      <td>[0.0485222227871418, 0.2381688505411148, 0.227...</td>\n","    </tr>\n","    <tr>\n","      <th>2996</th>\n","      <td>In the riverside area, near Express by Holiday...</td>\n","      <td>[near[Express by Holiday Inn], food[Indian], c...</td>\n","      <td>2996</td>\n","      <td>In the riverside area, near Express by Holiday...</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>[0.94435394, 0.6119035, 0.7891044, 0.9885667, ...</td>\n","      <td>[0.06879807263612747, 0.23580998182296753, 0.1...</td>\n","    </tr>\n","    <tr>\n","      <th>2997</th>\n","      <td>The Punter is a restaurant with Indian food in...</td>\n","      <td>[near[Express by Holiday Inn], food[Indian], c...</td>\n","      <td>2997</td>\n","      <td>The Punter is a restaurant with Indian food in...</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>[0.99509084, 0.9424925, 0.7625178, 0.9907007, ...</td>\n","      <td>[-0.12667560577392578, 0.22056235373020172, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>2998</th>\n","      <td>The Punter is a low rated restaurant that serv...</td>\n","      <td>[near[Express by Holiday Inn], food[Indian], c...</td>\n","      <td>2998</td>\n","      <td>The Punter is a low rated restaurant that serv...</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>[0.99541605, 0.9715836, 0.87202764, 0.99880993...</td>\n","      <td>[-0.13057495653629303, 0.21937601268291473, 0....</td>\n","    </tr>\n","    <tr>\n","      <th>2999</th>\n","      <td>The Punter is a restaurant providing Indian fo...</td>\n","      <td>[near[Express by Holiday Inn], food[Indian], c...</td>\n","      <td>2999</td>\n","      <td>The Punter is a restaurant providing Indian fo...</td>\n","      <td>name[The Punter],eatType[restaurant],food[Indi...</td>\n","      <td>[0.98941034, 0.99086845, 0.82358456, 0.985973,...</td>\n","      <td>[-0.10767646133899689, 0.2529870569705963, 0.2...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>3000 rows × 7 columns</p>\n","</div>"],"text/plain":["                                                           text  ...    en_embed_sentence_small_bert_L12_768_embeddings\n","origin_index                                                     ...                                                   \n","0             A coffee shop in the city centre area called B...  ...  [-0.1427491158246994, 0.5036071538925171, 0.07...\n","1                   Blue Spice is a coffee shop in city centre.  ...  [-0.20697341859340668, 0.5286431312561035, 0.2...\n","2             There is a coffee shop Blue Spice in the river...  ...  [0.005826675333082676, 0.49930453300476074, -0...\n","3             At the riverside, there is a coffee shop calle...  ...  [0.12191159278154373, 0.37966835498809814, 0.0...\n","4             The coffee shop Blue Spice is based near Crown...  ...  [-0.37350592017173767, 0.1885937601327896, 0.1...\n","...                                                         ...  ...                                                ...\n","2995          Near Express by Holiday Inn, in the riverside ...  ...  [0.0485222227871418, 0.2381688505411148, 0.227...\n","2996          In the riverside area, near Express by Holiday...  ...  [0.06879807263612747, 0.23580998182296753, 0.1...\n","2997          The Punter is a restaurant with Indian food in...  ...  [-0.12667560577392578, 0.22056235373020172, 0....\n","2998          The Punter is a low rated restaurant that serv...  ...  [-0.13057495653629303, 0.21937601268291473, 0....\n","2999          The Punter is a restaurant providing Indian fo...  ...  [-0.10767646133899689, 0.2529870569705963, 0.2...\n","\n","[3000 rows x 7 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"code","metadata":{"id":"E7ah2LM6tIhG","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609531978935,"user_tz":-60,"elapsed":2137934,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"2636e995-5ef1-4457-895e-adcdf34f40c1"},"source":["from sklearn.preprocessing import MultiLabelBinarizer\n","from sklearn.metrics import classification_report\n","from sklearn.metrics import f1_score\n","from sklearn.metrics import roc_auc_score\n","mlb = MultiLabelBinarizer()\n","mlb = mlb.fit(preds.y.str.split(','))\n","y_true = mlb.transform(preds['y'].str.split(','))\n","y_pred = mlb.transform(preds.multi_classifier_classes.str.join(',').str.split(','))\n","print(\"Classification report: \\n\", (classification_report(y_true, y_pred)))\n","print(\"F1 micro averaging:\",(f1_score(y_true, y_pred, average='micro')))\n","print(\"ROC: \",(roc_auc_score(y_true, y_pred, average=\"micro\")))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Classification report: \n","               precision    recall  f1-score   support\n","\n","           0       0.97      0.98      0.97       846\n","           1       0.99      0.98      0.98      1642\n","           2       0.93      0.70      0.80       300\n","           3       0.90      0.56      0.69       209\n","           4       0.91      0.72      0.81       246\n","           5       0.91      0.79      0.85       333\n","           6       0.95      0.84      0.90       288\n","           7       0.91      0.82      0.86       260\n","           8       0.99      0.99      0.99       267\n","           9       1.00      0.99      0.99      1275\n","          10       0.99      0.99      0.99      1458\n","          11       0.96      0.90      0.93       976\n","          12       0.95      0.97      0.96      1844\n","          13       1.00      0.99      0.99       492\n","          14       0.99      0.98      0.99       613\n","          15       0.97      0.98      0.98       632\n","          16       0.99      0.97      0.98       365\n","          17       1.00      0.97      0.99       145\n","          18       1.00      0.93      0.96        83\n","          19       1.00      0.98      0.99       136\n","          20       1.00      0.99      0.99       228\n","          21       1.00      0.69      0.82        36\n","          22       1.00      0.95      0.97        38\n","          23       1.00      0.50      0.67         4\n","          24       1.00      1.00      1.00       222\n","          25       0.99      1.00      0.99       240\n","          26       1.00      0.67      0.80         6\n","          27       1.00      0.94      0.97        32\n","          28       0.99      1.00      0.99       703\n","          29       1.00      1.00      1.00       524\n","          30       1.00      1.00      1.00       612\n","          31       1.00      0.94      0.97        88\n","          32       1.00      0.97      0.98       267\n","          33       1.00      1.00      1.00       297\n","          34       1.00      0.98      0.99        82\n","          35       1.00      0.89      0.94        18\n","          36       1.00      0.97      0.98       251\n","          37       1.00      1.00      1.00       348\n","          38       1.00      1.00      1.00       393\n","          39       1.00      0.99      1.00       390\n","          40       1.00      0.98      0.99       333\n","          41       1.00      1.00      1.00       794\n","          42       1.00      0.98      0.99        52\n","          43       1.00      0.50      0.67         8\n","          44       1.00      0.88      0.93         8\n","          45       0.00      0.00      0.00         4\n","          46       0.90      0.78      0.83       303\n","          47       0.89      0.70      0.78       425\n","          48       0.89      0.78      0.83       349\n","          49       0.93      0.80      0.86       373\n","          50       0.82      0.42      0.56       170\n","          51       0.95      0.67      0.79       220\n","\n","   micro avg       0.98      0.94      0.95     20228\n","   macro avg       0.96      0.86      0.90     20228\n","weighted avg       0.97      0.94      0.95     20228\n"," samples avg       0.98      0.94      0.96     20228\n","\n","F1 micro averaging: 0.9549113112810033\n","ROC:  0.9659676982287029\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609535641300,"user_tz":-60,"elapsed":243837,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"458863e7-50f4-4cfe-dfdd-1b3edde4e8d8"},"source":["stored_model_path = './models/multi_classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/multi_classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":103},"executionInfo":{"status":"ok","timestamp":1609535674624,"user_tz":-60,"elapsed":274401,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"589912b1-32b5-4333-fe84-46cf40658451"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Tesla plans to invest 10M into the ML sector')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>multi_classifier_classes</th>\n","      <th>document</th>\n","      <th>multi_classifier_confidences</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>[customer rating[high], customer rating[low], ...</td>\n","      <td>Tesla plans to invest 10M into the ML sector</td>\n","      <td>[0.9597453, 0.6497742, 0.986845, 0.5315694, 0....</td>\n","      <td>[0.15737222135066986, 0.2598555386066437, 0.85...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                       multi_classifier_classes  ...    en_embed_sentence_small_bert_L12_768_embeddings\n","origin_index                                                     ...                                                   \n","0             [customer rating[high], customer rating[low], ...  ...  [0.15737222135066986, 0.2598555386066437, 0.85...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609535674627,"user_tz":-60,"elapsed":273679,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"926c0a81-339a-49b8-e9ea-7f3ce049ca01"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768')                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['multi_classifier'] has settable params:\n","pipe['multi_classifier'].setThreshold(0.5)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | Info: The minimum threshold for each label to be accepted. Default is 0.5 | Currently set to : 0.5\n","pipe['multi_classifier'].setClasses(['name[Clowns]', 'name[Cotto]', 'near[Burger King]', 'near[Crowne Plaza Hotel]', 'customer rating[high]', 'near[Avalon]', 'near[The Bakers]', 'near[Ranch]', 'eatType[restaurant]', 'near[All Bar One]', 'customer rating[low]', 'near[Café Sicilia]', 'food[Indian]', 'eatType[pub]', 'name[Green Man]', 'name[Strada]', 'eatType[coffee shop]', 'name[Loch Fyne]', 'customer rating[5 out of 5]', 'near[Express by Holiday Inn]', 'food[French]', 'name[The Mill]', 'food[Japanese]', 'name[The Plough]', 'name[Cocum]', 'name[The Phoenix]', 'priceRange[cheap]', 'near[Rainbow Vegetarian Café]', 'near[The Rice Boat]', 'customer rating[3 out of 5]', 'customer rating[1 out of 5]', 'name[The Cricketers]', 'area[riverside]', 'name[Blue Spice]', 'priceRange[£20-25]', 'priceRange[less than £20]', 'priceRange[moderate]', 'priceRange[high]', 'name[Giraffe]', 'customer rating[average]', 'food[Fast food]', 'near[Café Rouge]', 'area[city centre]', 'familyFriendly[no]', 'food[Chinese]', 'food[Italian]', 'near[Raja Indian Cuisine]', 'priceRange[more than £30]', 'name[The Punter]', 'food[English]', 'near[The Sorrento]', 'familyFriendly[yes]'])  | Info: get the tags used to trained this NerDLModel | Currently set to : ['name[Clowns]', 'name[Cotto]', 'near[Burger King]', 'near[Crowne Plaza Hotel]', 'customer rating[high]', 'near[Avalon]', 'near[The Bakers]', 'near[Ranch]', 'eatType[restaurant]', 'near[All Bar One]', 'customer rating[low]', 'near[Café Sicilia]', 'food[Indian]', 'eatType[pub]', 'name[Green Man]', 'name[Strada]', 'eatType[coffee shop]', 'name[Loch Fyne]', 'customer rating[5 out of 5]', 'near[Express by Holiday Inn]', 'food[French]', 'name[The Mill]', 'food[Japanese]', 'name[The Plough]', 'name[Cocum]', 'name[The Phoenix]', 'priceRange[cheap]', 'near[Rainbow Vegetarian Café]', 'near[The Rice Boat]', 'customer rating[3 out of 5]', 'customer rating[1 out of 5]', 'name[The Cricketers]', 'area[riverside]', 'name[Blue Spice]', 'priceRange[£20-25]', 'priceRange[less than £20]', 'priceRange[moderate]', 'priceRange[high]', 'name[Giraffe]', 'customer rating[average]', 'food[Fast food]', 'near[Café Rouge]', 'area[city centre]', 'familyFriendly[no]', 'food[Chinese]', 'food[Italian]', 'near[Raja Indian Cuisine]', 'priceRange[more than £30]', 'name[The Punter]', 'food[English]', 'near[The Sorrento]', 'familyFriendly[yes]']\n","pipe['multi_classifier'].setStorageRef('sent_small_bert_L12_768')                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"M1LjAwJVJxun"},"source":[" "],"execution_count":null,"outputs":[]}]}
\ No newline at end of file
diff --git a/examples/colab/Training/multi_label_text_classification/NLU_training_multi_token_label_text_classifier_stackoverflow_tags.ipynb b/examples/colab/Training/multi_label_text_classification/NLU_training_multi_token_label_text_classifier_stackoverflow_tags.ipynb
index cd31b65d..0505a4cf 100644
--- a/examples/colab/Training/multi_label_text_classification/NLU_training_multi_token_label_text_classifier_stackoverflow_tags.ipynb
+++ b/examples/colab/Training/multi_label_text_classification/NLU_training_multi_token_label_text_classifier_stackoverflow_tags.ipynb
@@ -1 +1 @@
-{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_token_label_text_classifier_stackoverflow_tags.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/collab/Training/NLU_training/multi_label_text_classification/NLU_training_multi_token_label_text_classifier_stackoverflow_tags.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier for sentences with multiple classes at the same time \n","MultiClassifierDL is a Multi-label Text Classification. MultiClassifierDL uses a Bidirectional GRU with Convolution model that we have built inside TensorFlow and supports up to 100 classes. The input to MultiClassifierDL is Sentence Embeddings such as state-of-the-art UniversalSentenceEncoder, BertSentenceEmbeddings, or SentenceEmbeddings\n","\n","\n","\n","### Multi ClassifierDL (Multi-class Text Classification with multiple classes per sentence)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#multiclassifierdl-multi-label-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu > /dev/null pyspark==2.4.7\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2 Download sample dataset 60k Stack Overflow Questions with Quality Rating\n","\n","\n","https://www.kaggle.com/imoore/60k-stack-overflow-questions-with-quality-rate"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"y4xSRWIhwT28","outputId":"f7ac934c-b18f-4ffd-d773-842c81b2a80a"},"source":["import pandas as pd\n","! wget -N https://ckl-it.de/wp-content/uploads/2020/11/60kstackoverflow.csv -P /tmp\n","test_path = '/tmp/60kstackoverflow.csv'\n","train_df = pd.read_csv(test_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-02 11:20:29--  https://ckl-it.de/wp-content/uploads/2020/11/60kstackoverflow.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 50356825 (48M) [text/csv]\n","Saving to: ‘/tmp/60kstackoverflow.csv’\n","\n","60kstackoverflow.cs 100%[===================>]  48.02M  2.57MB/s    in 21s     \n","\n","2021-01-02 11:20:51 (2.32 MB/s) - ‘/tmp/60kstackoverflow.csv’ saved [50356825/50356825]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"gBxgVIB787wd"},"source":["# Split labels and clean them.\n","import pandas as pd\n","\n","train_df = pd.read_csv(test_path)\n","\n","f = lambda x : x.replace('<','').replace('>','')\n","g = lambda l : list(map(f,l))\n","train_df['y'] = train_df.Tags.str.split('><').map(g).str.join(',')\n","train_df['text'] = train_df['Title']\n","\n"," \n","# train_df = train_df.iloc[:50]"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":430},"id":"OfMCrNk-L_pq","outputId":"6ce7798d-ff2f-4b02-a066-67497ba0bdfa"},"source":["counts = train_df.explode('y').y.value_counts()\n","counts.iloc[0:100].plot.bar(figsize=(40,8), title='Distribution of Label Tags in Dataset')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["<matplotlib.axes._subplots.AxesSubplot at 0x7f977030b278>"]},"metadata":{"tags":[]},"execution_count":4},{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAACOAAAAJhCAYAAADinV3wAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdf9SnZV0n8PcnJiAVQWEiGdCxZPuxdSQbSbfaSsrCsWDPUbPcJKKlHx77YZ6cytR+7C62FavbZkuyhZo/kPJA4boaarW7qQ1qWmk14iAgP0YEFNQS/ewf32vyYXyGeR6uZ3geptfrnO957vu6rvu6P/f3e/8zc97nuqq7AwAAAAAAAAAA3DNfsN4FAAAAAAAAAADAfZkADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAABgzVTVb1fVL6zRXA+tqtur6rBx/taq+qG1mHvM97+q6qy1mm8V9/2VqvpIVd2whnNuraquqk335rUH076/PwAAAMBGJoADAAAArEhV7a6qT1bVx6vq1qr6f1X1I1X1z/+/0N0/0t2/vMK5vu3uxnT3h7r7Ad39mTWo/QVV9Yp95j+9uy+anXuVdTw0yU8n+aru/pJl+r+lqq69N2u6OyMAs/fz2fH77z1/2sG898zvP77Hzy6p9dqquriqHr2KOT7vnTkY7q37AAAAAAeXAA4AAACwGt/V3UcleViS85I8J8mFa32TjbYayxp6aJKbu/um9S5kJUYA5gHd/YAkH8ri99/b9vvrXd8BfHjUfVSSxyR5f5I/r6rT1rcsAAAA4FAkgAMAAACsWnff1t2XJfmeJGdV1VcnSVX9XlX9yjg+rqr+eKyW89Gq+vOq+oKqenkWQZQ/GquT/MySbZDOqaoPJXnzfrZG+rKqekdVfayqLq2qB497fd7KMXtX2amq70zyc0m+Z9zvr0b/P29pNep6blVdXVU3VdXLquro0be3jrOq6kNj+6if3993U1VHj+v3jPmeO+b/tiRvSnLCqOP3VvOdV9X2qnrXePZrquoFywz7war6cFVdX1XPXnLtF1TVjqr6QFXdPFaDefBq7r9PLadW1V+M3/b6qvrNqjp8Sf/jq+rvquq2qvqtqvrTJd/1I8b5beO7fM1+7nGX33/8Xr9cVf93rML0xqo67kC19sK13f28JC9N8sIl93jR+C4/VlVXVtU3jfb9vTNnV9X7xv2vqqofXjLXsu/76Duhqv5gvBMfrKofv7v7AAAAAPc9AjgAAADAPdbd70hybZJvWqb7p0ff5iTHZxE06O7+/tx1NZVfXXLNNyf5yiTfsZ9bPj3JDyZ5SJI7k7x4BTW+Icl/SvKacb9HLjPsB8bnW5N8aZIHJPnNfcZ8Y5IvT3JakudV1Vfu55b/LcnRY55vHjWf3d1/kuT0jJVZuvsHDlT7Pu4Ycx2TZHuSH62qM/cZ861JTk7y+CTPqc9t8/XMJGeOek5IckuS/77K+y/1mSQ/leS4JI/N4jv5sWQRRElySZKfTXJskr9L8m+WXPvLSd6Y5EFJTszi+1qp70tydpIvTnJ4kmff/fDP84dJHlVV9x/nf5nklCQPTvLKJK+tqiPv5p25KckTkzxw1HF+VT1q9C37vo8Qzh8l+askW7L4rn6yqr5jhe8mAAAAcB8ggAMAAADM+nAWAYZ9fTqLoMzDuvvT3f3n3d0HmOsF3X1Hd39yP/0v7+6/7u47kvxCkqdU1WH3vPR/9rQkv9HdV3X37VmER566z+o7v9jdn+zuv8oiTPF5YYlRy1OT/Gx3f7y7dyf59STfP1tgd7+1u9/b3Z/t7vckeVUWgZqlfnF8f+9N8rtJvne0/0iSnx8rwfxjkhckeVLdw62+uvvK7n5bd985nvF/LKnlCUn+prv/sLv3hqRuWHL5p7PYwuyE7v5Ud/+fVdz6d7v778f7cXEW4ZnV+HCSyiLElO5+RXffPJ7j15MckUXIalndfXl3f2CsqvOnWQSJ9obP9ve+PzrJ5u7+pe7+p+6+KsnvZPGeAAAAAIcIARwAAABg1pYkH12m/b8k2ZXkjWO7nh0rmOuaVfRfneQLs1iFZdYJY76lc2/KYiWTvZaGSD6RxSo5+zpu1LTvXFtmC6yqr6+qt4xtjG7LIlSz77Pv+/2cMI4fluR1Y3ukW5O8L4tVbI7PPVBV/2pst3RDVX0si1Vc9tZywtI6Rghl6fZgP5NFCOYdVfU3VfWDq7j1Sn6Du7MlSSe5dTzHs8eWUreN7+Xo3M37VFWnV9XbxhZTt2YRNto7fn/v+8Oy2Hbs1iXf/8/lHn73AAAAwMYkgAMAAADcY1X16CxCDZ+3islYAeanu/tLk3x3kmdV1Wl7u/cz5YFWyDlpyfFDs1h15CNZbM90vyV1HZbFVkArnffDWQQlls59Z5IbD3Ddvj6Sz63wsnSu61Y5z3JemeSyJCd199FJfjuLIMtS+34/Hx7H1yQ5vbuPWfI5srvvaV0vSfL+JCd39wOzCJTsreX6LLaWSpJUVS097+4buvs/dPcJSX44yW9V1SPuYR2r9e+SvLO776iqb8oiDPSUJA/q7mOS3LbkOe7yzlTVEUn+IMmvJTl+jH/93vF3875fk+SD+3z3R3X3E5a7DwAAAHDfJIADAAAArFpVPbCqnpjk1UleMbY82nfME6vqESOAcVsWK658dnTfmORL78Gt/31VfVVV3S/JLyW5pLs/k+TvkxxZVdur6guTPDeL7YT2ujHJ1qra3/+FvCrJT1XVw6vqAVms6PKasYXSio1aLk7yH6vqqKp6WJJnJXnFauapqiP3+VSSo5J8tLs/VVWnJvm+ZS79haq6X1X96yRnJ3nNaP/tUdPDxvybq+qM1dS0j6OSfCzJ7VX1FUl+dEnf5Um+pqrOHFtcPSPJlyx5tidX1d5Azi1ZBFA+m4OkFrZU1fOT/FAWYaG9z3Bnkj1JNlXV85I8cMml+74zh2fxTu1JcmdVnZ7k8Uvus7/3/R1JPl5Vz6mqL6qqw6rqq0d4bbn7AAAAAPdB/mEPAAAArMYfVdXHs1jV4+eT/EYWQY/lnJzkT5LcnuQvkvxWd79l9P3nJM8dW/I8exX3f3mS38tiK6Ijk/x4knT3bUl+LMlLs1ht5o7cdduj146/N1fVO5eZ93+Ouf8syQeTfCrJM1dR11LPHPe/KouVgV455l+pLUk+uc/ny7J4vl8a3//zsgj67OtPs9gG6Yokv9bdbxztL8pi9Zw3juvfluTrV/dYd/HsLAJAH0/yO/lc0Cfd/ZEkT07yq0luTvJVSXYm+ccx5NFJ3l5Vt4+afqK7r5qoZX9OGPe4PclfJvmaJN+y5Dv530nekEV46+osfvOlW3jd5Z3p7o9n8b5dnEVw6PtG/Xst+76PUNYTk5ySxbv1kSze06OXu89aPDgAAABw76vFNtwAAAAAsPbGyi7XJnnakgAWAAAAwCHFCjgAAAAArKmq+o6qOqaqjshiy6fKYtUdAAAAgEOSAA4AAAAAa+2xST6QxXZL35XkzO7+5PqWBAAAAHDw2IIKAAAAAAAAAAAmWAEHAAAAAAAAAAAmbFrvApLkuOOO661bt653GQAAAAAAAAAAsKwrr7zyI929ebm+DRHA2bp1a3bu3LneZQAAAAAAAAAAwLKq6ur99dmCCgAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYIIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMEEABwAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYIIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkrCuBU1U9V1d9U1V9X1auq6siqenhVvb2qdlXVa6rq8DH2iHG+a/RvPZgPAAAAAAAAAAAA6+mAAZyq2pLkx5Ns6+6vTnJYkqcmeWGS87v7EUluSXLOuOScJLeM9vPHOAAAAAAAAAAAOCStdAuqTUm+qKo2JblfkuuTPC7JJaP/oiRnjuMzxnlG/2lVVWtTLgAAAAAAAAAAbCwHDOB093VJfi3Jh7II3tyW5Mokt3b3nWPYtUm2jOMtSa4Z1945xh+777xVdW5V7ayqnXv27Jl9DgAAAAAAAAAAWBebDjSgqh6Uxao2D09ya5LXJvnO2Rt39wVJLkiSbdu29XJjtu64fMXz7T5v+2xJAAAAAAAAAACwaivZgurbknywu/d096eT/GGSb0hyzNiSKklOTHLdOL4uyUlJMvqPTnLzmlYNAAAAAAAAAAAbxEoCOB9K8piqul9VVZLTkvxtkrckedIYc1aSS8fxZeM8o//N3b3sCjcAAAAAAAAAAHBfd8AATne/PcklSd6Z5L3jmguSPCfJs6pqV5Jjk1w4LrkwybGj/VlJdhyEugEAAAAAAAAAYEPYdOAhSXc/P8nz92m+Ksmpy4z9VJInz5cGAAAAAAAAAAAb30q2oAIAAAAAAAAAAPZDAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgwqb1LmA9bN1x+YrH7j5v+0GsBAAAAAAAAACA+zor4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgwgEDOFX15VX17iWfj1XVT1bVg6vqTVX1D+Pvg8b4qqoXV9WuqnpPVT3q4D8GAAAAAAAAAACsjwMGcLr777r7lO4+JcnXJflEktcl2ZHkiu4+OckV4zxJTk9y8vicm+QlB6NwAAAAAAAAAADYCFa7BdVpST7Q3VcnOSPJRaP9oiRnjuMzkrysF96W5JiqesiaVAsAAAAAAAAAABvMagM4T03yqnF8fHdfP45vSHL8ON6S5Jol11w72u6iqs6tqp1VtXPPnj2rLAMAAAAAAAAAADaGFQdwqurwJN+d5LX79nV3J+nV3Li7L+jubd29bfPmzau5FAAAAAAAAAAANozVrIBzepJ3dveN4/zGvVtLjb83jfbrkpy05LoTRxsAAAAAAAAAABxyVhPA+d58bvupJLksyVnj+Kwkly5pf3otPCbJbUu2qgIAAAAAAAAAgEPKppUMqqr7J/n2JD+8pPm8JBdX1TlJrk7ylNH++iRPSLIrySeSnL1m1QIAAAAAAAAAwAazogBOd9+R5Nh92m5OctoyYzvJM9akOgAAAAAAAAAA2OBWswUVAAAAAAAAAACwDwEcAAAAAAAAAACYIIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMEEABwAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYIIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMGHTehdwKNm64/IVj9193vaDWAkAAAAAAAAAAPcWK+AAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABNWFMCpqmOq6pKqen9Vva+qHltVD66qN1XVP4y/Dxpjq6peXFW7quo9VfWog/sIAAAAAAAAAACwfla6As6Lkryhu78iySOTvC/JjiRXdPfJSa4Y50lyepKTx+fcJC9Z04oBAAAAAAAAAGADOWAAp6qOTvJvk1yYJN39T919a5Izklw0hl2U5MxxfEaSl/XC25IcU1UPWfPKAQAAAAAAAABgA1jJCjgPT7Inye9W1buq6qVVdf8kx3f39WPMDUmOH8dbklyz5PprR9tdVNW5VbWzqnbu2bPnnj8BAAAAAAAAAACso5UEcDYleVSSl3T31ya5I5/bbipJ0t2dpFdz4+6+oLu3dfe2zZs3r+ZSAAAAAAAAAADYMFYSwLk2ybXd/fZxfkkWgZwb924tNf7eNPqvS3LSkutPHG0AAAAAAAAAAHDIOWAAp7tvSHJNVX35aDotyd8muSzJWaPtrCSXjuPLkjy9Fh6T5LYlW1UBAAAAAAAAAMAhZdMKxz0zye9X1eFJrkpydhbhnYur6pwkVyd5yhj7+iRPSLIrySfGWAAAAAAAAAAAOCStKIDT3e9Osm2ZrtOWGdtJnjFZFwAAAAAAAAAA3CcccAsqAAAAAAAAAABg/wRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACZsWu8CWJmtOy5f8djd520/iJUAAAAAAAAAALCUFXAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMGHTehfA+tq64/IVj9193vaDWAkAAAAAAAAAwH2TFXAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMEEABwAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYsKIATlXtrqr3VtW7q2rnaHtwVb2pqv5h/H3QaK+qenFV7aqq91TVow7mAwAAAAAAAAAAwHpazQo439rdp3T3tnG+I8kV3X1ykivGeZKcnuTk8Tk3yUvWqlgAAAAAAAAAANhoZragOiPJReP4oiRnLml/WS+8LckxVfWQifsAAAAAAAAAAMCGtdIATid5Y1VdWVXnjrbju/v6cXxDkuPH8ZYk1yy59trRdhdVdW5V7ayqnXv27LkHpQMAAAAAAAAAwPrbtMJx39jd11XVFyd5U1W9f2lnd3dV9Wpu3N0XJLkgSbZt27aqawEAAAAAAAAAYKNY0Qo43X3d+HtTktclOTXJjXu3lhp/bxrDr0ty0pLLTxxtAAAAAAAAAABwyDlgAKeq7l9VR+09TvL4JH+d5LIkZ41hZyW5dBxfluTptfCYJLct2aoKAAAAAAAAAAAOKSvZgur4JK+rqr3jX9ndb6iqv0xycVWdk+TqJE8Z41+f5AlJdiX5RJKz17xqAAAAAAAAAADYIA4YwOnuq5I8cpn2m5Octkx7J3nGmlQHAAAAAAAAAAAb3AG3oAIAAAAAAAAAAPZPAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwIRN610Ah6atOy5f8djd520/iJUAAAAAAAAAABxcVsABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEzatdwGwWlt3XL7isbvP234QKwEAAAAAAAAAsAIOAAAAAAAAAABMEcABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAE1YcwKmqw6rqXVX1x+P84VX19qraVVWvqarDR/sR43zX6N96cEoHAAAAAAAAAID1t5oVcH4iyfuWnL8wyfnd/YgktyQ5Z7Sfk+SW0X7+GAcAAAAAAAAAAIekFQVwqurEJNuTvHScV5LHJblkDLkoyZnj+IxxntF/2hgPAAAAAAAAAACHnJWugPNfk/xMks+O82OT3Nrdd47za5NsGcdbklyTJKP/tjEeAAAAAAAAAAAOOQcM4FTVE5Pc1N1XruWNq+rcqtpZVTv37NmzllMDAAAAAAAAAMC9ZiUr4HxDku+uqt1JXp3F1lMvSnJMVW0aY05Mct04vi7JSUky+o9OcvO+k3b3Bd29rbu3bd68eeohAAAAAAAAAABgvRwwgNPdP9vdJ3b31iRPTfLm7n5akrckedIYdlaSS8fxZeM8o//N3d1rWjUAAAAAAAAAAGwQK1kBZ3+ek+RZVbUrybFJLhztFyY5drQ/K8mOuRIBAAAAAAAAAGDj2nTgIZ/T3W9N8tZxfFWSU5cZ86kkT16D2uBetXXH5Sseu/u87QexEgAAAAAAAADgvmRmBRwAAAAAAAAAAPgXTwAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMCETQcaUFVHJvmzJEeM8Zd09/Or6uFJXp3k2CRXJvn+7v6nqjoiycuSfF2Sm5N8T3fvPkj1w4a3dcflKx67+7ztB7ESAAAAAAAAAOBgWMkKOP+Y5HHd/cgkpyT5zqp6TJIXJjm/ux+R5JYk54zx5yS5ZbSfP8YBAAAAAAAAAMAh6YABnF64fZx+4fh0kscluWS0X5TkzHF8xjjP6D+tqmrNKgYAAAAAAAAAgA1kJSvgpKoOq6p3J7kpyZuSfCDJrd195xhybZIt43hLkmuSZPTflsU2VfvOeW5V7ayqnXv27Jl7CgAAAAAAAAAAWCcrCuB092e6+5QkJyY5NclXzN64uy/o7m3dvW3z5s2z0wEAAAAAAAAAwLpYUQBnr+6+Nclbkjw2yTFVtWl0nZjkunF8XZKTkmT0H53k5jWpFgAAAAAAAAAANpgDBnCqanNVHTOOvyjJtyd5XxZBnCeNYWcluXQcXzbOM/rf3N29lkUDAAAAAAAAAMBGsenAQ/KQJBdV1WFZBHYu7u4/rqq/TfLqqvqVJO9KcuEYf2GSl1fVriQfTfLUg1A3AAAAAAAAAABsCAcM4HT3e5J87TLtVyU5dZn2TyV58ppUBwAAAAAAAAAAG9wBt6ACAAAAAAAAAAD2TwAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACZvWuwDgntu64/IVj9193vaDWAkAAAAAAAAA/MslgAN8HsEeAP3xZUAAACAASURBVAAAAAAAAFg5W1ABAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYMKm9S4A+Jdj647LVzx293nbD2IlAAAAAAAAALB2rIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMEEABwAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYIIADAPD/2bv3eNuref/j73fl0pWiEyqVFDqIlBNC5FYhpDpJOp3cb4UfyjW3UzouR7lVonShKEcqQnQTqp3aKVLouCsOFQcpn98fnzH3mnvtudbea44x9t6z/Xo+Huux95xrrc/87rm/8/sdl8/4DAAAAAAAAAAAAKACCTgAAAAAAAAAAAAAAABABRJwAAAAAAAAAAAAAAAAgAok4AAAAAAAAAAAAAAAAAAVSMABAAAAAAAAAAAAAAAAKpCAAwAAAAAAAAAAAAAAAFQgAQcAAAAAAAAAAAAAAACoQAIOAAAAAAAAAAAAAAAAUIEEHAAAAAAAAAAAAAAAAKACCTgAAAAAAAAAAAAAAABAhVWW9QEAQK2NDzxzTj9//aE7dzoSAAAAAAAAAAAAAMCKiAo4AAAAAAAAAAAAAAAAQAUScAAAAAAAAAAAAAAAAIAKJOAAAAAAAAAAAAAAAAAAFUjAAQAAAAAAAAAAAAAAACqQgAMAAAAAAAAAAAAAAABUIAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFCBBBwAAAAAAAAAAAAAAACgAgk4AAAAAAAAAAAAAAAAQIXFJuDY3tD2N21fbfsq2/uX59ex/TXb15Y/1y7P2/bhtq+zPd/2Vr3/EQAAAAAAAAAAAAAAAMCysiQVcG6T9LqI2ELStpJeYXsLSQdKOiciNpN0TnksSTtK2qx8vVjSx5ofNQAAAAAAAAAAAAAAALCcWGwCTkT8OiIuK3+/RdIPJK0vaRdJx5UfO07Ss8rfd5H06UjfkXR32/dufuQAAAAAAAAAAAAAAADAcmBJKuAsYHtjSQ+X9F1J60XEr8u3fiNpvfL39SX9fOjXflGemx7rxbYvtX3pjTfeOMfDBgAAAAAAAAAAAAAAAJYPqyzpD9peQ9Kpkg6IiJttL/heRITtmMsLR8RRko6SpK233npOvwsAS8vGB565xD97/aE7dzwSAAAAAAAAAAAAAMDyaokq4Ni+kzL55sSIOK08/dvB1lLlzxvK87+UtOHQr29QngMAAAAAAAAAAAAAAADucBabgOMsdXOMpB9ExAeGvnW6pH3K3/eR9MWh51/gtK2km4a2qgIAAAAAAAAAAAAAAADuUJZkC6rHSNpb0pW2Ly/PvUnSoZJOsb2fpP+RtHv53lmSdpJ0naT/k7Rv0yMGAAAAAAAAAAAAAAAAliOLTcCJiAsleYZv7zDi50PSKyqPCwDu0DY+8Mwl/tnrD92545EAAAAAAAAAAAAAAGotdgsqAAAAAAAAAAAAAAAAADMjAQcAAAAAAAAAAAAAAACoQAIOAAAAAAAAAAAAAAAAUIEEHAAAAAAAAAAAAAAAAKACCTgAAAAAAAAAAAAAAABAhVWW9QEAANrZ+MAz5/Tz1x+6c6cjAQAAAAAAAAAAAIAVBxVwAAAAAAAAAAAAAAAAgAok4AAAAAAAAAAAAAAAAAAV2IIKALBE5rK9FVtbAQAAAAAAAAAAAFiRUAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFCBBBwAAAAAAAAAAAAAAACgAgk4AAAAAAAAAAAAAAAAQAUScAAAAAAAAAAAAAAAAIAKJOAAAAAAAAAAAAAAAAAAFUjAAQAAAAAAAAAAAAAAACqQgAMAAAAAAAAAAAAAAABUIAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFCBBBwAAAAAAAAAAAAAAACgAgk4AAAAAAAAAAAAAAAAQAUScAAAAAAAAAAAAAAAAIAKJOAAAAAAAAAAAAAAAAAAFUjAAQAAAAAAAAAAAAAAACqQgAMAAAAAAAAAAAAAAABUIAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFCBBBwAAAAAAAAAAAAAAACgAgk4AAAAAAAAAAAAAAAAQAUScAAAAAAAAAAAAAAAAIAKJOAAAAAAAAAAAAAAAAAAFUjAAQAAAAAAAAAAAAAAACqQgAMAAAAAAAAAAAAAAABUWGVZHwAAYMW28YFnLvHPXn/ozh2PBAAAAAAAAAAAAADGQwUcAAAAAAAAAAAAAAAAoAIJOAAAAAAAAAAAAAAAAEAFEnAAAAAAAAAAAAAAAACACiTgAAAAAAAAAAAAAAAAABVWWdYHAABADxsfeOacfv76Q3fudCQAAAAAAAAAAAAA7uiogAMAAAAAAAAAAAAAAABUIAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFBhlWV9AAAATJqNDzxziX/2+kN37ngkAAAAAAAAAAAAAJYHi62AY/uTtm+w/f2h59ax/TXb15Y/1y7P2/bhtq+zPd/2Vj0PHgAAAAAAAAAAAAAAAFjWlmQLqmMlPW3acwdKOiciNpN0TnksSTtK2qx8vVjSx9ocJgAAAAAAAAAAAAAAALB8WuwWVBFxvu2Npz29i6Tty9+Pk3SupDeW5z8dESHpO7bvbvveEfHrVgcMAMAdFVtbAQAAAAAAAAAAAJNpSSrgjLLeUFLNbyStV/6+vqSfD/3cL8pzi7D9YtuX2r70xhtvHPMwAAAAAAAAAAAAAAAAgGVr3AScBUq1mxjj946KiK0jYut111239jAAAAAAAAAAAAAAAACAZWKxW1DN4LeDraVs31vSDeX5X0racOjnNijPAQCAZWQuW1tJbG8FAAAAAAAAAAAAzNW4FXBOl7RP+fs+kr449PwLnLaVdNPQVlUAAAAAAAAAAAAAAADAHc5iK+DY/oyk7SXd0/YvJL1d0qGSTrG9n6T/kbR7+fGzJO0k6TpJ/ydp3w7HDAAAlhNzqa4zl8o6veICAAAAAAAAAAAAPSw2ASci9pzhWzuM+NmQ9IragwIAAOiBxB4AAAAAAAAAAAD0sNgEHAAAACweyT0AAAAAAAAAAAArLhJwAAAAlmOTuM0XyUgAAAAAAAAAAGBFQwIOAAAAJgLJSP3jAgAAAAAAAACA8ay0rA8AAAAAAAAAAAAAAAAAmGRUwAEAAAAgico6AAAAAAAAAACMiwQcAAAAAN2xHRcAAAAAAAAA4I6MBBwAAAAAmIbEHgAAAAAAAADAXKy0rA8AAAAAAAAAAAAAAAAAmGRUwAEAAACApWQulXUkqusAAAAAAAAAwKQgAQcAAAAA7gB6bZu1PMSda2wAAAAAAAAAWNpIwAEAAAAA3KH0ShoCAAAAAAAAgJmQgAMAAAAAwBIgsQcAAAAAAADATEjAAQAAAABgGeq5HdfysIUYyUgAAAAAAABYEZCAAwAAAAAAlgs9k5EAAAAAAACAnkjAAQAAAAAAd3hU7QEAAAAAAEBPJOAAAAAAAACMiW2+AAAAAAAAIJGAAwAAAAAAsEIhaQgAAAAAAKC9lZb1AQAAAAAAAAAAAAAAAACTjAo4AAAAAAAAWG71rKxD1R4AAAAAANAKCTgAAAAAAABAQyT2AAAAAACw4iEBBwAAAAAAAJgAk1gNiGQkAAAAAMCKggQcAAAAAAAAABNlEpORAAAAAAB3bCTgAAAAAAAAAEBnJPYAAAAAwB0bCTgAAAAAAAAAMKGWh+3DesYmGQkAAADApCABBwAAAAAAAAAw8UjsAQAAALAskYADAAAAAAAAAMAMJrEa0PIQt2fs5SEuAAAAMB0JOAAAAAAAAAAAAHNAMlL/uL1jAwAAtEYCDgAAAAAAAAAAAFYYJCMBAIAeSMABAAAAAAAAAAAAVkAkIwEA0A4JOAAAAAAAAAAAAABWaCQjrRhxe8aetLg9Y5P8hhUVCTgAAAAAAAAAAAAAAGCpIxlp+YrbM/byELc3EnAAAAAAAAAAAAAAAABwh9Y7sWelOf8GAAAAAAAAAAAAAAAAgAVIwAEAAAAAAAAAAAAAAAAqkIADAAAAAAAAAAAAAAAAVCABBwAAAAAAAAAAAAAAAKhAAg4AAAAAAAAAAAAAAABQgQQcAAAAAAAAAAAAAAAAoAIJOAAAAAAAAAAAAAAAAEAFEnAAAAAAAAAAAAAAAACACiTgAAAAAAAAAAAAAAAAABVIwAEAAAAAAAAAAAAAAAAqkIADAAAAAAAAAAAAAAAAVOiSgGP7abavsX2d7QN7vAYAAAAAAAAAAAAAAACwPGiegGN7ZUkfkbSjpC0k7Wl7i9avAwAAAAAAAAAAAAAAACwPelTAeaSk6yLiJxFxq6TPStqlw+sAAAAAAAAAAAAAAAAAy5wjom1A+7mSnhYRLyyP95b0LxHxymk/92JJLy4PHyDpmiV8iXtK+l2jw11asSctbs/YxO0fe9Li9ow9aXF7xp60uD1jT1rcnrEnLW7P2JMWt2fsSYvbMzZx+8eetLg9Y09a3J6xJy1uz9iTFrdn7EmL2zP2pMXtGXvS4vaMTdz+sSctbs/Ykxa3Z+xJi9sz9qTF7Rl70uL2jD1pcXvGnrS4PWMTt3/sSYvbM/akxe0Ze9Li9ow9aXF7xp60uD1jzyXuRhGx7qhvrNLueOYmIo6SdNRcf8/2pRGxdYdD6hZ70uL2jE3c/rEnLW7P2JMWt2fsSYvbM/akxe0Ze9Li9ow9aXF7xp60uD1jE7d/7EmL2zP2pMXtGXvS4vaMPWlxe8aetLg9Y09a3J6xJy1uz9jE7R970uL2jD1pcXvGnrS4PWNPWtyesSctbs/Ykxa3Z+xJi9szNnH7x560uD1jT1rcnrEnLW7P2JMWt2fsSYvbM3aruD22oPqlpA2HHm9QngMAAAAAAAAAAAAAAADucHok4FwiaTPbm9i+s6R/lXR6h9cBAAAAAAAAAAAAAAAAlrnmW1BFxG22XynpbEkrS/pkRFzV8CXmvG3VchB70uL2jE3c/rEnLW7P2JMWt2fsSYvbM/akxe0Ze9Li9ow9aXF7xp60uD1jE7d/7EmL2zP2pMXtGXvS4vaMPWlxe8aetLg9Y09a3J6xJy1uz9jE7R970uL2jD1pcXvGnrS4PWNPWtyesSctbs/Ykxa3Z+xJi9szNnH7x560uD1jT1rcnrEnLW7P2JMWt2fsSYvbM3aTuI6IFnEAAAAAAAAAAAAAAACAFVKPLagAAAAAAAAAAAAAAACAFQYJOAAAAAAAAAAAAAAAAEAFEnAA3GE4bbisjwMAAAAAAAAAAAAAsGJZ4RNwbP+T7fsOvhrFfO+SPAe0YPsZtifis2x7q9m+auNHREg6q8GhTjzbK9nefVLiYumwvfrgemF7c9vPtH2nZX1cd1S217b90GV9HHdk5Zq01rI+DqBWy3PZ9rtsrzL0eC3bn2oRG1iaJqmfA+COyfY6I75W2P6T7XuVPuQzbN9rWR8P7rhs72Z7zfL3t9g+rcW4IXBHYft423cberyR7XMaxb7LkjyHFYvtlZf1MWD5UObS71r+btv72j7C9suGx6KAHmyvYXuNZX0cS2KFHcwqHcZrJf1U0nmSrpf05UbhnzziuR1bBLZ9zxZxZon/z43jHWf77kOP17b9yYbxm3X+ZxhYWfDV6Hh7TH7vIela24fZfmD9UU4Z3EinPVdzDr6/fH1E0nclHSXp6PL3j1TEHXaZ7W1aBLJ9pe35I76utD2/Qfxu51xE/EPSG2qPcWnEtf0l26fP9NXytVrrmXBpe/8yaWrbx9i+zPZTKsOeL+mutteX9FVJe0s6tvI4u147be+2JM+NGfshLeJMi3lu+X9bR9Jlko62/YEGcVe2fWL9EY6M+5rWcUvs45fkuTHinlTe49UlfV/S1bZfXxu3xD6sxL6T7XNs32j7+S1iTxrbd7P9QduXlq/3Dw/wLY9s7zft8cq2376sjmdxOp7Lq0j6ru2H2n6ypEskzWsQV1L7BJ/y//TNNkc3Mv6mLgPGtre3/erh/snyyPZ65d7/5fJ4i+nn95hx91+S55aXuOrYz2mt9OsW6Ts1fo1e58UikzWjnhsj7l1sP8/2m2y/bfBVG7fE/qcRzz2gQdxu7c6heMt98nDn9uFjluS55chlkm6U9CNJ15a/X1/6Zo+oCexOiyNa36eH4rxQ0sWSniPpuZK+Y/vfa+OW2E3v1T37qLZfO9tXTexZXnPfit/tPa71Y9sn2n6p244pvzUibrG9naQnSTpG0scaxu/C9ibD7QHbq9reuDJm8z6qOy+QHHqdLW2/snxt2TDuyrbv44aLqjteOx9oewdPmyy0/bTK0Bcq+3w72X6RpK9J+q/KmAPfXsLnxlbe3zVbxuyhx7265+fP9rql7X2U7U8OvmpiDrnW9n/a3qJRvG5jndNeYyPbTyp/X7XFeee+8wGr2X6r7aPL481sP71F7IbO0lRuwaGSdlbO722jnO9bIfVoA/TSs63ci+2H2P6epKuU46fzbD+4YfxX2V67VTxJchaMWL45O+MHS9pIOaBsZbGL+1XEvELSEyV9PSIebvsJkp4fEWMPYNl+maSXS7qfpB8PfWtNSd+KiLEbx7ZXioh/2L4sIrYqz+0fER8aN+YMr7MgfqN434uIhy/uuTFjv1DS2yR9Q3lOPF7SOyNirEaF7Z9KihLrvpL+UP5+d0k/i4hNGhzzPEmPlbS2pG8pJ0RujYi9KuOuJWlPSfsq/w2fkvSZiLilMu6Vkl4UEd8pj3eVdEhEbF4Z9zRJb4+IK8vjB0s6OCKeWxO3xPqhpPtL+h9Jf9bU9WLOVShsbzTb9yPif8Y6yKn4w+fciPDjX+NK/EMl/U7Sycr3YhD4f5enuLYfP9v3I+K8ceIOxV9X0hslbSFpQSMoIp5YE7fEXuSaaXv+OOfbiNhXRMSWtp8q6SWS3irp+Jpr9OB4bb9K0qoRcZjtyyPiYRUxe5/Ho97jJvcq2xdIuosyCenEiLipQczvlXbFCyVtGBFvb3hOXCjpiRFxa22saXEvjohHtoxZ4i70/+RcLXNlRFR11gfnrO29JG0l6UBJ8xq9x4PYz5b0dEmvlXR+RIw9WGj7S8rPyEgR8cwx4x6xmLivHifuUPxTlUkhx5Wn9pa0ZUQ8pyLmlRp9zGPfq6fFP0nZbttP0jrKz/Z5EfH/xozX5f9uKH7Pc3kHSWco27OPi4jramMOxT5E0lOU7c71JH1Y0hER8eGKmOdIek6L6/CI2JdL2lrSxsqBoi9K+ueI2KkiZu9z+cvK9vybS1tgFUnfi4iqxNEZ7qnVfbNecUucXv2cwyS9W9JfJH1F0kMlvSYiThgz3l+U7eIvS/qMpLMj4vaaYxzxGk3PizI4uJqkb0raXlNtubUkfSUiqpKebH9F0k3KBMAF70VEvL8mbol9jXJy9pTy+HWS9mvQxujS7iz3p5cq34dLlO/xhyLiPyvjbiDpCEnbKT8fF0jaPyJ+URO3xF4q7cOZnhsj7ubKyfn1IuLBziqUz4yId1fGPVrS5yPi7PL4KZJ2VX4WPxQR/1IRu9f4UPP7dIl7jaRHR8Tvy+N7SLooIlokvzW9V/cc3/Nikrsj4h3jxp7lNX8WEWMlGCyFca27SPoX5bn8GEkPkDQ/Ip5dGXfQrz5E2Yc8qWHb4vDZvl/Tj7J9qfJzcmt5fGflnMDYCwU79VFnS36PRuNl+0t6kaTTylPPlnRURBxRGfdVkt4u6beS/lGebtH+7tHHebWkV0j6gaSHKe/RXyzfa3Hv207ZjvudpIdHxG8q491L0vqSTpD0PC3cNvx4bduwvMY2kj6pnC+zpD9K+veIGGvBiO1bNHu/rCrpuce9uufnz/ZFyvbg9Pb3qePGHIq9pqR/VX5GVlL+P342Im6ujNtlrLPEfpGkF0taJyI2tb2Z8lzeoTJuz/mAk5X/fy8obdrVlO2tscbubV8YEduN+KyM/RmxffWg31U+I9tELtpeMJ8xzrFOe42m/fVpsddV3p82VuYbSJIioiqxvEcboMRpfry95nN6jqOW69ubI+Kb5fH2kv4jIh49bsxp8d+tvMZdpry+nR1Rl0AzKeWgjpH0Gk27cVT6e0T83rn6aKWI+Kbt2izhk5QDbocoB9AHbonKCW9J59n+s6R7OTOkr5S0j6SmCTga/YGrsZLttSPiD5LkzJ5rdd69Xtm4XKjzr/xwzNmgA14GWL4QEWeVxztKelaTI5YcEf/nXKn40cHkd23QiLjZ9uclrSrpAGWn5vW2D6/s2DxP0idtnyvpPpLuoUxcq/WAKMk3khQR37f9oAZxJempjeIsMhBRJgCaXTdrBn2W0B7lz1cMv6wySXC5iRuVCTZL4ERlstDOyoHvfZQrF8fmqYTLTb3wqrE1lR2yFgbX452UiTdX2a69Rtv2oyTtpZyglqSqEqK9zuNy7d1J0vrTBsjWknRbi9eIiMeWzte/S5pn+2JJn4qIr1WEXcX2vSXtLunNLY5zyE8kfctZGWo4+a22ws63bH9YiybVXTZOMNsHSXqTpFVt36ypc/lWtVkJcSfniqNnSfpwRPzddquM8sE1fmdJn4uIm+o/dnpfbYAZXNop7sCmEbHr0ON3NGizdF21ExHPs72Hsp38Z0nPi4iaa3Kv/7uBLuey7cdJOlzSOyU9RNIRtveLiF/VxpakiDjI9teVK5xaJfj8SdKVtr+mha9DVYlkxT8i4rYyaXFERBzhXDVTo/cKtHtGxCnleqpy/GP3g23vqWzXb+KFqwuuJWnsfuoscdesiTusYz/nKRHxhnJeXK+s6HC+csJhHD9U9pOeK+l1kj5l+wvKZKFWbd2m54UywfsAZT9v+J5/s3LSqdYGEVG7unsm20s6ylmdZj3lxNbYySJLod25RTmX91KOGR2oHN+qSsBRJoGcJGlQpef55blRVZnnqnX78FGSHi1pXS9cJWQtVfZHiqOVY0RHSlJEzHcmPlUl4EjaNiJeNHgQEV+1/b6IeInrt+PoNT7U4z4tSb+XNJz8eEt5roWm9+qe43sR8Q7nwoJXR8QHa2IN88zVaKy8zo2l97iWcpz+7+XPf0i6oXzV+qXtI5XXs/eWz1urSv53VS7QOrk83k3S1WpT4WOV4UnkiLi1TMBVxSx/NuujRsQTKo9pSewn6V8i4s+S5KwO8W1l4miN/ZXjyq2uP5K6XTtfJOkREfEnZxWEz9veOHJBddV/ou29lQv1XqCcmD7L9r4RcUVF2KdK+jdJG0gaHme6WTnG08Ixkl4eERdIC5KIPqX8N8xZRPSuotP8Xt3587daRLyxR+DIBRBHKyt9P17ZBv1g6au9q+Lz0musU8q5i0cqP9eKiGs9opLmkhqaD7hfx/mATSNij9LXVjn/xr5eRMR25c+Wn5Wf235iRHxD2ZfeUNL/lPnZVlr314d9UZmo9nW1yzeQ+rQBpA7H23Fesuc46uqD5BtJiohznRXFm4iIt9h+q6aScT9s+xRJx0TEj2f/7dEmJQHnpohotT3UwB+dpf/Ol3Si7Rs0dIEfR+SKzZsk7eksFzdYhfQtVQ5AlknCuysHabaR9EJJm9v+rHJl79hlOJ2rNwbZbut5qCx0RLyz5riV2w192/bnyuPdJL2nMuZAr87/9AGWLzszLltoPvltexdl4/j+kj4t6ZERcYMzO/ZqVXRsIuJK2++RdLzy/X1cNFhNJ2m+7U9o6oa5l6Tq0rdS/eqdUWy/RNI7JP1VUxmcLRJZBvFnXfEw7iBnrxtpx4SLp0t6lxatdlZbov0eEXGMs2rYecqExksqY86X9AxlmcXhjk2LhMuBeba/KmkTSQc5Vxr8YzG/szgHSDpIOQh5le37KVfLVOtwHv+vMsHgmVp465RblEm5TZTO11vKax0u6eGlY/OmiDht9t8e6Z2SzpZ0YURcUt7jaxsd7o/L10rKzl0rg1UUw/f80JgJlxFxiKRDbB8SEQfVHtwIRyo7X1dIOt+5qrNqxc2QM5yV1P4i6WXOVQZ/rQnYK8kwIo5b/E9V+Yvt7SLiQklyVqT8S03A3hMAJaFuf0mnSnqQpL2dK2X/b5x4w/93tleVdN+IuKbJwaZe5/L7JO0WEVdLku3nKCtGNtnCp1OCz2maWh3b2t/L4NU+ynu3JFWVDV8Kk1l/LoNXUeJvq+xnjusiSb+WdE9l/2zgFtW1wXvFldS3n6P2k1kRuehkMCh9L2Uy7qG2N4iIDWuCF03PizL58yHbr6pMZprJRbYfMrzwopWI+LWzws5ByvbxgRHxp4qQv1Lfdmev5OF1I2J4e4xjbR/QIK7UuH0o6c6S1lB+9obbsTcrE9dqrRYRF0/7HLdInvq17TdK+mx5vIek35YEjNq+WfPxoRK0VyLudcrtTr6oPBd2UY7tvFaqnihrfq8uuozvRcTt5XibJeAok2yeqpz4H2bl/bZKx3Gtm5XJ7x+QdHTDpIjdJT1N0vsi4o/ORS5Nth1WTvRvFxG3SZLtj0u6ICJe2iD2jbafGRGnl9i7KKuT1GjeRx0obarXKvs5Ly79qQdExBktwmvhicLb1Wbh789V1y4eqdO1c6VB+yQirneu1v986ffVvhe7Ks/jGyR9xpn4fZym7t9zVsYYjrO9azSomDKD2wfJN+U1L7Rdfb/2YrZMqRir7TGXM2tV4THHIgfOsL3TIPG0pdL22Vk5Mb2xsv93orJC0FmSxt0xoddYpyT9rSRBSJKcFURr2uA9CzAM3FrGnwZ9vk0l/W3cYJ0+Gy+U9GnbByuvx5eXxLS7K+8pLfRYIDnQK1GtRxtA6nC8Hecjey60/0lJkDm+PH6+MoGvmYgI27+R9BtlX3Jt5X37axHxhrnGm5QtqA5V3thO09DFZtyToMRcXdlwXUl5A72bcvuJ6o5CXW5GxgAAIABJREFUOQl219Qg8rOUF4mxV944V4NepFxh+MiI+INzNcguyqSIsTP/bO8z9PCdym2dJLWZ3HHuCzkYpPnGYDKgQdxPKxvEC3X+y9fYnX/bZyszCoeTQx4XEdWVVZzZwa9Tlh57b5mYPSDqypseK+mTEXH+iO/tEBHnVMQ+RtKmyobV5sqKS0dExEfGjVni3lXSyyQ9rjx1vqSPRUSTDmRrtq+V9KiIaHHDHBX/O8qtJ+YrO2APVQ4A/1UVpSd7dqSd24ZN39Lp05Uxr1NmM18ZDW9Otr8TEduWz/bhygH2z0fEphUx50XEI9x4275pr7GSsuP8kzLgdA9J60dEk2S1En+NqCwVOhSv6Xnsqe2yToqI57U4xhGv8VDl9W1n5X7Zx0TEZbbvI+nbETFrue5lxfZq4yYULE0lkenZGtoWISL+u9NrrTIYQG0Qax1l8vftpb24ZlSWci5xu2xZ435bXG2pnPC+W3nqD5L2aXENmmkCIOq3rPuhpFdGxNfL+fdaZTnrf66M+wxlUsudI2IT2w9TbntauwXVyjG0RU055pVrz+Xpcctz92g1IeKsFvZv0xJ8/iPqt6vpkeQ06Iu8VHld/4ztTSTtHhHVe7R3PJcfoWyzPFi5Fdy6kp5b+/kb9IEjtzbeXJmU9eWI+HtN3BJ7I0mblc/fqsrVX7XbRB2rfv2cQ5X99L8oV0XeXdIZMeZ2Mp5lWwzbG01P2hrzNbZSJh01OS9cVi3ONAlQOfgv21crk6d+qhzHabJFW4n9dWWb/tXKFZfHKLfjGGvLwaG4d1IO9ja9Fjm3oHijMuFyZ+WWOCdExGMr456jsi1beWpPSftGZVn9ngafB2fiYtReJ4biflnSK5Vjb1vZfq5yW7IdK+PeU7nVyXblqW8pr/s3Kc+Tsasj9BgfKnF73ae7bb3U617deXzvg8okoVZVoo5RVmO9cMT3qvvEvca1yuTSdsp76a3Ksevzx71H214rsmLYqAnDkHTz9HbuGK9xjfK9+N/yeG1J34k226ltqpyQvo/yvvdzSXvHmCunh+IO91FXk7RWoz5q061OpsV+rTKp7gvlqWdJOjYixtqBwFPV0/5ZudXZmVp4nqiqWkaPa6ftb0h6bURcPvTcKsrq/XtFRIukywXjQ7bvHA228XEmkr9H0n0iYsdyjX5URBzTIPZ/KStbfkb5md5D2Zc6Qaq6hl6vbBMutOVg+fbY/TNnYtb/U9u5nE/N8u2Ium1lbpG0uvJ6POjjRdQvbpXtnygXcR4TERdN+97hDdovzcc6nUm3f1RWinqVsnrN1RFRVbHc9shtISPiZ6Oen2PsJ0t6i3Le5avK7R3/LSLOHTNel62GSuwHKecMV5H0C0mXRNmKqlbr/vq02O9W3uuaJqrZvr/yWnaf8tQv1KYN0Px43Xkryh5j4KW99g5N9csukHRwlN13ajm3znyBMmnqE5L+O3LhzEqSrh1nLnFSEnBGnQxVJ0FptJ0cEb8c/8hmjH2NpC2jJBOUAcjLaxrypQH8KOUH+FLlyoj7K6tFXBARTbYh6DmZ3Fqvzn/p1LxdCyeHvCPaZbEOXqd68tuZefz16FS60Ll67kNRLhS27ybpAxGx3+y/ecfiXF35nNaNwKH4p0l6e5TVoc7kloMjomolYK+OdPnsba9sCJ4laUdltY/a4/2mpB1aNdSG4j5deUPeUDlpsZbyM336rL84e8zvKBNNdtFU2eIFajsdQ6+ztqTNtHCi0yKTUHOId5JyYPN25b7Fayk/47Xl75ufx7a/L+k/lPe5RVa61U4Kldc4Tzlh87mI+Mu07+0dEceP/s1ZY/bco/ZR5XjXiIj7lgSJl0TEyyvjrqd8r5sOrtj+qLKtMpgU2kPSjyPiFTP/1qzxnh8RJ3jhbQsGQlk16fSahrdz7+JjlFuGNGnAD8UerLYdnFeDPcM/Jo1fwc32hyTdS1MTC3tK+q2k/y5xx1p9MPQ+r1H+/JNysmne8ADimLF7TQCsNb1dZXvziPhRZdx5ymTyc6NMsNu+MiIeUhn3J5I+r5xs+UFNrGlxN1eeV+uV+/9DJT0zKhYDTIvfPMHHnZKcRrzO2pI2jHbJrN2StMvg/AOUAxTXRJskmXnKFYprKyeRL5F0a0TsNesvLj7uiyS9WNI6EbGpM+n741GRBNC7n1Neo1nCpe3txx0UnePrNDsvbL8jIt5eJgEGg7IL/qwZ/C/xRyYyj3u/mxb7WTGU1Fvel4Mi4l2VcZfKtai8VnXycHmPj1COFYVy4vvVjQb/e7UPt1YmDQ1WON+kTJadN/NvLVHc+ym3On20cvLtp5KeHxHX18SdRD3u00uDOyTi9hzf6zFW3dNSGNd6oHJs6ABJ/xQRq44Z54yIePqICcPB39dQVtoZezsc2/tKOlg5kWzl+XFwNKww6qy6r6iozubZE2UH/d8Lp3/e5/gal0bE1h5KJLZ9RURsOW7MafEHOwRIOX8x9tZyveYBhuL36ONsIOm2Ue1L24+Jiu2Se40PldhfVt6r3xwRW5Z21vdq+78ldpdJX8+w5WBEvGSceENxu1RznES216i5ps0St+e5vJKyctFTlNf7syV9YjDXVRF3kFxg5Zj9Jsq+WdXir6H495C0bYn/nR7jDa3Zfnq0qZ42HLNX8mmXRLXBfaRFG2Ba3MHx/k15vK12jeim1xh4T86KTp8adWy2HzTOuO1EJOD0UBptuysbqycrJ99+2yj2NyU9OyL+WB7fXdJpLTph0xrEVyozvh4fEa+qjT09PtrrMfntXPX2nMgt0JZrtk+JiN09QwZkNFgN2YPthys7Ht/VwqsrWiVZXDW9gTbquTHidulIl/+/LZWdry3L4OwJEfHkyrjbKJMtzlPDVSw9OFdCPknSezVUNWygxcCN7Rcqt1LZQNLlyob3t2vuJbYvj4iH2d5LWa3mQOVkeouVyE3PY+c+0Hsp79XTk6WqJ4WGXufOygoAoewsVa0WGnqPny3p6coKHOe3GMCy/V1lif7Thz7T34+IB1fG7TK44qxG8qBBx7Z0fK+KiAeNGe8lEXHkLANv91BWCdx2vCOWnKsV9lUmC12qfF++Wts5L7EXaWO5QeLz4Fq/uOfGiHuSpK2Vnz8rz+f5ylLDn4uIscv395oAGJosXD8intZwsnBQSW34fjq/9trp3FrwX5Xn3ErKlZCfjcrKZM7kwtdLOrLltWIofvMEH49OcmpyzLbPVW4rs4oyMfkG5QrG6tLIHc/l+crtTk6OytVS0+IOqsu9StKqEXHY4L5VGfdy5aq070bbJLVu/Rx3TLhsbYaJtwWivlLN67ToJOfYCZeevXpBzRYA3c1wLRr7XPZSSB7upWP7cL6kV0TZfqK0+T/aaizAmUy3UrSrrLO5cuX7xhraarDR+N7Wkt40InZt+6JLIm7n92KpJb9NohaTnr3GtWyfqhwf+rFy4dMFyvZAlwrXzgTd74/bpxyKcy9Jg1X0320xoTfiNc6IiKdX/P5wouwo91C258Yei7N9kaQdlG3jrZwVfD4TEY8cN+ZQ7FHtgFuiQVL50Gs0q+zc69pZYh8fEXsv7rk5xuwyPlTiXBIR20zr/1b3GXoa1V5r1B+5QNJdJB2r3D2jWb/E/ZKdn6mpxNNzo1FShDPZ+UPKxO9/SPq2csFh1RYwPc/laa+zjqQNotFinGmxt5L08oh4YWWMGUXFbjBLKX6Twg5eCsmnvdj+mXIx7snKnWCW++QLd9pBo+UYuO3/iogDPEMF+Bb9hdK+vCoqK4ZO13Kf+m6cVTeGVyycp+yI1ex3/g5J7yiNqT0knWf7FxHxpOoDzsGqq5zbRoWkJ0u62Pbh5bVrOje7Dv39woj4vHLlbCvLbani6Vp3/pfGB1nSFmUwci/lfpEHKicCaqpP/EnSleV8Gy572yQ5ZDrbB0fEwWP++v7lz7E7oMvIkZK+odzXuml1lmK+7U9o4bLILRqDTfcMHTLYuuA2Z+nwG5TVZWq9R3k+31XSnRvEk9SngxCZef5Z2z+IiCvaHOki9pe0jTLT/QnOFWX/URnzTs7S+s+S9OHIMnqtGoNNz+PI0tsXlkSC6jK3o9jeSfn5/rFy0mmTkuTx5YqwPfeoVUT8fFq8Fh2Oe0bEKbYPKq9xm+0Wca9TbrEwyBzfsDw3log4svz16zFtpZjL6jHb7xw3fnmN6yS92bmd6NOVCRG3l0HPD1VOGtpDq9xsP1qZdFFrddv3G1zTyjVv9QZxN5C01WCw35n4dKayPT5P0tgJOJIOknRRGWhpmdh6rMpkYXn8I2XHt/YacpXt50lauXRGX62sMlClTA4eLelo5zYUJ0n6oO3PS3pXjL+txWoRcfG0a0WTLdqKo1USfCQpIuY7E7ZqBqf/PuJ62arNdbfS/n6hpE+XiYxWg269zuVnKPuop9j+h/I8PiXqq1rYucJwL+VKQCm3eq71t4i4dfD/VybqW7QvevZz9lAmv11iu2nC5XS2j4qIF1eEeMYs3wtNbX09rkdodMLlS22Pk3B5UokxT4uWOw9JVVu0zaTB+yyNvhbVnBOD+/GaM3x/E+XWzHNKHrb9hpJAd8So42v0GenVPrx9kHxT4l5oe+x7lEcnN2nwfxj1izk+J+njyjLkrQf6T1TeT1uPM/S4T0t934uDlYmc50pSRFxe2rRjKckVb1e+r29TbjvxHEk/lLR/RPy68ni7TZzO4Gplv6pGr3GtQ5TJeUtlIqy8TlXyTbGypBuV/ffNnVUzx642PIP1a365tFlXUm4Xesqon3FuXVbjYOVk4Ya2T1TZ6qQy5sBlWnRLoN/Y/q2kF8WYlc88YnGr7RaVnXtdO6XcNmuB0lZ+RG3QTuNDkvRnZwWOwXjytsr5riZs76x8T4arfVeN5Uj6le23aOExyV9VxlREPLbMQe0raZ5zq7JjI+KrtbHVYfzCuW3PNso2hiTtX8ahDqo4zoGTJH1EudW8lIuJPqOpZMax9TqXPWIxju2LIuI1LeIPRMRltmvfh/fP9hLKRQI1PqpcjDtfeU1+qHLh4WA77dr4rQa/H69sr8zUB76Hcouu2oXgPRLVHqjsB79C0jG2z1Ausltke9ElPMYHRsQPZ0qeqk2aKj6l/Gw8ujz+pbLNX/t+tBwDH1TReV/lMc0osnLRNbbv22DcbYGJSMBRTn58X7kKXpL2Vp4Ys64EW0I3SPqNpN9L+qcG8aTc2/QLQ4/PbRRXkt5ue/+I+GNEvMxZRv390agSQOXE0tLWuvPf/YOsPpPfp6l+0HUuxi4NPRjkiNzzfT1lg1CSLo6IG1ocXCd3igarpGexr3LgdZCgdL5KObZKB6tPR/pSZ2Wvo5Xnw5+USS217tM6u71o3kEYDHpLeuGoz3CjQe+/RsRfbcv2XUqDq3ZP8iMlXS/pCknnO8vWV68UKpqexy4Z75L+4BFZ79FgCypJH5D0hMEkd0lSO1OZIDmuM5yVX/4i6WW211V2Zlr4eWmwRrmX7C+pxbY1vQZX1pT0gzJAEcpB9Uttny5VJbYeoew0LvJcRCxSkWqunMnZ+0raSdKpyoGL7ZQdwJoVX/tJ+qQzsdzKgcgW7bcDJJ3r3M5IysTk2glIKdvFw0mbf1euBvyL7dpkzl4TAL0mC1+lHBT7m/KecrYaDMQ6V1jsrDzfNlYOuJyo3CLoLOVe2uP4XbmeDT7Tz5VUPdE0pEeCT5ckp2IV2/dW9iWr9nofocu5HFny9jBJh5X3463Kqnu1yTIHKJOGvhARV5XJzdnKwC+p82y/SdKqzj3rXy7pSw3iduvndE64nO7Ixf/IzCJi31YHMoOmCZdRVvtHxCaNj3Nxqt7noum1qGPy8KD912Qr8hn0ah+eZ/tIZX8slMlw5w4GlscYSB4kNz1AOb4wqJz5DEkX1x+ubouIFn3zUW6Mim2RZ9ErEbfne9E6EfdY5XVsdeV97kRlm+tZynHEXSpiD79Gs4nTmZLJpAXbLtXqNa51haRX2B5eNPvxaFjlZMDtVte/V3ntuUpT51koxzBaGnu7pYGy+O0NkkYm4ETEfqOen0P8rzqrvw22Otk/2m118jVJn4+IsyXJ9lOUC4s/pZwIHndMrsfiVqnDtbP0Swdt5Js1NUF9q3LbxBq9xoekrIZwuqRNbX9L0rrKCiXVbH9c0mqSnqCc03mu2tyv91QmXg7m5M4rz1WLiB+V5J5LJR0u6eHOE+VNlWOTPcYvdpL0sIj4hyTZPk55LWqRgLNaRBw/9PgE269vELfnudxlMc60e/ZKyjHKqoSv6Lj1cvErZfLjlZJk+8HK7RfH/myX+YrB+OBLRjw3Z0sj+bRXolpkZeRTlAup1lYuCD9P44/jvE7SizQ6OatF0pQkbRoRe9jeU8p/g91kJXGzMfBBwm5EnNfguGaztnI84GItvABs7MIcE7EFlUeUuBv13Bxjvlw5CLuuMpHjlIi4uu5I+/Po0k3Nto2yfWpE7Lr4n1z2bM+LiOps8Rli31lTkx7XtOo42n61pDcqO6g7K1ewnBARj62M23yv7J5s767sGJ2rvAA/VtLrIys6LXds/4cyaeFLWnh1c/OENTcuh+jOe4ba3li5/2aLxuthysHpFqsIhuMusk2IK7fisv2MiPiS7X1GfT/abEH1BeWk7AHKBtUflINmO9XGHnoNS1o5IlpWRmhyHnv2csvRIvHUpazu0GMrEwK3meXXliTu8B61q0taM9rsUXtPZeP9ScrP9FeVA2Rj70le4m6lTGB5sDLheV1Ju0VldSdnRY8ZzbXh7Kza8GjlZ+KDQ99aS7n1Z4ttvuZJ+qNy4PzU4U6j7dMiojr5u3Q+FI1KF9veTZkQsolyZc+jldtF1JaPfasycfGL5alnKAfh3i/pqIjYa6bfXYLYXbY8da5u2lXS1yJLqG8r6b0RMeu5uKyUpKlvSjomIi6a9r3Dx03mLEkVRynPhT9I+qmk50fE9XVHvCD+lyW9Ulnla6uS4LNfROxYEXM15QTWU8pTZyurAFVX7ivH9zZl9dCXl/fnP1v0eXqdyyX2RsrJoT2Uiw1OjojZVsUtM2WAbD/l/5+V/3+fiAaDDT37OdMSLs/WVMLl3jVjDb2UPslhMbXV9dqSXhcRb6mM+0NJDxn0eW3fRdIVEfHAcc5xdy5v3tO0a9HgXH5XVG6lMmrSuNVEci8ztA+fW9vvc27ZPpOI8Ssany9p5yhbTzm3eTwzIh43+28uNu7ByoV7X1DjsQDbOygnB8+ZFrt2W7mm92lPbSPzavV7L45Rvg8HKttyr1b2fV86ZrzhbVN+FhH3Hfpek21U3HiLFtt/VY6TjeqbvyYi7l5xuN3GtZxVcO8kaTAGsrey0tXY23D0ZvsaSQ9t0c6cIX7TtkuZKPydMsFreEKoxWfvS8oFBqdHxJ8X9/NzjD1qS6D5EfHQys/KVcqFMScpF7eeVzvGV+I27+MMxT6kdmJ3RMwu40ND8VdRJrdabedGBufA4M81lBPtVXMj015jZUmrR5utyQb9hZ2VSWXHRFY7uY+kb0fERhWxz1Xj8YuSXLL94PpQ7uHnTh8XHzP2e5XjC5/VVBL12irJb+Nek3qey7avVLbrj1OOk10yap5gjLhvH3p4m/L+empNn8Gzb73Uon14VURMr8a1yHNzjNmtj+OsiL91bZwZYs/XwolqKyur+bX4nDxe+dl4mjJp7+SIOLU2bi/uuBVliV89Bl4+xzOOLbX4fyuvM/LaO9f5i4ViNhgT6872t5WT8xeWx4+R9L6IeFRFzEOUJ/+c9zSfJeYpEbH7TCdEow/wFcqb6B/K43UknTe9QVsRv9sgcivTOv83KldFtuw8bq+8KV+vvOlvKGmfaF+KtMnktzvslV0GHl+pPI+PUFYMGZTrfWfU7z19haQnR6l646wQ8fUWk6c92P7piKcjIpqUT/eIcoiSqsshtu5I9x5Mt32LcoXa35SVFpxhY60x4w2uFW/UiA5Ci86v7U0j4se1cZbgdR4v6W6SvhIRt1bE6TJxU2Kdqw7ncQ9DnZknS9pImZ0eknaT9LOIeHlF7Fco94Uefo/3jIiP1h11Xisj4sbaOCPi3kU5ubtgcEXSSr0GJMdVPgfbK8tNf3zoW7dI+lJEXNvgNRZs5dSa7f2Vq/0G2w5tJenAqEw6HBq42k7Su5RtgrdFRHUZYNtbK6unSdkZa7LSvuMEQK/Jwq8pk9KGP9efjYinVsZdo7ZNtZj4qys/y7c0jts8wcf2bhHxucU9N2bs4yQdMNR/alZBtOO5/F3lZNbnlH3WquuSl85Wu8316OcMxW6acFkG7V6orCbzlRiqdmL7LRHRompWs33Up8VomnDZK8GixB61rcyuylWyTbaVacmdkodn+iwPNPqMbCLp51q4ffiwiLikNnYP0yfUS/t2fkRUVRDtORZg+wRlqfqFqnDU3p9a36fLezB9O7mBVu9F00Tc4cl42+8e7u+2mHgrcc5Vw4nTMgHyqhixLY/tn0dE1bbfvc7lUYkPLZIhSpyNJG0WEV93JrWs0qJdWxItduvRDu80RtvzOjSYKNxZuZ3TZyWdUTOJPBT7q8rEus+Wp/ZQjsE8TdIl47Zf3G9xa+9FDD22O+nGWZFkYw3toBERn24Q9+KIeKTt7yjnGn4v6aqIuH9l3EW2JlNWtayqjGT7PGWlns9HxF+mfW/vWLgizFxjNx+/cFaxOFS50MfKc+7AiDh53JhDsUddiwbGvib1GusssXdTVpP9VuRuIs0W45T4a0hSi/uJ7YMj4mB3Wohq+zPKJM7hbdrWiIg5V4oqfbL1S6znaaqNuJayCt4Da461vEbP5NMuiWq2r1dWnDpFDebjPEMy1kA02B3AWcH4LZK2UCa/PUbSv0XEuZVxm42Bl/aglFt7SVM72Txf+dk4sOZYe5qUBJwtJX1aOfloSf+rPAmqVmaX2P+khfebHHt/L9v3johfD50QC4ksJV7F9guUpQsHg9G7SXpP5c1+sBLEyhKtO5a/V70fvYzo/C90EjfoPM6T9LwoKxWc+3x+JhpU2+kx+V2O94nKm8Rg1c33o2IrH9unKAfbVlUOuP1AebN7pqR7RcTe48Yu8RdaBeFcNXtFNEoka832Xad3Pkc9VxH/exHxcGc5xA2jlENscNNv2pHuOZjew1IaKDxPOdFyiaQLJJ0fpZRjRcx1Zvt+TUOz18TNcOwO5/GPJX1H+f5eEBFXNTjWUZ2ZBaJiqwePrtrXJLnV9o+UE70nKycL/1gbs8RtumLB9oURsZ0zqW74Hl2VVDcUf6MWbapZ4vfYj3zBQLTtpyoHht4i6fgGE6eDz94hkq6MiJNanXO9dB5Ebr5Sb4ZrZ/V7bPuuyqoh08+32oGVUdsL3CRpXrRdfNAswaf1dWhanG4VRDtOZj0gGlZ8sf2IiJjnDit6SvzHKLc+3Ug5SD+43rfolzXt5wzFbppw6awAsJqyhP7eykUyry3fa3Uuz5e0TUwlGKwq6dKoWLU4FLt5wmWPPpTtr2hqW5nnKasWnaTcVuZJEVG1rUzp+/8/LTrhNG5Vli7Jw0Of5edIupemBtL3lPTbaJAAXz5/z4yIX5bHj5P0kXH77LafHxEnzHCPUkR8YPyjlWy/WVnl+gvKa9AuygTGQ2ri9mT7mqhMEFpM/C6JuD24cSKuc2u3w6ZPiNm+v6RDo2K7haFYg4nTf1YmUVVNnDq3m/59jKhabHu9iPht5fF2GdeyfZkymeXH5fH9lBPVtX2cFym31V0nIjZ1bgv48YjYoSZuiX2qpC21aPWp6q3EZ2i7LFIJZnnjTCR+onKri6fV9tlLzHsqk2a3K099S9I7lP2S+0bZCrwF26tEo8rOPa6dpa/+SE1td7KnMgnpTRUxDx/x9E3KtuEXR3xvLrGPl7SppMuVCS1Stu1bfEbeqrx27iDpI8oxo6OjcjvxwVicc2uyrVS2Jqsdk+yt0/jFvZXb60hZ5bu6IndPvcY6e3Ju33S8pMEY/u+UC/i/XxFz/4j4kO3tohSiaKmMP71MU4mA50v62DjtAOeOAP8maWstvCXuLZKOjTaJIT3HDbskqtleKxpU3hqKN9v8RdSOHZbXWEf5HizYQUNZyX+2hLslidt8DLznvFaJta3y/vQgSXdWbh3255o20SqL/5FlLzLRZkvba5XHLcrHPUPSByTdR7lKfyNlksHYg1eRyTcrKy8yXfbsi4hP275UU/u7PSfqt846TlOT1BuVx1a7feSairKXfBlwfLmyIR/KydmPz/KrS+pOwwPekft83qlBXEnacbhxHRF/sL2T8gI0rtZ7ZUvS5pHVnCzp18rBzLB9oXKFQa2v2D5bue+7lEkiZzWI28tFysb74p4b1yqlcby7pvYPr1YmU86b1pH+pDIbeZx4XfcitX3O9MGUUc8tqcG1oqeIeLxzy7ptlIPrZzorGsyaRLMY8zR1Tb6vcuWNJd1d0s8k1fy7VvbQXqzlOnqXinjDupzHygzsf1FuVfefZWByfkQ8e9yAUZFgswRWtu2IzHAun787twgcEZvbfqSyKtmbbV+trMJxwmJ+dSRPrVhY1fbDpYVWLKxWcZzblT/XHDfGYnyiDMg3rUZSYvXaj1yaen93Uu49fZXdZF/dX9o+Urma8L3OFd8rNYjb04NGTQDUBvWiq0M2t32TMjHphorQ/7B93yiJ6c5k+xarGI5XVhd8qqR3KlcgtdjrfOvy9aXy+OmS5kt6qe3PRcRhNcGnT56W03isBB/bOyo/E+tPG0ReS6O3YBjHSrbXjoUriLbqBzc9lwcT1JJ2diYDLmTcCerov2f2MZJeo2zD3L6Yn52LHv0cSVJE/MRtEy4fORjgt/1hSR+1fZpygqXFtV7KSZtzhgbi9tXUlh9VSsJNkypnQ3r0odaLiCMkyfbLI+K95fkjbO9XEXfgc8oxhU+owbk81B87NhomDw8+y7bfHwuXZv9SGS8wY8J4AAAgAElEQVRq4aWS/ruMnW0l6RDl9Xpcq5c/u7QPI+I9zqoWj1Xeo/eNiO+NG8+dtwEoLrK9RYMxvYW0vE9Pi3NXjRiDq03gKA7S1ELD2Z5bIoOJXNsrR8TtQ89fp2zft3C1MuHr/5QTTv8t6UfjBotZEm9rk2+KXuNar5f0TefWqoNx5Rb97VcoExa+K0kRca1zEW0Lp5evHka1XVpsyflg5djIcJuluhpJib2qsvrdHsrzoVXb4nfKSnWjVCXfjGrDKftTNTG7XDuLnbXwdifHKaskjJ2Ao/y3P1BT18ldlVV7trT9hIg4oCL21pK2GIxrteJcgHtOGcs51fYZku4abbbnvlOZv3mWcmuyv7cYbinJf4do0c9fk6r4yuvcxsr+6Va2qz7bzoURl0fE6bafL+kNtj/Uoh1axjd31qKJ6lVJ1K3HOofZ3kA5oT5YaHCBsmrmLypDHyXptRHxzfI622uqgta49lVuxXW42s05LVDaax+U9MEyJrLBuG24iDhO0nG2d41O2yv1nNeJiM84KxkOEtXe2ChR7R7O7Ws31sKfkbGq4A3mL2xvMj0hxlmttIUvKeeszyxxH6S8r9QueuoxBm7bj4lSbdhZqa3l+PeHldehzynvgy+QtHlNwIlIwCkTCbuqnLiD/6eKwTFJercyq+vrkauGn6AsWVQlIm63/Q/bd2vUeBj1GlcrO3qt4i2YVC9ZZMtd0s0MjpN0s/KmJOUKuOOUk7815jlXMA6XY2s1iNVj8vsq288rsTdTbs11UWVMSZlGafusQYO7PK5qfJcL7eHKG9xgFcRREfGFuqNtr9fk9AjvVJZYvjByL9L7SareSkXq05F2VuJaxLgdhDKQt5qke5aJ9OH3ef2xDnLh+POVyV7VWziMiL2dcpD3scoEmTOUjfmxDSUZHi3pCxFxVnm8o7IjWaPbxI36nce3K7cku1056XZD+apW3odRW3HUZJB/RdLJJSFCkl5SnmsiIi6WdLGzotoHlP9/43ZKn6pcsbCBcpuJwWfvZtUNBA065ldFg7KjI9wzhlbElGTWVoOxj46p/cjfYfv9kr7cKPY8ZxnuTSQdZHtNtZlI3l1Zxvt9EfHHkgj3+gZxe+o1AbCfpEcpV7FImRg5T9Imtt8Z41eMfLOkC51Vz6y85r+k8lgl6f4RsZvtXSLiOGfZ7Kp7SLGBpK2irPp27k9+pnJVzzxJVQk4apvg8ytlO/uZ5dgGblEmdLTwfknftr1QBdFGsVufy7NNUI/dBnf/PbNviohW18ph3fo5bp9wuSDZNnIF9ottv03SNyStURF3gYh4b2nXDpLT3xURZ7eI3VLnPtTwANv0vkeLwbfbIuJjDeJM1yt5eHUPVXMqA7GrL+Z3lkhpz79aWYr8r/r/7J13mGRV9bXfNQTJQUSSgoIEkShZFEExK4KkH0lEREBEEMGEKIKKCHwKiIDkZAARFCQjOWeQZA6IiCLKIFnX98c+d+pWdXX3TJ1zerpl1vPM09Strn0v1feesPfaa0VTzsD2ALaPTevDJ2x/c9QPDIZmv2Dy11hvJp7f9/V5z4QVei7WBu5UdPg+C1PUw3LH5FpE3FOJ+fnI9Hprgky8+aAXqvpE3F8p1E5OKk10Ir6PJ4CvpdfZ30cbkj7nAgpOtfNati9Pc3Sj5vSgy1gZP2v7uaYGoFCJKEIGSIXDWii+dklr+fUJAsAFhHL9tQydBweJfSZBALiIKDxd1ZBECsQuqirXiluraaZqEwORM2wUrefNjAWwErBuQzKUdDSxl3wjkKXMTdghLUw05RaD7f9KOgpYNb1+lpYKVSaOJVRU7gKuVjTNlKjNnUQoOX2TuOd2oFDBV8MoDZH3bB9NkLBWBvYiGiVOJdY1uTiPWBPeQ6GmiAaFc51tnESoZTZz87bp2Nsy487ZkG8AbF+pUM/Kwf2SfgUsmvZ8DYqsDxPhZCNiPL4NeFTS9c5Tzrwyrd8acva1hO3iYznX2qAW+bQiUe1c4pk7j7LPyNkMzTH9CMh2bCHWsecpRCKWI8aLabKgHgY1cuA7AidKapySHgeyVYDasP1rdQj8J0m6g2gKGAgTgoBDeJH/ixgYSk3Kz9t+TNIkSZNsXyHpW4ViPwncI+lSuv3psmX6ZqALK9hevvX6isSQzcUuRKdF8/e6BvhOgbhQp/i9O1EYepYgGVwMHJgZ81aFiseT7UK0pKWIhMvAaJF6VqRM0qom2sXpNpt7MpnF6R783C1p5ZRAzfYirbiRXqP137MRRYDbGXyDsDOwJ6FIdhvdJIBvDxizjYaAdJak/xKSlme6jMXelcQ1HwRcYPu5AjEbrG17p+aF7QslZW34U+HmLmDDdKhk4abKfUzcB/cQz+BxpRbxCW3P7dmATYhicA4+Q0hl75peX0okhbKhUALchGBjL0V0XK45aLxWx8Kne5NJuUz6REh+UC3VkIKopUYCkUwAeErSooQf+SKFYu8IrAL81vZTkhagQHeo7adozae2/0LhRFkpjAGxdWZCkeSv6XwLEXPTWoS87kAEHNsXKWwG1k6H9nQfe4AB0MhL/zMlFh4BSpDJXk73nul5QjXiaUkl9lLFCD4OpdO7FNYKXWtihWf04bkX6woKoj33cjsRkqsg1pA3L2u6elrnXLfPR6YW78347NTgCkmHEGNR28rh9sy4NfY5DUoTLm+V9E7bU0i3tg+Q9DCRCC+CRHSqQXYqiZp7qJ+09qlTVGQVtjIDK0+0cJ6kjxFrrPa9PLAFbEIt8vAnicR3W3kiiyAq6Ty611VzEPm4ExRd2QN1cMKU9eFWRBGrKNKcsRORoBZwuqTvOikmTSscdrqTgAttn1nwUoEpjUk7AzVsVWsRcWvk4GoTcVcm9k7Hp7/niQT5rYQ9QK2cZIPNiTxDLqqMyRpGHQp4TRorcvN9V0n6PLHWehuhvnTeKJ8ZEZLOdKh99yUmFyC/Qffa5XvE2uUrmTE3I+7lO2zvkPY4JQrTEIXCrdxSiiqIoqpyLdRqmqnZxHAQcIekLruTvMtlfoLk3ZBM5iQs2/5TYM/3MuA+STfTvR4aeA3QwuWSNgV+bBdV2DnP9hQip6Q/UqYwO3siGioV5/dXWM1lWWYl1FAaeiHVXt5PWIeeoDIqkRCKKcUtvUrnOnuwoO22jc/JknLUoRr8VmGn1uSYtgWymn5tb5XyDBcT66LSmNf2E5I+QiiSfKmH6DMIfkDk2pr8/zZE3WXDYT8xlahJPqUeUe2Z9jiUC0nLEWpv8/asu+ahWwFuYNj+mUI97FKiEWwT2yX21cVz4A5l55UTAQeXF0B5SuF0cWeqwf2FTMKlyo7vdaBCPu89MS8jlAS+DixAdNOvYTtHJqyJvX2/45UZ9kUg6UO2T57e1zE1kHQ6ISl4Y3q9FrCb7b7qHFMZs2bHfnOOd9KZhC4tWPweE6QFZ64KzinE3+6WQpdVFaoop5fi/4pgu59EJPhKyNNOIjZzB1faSLfPNR+RxHpnZpzdB02QTsM5lgb2A7axPVOBePMRMpbrEcSk/wI32N6vQOyLCQJgW41rPRew2amBGvdxivt+gkm/JvAc0T12te3LS8TvOdckQsEney1QA4ru2HMJAtkNBeMO8UuVdJvtLCa9pKuJzqab6SYkZ20k0zz6XaCtRvLREvOpKvmRD3OuRYB/uEyH6ISAKvtES7qvXQhJxa17bS+vPl7B0xD3QGB/d7oL5wEOd6adXUp8nA2sCJxMJFD3a5EwBo27H5HA+kk69D5CZv8wQnUwq5tF0gPAik7+9Aq10rtsLzfo9zzMODTw36w2eu7l9nq2uZezlB2H+T6KeVuXRiom9MIex8qqkm62vaakG4EPEITLe22/Zjpf2rBQBU/ymqi9h6qBtNbqhZ1pM5AKNZv0kIfPKfFMpzG4yV88kLuukDRi4tmZVnaSvgnMQiTn2+vDLMJeKiCsY/vf6fWcxL4st1v4VnfbfBWDpHscjUml4xafp1Oc4jm4VuxZCCL14h7BiinzHG8mCBHzEV3DBzosqQaNV+37SPGKzvulx2R1Ggv7wc5TlW325jsCbyf2fBcDx+fkGSQtYvsvaQweApexaHl97njWJ2azZrmNUOCYDNxfKnetegoD2TmFYeLeZHut0mu4WmNnK/4idBoab3am3UkiVXyBaAxsSD1fI0jr+9seWBV3uLVA7hogxZ5MkIVeIBqgGnWPrPVsxbzW9URO8keEMt6fga/bXnbED05d7LOATziaqIpAodp7EVHoXo+oed5VYr0h6WDCQuyS3Fg9cavkOlPsy4k89ffToa0Im9K3Dv+pqYo7P/Blum05v+xke10KJecUBfn07YQYwL4Otcu7c9bK/er2pda36Xob8unKSuRT27nqRVPGC4Vi7Z8TUS173aVQwFuaUBDNbkxKNZGNCUJW2zpzMlGLG1hhT9KRdJOR3wr8hlASKy4mUiIHnv5eQ+A8p6R2/CWAvxK5lk8SanXfydkvTBQFnOslrWg7Vz6vjY2ISX4PgqE4DzFolsCPCLZbk6SfiXyboapIhIg9GvJNmkQOy90wVcZqxL3RdNYvDjyYBmcPMnm4bsd+c46LKGhHkhLe/To3iia8Fd1jHy1UVF8L2EbSH4ikWymp5Vo4P02gr6JbNrXI4E54CW5IMPOPUCjXnOwMtqlD1nNz218b/bez8W8g23fS9pEK78ZX0f09l9j4L0Go4GxJdN58OjcmgMPu5bfAK4lumTcQCeUS2IqQOW2KeFenY8WgIKM+T3RFnD/a74+C4vcxgO2fEN3OyxFs9z2Jv9/smdfbD0tTRn1iCtL8+hTxHf8iI85MRIfQpwpeW20mfTYRrR9cSY1Edf3I++E0YClJZ9veu9I5xhVc3yf6yvR3a9S4NkvH5gT+OfzHRsXMhBzyDsBChDpbFmE03W9PpATN1UApD3lsHyjpQjo+57vYbghPJaRkzwBuktQm+Hwvfc/T1PmtUELYGni1pHZCYW46Eu3jDq17eVtiHf4qOmuXFenM3dMESesQa4kFJe3VemsegmxRFKXWAW5ZGpdE5X3OeQoi9SGEkqOB4wrEnYJm/1QwZHFP8sq4XNL/I5L/EMTZA0rPq5LOt11E5cnJCrYC+lkZZt8bGmoNvLJCeWLg/VNTXFOoIf7F9jPp9ezEHJiLVdLP9l7adFTKBoXoVlj4TzqWi8sk7c1QwlCJOep2SWu4fGNSsXm6B8VzcC28EziUSHi/WtIqxHiRS9yfCXgPUYh8FUFGPoN4Bi8gbwwt/n2kIqSJe3cRddStsomAlM9r3Wn7cElvtH1t5rUNgUO9+TgKzs1NkbsE0WYEHKZQMPgRYYM+cA6ghVvTmuU4QonlSaBIkVp1FQZqqcqdn76Pb9BRziqhOlx87FS3UibAQ+nnopIWzSmsp4LxBXSUQj5vu1F0zrKkLkG0GSF2P6vdgTEGea09CDXATxBKnBsAfZvvpxbqqA3OTXmloS2J/fWOth+RtDix3ymBG4FzUi7jeQqQp2rkOnvwYSJ306gvXkcBJWpCDWgs3E6OJ9+qvcEBBJn12kS+WRL4VWbMSyT9H9CoRW6WzlECT6f61guKJrhHifpLCUyW9DmCF7BeuqdL1HNWBLYj9jWNC8XA+5xWTWSd0uQ0upsioVuJsgZK5MD/3frv2Qil5/uzryyhtT58hkJckXGtgKOOJOTMRFHst2T6Iku61vYbE9u2+Z9vNuX/JRK9h9ge2HJIwcDe0B3JwrmASzxOO+qhf4dpv2PjCRqmY6HBoBsqVerYH+Zc2UlvSW0m92yE5NsLtosQDFrnKdZ9M9zfrvImeGBIuoiODd6UpJ7twyqcawNC8WROwrf2s4NOsJIOJTblRWU91S1LPonYqJ9pO0s+VcN43+YuaCXdRCyizkzXmSUJ2RP7t8ADBNP9GqKLpaQNVVUoLHYWIeyujioYt+R9fDbBeP8NUaRuvudnRvzg6HGbJP2TrcOPAJ8rSQyQtAaRjF3T9mcyY91ge50yV1aXSZ/i9y3c2P59TtwUa35ibdju0ru6QNwxXfuk+3B52/eO1TnHAxTdhJtSmNiavs8PEF1IEMmVs0vMgZLeStjWPU6okQ3cAdGKWbOr/o3A0rZPkrQgMJftfsoOg8ZfnQ7B57oWwWda4yxBkHgPoluGfTJwt+0Xsi60MhRqdY8TBI7sNaKi23R9whL3mNZbkwlZ9dwEWe/5qqwDUuzsbr1a+5yUYFu7mefSmFSccFly/5Ti3Wp7dbU6Fcfznj2t4X5Bx3J5O2Bl28PZlgx6nqrfgaSFndmlnuK8jA55+MZC5OE2GXSKNbDtzQrEvpWw+XguvZ6VGO/XGPmT0weJtLg9HQLkxkQzQJbVvCqpIqXYDwCvIWyoijYmlZqne2JWycGl2LcRxYkrm+dZBTqo0379CuCE3r2NpCNycg01v48Uv+jYVjqvJelO26tUmOv62kM1yHk+emoBXW9RQIGjdZ6FgS2IQvg8BBEn14aqif0qYB7bubYhTbyaCgO1VOVmJyy/30RHeeLo3BxRil107FR/lcgGdgapPO19twGWdNieLg4sbPvmQWOOcr4ixHJJl7tHfaTfsWmIV1MhYiZCWb5ow5Qqqw3WQnqm3w/cU7jOUDTXORaQdA0huHAS8L3S+8jWeYqtBSQtYPuxErFaMRtFq4b4PolOPTWXnPUdwirz/4BPEfn7O52pRJ1iL0wQ1W6xfU0aP9d3ZhO4pF8TOd6itSFJryCIZM38dA0hpvHQ8J8a6DzzA68stcboE79oDjzlcS62vX6heOsC+xO2zu1c9cDrlvFOwKm6mRnmnAsA1ztDRq7ZhIx2bDxB0l3EIPN4ev1S4Krcze5ExHCLoBqLn4rF75ttl/LKbGJe5HyLoZeO9L7LdJEVhyrY4PXEX4Bg3G5HyJydQGwaVgHO8oCdmD2LoKcplFToeUZeAP5QYsKXdD/lvW+RtKzryVhPcnRm1Yi9DLA3QwvU49LOoXUff5AgspS6jz8NHOPwqd2PIEgeaPuOAtdc9dkuDUlHA4sRne9tgmiubU8NJn21wo3CtmcPQnXqTqKYdUOJZ6MGcXGizn01UZPYmvYPS9u+TNIcwEy2J2fGXI/wiD6d6GiZn+goe3jED44e9+vA3yncVa/oZF0dWNb2Mmm9eZbtdUf56LScoyrBZ6Kg1jwiaYkae92xhKTjbO9UIW6Rfc5YEFdK7J964l1NqA0eT6y1/gJ8yPbKpc5REmOVF5F0oisq90r6me33FIhThTzcc44i1sApVr+/312595ukrwHfcCgONt/Lp2x/ISduivV6OiTcawrtF2brLer2OzZg7Jo2OBNqnpZ0o+2122OzMm0RUowh6iyS1rV9XU7csUAFAk7RNYuk7xPrzcWANjE9i0jWei52Sz9PSz+3TXGzGr/GEpJWJNR7t7Q96wCfH5HY5AK2JKpsb1UDCrXlyXTs2rcG5rW9RYHYE2bsTPmh/wJvsf3aNJ9ekptvGeF8q9keWCFB0myEkswVRMNB0xA/D3BR7j1XMa91o+21R//NgWLPSUflYxnCUvRCJxu0aYzVT3SggSkjOnA1UT8smgevletMsZcEDifyhibyfZ90gebc9DfbAdicaOQ/yfaluXF7zrGx7XMLxfoVkUM9ibjPxi8xoAelyae1IOlc4KO2Hy0c91LCRrW9JtrGZciyVxIExpmJHO2jRO5+r5E+N0K8McuBp3nvFheyEVc0R3ySobnqgYlr49qCqtl0SjrN9nbt9xRKCdv1/WDeOR+TtH5mmH+r1fGn6Nx7Ovvi6uIw4AaF9yTExPHV6Xg90w01iDYjnOth4GEyJL56BrVJhAzuvJmXNgSFkse30ZHTXZzoGBbhwf1HyLcxqoQaNnht3EBMoBv3EFlulXTMMJ8ZFS4s69mKW+sZ+QWwMFFMKIZe8o2iO+IR2zcVCP91SV8hxviLgJWIhfzpI39sqnAW0f1+PN1y6tMMDd9FVtL+rbmPN7L959bxrPsY2Nb2N1Ii5C2ENPkxhJVdLm5TIdl3dStDDYHLqKjNRvibt4kmBnI3pR+VNKRIWqCYNbNbrH/bzyUSTi72ILzTb7S9gUJyuJTd3s7AXsALkkr5kbfnvl6YgtZDEwivKFmUbpDu448CLyUU1RYjxossf29i3Nnc9n3pPB8g/N9zE9Nbpp+7tY6VuCc2IciKt0OsNyUVWxO0CT5E8mYWIgE+MMGnJ1E4a4r571zS8Big1hrxKUmHEHLq7WJ9FtFQQzt6StlaDEEJ8k3lfc7lkjalsFJkGxXGue2I7+HjRGLolYQq0HjF0+3id7r/iudFapJvUvwS5Ju+5GHybZd6UcQaOOFvkjay/VOYsofKVu0B3mX7880L249LejeQRcCRtDZwbysPN4+ktQrs+a5nqPR/v2PTDNt/kLQyoeIAQRq6KzdujXl6mPM0ku9H2f52Zrh7FfZIM0lamrD6yFLjTDiCoX+rI/scG48oTRIqumaxvZWig/xionBTBK1awNt6CEifkXQ73YqJ4w6SXkus8Tcl9u4/JDr3B8FIzQklrPugor0VgKQVCNXs9no2195qBdvLt15fISnHXg+oO3Ym8snHCJJoo9pzTCaZcy3br5d0B0yZT0vkW6ZAYfti25NzyDcJOxOW8osS91qTH3mCsFnNQi/5RmF/9hihiJujqnqHwiq5ODmEUPl+U0OeAm4hxo9pto22/cb0s+++X0l0ABiYgEO4k1ypsLtuW2b9v4yYUC/XCUFaOIrIkUCoqXyfAjll27+U9AXCyucIYFVJIuzgchslFyP27P9QNISVIO4vQzRzfBg4IpEZT7b9ywGubznbDwxHFM0hiI5EPlWmyu4YENXmAx6QdAvlbOUAXm77pNbrkyXtmRmzwbyOpuePAKfa/pKkHKJTtRx4T31rJmBBwhqwFP5l+8KC8cY3AaeF17VfKOTfVhvmd7Ph5AmbgT2BsyQ9TNxoC9NJsI9L2D5V0aneTHQfaIoMLxYMM/BOQU4BoHLxuz2ovQD8DtgxI94UJCbvPgyV3RrUt/DVKe5xwDm2L0iv30XIRY4rqNsGbweFfHGWDd4wWHa4xL/tgwcNmhZ92wCvtn2gpFcCizhTjnSYZ+VfxILzU9PKIldd79t+WAtYUdLMtt+VGevttj8taRPg94T1ydV0OnFy8ILtowvEgfDErI1tCGnITSS1x4uVcu5jOuSj9wDH2f5ZIj2VwFrANpJKyL4fWuiahoULyGwOg7YF4mzExjRL2SOhVuHmGdvPSELSS9Kmb2DlwgYKS5J3unBHrAdUf/ofRy3Swm7AmsBNALZ/JenlBeKuY7vd/fBjSVlk1HS/fdb2D7Ovbiies21JTueas3D84gSfdqIwrV/eT8eqZdxhDNaIZxAFm/cSdlTbA3/LjAmhTjeko6cEEjGtKSpca/ucUT4yNai2z6EO4bL4/qkVdybga7a3oaAneWXsDpwgqSFNPQ4UsfkcSzJZIVQhD6u/NfBZw39imrALcIakRqn3T5Rpgpsprd+eBVBYibykQNyj6SZVPNnn2FQjEQsWA2aXtCrdnfpzZFxn+xx7ADvRKTCdrrD4OHKEj00NqhJxGzgUFxagzHy9O7AvMZd+nyB1DJxQl7QO8AZgQYU9WYN5iIT9uIftj5eIU3nN8jfgF66j2ie11IokvYEY52qcqCSZ7ERiDfcOZ6pl2t4g81pGRFpzH+RQJDtGoVJa0t7qS4TSyfLABcC7gGuBXALO7ZLWtn1jOs9aRD4yFzXHzlMJ1Z5mfN+aaGDbPCPm82l92Oz5FiQUcbKhsFI/kcjTStI/gQ/nkHBsHw4cLmn3AvPc1EDE3mQb8kiCNckhsv2UpB2B7ziaD7OJuP3gMqIDv0v/Zk3/iqBirhNgDtuntV6fLmmf3KCSViLUb94DXAq8z/btCvXhG8i4PyQdTNSS76OzZzdRbxgYqfZ0KXCppA2I2sXH0j332V4S2yjYi2h+60cUzSWIViOfjgFR7UsDfm40/F3StsQaGWArYlwqgZklLUJYZ+6bG6xyDrxd33oB+GsmwbIXVyga4X5Md21yYNLXuCbgSPocUcybXdITzWHgOeC70+3CRoHtW1JCpSkGPegBpOPGGolw86Ii3bTRDLySDiQUOE4j7rdtCJuoHFQrflce1BoFjuMom6Rf262uWNsXSvpGwfilMBakBYDVJO3L0ARybvHmOyQ5UiJ59STB+s6VI/0W8BDBIhfBHl+K2KSeSGywpwXViQttuNVxWQCzpJ/vIew9/hU5jCI4L3VsnEP3pD/NUn29CbHUxVJ6DXA6YZn1Cwpt+hP+LOlY4G3AwQp/z1KJt3cUijMm6mmpqHc0sJDtFdKGbyNn+snb7iqGKeTErx3m16cFTeHm28RY8SfCoiwXDym69M4lNo6PA9lJX4fs77eJpFtxjNTFkc6fLSE+3jEGpIVnHUpLzflmZgRy9TRgKYUsctezBwz87KX7bR8iQV8aZ6Zxcz6FKtCHibVcKVQl+KTE0LkpaT9eu5xrrxEXsH2CpD3S/HKVoosqF8U7egAU/uyvoZMQ2lnShrZ3G+Fjo6LWPqcW4TKhyv7J9n8kLSFpVhf2lK+I7xLy2M0c/R6iWakEkboamayNgkXZKuRhuvdRxayBAWz/Blhb0lzp9ZMl4hIEw8slNV2cOwCnFIirdlNLmmdz9jvvAD5EqBYdRoeAM5nIU5bAjoSCwb9hSsHlBjoF2kFRZZ6WdLDtz/Qc/nSfY9MM208RSf/sxH/CrMBcxJqzXWR5Atis0DmqQtLrbN9bIFTNfOR/JC1eaW7aETgxkThFkDirKJ6VJJPZXqfAJXVBUt89tDOVZNI4cQFhsYvt3+fE64PNgJWBO2zvIGkhMprVWnvJWYiGjj+m10sADxS43pp7nBqqPUcQ+cKXS/oq8X1n2zkmnAB8zPY1QGPNdRKh+p2LRyTNbXuyQg4jWOsAACAASURBVD3k9cBXcvIhaW2/me0zm2O2jxrhI1MbdybgMdt758Ya/hRah6g9NU0GRYmGJQmGtqs0ANTKdSZcKOmzwA+I8WJL4AIlpdVBcuwJRxKK9Z+3PUXhMxH3cp/DjYlm7WdH/c1pQJrntiUI9X8lyM8/BVYh9rBTvfe2/dH03H2h9J66Nvl0lHNnEdXatQFJ77V9/ki/Pw34MHHPfZO4j68n9iklcABBfL828RqWBH6VG7RSDvwr7uOU1HssA40y1uqtY1mkL3kCWL1JOsj256b3dUwtJG1OeFcWW0jMwNhBfXzN+x3LPEdX8Ttjsm+6TYeFMyTvJN1mu7jalKSLCbnNZuO1DbCe7WLF8IkESQ8SnbL30CIu5HYRSbrdSY7UHR/17Ht5mGfkTturlH5WclDz2Wid4+vEwvhpQnVhPuB829lSlpL6+UzbGZ29knYmOqafoVOUzorZin1twyQvCUlzAO8E7nGoWSwCrGj7ktLnKgFJ7yUIb72EumwbFYXixj7Asa1n+he2V8iN3XOeZYGfuZyHaunCTTv2mwk7kotKJH0lHUrqVmkXcUpA0o3EmvBu4r5YiejUe4a4R0rbUIw7SFpipPcLzHvfAP5JEL12J2S+77OdVcyp9eylOeTvBAmnLWedsy4UUSxcDng7ca9d7EJe5Cn+foQqwNuAg4hkwPec0cnYM2dPIja8b65RxJgIkHSj7bXTmvkIQpXsR7aXyoz7daLzv1hHT4r7APDaZtxMybh7bb82M27Nfc6U9XFJ1No/pdinAq8lEqXtMSNX9r0KUvLuLKLLez0i2fs+2/8qEPumEuvtqTzXAkQDyc8yYpxDEE32JBJ4jwOz2H535rUNIUQMQ5IYJPa8RBfneunQVcABhf5+7yQk8AEutX1xgZg/Bq4kCjgQa4ANbGcp7Ura1D1k9VJIxeQ1nCxIFBYlt9heMTPu3sDSFJynU9zbbb++59jdLqAMnIpvewOvoqx62BK568vphX7f93hE7bkpjUWUGHtaMV9mu4Qyazvmmba30FAF9OxGA0ntZ3c2wl73dtvZZDJJpwDfdgFb7j6xb7a9pqTbgA0IAuP9tgey8R2DvWSVsTPFPp34ntuqPbvZHqhBKa211ybsUt5K3GeX275/xA9Offwh6+RSY1IzbyRSz1eAQ4Av5q7rJN1qe/XRf3Oa495Qa0+qsBbaG7jO9sFp7byn7U8UPk/2WjbFuYI+DU4F5upquc5hcusNiuTDS0Nh8bV56RyqpF8SggMnuYewL+kzHkC9vtaeOsWuQj4d5lwllfCamMXWcZLe5Z5GKkm72D6mRPwaqJED7/1OFc0Wd7ub4DquMK4VcFpYVuEJfZHtkl31tbCf7bPSQuKtRFfS0RTwFpyBMcG/JW1Dhxm7Fa1NZA6GK36T4X1HMKTfAPw8vd6AYEH+jXxJxGIKHD3YikjmNdL0V6djEwIVJuW/OVm0FEYtOdKnJG0B/Ci93oy4p6HPQnxQSLoMeJ74ngdhDL9vhPeKyIXa/mwq+P7L0f31FGGbkQ3X6frem+i8KZpsSviSpOOBy+keL7K+Z0c35I9br/9CqJSNV3yLsCK7pzSBg5BOvVndKkvZUovq2Mop/XwEKFG42YPokpoMHJfY758tQZ5Ka6ylbZ+UxrbFCCncXFSxJEl4GNjJyXZJ0grA/iUSpxMFY1AA+QzwEYLQujMhdX58gbhVnj06FrVtlZCsdaEdnaypeFeEdNMn/ubEc/IEofj5xQIEn/ac/QJh61hkPp2g+EoqOH2K6HSah1D7yEXxjp6EXwOL01E6eWU6loua+5zLJW1KecJlrf0TwG/Sv0l0KzqMS9j+raStCMW6PxJ2HE+P8rGpRXF5aOhflLX9GJBVsLC9SfrP/VPxYl7gopyYCW9j6JrtXX2ODYITCWXLLdLr7Yh13YjEuNGgUBS4xPZFifS9rKRZnK8avQtBWPwCMT5cTkjj5+IVigaqyYSyVbH1LPF93pQIWhCNHSfkBExE2R8SRNwi87SkXQlC05KS2tY0cwOlup4b9bDjKaBqJelbtvcEvq2kZtGGy9tc10Axad2+wcvltarMTb0kwFSgzSIBSpqU6gqXkOzpFGqDhxe45D3Sz+KKQ7Z3b79WqMH+oFD4krbcvbg1XetxhGLdk0Szy0CouZesMXb2YDU6qj0Q6+YHG8LWtH7fDpW3o1Lhu4T6Ty+uUiiqfp+OasiVKaeTu95q28x/1+Vs5i9LJKpizS0Jd0r6KTFPteNm5TpTvn6j9nxk+7dAFvlmGCJ2EbU6IqfcYDZgU8rkRWrlW2oqqi5NEPWWJ76L5nw5TbNHEs/bU8R915tfzyVmLTvcvncQ8k1CrT01dDs4TCGfkm9jOAQua6vaoOQ6bj9Jz9r+OYBCTfstxNo5C6kBYEfgdXTfy7mqg8Vy4Bojp6QaDSgTRQFnQ6JbaG1isjvJ9oPT96qGR8P8k3QQUYD7Xk024AyUhaRXAYcD6xKT3nUE+/j3BWL/ClinZPFb0iXA9qkojUId4mQXUJMZhiU8LtnBY41S7PEU660EAakocSERybYkEgunkORIbZ+VGXdJ4hlZh3hGbiSKQn8GVrNdwroGhW/qIsT3nC0dOhaQtDrwsDO9vlvxVmDoYn7ghabC0/sDidRSFKmjZzngXjpELxdYsE0opKLKW2sQhlMXxMcJu7PXS9oM2NH2u0qfqwSUFLEkvYMoinwBOC23A0BhS7M6sXlcJo0VZ9leN/+q60HSvbZfN9qxFyNKFABSAuteD9hROUrsifbsVetkHYv4MzBxIOk8Yi04L5Eguzm9tSZws+31M+PX3OdMBuYkkrvFCJdjsX9SRWW5EtDQ7v+XA/8i7XNKFPXSeqsX9oAduE1Rtt1VV7Ao25yjlzw8l+2ByMMtQsRSdJPd5ia6qLctcL132l5ltGMDxL0NeBMwP2F5eith+7FNTtxaqLGelfTq5m+fipmNiug1tu8ocM33OFNFpyfevMTf6yC6rSEnFyIXosLqYZJWs32bQi1zCDwG9sGDIO1zmsaInWkVVmwfUOF8JfNaRecmSWcTJMDGom47YGXbA5MAJV1DFNBXItRN7gHOy92f9pyjmjJZK94swC9sZ1sZahhVmdJkl5Rrn8f23aP86nRD6bGzJ3Zx9R7VVe/tt85qMPB6K8U+n8gfv43IVz9N7Bty1dqrrMHVsczsjZud61RSPc2N0xOzmlrdMOe72faamTGq5VtSnug9DFXYy1Jpk3QtUaj/JtFMtAMwyfYXM2JuP9L7trMsW1PNYl+GKrbnKLRV2VMPc675gB/YfmeheAsTeQsTY9BfC8R8iZN1mKQ1E7FsyrGMuC8DzieUot5J1F+2chkl+LMIIufWhB3VNoRa3R4jfnD0uMVz4KrslFRl7TkRCDgN0qZvK2Kg+BPBoD7d+Z0yRVFrITEDEx81it+S7ndL3l2FJN9rQpXkhWuh9ga6JnFB0nJUkCOtAUnvI2xvihIXFD7TXwMWtf0uScsTRLis7sJhznUKkcz5pe0tR/v9UWJ9CVifIOBcQHSzXusMtQxJq5I6LSnLokfSgyUSQBMdktYgLKiuovs7zpbgTuS37xJqAI8Tii/bDkoQVR0/1nb8Rlr4cOBK2+eUICRLuhNYlZDdbuRpiyUUJM1PSE+3iW9XF4j7fSLh27ZfnMv2hFGAq4kSBQBJPwF2t/3HUX952uIWffZ6YhclWqaYDwCvIdRISney9osPDFZUl3TESO+XmJ8mIlJxfieGrpWz1oYq3NHTKmyuTqindUlZ5xY4J+I+pybSeHEa8NJ06O/AB23fO/2uaihqFJlqo3ZRtjR5eIwIETcA+zg1V0haFzjUmTYM6tgk7w7MbvsbJYg9/c5RKFbx9WxDNpF0ue23lrjOnvg1LWUawpAJslfWfqEVd3/gUeqoh00Y9BTfDgCmFPJyC28pfo1iU5W5qSIJcD5CjeVkYGWikHU+cJXto0f46NTGL178VofwDKE0tDxwpu3PDv+paT7Hy+nejwy8n5K0nO0Hhsk3GPjHOF0L1Bw7X9rn8OScmtZYFr5LQhPMZr4mJB1NqDlnq+uopVZHqJI1KEnObt/HkwhlpyNyc8GV8y0XEM/HPbRcAWx/OTNus5abQtwrRSZWqEU+Y/s/6fVMwEty64mSHiQIHL3fxbgbj/uhMPn0I8Qa6+fE2PlmIi9yYmbcfmuAUvZ9LwcuI9YwHy5FvFRHTKTZ88xCNAVkkQNr5MDTfvRO2/+WtC3BvTi81D1cY+05USyomqT8dsC2wB3AGcSmb3uiQDmesAWxkDjU9j/TQmKf6XxNMzCVqJXwTvgcITlZsvh9uaSLCVlICMWTyzLiTUEacHelk6S/kvDjzCW9FZUXHgPUlPaG8H0vTlyQtBTwO9tHSVofeJukv9j+Z2bcWs/IlsC3Etv0RNulZFRPJkgn+6bXvyQkSYsTcGxvDyCphPTyZkRC6A7bOyQi0emjfGY0HEssLrsW24VwvaTlbd9XOO5Ew1cJaeXZgFlLBnbI0W6YNmOTbE/ODHnYSKcj35LkNoV6wauBz6XnosR995xtK8nJp++jCNImbA/gFcCdhPriDeR/FxAdMbvSkSe/mrAofVFimAJAbvft/MC9km6mO4GVZTNQ4dkDhidaki+pm60OMobxP0DMz/MTybYZCPwEuIZY05dcKxe1lGkINmmd+XngH8Qa66wSRT2G7nP+j0L7HKhDuKy4f4JITO9l+4p0rvWJpqQ3FIhdDGORxC1NJrP9plZRdg3CznAZST+gTFF2ExJ5OJ3v4Zz9Qvr//JekLwCP2H423Q8rSTo1d7+XsAtwavquIcboETtzpxKStA6RhN0xHZupQNyucxSMVWM9O0nS54l7bK/eNwsQ96tYykjaj5g/msLgSZLOsl3COqS5t9q501y79iZRvz9Du73Hpapzm2SjUOHKJt204vUWm46UlF1sot7c9LSkN/aQALNsDCVdSlhZ/pcgWjwu6Q7g03Tmk0FjT1Em01CrtutzYgOHtv77BeAPth8a7penBZI2IvICixIkuCWA+wkrikHxKSJnOFy+YQGFuth2GeeogZp2XLcTFq2Pp7jzAY9I+ith0XHbtAa0Xc2OVD124pS1Xzy2/be3/RdJ3yCs4QZGIvbsBSxu+6MKi6BlbZ+fGbeWPQsp3mN055sGtdn9HnAhFcnZxDq5UWl7gSDK7DjiJ6YCtfItCa8o9Az34llFc8ivJH2cEGOYq1Dsy4ENidwywOzE85E7r/7N9k8zY3RB0ibAz5s9WNpPrW/73AKx+5JPc+Mm7AOs6rAabrgH1xP5kmlGym8uRlgkrUpnLzIPMMegF5mIlm2izazE2ngzSaUIl02e4p+JVP0IoWCbixo58KOBlSWtTKw1jifyp30VLwdA+bVnIaJUVSj8kJclGPUn2X6k9d6ttlcf9sPTESXZ4zMwdpB0PZHwvo1Wwtv22QVi30wUVnqZprkSch8gZJwBrrZ9zki/Pw1xjwdmoVt26z+2P5IZt6i8cC2MBXs8neck4JDSxAWFSsTqBFHmZ8BPgdfZfndm3JrPyDyE0tkOxALjJOD7OYtvSbfYXkOtTsVc9mordjXmrZKMp0KmfQNi43u/M+xVVNEOUWEhsxSxAXuWwooLEwWSfmF7hUqxiyZBJG1h+0xJS6YNb1GkzegqwG8TIXkBYDFnyk4rfL2XJsiRBxGd6t+zfWSBa76HKLzdaHsVhYrY15whNznMeV5KJALGrQR3TVTsNhkTmwFJ7ycKnjdlxrmHDtFy5YZoafttJa5zIkDSfURy6UKCjNRVNC2YLJxQKLVOmZq4Jc8laSWCUL0p8JDtDQvE3ITufU52Mi/F7Uu4dKYiZ639U4p9l3tUdfsdezFAheWhW0XZrYE1W0XZ9wPr2c4iwbfW9Y36y5zE/ZZLiGjv9y4gyHtZ+70eQoiI7nqIYqRzySFprv4UsZ8+WNH1vGdmU1LvOb5i+wuFYhVfz0paFtgY2JOWxVAD53dlV7GUUXROr2z7mfR6dmIvnNv5PgnY3PYPc+IME/sBwi67N3fxWOlzlUbpvXv6+72ht9hU4O9XZW5KxZVTCZtLESTfD9m+KyPmHISV+umE/d1ChKrjgUS3960Zsasrk9WApLuI4v9ljg74DQjlieyi+ijnvcT222ueY1pRa+xMsY8DfmT74vT67cR6+SQih7hWgXMsQqgLZVmdpFhV7MRT7C41CIW6xz22l8+M+0NirP+g7RXS83597j5HlexZakKV1OpqoSbhS9LBhCNAUYUlhfr5/QSZ7kBirvqG7RsLxK6lAPdWouZyOd3CAIMQvoa9rlLrl578Xmny6fUEUei59HpWQulyIJKTQsHwQ8S+rL2WeAI4Jec7ro2UFzkbWJFoYp8L2M/2sQXPUSQH3tpLfxH4s+0TeueUzPirEPmFYmvPiaKA812CZbousLrCY+9o28+MR/KNhrLHFycm6hz2+AyMHeZwQW/eHsxie0iHUy7SIF5jIF+jZ9P887Q5y8V5kj7G+JcXHgv2OETC/06FX21J4sJ/bb+QCFrftn1kSiTnotozYvsJST8i2N17Et2i+0g6IqO4/u+UXGrUMtYGBuqO7YOazNtbFczx44hN5JOEEkcOLpT0UeA8yj97RTxY/wdwgaS3l97cJXzY9uEpCdIoA57G4J1CnyW6B35EbHBLw0SHwnuJRMWctIjJg0CSCHWF5YiNzLLAF21fmnepU/CM7WckofDpfSAVSbIh6UpgI2L9fRvwqKTrbX+yRPwJhqLdJg1KE21GwFrAipJmdp4v+dO2/yvphURAfZTojCyKRJAEOMr2t0vHz8QxRAJoSeK5aCAKdL5PYJwv6d22Lygct3hHTw8eJTqmHiOja0rStbbfqE7HV0PM2knSf4lEyCG2v5NxrXvQIVxu0BAuM+I1qLV/AvitQoHitPR6W6A4gXaCYCnbm7ZefzmRUQbF+4mi7EcJ1ZeFCBWATYnGg4GR1i7nSzoWmE/STgR5+LicuAnt/d6RhfZ7TUf9ssQz8hPiGdwWuDkzdjNXX5UKY03XcxHyTUqeLwP8QNIsLqA85bBHbheuZgUezIz5IHCwQur9wpxYw8SvpUL1MLGWfya9fgnR8Z2FtBbah1jjl8a/anzHY4TS9mSPEcXNBpPTsVxUmZtSsWPltEbG9hMFYj5FqOs9Yvt9MIUQ/ydChWlgAo47ymSHEySIySn+PJLWyiHua2gHPERO61bgU85rpnne9mOSJkmaZPsKSd/KiNc0iQ4L2z8eb+QbqK7gt7btnVrnukTSobZ3lvSSQuc4jVBgOtv23pmxmrX3u4FTbd+b1jODB5Q+Ryhmzi7pidY5niNqf7lYyvaWkraCeN5zrznhNbY3l/R+26dI+h6Za8MGkpYh8soLJdLQSsBGzlCWU121OiS9gaGK+LkKvqVznW3cCJyTyL7PQxmrNnes6p4kmohL4t+SXt8QpyStRpk9+w5ELnUWOsIAgyouNZjU51gRzkHl/N6vgZsUNvYm9oJ3KzUieBobDhziCqdI2tQFGtRHQ0nCJfGsbUo8101zy0K5QSvlwCenuWRbYL30XM+Se60NbN9J4bXnRCHg7EAUWY5Ir7cmbozNp9sVjYwDiYJ6F3t8Ol/TDEw9aiW8oWDxu09iuhePkZ+Y/o+kpWz/Jp1zScrI4FeRFy6NZgMNbNXLHicS/6VQi7jwfNp4fBB4XzpWYlKq8owoFAU+RHQgnUp0nz6akrP3AYMScPYi1H+WknQdsCBh71QCL9h2uvZvJ+ZtkU4h2x9L/3mMpIuAeXKZwgTTHcIOb8qpKPDsVU5WTCTsCuwt6VkKbu4SSidBHlOS1Jc0RIbUmbY9wHeITd1bCALOZIJVv8agAdPzdoHDY7kU6aaNhxLx7VzgUkmPA6Xu7XkTyfAjxN/vS+qWJ38xoVYBoAuSLiOew6OcKT3dhu3PFwpVg2g5BLZfm0hOWR7ONWD7COAISUfb3nV6X884wh7A59Nc8kI6VmIuaVvKTOnoyYxJItZvQayxziKk9AdWdrT9xvSzr7R+i7SXs8+pRbistX+CIG18mUiSmkj+l5C/n4goSiarXJS1pM2JPUlp8nDx/Z6T+oqkq4HXt4rI+5NvFYnCfuoEortycUUjw86tvc+gcdcnksa/J8a3V0ra3pm2cn1QssD5c0lbM7SQdUBm3C4UJOL+i7D6vJQYg94G3CzpCMi2Vr9MoXL5Q7ptRHPzLldIOoQYN9s5uHGrBiDp1cBfmv93hdLQQrZ/nxm6aLGphfbcBGExkD03JVJCUxCaudn2Fno+2gTOa23/iGhKKYGj6W5uebLPsWnFt4CHiCZBEZacSxHkwBMJFclB8U9JcxF/tzMkPUrrGRwQzXz0csIy5efp9QbE+m3cKgG0UXDsBPiLpM8AP0ivtwT+qlB/KWIPb3vDlB/KUpJJKG6/aPsg4CBJB9n+3KgfmHY8l8bLpvlyKVrjfgZq2bNA5AH2AY4FsH13IvjkkGW2pVut7uuE2mc2AUfSacTYcyed/Y3Jt9AuTvhq4f8RJPt77HwrGHXbIg1BgTwqREPyWZIeJr6bhYlxPxdrOFP1rg9ulfT/gKPS64/T3Vg1MCqTT39Dt9vFT9LPXGu/6ySdACxq+12SlgfWsX1CZtxelNyP/IT4Xm+jzJjZoEYOfEuCG7Kj7UckLQ4cknuhkra1fbp6rIFba8+BFWAnigXVfe6Roet3bLxAyRZL0em2aurmeFFKQ09EpMF9TmLAKVo8VSic9MKu4D+tAnKyCmm6k4juFRFdgDs4eTu/WNCHPb4xUIw9Xgtpkt+FkDf/fkrkbGH74My4VZ4RSScDJ/ZLkkp6q+3LM2LPTCS7BTzoAt2QKe5VwEVEgulNROf3XYkcUCL+B+gQv651pr2cpNmaTdhIx2ZgfEJhV7cYkQRZGZiJkMgcyNJP0SX8emLhPsQaI7fbQB1pyLb9Wwk58lMIwtsto/5y3nneTMhOXuQkS5oZ7x7g7URxaF/btyg6n19UNm0Akk4l5E27CgDpX9bmpuc8iwKLEB2HR432+6PEWoFIZrbtZXMTTe34r6IM0RJJB7tHqa7fsRl48UIFO3pSvIOAH6aOoTGBpEVs/yXj8+cQjT57EkTRxwm10lyr1hn7pzGAKliTpLhTbDlLEgNrrV1q7fdS7AeBlZw6K1Mx/O7cxL2km4hmiJ+21ofZFq4K296tHeoyTVf59wddJ49yLgHL2743M85FdBLebXukw/KusO+5FiDWQwOTqBTy+sPCGdbqtXJlkvqNvXam3WBNSLqVsIpq2yJcZ3vgJoYU50sjve9M67N0jpmAOUusL2o+H2lM3sP2P9Pr+YHDbJcgDvWz48ja8/XbQzfnyd1fKywRnyHm0m2IefUMF7BpSwSO7Zv1mqJj/2Tb78iNPVYoMXamOC8DvkTk9yAaO79M3OOL2/51RuyX071H/WPGpTYxq9iJt+JvBKyXXl7pAs0yCluvfYk9+yWEk8aHbF+ZGbeaPYukW2yv0ZMzy7IaSvPeJq3xbT7gxyXmvURKW74EkaUnbtFcZ0/sqwmroSJENw1je94gN4+azvESgvDWrLkfBCY5U+0kfc+HOKNRpk/MOYH9CFtxiEbJr9jOJXIi6UCGJ5/uanv93HOUhqQLiVzAvg4bv5kJy/kidaKec5Xaj2Tvw4aJWzUHLum9JeaOFGtn28cOs1a2M8jfE4WAczqRrLgxvV4L2M32B6fvlfWHott2Y8K25mVEQXYND+ghNwNjD4Uv3dJ0L2BLTKBjWvzOTUynGC+hNeHnTvatuFULWSWhSl7nI5yvuFWEWtKFheIVfUZSouYy2xsUuLze2LsRCYR2cmUr56lDNbEXJpi3t9i+JjFv1y9xL0v6DqEG9P10aEvgN7Z3y4g5xBez37EZGBySLrf91tGODRi7ShJE0oK2/5Z7fX3i3kR0vN2SiDgLApc40wtY0gPEs/EHojuvlG1fVSi63/cjyHQfU6giHOJuC40XBcaiAFAS6XrXJ9YtFwDvIv6OpdTUmvMUkZEdZqx/UZK9/hcgaX/b+xeK1dVN3hzPSSj8L6AC4bLK/qnPeb5GFG6OL1Eom4ioQCarUpTts3YBoOS4XGG/ty/RhNI0AGxMEO0Oyox7k+21VJ6gPWSeK5zkrVHgrJXwfpntv5eOOwNjg2EIHOO2qVOh1rALQZK5BZgHONx2VkdyrecjxZ4y/ox0bMDYPwauJFRvAD4GbGB744yYNwDfpKPSsxmwl+21c4v1NSHpftuvbb2eBNzbPjbekHJ8axJNIjfb/ut0vqRhkUgshwGLErWnJYD7bb+uUPz5GZr3zVaVUxD31wTOSIe2InJG2Sqz6qi+irCZHddzYSrWf5xo8n29pM0IZYeBba4lnUuoTnep1RFEhiy1OklnAZ/IrTX1iVuN8KVo9l0SuJBuJbwiTV/pHKXX4FVy96netBTwO+K7KJpLLUnCTfFqkk+XAfZmaF4ki6hWmlSX6m/DwvlKkUj6LmFlfE9urJ64VXPgNepZkta1fd1ox6YFE8WCajXgeknNBndx4MHEohqPBZf3E/LHn6TDHn9RJzUnEhKzeQ/gFYSk3tqERGYJ/+XrGSo72u/YwKhA3liNzmS0iqRsosxwhSzyJQtroYrX+XBwHauI4yl0n9V4Rmz/R9J/Jc3rsP4qiZ3cUj6w/bikncizLWhiPSLpDGANSe8lNuil7uO3AK91YsqmgsBArOaURFiM8FpeFabIe84DzFHgWl/0kDQb8V2+LCUq2t/xYiXO4ejYaDyAm4JsdsGtl3xTsKB3BFG0WUjSV4lE4Rcy4jUYs465wnPqz22f1bxwdNi/6Mg3UI9go7Ag2Z9IPs5MJ6GQqzS4GdGJdYftHSQtBJyeGbMfsmRkJe1KJPmXVLe069xEl+UMTExsRNzXJVBLXqXGmwAAIABJREFUXnhCo0SjRQ+K75+Gwc1EEvWbhAXRiwa9ZDKVsyZZqSHfpHiPp7VzLsZi7VJsvwdg+6upMPSmdGgH23cUCP0nSW8ALGkWYl95/yifmRrcKul4OvPzNmRYhzUYrsAJlChwXi9pxVIJb0mT0n7hEtK9IGkP24dnxj3T9hZNDrb3/RI5WYXt9F6EEsRHJS0NLOvMzlZJX+x3fJwTT/8maSPbPwVQ2F1nF5FrFZuI7usnJG1DFDg/S6wzci0Bij4fPZgkaX7bj8OUAlepGskuxD74C8Tzcjnw0cyY2wCHEzksAzcC2yqaAz8+SED1t/YAilpoXy7pYrqbyi4rELcKUq7zi4RlloAjJR1g+8QCsfvZ1jRWKsd6sMbcA4l87GW2V5W0AWE/lI1h8r43EHnKXLwHWCXNV02u8w4gi4CTvuPvEQp72eobI5zn/cAjtm8qEG434LvAcpL+TBAjtsmMeQ4d8jQEIbAUXgbcJ+lmusksWbZLtXKdCb9L/2ZN/2qgyBp8DHL37ywQowv9SLiSskm4CU9J2oJu8mkzVuaqipwFHEP87UrZRQP8O9X0mlrO2sRYPyhuS7FEcCIeT/89H/BH4NWDBm6t62cGdpD0W8oSs2rnwEvZ1LVxJEOf5X7HphoThYBTfHCojJ2JzqA/ExJLMzCxsAfBFL7R9gaSlgO+lhOwZwJtP7DFi98lyRuq5+05VoWsUqjpdQ4M22WRJXHae4qCsYo/IwlPAvek77ndGZr7/c4kSS0iy0wUWnSnheAhxIam2aDv4/AQz8WvicXVH9LrV6Zjg+AdwIeIzXOb5T+ZzE3uDEzBzoSNxaLEArl55p4AiihZ9aBkQbYXRQp6ts9Q2AE05LyNbWcXWGz/YfTfKoPChMgbJd1JyJFe2IxJL0ZULACcQBDgu6TqC+Bph6XsCwq1hUeJMbkobG+oqCIParP7PaLwcRBR/GgwuURnzAxMN5Rcw73C9kTbW48JShEuK+6fhsD2uaVjTiDUIpNVKcqO0dqleBIydfMW6+hN2IUoJC9GNLpcTBSgcrFritPsHa+hQMMFFQuchBXJhxT2SyUS3ldJ+jewsKR3AvcA2xPfdw72SD/fmxlnJJxEPM+NcvifieJIrrR8uwg7G/H/UILwVRO7AGdIauaihyhDsqxVbJolkek2JtTrn5dUYp9T+vlo4zDgBoWaA8DmwFcLxMX2o4RNRjGkotX7hnn72gFjzj34FU31OT4uaRM6VkPfdaatemXsA6zaNCGlPMD1QDYBh7AmXZBuMtJkYBngOGC7AWI+b/sxSZMSAfMKSd8qcK1QL+/bYD7CPhSicb0EDiW+169LugX4AXD+gOSmkbAWsKKkmZ2hVANTnu0NFTY+k2xPzr04Z9hBTgX2rxi7QdFcZ9P8JWmu9PrJUrFbKLUGr5q7r7QfqUXChQrk0xZesH306L82zdgL+CnRWHcdMe4PrJxt+9UAko4DzrF9QXr9LmLdlYOa63qonwPfuVQgSesQe5AFJe3VemsewhJvYEwIAs5YFloKYW7gEkn/AH5IyMiNW8nCGRiCZ2w/IwlJL7H9gKRcq6H2BHpo6/hk4HM5gSUdbPszPYc/3efYIFidCt6ejFEhqyBqsserdlm0UFJtoMYzAvDj9K80LgJ+KKnx5905HSuBfQmLwUcBFBY7l9FhZ+dgbuD+1FlggqB1q6SfwrR1GKQN2CmSNrV9doFrm4EepM7SwyXtbvvIMThlsSKLeuQUbZ+bjh1WIPwcxGLVwOwF4g1BqaJpilVzTl2G8EX+MHCEpDOBk23/skDsiYZaBYB/2b6wYLwGtyq8048jkglPEh2ARaChthYDqZ05FOT+BWyVCN9vJJ696+gkOGdgAqBnXF6tz7FBUbObfEKjIOGy+P5J0pGM0OVXohlgAqIWmaxaUXYMMK7sG4eDww4it7u7X9xniWJFMUuBhJoFzqzCXS9svymtV24jCqcfAZaR9APgqkELDbb/kppYTnYFy+iEpWxvKWmrdM6nEik5C737GUmHEqSvcQvbvwHWrlAsrFVsOhb4PXAXcLWkJYgmlFwUfT7asH2qpFvpqHl8wPZ9JWKnRoOjgYVsryBpJWAj21/JiLkgsBNDmxey7BHHAolwM55JN208RuTpG0ymnArHG2yv0Xp9njpWJQPt/YB/pnHiaoK09yjdpMMc1Mr7QjSL3CHpCiKvtR7dzSMDwaFoeVWar95CPDMnEgXUYnABq6wGkn5DkAquSf8GvRfGRK3O5VVD+6EooVzSCoTS8EvT678DHxw05zIMiqzBxzp3XyiXWouEW4V8qo6l03mSdiPqUG01p6y8me3bFfbWyxL38oO2n8+JmbC27Z1a57lQ0jdyAo4B56JYDlzSB4Y5/goA27n1xFmBuYg1Vpug/AQZBCoAvYibb6sjLbK3JKSVHrK94XS+pBmYCkg6B9iBUDJ4CyHtNYvtdxeIvS2xCHoVnU2TnSF/q/6+kEW8zlXP2/M7BHP3/4BPEYWsO23vUPI8EwWSHiQ2Y11dFrYH2tyoW2VpCJzpS1r5GZmdkJ1+MDdWK+YkQvK3GYMvJax1sou+ku6xvWLPue5qH8uI/eaR3h9k46Meuf5WrPEswT3hoJDVfxXd33HRznd1JOZLxKrlL/xFonB1NrHx2JggJQ+cfBzhXAsQG5Is5bCac2pPzA0I5bc5iWT1Z20XI3SMd0i6zfZqFeJ+nSB89W6iS/pxvwqYx2X8yPvaWtjOsrWQtB+wBR1Sa7VnbwbqoPS4rG554aWJLtzivu8TDeqvQJkbs/j+SdL2I71fudN1XEKVvOpT7OXpFGV/XqooWxK193s1IWlJopt1beLZuwH4ZEqyDxKvasFJ0mXEPHoQYb3wKNGA8YYRPzhyzHkc3cIv7ff+oMl/hZLs9cDWwJoOC7U7gPcD69nOUh2WdDlBVChtGY2kxtL6Otuvl7QU8H3baxY+z/zALbZfUzJuTSjT7qR1n32CuH/PoWCxaZhzzmz7hdJxJwIkXUUoqRxre9V07Be2V8iIeT1RnO9S+axRoC3Z3NIn9mXA8yl2rrpVcUg6FViRUNkzMXbenf5he2CCZ/pe32H7j+n14sDFiQB+R3OvTGPMOYGngUkEsXVe4Azn2Yg3savlfVP8RQiiKMQa/JFCcWcnCvZbEpYh59vevVDs1xUmbTQ52rUIu891iaL93bY3GSDWIokwu0S/92sU22s80yVznSne9cC+tq9Ir9cHvjboOm6s1uCS3kPYnbYbtYrn7nNzqZI+AXyGyGu+h1DyP932m0b84NTFLk4+VajqNZZO0LN3cL59fZWagMLO8Rq6rXbXsz0WNsfZyM2BSzop/efLCaWan6fXGxA11CJqPpKWKD1WziDgVERK6G1OkAzmfrEmNicyUgF8XuAi288ViHcxsWi9ne5N0zSrDEjaFfgYsCTwm9ZbcxNJi4FlkdXxpp0bWIWwIynm7dlzrldRqJBVGmPBHk/nuR5Yv7nHJM0KXJmxGLxihLftfIuP9rmKPSOS3kcoRM1q+9WSVgEOyL3fJC3fmziXtL7tK3PipjiHACvRLSN7t8uoZRSHpIvoyPVnjUEz0B8axnrCGd3pqtT5ro7E4p6E3VSDeYBNbK88SNxW/AeBlZ0kf1My5M5ByYWtuENUavodm8aY1ebU1jkWICwLPgg8Qtgl/ZSYZ89ykhb9X0ZPAeBvFO42GWb+KzLvpY6LRlHmWheQT5d0F5HQ7LK1sL1jZtwqz94M1EetcXm4RGyDGgnZ8Q4NVaB8M7HuHEiBciz3Ty9mzCCTBVrz3WyE6tJdxHewEnCr7XWm17WNBkk3AkfR2T/9H7C77bUGjFe14FSjwCnpfNvv7VMESJc8WPJf0hzAOkSC+1ZgIeA1hI3WNbZvHfSaU/yfAKsSDS0lLaOR9DbgC4QN5yVEIfJDuXv2nlzOTIQVwAGliQU1IelrBClgILuTYe6zBgPfb634Y9LkU5MYUhLqqJpMIVVIutP2Khkxsz4/wPmKNLf0ibsosEiKfVTJ2CUg6Usjve9kZTNg7HcTCrC/IZ7FVxP5hyuBnWxPs7Kawibjh7b/POh1TeV5itZGUsyN6FiTXWX7vAIxzySI9RcRjhRXFSZyZDep9Yk5M0FEejORa1iAyCkPZK2iUP+5zPXU6nrPl/VM18p19pzjrt49dL9j0xCv+hpc0jGEmvgGhGr0ZgRRLStPlGIXb0Dpc44iJNya5NOUI/sYnRzfNcAxtp/OjFu8JpDivhT4Ep1x82rgyzVI1KXQyoFvB/yVAjlwSZcA2zs1OyUy58mliEipmWFz2/9Mr+cHfpATf0JYUE00SPoY0XW6ICGxv1Nv8XcGJgZcXlpvsYLMxO8R3ooH0S3VOLnA4Hvo6L9SBrZ/L2kRhaTls6N/YkwxFl7nAL8GbkoJrSldFmkzNc1dFmO10E7nKvmM7E8sAq9Mse9UdEjm4szUyXIIsUD+BrFIzl4U295H0qZEghAqeloX6iyoJdc/Ax3UsO7LSpaPgGoSiwkPE89c47n9EqBEcuhtRIdFG+/qc2xaUHNObXADIX27UU+S7Na0wX4x4Da6CwCf6nk/a8yvNf8plPteQ6dYuLOkDW3vlhm6lq1FrWdvBuqjyrjcFKAlnWZ7u/Z7KUm0Xd8P/m9jH2BV9yhQElL1g6D6/qlF8umLFwnJp/a+bEKgme8k/Rh4vZMSkEJmf//peGlTgzlsn9Z6fbqkfQYN1iRgKxIJd6ZT4CyiNuXUpVmafG37KeBySY/Yfh9MIaD8Cdie/D1FLctobF8q6XZCGUnAHg67sly0x4wXgL+WKAqNJZxpdzIGJP+f0GnyqZbTczmryNr4u0LByQCSNgNyVfHOl/Ru2xdkX10fDFOQLUq+AbD9MLFPua107BLIIdhMRewLJC0NLJcOPdg0SwCD7v/mBi6R9A+CcHJWjWJ66dqIQrV2DeCMdOgTktbJGesUauR3Alu5gNr5cKepEPMJ4B7CPvO4HHIvgO3/SPqvpHldQa2uz/lyn+lauc42fqtQCG7WntsSBP6BMEZr8DfYXkmhxv1lSYcR+cos9GlAOVLSwA0orbhD1HqAEiTcOVyvwfkU4vk7Ir3eOh3bIjNujZpA06y4x6i/OL7Q5MA3tv1Q63hODvyV7lYa/iuhulQKL2vINwAONdGX5wScoYBTAZIOIjbod07va5mB8QVVkspWyN81jM3rXE7urrjKwDDnuYxgh55te++SsXMxFuzxWl0WGsYfsRW3SvJsUEi60fbaPZ1C2dYvqWvxYGA1YoN6BnBwyU6IsUCJbqFaY9AMdKBK1n21kMa4M21vWiH2uURi5VJifnoboQjwEEx7N8tYqNSk89SaU9cg7BeXoLs79EXRrd9GxW6TeenuCrmK6HLOSj5JegB4bbOJTgm+e22/NjNucVuLFLfoszcDYw9VkL5Ncbs6N9MccI/t5Uufa7xDhRUoW3Gr7Z8kHQ4sTEd6eisi6XQuVGkeGbcYjkzWe+x/HZLudY9tYb9j4wmSDiZUgX9AzFFbAvMTzRLTrIYnaTIjE9PmGfhimbJX3wIoXuBMBMirCXWaB0rETHGXdLL0knS07V0LxZ0TeKYpbqY55CWJ+FMifnG1wYkMFbY7kTQb/dffz4z4wdHjZtkrjRB3TPKRpZGayL5LKBo+DvyOULj8fUbMyYRtw7NEU1aj+pY1vqXYRRUBe2KvSxSkm/1vc90lGu2KQ9IywN4MVXMqpiLeOtfqwMOJwJAbayViLt0UeMj2hrkx+5yjmAKVpLuBVZqcbJpL7iiQ+x3IymuUmF+i00C0M6FiBJRR+VJYDL6RIMA9RzQDXG378oyYNdXqJtQzDVMULL5M99z3ZduPZ8attgaXdJPttRSqkR8AHiPyT1n2mQql5Df0NqA4QylZddV6vpKurzj5VNJ9vTmQfscGiFu0JiDpW7b3HK4Rx+O4AUeSShORJH2bUMFtO1H82uWsBm8jlKcbu8glgHOcoX42g4BTEYkd1fbp++N0vJwZmI5QRansxOLdgk4n0sZEUugrWRfN0CR9OpZNiBjmXCIYokX9VEtAFb3Oa0LSz+jji0jYftgZnpk1IOkE4HJCfWJTwqJkFtu7ZMadFfgqUYCcC/iC7R9kxrzW9hv7JHyLJUJKouYYNAMBjYH1xHAL7txzSLrBFawKJG0/0vu2p6mLOJEr5qeiSk3lOfVBIqH3C2AKAbBGkX28QyEP/QSdrretgXltZ3WbSDqb+H6be2s7woppRELqVMQ9H9jNHRWRJYBvO3WXZ8QtbmuR4hZ99mZg7KHC0reSPkcQAGcHmkKpiETvd21/Lv+qJxYU6ogrEp37UxQo079pVqBsxa22f5J0q+3VRzv2YsAMMllA0veJ4kpDytoGmMv2VtPvqkaGwg5nOAxcxJF0IKEycRoxvm0DLGL7i4PE6xO/eIFTYT35pvRvKeAOovB2eGbcUwgFmfYcclju/j8Vgja0/WR6PRdwSS5xMcXqVRvcEviN89UGe88zIWyMoP98khnvTGAynfFia2A+25tnxq3VaDhm+cgaSOv8SbYnT+9rGQk1CrKt2A8An2SofUjWXqcWFPbAxzD0erMUe1K++xW2/9Q6dgphWfNL21tmxl8Y2JywdJy71jOiQtZkiYCzfpPDUVirXFmAgHMoobjw41JF35599QEEWQ0ou6eWtByh6rwn8HLbs2fE6psLKHG9tZ7pWrnOmqi5Bk95ySMJu/KmAfd42/tlxi3egNLMy62fcwEX2n5TzrWm2DXJp6cTOb0b0+u1iJzfBzPjXkHBmoCk1WzfprACHAKP4wacRDTdl6GEvdyx/gPE3gli31SMsC/pnQSJ+iriet8EfNT2xQPHnEHAKQ9J7yOk4xYlOlmXAO4vwYCcgYkJDeNH3iCn+JY2Sys7da0ousrvzGSvNioDSxH2SA1KqwxMCJJaTfZ4il+ly0KVfRFLQ+FZvy/wdmKSuxg40PkdWXcRxZUDCGvAY4DnchNNtVGys6DmGDQDgeEWww1KLIpVqfNd0tHAYoRtZnuMy1LJSuuhn7mC2pTqqdQUn1Nbsa+1/cbcOP8LqNhtcqftVUY7Ng3xmkTQvISizM3prTWJrp71My4XhdVkY2sxAzMwBerTwdnv2ABxD3oxkm36QYUVKMdi/5QKx+9xR9liSWKezVLjmkiYQSbrhkLRYlc6ym9XA0fn7p8mIiTdZXvl0Y5lxK9S4EzksTWIZpldgKdtLzfyp0aNWWsOKbrO6olTRW1wmHMVKSLXRom/WU+8Wuvv+wjy1O8o0OSjMVI9LQ1J29o+Pa3ve2FCReunHlB1IRHplqY7h3r1QBfbHbeKImCKdZPttXLjjBUk3WZ7tUqx77G9Yp/jcw9K0pL0MaJ5aEEil3Om7fvyrnRK7Je5jA1gv9hbAV8HriDGi/WAz9r+YWbcplj/H6LJpWiDZGlSZIp5NrAyMdZdk/7dlLOOU0W1ulrPdK1cZ4pdtLGlFbfaGjzlIXcliv+Nak92bFVoQFEltZ5W/JcydO4rkV+/H1gWaGqRiwMPEnalOWuYakSZND8vk14+aPv53Jg1kfLr+xA2exOmCVXSy+jYnd6YOxfOPPqvzMAA+ArxR7rM9qqKrpZxuUGYgbFB5YHlYWISaibhlwC5hZzvEd6SVVQGJG0EHEYPSY3wixyPqOZ1nnAWQQo5nhaDvABq+yIWRdoM7Jv+lcROxKLq87YPkLQ7kMVoHiOcQJ/OgkEw3hc3/wsYI9b5uu7ucj9P0fn+ycy4sxGbpDbpz+SPe1sC30pJhRNdSFpfQ1VqTpJURKWGOnNqgy9JOp5Q+mp3QowrO8Axwu2S1u7pNinh//20pDfavjbFXZdIvg2KQ9PP1YkuiIdG+N1BMDdwiaQithaSzrS9hTqqZ10oVSycgTHB/2fvvcMsq6r0/8/byABKg4og+JOkoygKSEMTRlAQGDEBBkCCOhjG9EVQxzSmUVFUxATKDAYyCq0EBUVschxyEIFRcRwRAUVAQFHC+/tj79t1q6iqbu7Z+557qtbneeqpOud2rbu6qs45O6z1vg9LWsPjpW9LdM6sI+mlwGk1iiO7xKMtsFkCqs6fMvsCZ0u6KR+vBfxrodidwPb+wP5RTJbIC/Ffyh+dQEna+1vAsb3NkELcJ2kPxqytdqOvsHxQJtngfEvBDc4zSJuFF5E2V+bbvr1A6DmSntDb6M+bFyXWf++TNK9X+C5pI5qNs/r5JWmtojdvXZ3xxYwDMdkmslOn/kgW32i83cmTJfWrLTS1O6k1/n5JgRj9DON5WoPH5c9zp3h9bdKG6mZTvD4lSjZR+wBPBa7KMS5i/Px9UH4J/HduPFy0IdsrJBpkQ7aPsyQdQJq3989/izTPlCLfIyGtr7yTR+Zb4u/uCknzbV/af3LQ4pvM6sC+tq9qltoYkubkOcLpwLx8bh83VGbrx/Z3JJ1NKj418AHbtxaIO9W1VwpViLk/yX6r5F7AGcC2wL35eDnS77NxUR31rulaa50AT+ofb9q+MzeEN6LyGPwIkmLdV/Px7sCRpPFoE37F+MLWk/PnJtfOKZIeT7KSvYJ0TX+zQbxFTPHsuxDYpkD47QvEeAS19gYkbUX6u/hf0r1odUlvKFGIW5E/2P5BiUCq7EQh6Vm2b8jNvpD2BgDWyOtyA9/jQgGnAvkBsbGS6sKGth8u2XkTBP1IOok0aP0p6Qa0HalD+2ZoptIi6YskibsiC0x9ca8mTRTHFam5gD9kDWpWj+d4Vbos9EhfxNcCv3AhX8TSKMn0TbZh2FQJ6BBSpe2LbD87V7ufbnt+k7i1GUa3kDokwd1VJC0kSWV+zfYpDWN1rvNd0gqkDZC9SNf3YcB3miw0qa5KTc1n6tHAs4DrGKv+t0fMDnAYVOw22YC0MLEiaSL2J+BfbF/dMN+PkxY7ihTKTBK/iK2FpNVs/15TqJ5FMWZ3UAXp2xx3W9L9eDPSRvJhtm9smG4nUT0Fyirzpxx7Z5JC5NrADqSF9A+P2kbWMJB0Amlxd1YWk3W54FLSP5LuQ7uSNv8PI83NGi1OSloL+ArwfLJCImlj8n8bxt2fpFZXbIOzL/aXgI1Im1gXkLqnL7LdqKhF0utJSlEL8qmdgU/bPqph3PmkAqdbSM+mVYFd3cCeRZOrDRrYlAZqg71N5H7FgtKbyDVQRbuTWuPvHHsL4Bm2D5O0MsmGYzq7uSWNW0X1tC0kfdID2OLle/18Uif285Tsaj7jhja7OXZRRcAJsc+aPGSzsVZplKwRe4VvMOG56gGtESe8xw0kpajfkIpDi9nBq6C6vKTzcn7rA28kKRf80OWVX17F2LV9vgtYiEjq2U+ubftTklYnWVFesphvXdL4TyxdBChpacarqJwD/KcbKFuorlpdlWt6krXOtYEflVjrzIXfr5zQ2HLioH/TwxiDq5JiXW0kLQMsa/vuQvGqPfuGiaRDbTdqmsl/x7v31m7yWsZ3auwnlkLSNqT9gJFvQpX0DdtvqXGPiwKcCuTNtp1IcnorkRQ+5ruAfGMQTERTeHv2aDJJz5Wme5EWpXubpo0fol0rUlMlr/O+Lot3AX+gQpeFpFfSJ4dYYlJTi9xB12NZ0kbkg7bf3zDuFbbnqU/CudTkoyaSPgssReVuIXVEgrurSHoKsBrpZ/y1xf37xcR6MfANYFznu+3TG8ZdFngTSYWsf+GmSGFI/ht7Halz/3rSwtNXbR80YLyzSBPonoTs40k+340X8io/U28sUSQ0E5iqQKRH00KRXPiF7T83iTNJ3CKFMpPELWZrkYuEF9reukRuQXuosPTthNgrkhZDPgz8lvRsObrJYm/XyPOQ/2SC0mCTjeQct8r8Kce+xvb6eaPzUySVro/VLtgeRWZ7MdlMKLhUshh6OXAI6Ro8DPhK6c2tUpTc4Jwk9lzgX0hFgavaXqZAzHUZU8c4s1RRYN4s7I1nG8vfq5KN77A2kWuiwnYntcbfuYBjY2Ad28/M898Ftp8/SLy+uBNVT3fKcUuonlYjN8l8hfR8Mkmp5t29jeUBY15qe76kq4BNbf9N0nW2R1VFvJPkxp53MFYYch6pGKKx0leN57WS5fcXmaAu3/TvIq+vXA4cTrJH2h44BTjH9iFNYuf4XyetCfWaRXcFfmX7nQ3jdq75UkkheWmSsgWkdbOHbL+5QcwLgL09Xq3uYNubN823Fn3NJzeRitPWpMBa54TYRRpbhjEGV2reO9jjFeveabuRmn+NBpS8/vSySWI2UU/rxZ4Rzz5JGxVYY7hm4jrhZOdGCVVqQlVqvNwyH55r+5om8WoTFlR12IFkXbAPyXpqBaC0xHUQAM27YBYT+5vANyWtQ1rgvCYP5L5he7KKwCXlrlzEci5wjKTbKSAPXZFle8U3ALbvlfTYAnEvZ3yXxXsnvN6oy0JJuecHtk/Mv8N1JC09qpsrkwxGLpBUolPhgTwg7PnJr8wkVeojSG8zpV+G0zSQGZb0OdsfmHD6/ZOcCwYkL4Kc6tyNbfsWUpdoo8F2ZgXguYzvfC+xIXsUcAPwYlKn5R6kQplGSNqRtJnwjyRVkk1s357vnz8HBirAAe4GrlPycl6kUiPpq9BMpabmMxW4UNK6pTZAukytjcHccfNq8uQ/NcEVkezvcTtwK8myrbFssSrYWth+SNLDklYstekftMZDpL+5ZYF1JeECEsN9RZF7AlcCx5A2Gt4AbNU0fod4sMQmwkQqzp9grFDoZTneqZJGeiOyFrYXAgv7iskWSpo1xWTONsO2fyPpyaTuUEiKISUsjKqSC1r3Al4KfJ+x+9CZwEBNEpIOY/JO5KaLvJNucFLAPlvS/yMtIG9EkpT/NmnDtzF5PFFayXhnkurUzyR9BJgnab8mDSKDFtgsQdwt+zaR5wNvBp4p6bsU2kQeAkXtTioW5r0S2JBkPYHtW3JRWVMA5V0yAAAgAElEQVT2ZLzq6WdJFhSj/tw7Fvga6ecCqcD+O4yt7wzCzfnv+STgp5LuZMyurRE1NmT7Yq8IfJzx6h6fHOE5yhHAnxlv+3IEzW1fal1/+5EKvcapyzcJmNdYLiRtlh7sZNdzJfB+xn6PTXkR8GzbvTXaI0gbtE3ZtNd8CYushv6hQFwAJH3f9qtLxcvMn9CMfGZuEmjCvsACSePU6hrGBOpd07ZPk/QM0mY9wA22/zbd9zzK2PMYa2zZt0ljS80xuMZUdZYmrR/+Xz5ek7Rm25QFpAaUb9LXgNKQH5L2wa9lrMiiFNWefbXJTYG2fU/T4pvM5blg7+h8vAdlbERrMr90E6qkfYC3MFacfYySwtCg+wu9uNOqKrmBak8U4BRE2YsMuI2xiX9vwrSfpD8BB9j+eisJBjMKDUl2OhcuPCt//BG4GniPpLfafu2AYXckeYW/m/TAWJG06TuqVPE6t712jjdpl0XT+KQCpy1z1f9ppAfzrqSf+cihMUUggDmkhcgVC4T+KnAisIqkTwOvAT5SIG5VKqkXbAdMLLZ5ySTngsHZFfiypO8D37ZdYpLU46O2F+QFzReROt8PodliHsA/2t5Z0o62j5B0LGUW/3cHvtS/YdwrApPUxHLwxPzR4+wGsYChPVM3A65Skrj+G5STnQ4WcTKpQOty+pTDmlKjUCazOmkBqLStxb3AtXkBdVGBc5PitGC4aHK/84toUISb455IUi44Cni57VvzS8dJGvUFnCL0jTd/KOmd1FGgrDF/AvidpP8ijec+l4sO5zTNt6tEMRlI2gU4gDQWEnCQpPfZ/l6riU2DkoT6XcC3gA/2bbD8t6Qmihn9Vq/Lkja/b2kQr0fxDc4+liUV91xu+8FCMWvSm4tsAWxDubkIku5hbAz+D6TNp/tsrzBgvGFsItdmm5rBVc6O+u+2Lam3mf645tkB6fpdlrSxB7AM8LtCsWvyWI+3ezta0vuaBLTdK+b5DyU12BVJa3wlqLEh2+PbwM8YK2B5HUntbFTtQ57r8RYvZ0ka5eaZB2zfIWmOku3eWZK+3DDmjsDmwL8CR+YCgzVJTS5FCkSBX5Is8Hob6avnc02ZrPmyZEFAYyuySXhI0tNt/woWKWg1ug5tX6pk1VNMra6PKtd0X4Hv1bnA9+NNC3z7yQU3p0j6D9unLPYbloBKY/CXl8htGmo0oDy11ppm5WdfFZTsWr8NzE2Hugt4Y4EinLcB7yS5aEC6H496jUGNJtQ3kYot74O0x0BaJ2tUgAO8In9ehdTsfGY+3po0nxi4ACcsqIZIXiC6sHTlVzA70XAk775EugGdAXzLfb6pamClIek9JB/1Lkyeew/Pol7nE+IfT+qyOCaf2h1Y0XajLguNWS/tDSxn+/MaYesljfddfhD4NamS/vwCsZ9FWsAScIbtxuoetSnZWSDp7aQir6cBv+p7aS7JS73UInLAokr33UidvWbMguKehnGvzAv/+wPX2j5WfdZqDeJeYnsTSeeS/k5uJXVvNFXheoRsukZUInNIz9TO2kR0BUk/s/3cCnH3J41bShfK9OIXtbXQFFZqrqvwFBRElfzOJb2EpNrwfNKC9PnAIb3u8tnAhPEmTCi6LPDsqzJ/yt//WJINwLW2fyFpNWA9F5Bn7xoTiskO6ysmQ9nuuLXkhkjukt6u13GbN5sWekStnSFtLnmCFYuktW3/uvD7zAHOd3Pb6E7ZZ9ek1lxkkvcRaRN4M9sfHDDGY0mbyEeTGpGeTFLl/BRwnu1ZUXS6OFTAjlrSvwHPIBWH7k+y/Dq2QDfySaSx0DjVU+BmGL3C8r4C3w8Ad5LWEE1q0HmC7Q+1ldt0SLrc9kaL/5cDxX7E+uOIr0lWsX2phaSFJGu2zwIrkVTa5jd97uXYi+7teV7yCeCFtvduEPOHpGtiRdK13Rsjb0Jaf9qqYc57kK63eSTlotcAH7G9oEHMNXpfAqeSGhgFZawoJW1DWi/st5nfyw1UM/uKWe7JxSzzgCLFLLWuaQ3JZneyNcoGsTozBu97Pr0L+AMFG1ByAcQZs3E+OhmSriE9N87Lx1sAX2+yDp4LC6+z/azF/uMRIhd6P520v1ekCbW3TuYxdcRlgUttr1cgZSSdDrzBWekqr7ccbvvFg8YMBZwhkquSt2o7j2BmkDcKlyLdBGooZQBcQxqsTmYPtUmDuHOB05VUoY4j+Tjf1iBeVSpXj0O9LgtJ2pykeNNTm1iqQNwqOCsCVYp9A2XkGodJyc6CY4EfkxbE+hcx72ky0A4mx/afJX0PWI4k//pK4H2SvtpwIbJW5/uhSkpZHwF+ACwPfHTQYP0FX3ny0WMucEGDuNVUaobxTI1Cm6FwoaT1bF9bMmitBXPVs7X4HnC/7Yfy+yxF6hoOusP9tu+XhKRlbN+gZGnUlL14pLT+UcDOBWJ3AtdXoKw1f8L2X+jrvsoLQ79vErPDHMpYMdnGkhYVk82W4pvMHI+Xu7+D0VdF+h5pM2jiudIbwM+ggF0kY/bZ51HZPlvlFElqMRQVLqdO0ZMkfZzxc9dHE+MvwBmSbrX9Cli0aP9bkkrWyBfgqLDdierZUa9Muob/TFoz+xiwbcOYUEH1tDITLebf2veagWLziRL3Cg1BERD4q6Qtek11SipnjdXEK7IRY7YvkFRabuytP4xgI9EOJIWofUjKbCuQCmVK0H/vOd9J1aOput4XGn7/tNg+Rkllr9d8uVOB5ssjGLuu18zHyuca27SR1sf+i5TzXcBPSGoOTaimVke9a7qKzW4u6H2q7d/2TjWN2UeXxuATn0/vnfB6kwaUi4ETc+H7A4wVWQykYDgDeKhXfANg+3xJjVQunWzmb5S0RonCvyGyfYWYh5FUU3vjw51IqqqlWL1XfJO5jTQWGJhQwAmCjiPpDOBVruChmwcqr2RsYfp82ydO/12PKv76pOr0VwM32y4xSS9OzerxHL9Kl4WkF5C8nC+w/TklKct9R61TqIcq+i12kYqdBfMYu6YvKPV3HCQk7Qj8C6nD8kjgCNu35y7Mn9teq0Hsop3vSmpkjzidP9v2FweMuyLwBAoXfGk4KjXVnqlBPfqKsh5D2nC7iQ7YfOWuqRcxwdbCdhOLNiRdDGxr+958vDxweoluyGA45AWFvUhFnC8idVIvbfulDeP+fELR96TnZgOqp0BZdf4UJKb4/T3e9qwpJgOQdACwPvCdfGpX4JoCG+rFyQ0tzwE+D/TbsawAvM92o+JTjbcwgqTo+CHb328Y97GkDU4xtsF5TK0mhhKKJLUoPReZELt/LWAOsDFJbWHzhnEXKS5JOsT225vEGyYqrC40Wee/CqiT1oobTE/Te4UqKwLm99iAtCayYn6fPwH/YvvqprFrMNU6Q49RaaiRdL7tLSY893q/x4dJP+cDbA9sTyLpCGAf23fl4ycAB9p+Y4PUJ3ufl7ucJdDTSXsLf8sN8OsDR/b+DwXi11B8Kz6eVUW1ulrXtKRTSBaD25H2XP5KUkVqrCYj6VpndQwlq7YitmRdGoP3mKoBxfbARVT5WbIj6W9t1hcaKNkALkf6u+ip4N1PUmRk0D0YJcX6DUnKYf028zs0TLkz5CKvzUg/zy3y6fNsX1nwPQ4mrSn3X9e/dBP1t7gugqDbSDqZdAP+KeNvwI2LLCR9nbSJ3H/T+ZXtdzaNneOvSuq4fS0wd1Qn6KoshZi7V9YBxnVZkGyYRnbjsDSSTmVyn8U/kH4ORSd6o46ki0iL0f2dBV9osggp6aMkRZ1eMdNOJAWqxp0FQULS4cC3bZ87yWvb2D5j+FlNTu4qhXT/mU9Sv4FknXGJR9CaLCt5LHQllZqaz9SgHl1ZLJ2IKtla1CrgDNpB0gvJfue2/94wVqek9WtSqxip9vwpSEQx2VhXL2kM178IOZIFX7lIfSdSx/4P+l66B/iu7QtbSWwKhrTB+STbf2yY6tBRYevMHPMU0rwf0jrI/wL/Z/uohnGHsolcClWwO1ElO+qKcaupng4DSUsDb2fMSvxs4L/cQEVbk6gXTXZuwNjFN2QneY8VICkFl4oZTE0uzrrQzWxPH1GwUakApaQl0FWk4s21SPfPHwDPadrA0Be/xv+/+Hi2ZjFL33sUvaYrF/geQZr/Xto0Vl/MTo3Be9RoQMmFIVuVKmzqOpKms4+z7YGUs/Ka0GQBzxkkXhuojHpf8fvwJO/xSsbGcOc2va6jACcIOo6kN0x23vYRBWLfADy7V8GaKw2vs/3shnHfQSoCWBlYABxvu4TlUhVqVo/n+EPZOJT0GeBu4Ju27ygRsySq4LPYZWp0Fki6EdjAY16ZywFXNZmYB2PULg6pRZ4wvcz2Pfl4LnCq7RdM/53toLrKb9WeqUF9JB1l+3WLOzcqSFpI2oz8LLASyYZqvhsq1Ui6ANi7110jaSPSolOjLvJguOTC72fYPkzJU355279uGDOKvjO1ipFqzZ+C8UQxWaK/q7crSNrcdlN7hcninmF7m8WdK/h+A29w9jqw+zceJe1j+yvFEy2IpB2AAxmzzlwDuMEN1Yty7CtIawHX5uPdSOq9jZqehrWJXIq8cdNTJdkYuDR/3WTTppY6aWdVT2si6ZvA0iSrGkhW4g/ZfnODmNVUhmpsyPbFXoakeL4WSakUANufbBo7mJ7eddTg+68mbarfmY+fCJxTesxReG39CtvzJL0f+KvtgwrH/xfbh5eI1Rez+Hi2cjFL567pPDf7R+A3pCa7IirJHR2D1yj4OpxUiPtjxtsYDqSoHsxs1Fy97wskm74Teusto85jFv9PgiAYcb4H3G/7IVi0AbxModi/JC2q9Ca4q+dzTVmdtJhyVYFYw6Cq1/kQFxAuAZ4OfAkYxcXp4j6LXSYX2mxQuLPgFlK34v35eBlSZ0RQACdf1oclrVijOKQiTwb6VRX+ns+NKvcC10qqoVJT85ka1GfcBlD+/W3UUi5Lwg6k+/E+jNlafKJA3H2BBZJuIS0wrUpS4Qg6gpJC2cakYpnDSBs5RwPPbxi6hg93V9kIuFDSuGKkXsd9g0XZWvOnYDy1fn9d4wpJ80t29dZmYvFNbs65A/i+7QcfbTxJywKPBZ6U1U16KjUrAP9fw3SnxPYdShYXg3COpPuAVSVtD1wLvAEY6QIckhrwZkywziwU+zWkscvupK7T1wP/XCDuHElPmLCJPLJr4f2NHHnjeKCimwkx7yY1Yu2mCXbUpCafxnGb5jgh7u/zGP7wrjW2ZOZPUJo4MxczPGrUpzIk6Zq+l+aSfn8leO6EzdezJJVqkDyZ9DdyOX0bs0F9mhTfZA4ELpK0IB/vDHy6YczJmtbe2jRmHw/k4s3Xk1SdIc2hGqExJbXD83FJJbXi41nbfwFOkLSKxlTVbiiQKwzpmi6hlNFHrabezo3BSTlvNqHg67KGMX+dP/4hf8xqJO1DWr+5B/gGSYXqg4MWwOmRFrvjsL3CIHGHhZL7ySak/8Mltm8jKZQNyluB9wAPSupZBLvWzyE3TT5AuhcNZJc4spOOIAiWmDOAbUkbkpB8Bk8nWfk0ZS5wvaRLSDfKTYDLJP0ABvcZtP0hoIp0cSV2IW1YfMH2Xbl6/H2L+Z6Rw/ZJbeewGM6Q9BPGS/YvbDGfVpnYWSClteSGnQV3A9flwgWTisoukfTVHDtsdppTszikFkeS/g56soo7AYe3l85iOYExG7XS1HymBpWQ9CHg34HlJPWKFUUqJju0tcSmQNnWglRoOtHWYj9JjWwtbF8q6Vmk4g2AG91A9j5ohVeS7PCuALB9S1Yna8Sod40PmVrFSFXmT8EjiGKyxKbAHpKKdvUOGZEKAvYgFaY+Wt5KKjx9CmlDqPc8/TPQdNNmWgbd4LS9paTHk/KdD7wZeKak75IUBg4pmGZJHsiFR3Oyis9Zkr5cIrDtm/LG6UkklbZ/dhkLnCqbyF1Ej7SjPkzSSNpRd7ixBeAhSU+3/SsASU8DHhow1rEkVYGiKkMTqLEh2+OptuN53UFsHynpMqBXBPgqF1Cun3ht276kacw+9gLeBnza9q8lrQ00sjHMrO9sYwhg+05JpVTUil8fU6nVMaFZaUCGck3bfnZPKaNArN9oEmXZxkl2cwxeo+CrRAPZTOKNtr8i6cUklevXke5DAxXg2J4LIOlTwO9zLJHmTasVybgSkt4MfAw4k5TzQZI+afvbg8bs/TyGyOtJP+eB70VhQRUEHUfSVbaft7hzA8ae1F+whwf0GZT0CuCLjA0G1wSudwHp4pp0pWBI0kFMXx07soUAkl4FbJkPG/ssdhlJpzHWWbBowcb2gQ1iTmqv0xc7bHYaMtXPeNR/trkbsv/au7LNfKZD0uOYRKUmd/o0jV3tmRrUR9L+vSLfLqMGthb5+3cGTrN9j6SPkLpu9nO2pApGH0mX2N5EY1LqjwMuGvEFvYB686cgmAx1zKJFyZLtNbaPrxB7b9sHlY5bg1yofyHJ6mWTvJl3JbAj8ALbR7ea4BRozDpzf+BJFLDO7G369J1ahTQH/htAieeepHUZ20Q+s8Qm8jBQYbsTdcyOWtLJpGLkLjW2IGkbUvf7TaQNpzWBvWyf1TDuOPWiUuN6VbQnlXQocJCztVwQwHCubUnzCl4jQ7HjKkXO90VMUKuz/aYCsatc05KeZPuPJWP2xV6kLGv7mZKeAiyw3UhZtmtjcJg65x6D5J7HtDv3itSyQtR3bddSHhpplO0hJX0FONv2iSpghSfp6gnqepOeGyXyuPOfbN+Rj5vY986b7vVRXusMBZwg6D739Q8sJW0ElOgUqrlAvB/1pIuLU7l6vAalulWGju2ayhZdo3hnwagXgcwEbB+RFzPXsH1j2/ksKfkZMrID1gnUVKmp9kwNhsI6kl5KKj55uO1kBsXNbC0APmp7Qe702gb4AnAIqUsrGHGUJO9OUbI/fbyktwBvJEkYByNOFNgEwyBv/kCSN+8Mth+W9H6geAEOcKukuR0pPt0R2Bz4V+BISU8mbdK/GjivzcQWw46kcfG7SZ23KwJN1FkBXt40qcWRC246UXQDVe1OumZH3cm1IdtnSHoG45UoG1m1VFYvqqHA0Susewywl6SbSEV1XVCICOozjGv7m6RxQAm6pqRWXK2u1jWd83uYtJ43L5/bx3ZJS86iyrJdHYNDteKglSdRiFqlwvt0hcslnQ6sDXwo/62VWJu8T9IewHdJ1+Ju9BUwjih3MP46uSefG4ReM/qypIK6q0n3n/VJ+6CbDxh3HJKeD/wHaV72GMbucU8bNGYU4ARB99mX5JV9C+mmsCrJuqc4KuB7l6kmXVyJml7nxelakYWyFYem9rW8gwZWHB3mQknrlegskHS87V0m6TAEynQWBoms8PUFkvft2pKeB3zSYTlRkmVt94pvsH2vpMcWij20Z2pQha+TpKcPygtkh3WpEK4fD2hrkemppr0M+IbtUyWNnL1AMDm2nVWM3kOyUFkH+Jjtn7abWTAIBedPQdDP5aQxvUjNIXfmrx9PUjFYu73UFstCSf8GHMf4zvemdir9xafbAgcwosWnWbXxDEm32n4FLNrc+i3wBka3oeatwHG2fwcUWXMY5U7xFqlld9I1O+rvMYnqabspLRm54OYaAEkbS7rF9i0NQu7JePWizwJXkZobm+Za4xqsXlgXdJchNa1p8f9kyXAlO66K3CVpeeBc4BhJt9N8o77WNX2OpPuAVSVtD1xLGgeVLMD5e55fGxYpajehy2PwGjwkaQ1nl4issjObLXfeBDwPuMn2X7Lqy14F4u5Oui6+QlbCy+dGmV8C/51Vz0wq5L9G0nsAbH9xSQPZ3hpA0gnAvN5emaTnkgpmSvEtUqPBODeKJkQBThB0HNuXSnoW47srHqj0do197zK9weB5lBsM1qRrBUMASPoh01tRjURBgO0t8udJK9B7EnWkjdUZT6XOgn3y51gIqc9/AJsAZwPYvkrJ9z0oR03lt2E+U4PC2F5I2thbkdQRslDSb0nKIUfPot/l77J6ynbA5yQtA8xpOafg0XEFcJft97WdSNCYUvOnIFiE7bUBJH0DONH2j/LxS0gWQaNMr7D5nX3nDDQdL/cXnx7akeLTV/d9fb7t75GKDkaVucDpkv5EKqBaYPu2lnOaicyR9IQJdicl1u9PzB89zi4QsyY1VU+Hyd7A+pL+x/agjR2dUi/qFfVIOsr26/pfk3QU8LpJvzGYFQypae0TBWN1TUmtuFpdrWva9paSHk/a7J4PvBl4pqTvkmy+DmmSd+b4SZRlvzlosI6PwWvwYeB8SeeQ9i22JCk8zkqy2uevSX/HyxaM+7+ka7tL/Cp/9Dg5fx5YgYpkJbeoUd32zyQ9u0G8idxt+8cF4yF7NhekBUH3yR2yp3VEahmArFRwP+nBvCewAnBMga63KqiC1/kwUPKbXBXoecjvBtwGnATdksiXtFpDNYDOoAqerDnuUiQVp60HSixYIiRdbHsz9Xm8KnvAtp3bTEHSfJLs5jiVGtuXF4jduWdqMJ5ctPk60vjiFuAYYAtgPdtbtZja0MjjrO2Ba23/QtJqpP//6S2nFiwhkm4A/hH4DeMVIuJZEgTBIiRda3u9xZ2bDUg6hbQhvR1p/PZX4BLbG7Sa2DRozGrornxcymqoKpLWJxVSvRq42fa2Lac0o5D0euDfgXF2J7aPai+r4SPpKtvPW9y5rqBskTfg955E2pwep14E3AwjqV4EgKQrbM/rO16KND9Zt8W0gpaRdDlJTebsvjWzn9l+7oDxprWamm1rOVldoqdWVzp20Ws6K7JdSFLy2CQrvl1JKjR4ge2jpw2wZO8hUjHnP5PWD38CnOvm1oAxBs9IehJjzSYX2/5jm/m0iaQ3k5qgn0pSqtsMuMj2i6b9xsXHXRl4C7AWfUXZoz5nKI2k75DWx3r3hj2A5W3vVij+Z4GlSDaJi+4RTZ4joYATBN2nX2p5G1IVeRGpZRX2vVO2GiIVgfSq/3qykPvlTqpRtBqq4XU+DJ5ve+O+4x9Kusz2u1vL6FEg6fr85ddsH9xqMkOkYmfBQ5IelrSi7bsLpBpMznWSdgeWUvJ+fxdpQhkUorJKTbVnalAfSSeS/i6OAl5u+9b80nFZNnpWkK0tTpC0iqQ18ukb2swpeNS8uO0EgsEoPX8KgsVwSy4Y7l+EbGJzMhSyXPi6JEUHIFk8NAy7C6n49Au278rFp6OuIlbLaqg2twO3kqyiV2k5lxlHabsTddeOuprqaU3yOOAq2/dJ2pNUEPiVhlZPnVIvkvQhUhHZcpL+3DsN/B04tLXEglHhAdt3p7qIRTzcIN6B07xmxu6ls4XianUVr+kdgc1JiilHSnoyaQ71apJrQgm+lYsUfgqQHRl+RFrva0Inx+ClyQVO2wNPs/1JSWtI2sT2JW3n1hL7kApmL7a9dV67/kyBuCeTromFFLJGqo2kZwL/xiOLhprck/cC3s6Y08O5pDX7UvTW/vv3Uxs9R0IBJwg6Tk9lQdL+pKrjY/uVFxrGvoFJfO9s39E09hTvtxJwoe11FvuPh0jN6vGa5AKWl9m+KR8/DTjVdklptqrkv4nNbJ/adi7Dpka3kJLv5oakiUd/R/1Idkx1kaw88WHGd1d8ytmvPWhOTZWams/UoD5Z9vc5wPNJi3jnA4fMtutP0g6khcinkDbJ1gBusP2cVhMLglnAsOdPwexGyZrm48AL8qlzgU+MqrIsgKSPA1uRCnB+BLyEZL/0moLv8a+2R36TV9LVwFYTrIbOGdXuaUnvIBU6rUxSZzm+SWFIMBx6asJTKe02LAypRk3V05pIugbYAFgfOJxkdbKL7Re2mVcbSNrf9ofaziMYLSR9i2Qx90FSocW7gKVtv63VxGYYNdTqal3TExTEryVZiL3Q9t4FYn8KWMn2O7LS4KnAN2wf1jBu58bgNZB0CGnt7UW2n51/xqfbnt9yaq0g6VLb8yVdBWxq+2+Srmu6FtdFBcA8z/lPHrkuMtLjuNKEAk4QdJ/fKXlZbgd8TtIywJxCsYv73k2H7TskbTWs93sUdNXrfF/gbEk35eO1GGEfTkmfs/2BCaffP8m5GU3lbqET8kdQiaw88eH8EdShpkpNzWdqUJ+9gD8DX83Hu5PUcHZuLaN2+BRJ6nZhLijbmmTJFQRBfYY6fwpmN3mRf5/F/sPR4jWkzekrbe+Vu50bWwxM4G10Q2XhQOAiSeOshlrMZ3GsDuxr+6q2EwmWnFx8sxRwuDtkR11Z9bQmD9q2pB2Bg21/S9KbBgnUYfWiHutIeimpeaaJwkkws9ibtF72N+BYctNa06CSXjXd67Zn21poDbW6Wtf0q/u+Pt/294DvlQhs+6OSPi/pP4GNgM/a/n6BuF0cg9dgU9vzlKzDemqO/9B2Ui1ys6THAycBP5V0J8lWvCmnSHqp7R8ViDUsHrRdUp1mMrVhAEqpDUtakfGFdecAn2ziJBEKOEHQcbLawvakTv1fZKnl9WyfXiB2cd+7LlOjerwmWSXiJ8DawA7APwEfHtXf30TFl3zumg4sKlShRmeBpMcB99t+KB8vBSyTi0aCAkg6i8kXx2ab7G01Kiu/VXumBvWR9POJKmGTnZvpKNlNbpw7Tja0/bCkq21v0HZuQTDTiflTMEwqSXtXRdIltjeRdDmwNXAPcL3tZxV8j86oF0palzFZ8zO7oCgjaRXG24f9X4vpBEuIpDNIdladsKOuqXpaE0nnAKcBbwS2JG2CXz2IslVX1Yt6SNqW1CCxGUk16zDbN7abVdA2kna2vWBx5waIeypp3fvMfGprkh38H0h2sG9sEr8r1FSrq3VNSzoC2MfZljOrqBzY5Hc2oSBLwEeBS0j358YFWV0cg9dA0n+TrrtLcyHOyiQFnE6Mw2si6YXAiqSxzN8bxroHeBxpfeEBxmyuV2icaGGyOhQkdbM/8Mh1kYFVomqrDUv6PvAz4Ih86nXABranLYz/mFYAACAASURBVPCcjlDACYKOkzfOT5C0iqQ18ukbCoUv7nvXcbrmdd5TiZhL+p2VVIkohqS3A+8AnpblenvMBS5oJ6uRoEZnwRnAtsC9+Xg54HTSYDkow7/1fb0sqWDvwZZymalUU6mp/EwN6nOFpM1sXwwgaVPgspZzaoO7lLzNzwWOkXQ7fbaDQRBUJeZPwTBZQJL2/iZ9i5AjzmW5M/QbpMXTe4GLCr/HKwrHq0beFBv5ohsASa8AvsiYxeWawPUk+89g9LkXuFZSV+yoa6qe1mRXkgrnG23fmueUBwwSqKvqRT1sLwQW5o7y3fLXvyXd/4/uiKJRUJ4PkcYvizv3aFkaWNf27yEVsJGunb0axu0a1dTqKl7T6/eKb/L73CmpaQHHxLHglaS/kVeQ5mZNFZG6OAavwVeBE4FVJH2apHT5kXZTGg1sn1Mw1txc2PIM+orgR5TLSdeY8vF7J7zeRK2mttrw0233K3J9ItuJDUwo4ARBx5G0A0m6uLcIsgZwQ1NvwWCMmtXjNampElGSPHB/ArA/yQO4xz2zzTu1nxqdBZN5hnbRR7Rr9Dp9285jplBZ+S2eqR1G0vUkmfpeJ/YawI2kIjjPFkW1rHb2V1Jh2h6krptjSnWFBEEQBKOBpMttb9R2HoMiaS1gBdvXLOafThfjPdO9bvuLg8YOxpOV9V7EBItL2wPZ6wTDRdIbJjtv+4jJzrdNV9azJiNb683Ph5fYvr1hvE6pF/UjaSVSB/mewC3AMcAWpPn7Vi2mFgwZSS8BXkpaXz+u76UVSIUzjdbMJF1v+9l9x3OA6/rPzSZqqdXVuKbz+GIr23fm4ycC5wyiHDYsuj4GL4mSXeQ2pIKLM2xf33JKI0NeowT4mu2DG8R5M8ny7KnAVaS9ogttb9M8yzpIWo7UcL8FqSDnPOA/bf+1QcyqasOSLgLeZ/v8fPx84Au2Nx80ZijgBEH3+RTppjtuEaRE4Bq+dx2lq17n1VQiSpL/nu4GdpM0j7EH8wXArC3AqdRZcJ+keb2BiaSNSJu0QSH6pBYhXW8bkTa/g0JUVqmp9kwNhsL2bScwIrwVOM727xiTTg2CYAjE/CkYMj/MzSInUkjauxZ5njflaw0WTucO+H3Bo+cB23dImiNpju2zJH257aSCJeZ7TGJH3W5K09KJ9ayJSNqFpHhzNmkz8iBJ77P9vQZhu6ZeBICkE0nNEUcBL7d9a37pOEmzUaV0tnMLSZ12B5JCQo97SJYiTTlD0k+A7+Tj1wILC8TtFDXV6ipe0wcCF0nqqSDtDHy6QbxF1LC3ynRmDF4b2zcQyuGTYvvZuWhts4ah9iEV9l5se+tc9PSZxgnW5QjgzySVJEjqgEeQijAHpbba8NuAI/Oajkj7kv/SJGAo4ARBx5F0me2Nc7XwhrYflnS17Q0KxC7ue9dlalWP16KmSkQNJH2U9BDuyUDuBCywvV97WbVL6c4CSfOB7+ZYAlYFdrV9+bTfGCwxkn7NmNTig8CvSRtv57ea2AyipkpNzWdqEAwLSR8nPU//ROouXGD7tnazCoLZQcyfgmGSx50Tse0m0t5VkHTWNC/bdti0jTiSFpLm6J8FViKNw+fbDjvjDiDpYmBb2/fm4+WB00f199e19aweeR65XU/1RtLKpOaOgeeTXVMv6pEVT54DPB94GDgfOMT2/a0mFrSKpPfb/vyEc/vY/kqB2K9krAj+XNsnNo3ZNWqq1dW8piWty9gm+pmlnAcmU04roabWpTH4MCml+NJVJD3J9h8rxL3U9vxsh7Sp7b9Jum6U1dol/dz2uos7N4pIWgHA9p8bx4oCnCDoNn2LIPsDT6LgIkjY1SSmqh4f5YdcF5F0I2mD4v58vBxwle112s2sHSZ0FhzW11mwqEhgwLhL57gANw6opBMErVF5QaHaMzUIho2k9YFdgVcDN9vetuWUgmDGE/OnIGgHScsCbyJtDPU3zTTtcA4yuSDiflKjwZ4k25BjZmO3dxfp6vOpg41w1/bbpmQbnKubWKko2cs+Qr0oK8OOLJKOJ3W/H5NP7Q483vbO7WUVtI2kK2zPm3CuREHEoutE0jqkdc8fz7Y1z8qN2p27ptVBe6uu01N8sX1q27kMi6wM+XD//a1UYWGOdSKwF7AvaT38TmBp2y8tEb8Gko4GDrZ9cT7eFHin7dc3jPsyHjnf+2STmH2xlyGtn65Fn3tUk/hhQRUE3WdHkoXMu4E9SFYnRW46wF8lbTHB92422tXsR1iSDINbSA/PXuX8MsDv2kundQ5lrLNgY0mLOgsaFN/sDJxm+2eSPgLMk7RfA8n3YAKSpu1wt33CdK8HS0RN+fuaz9QgGDa3A7cCdwCrtJxLEMwWYv4UDBVJzwXWZfwi5JHtZTQ9kiZddC2Q81Ek+fsXk8Zue5AsF4KGSDrf9hbAbSSlT0hFOAD7SfoTcIDtr7eSYLCkdMqOeirVUwrYqFTmtAk2OLsCP2oY8wxgW5IVFcBywOnAqDeJPHdCp/tZkoqoWgTdQ9JupIKNtSX9oO+luSTl1qacC2yZLYZOI9ld7UoaD8wm7soKZ+cBx0i6nT7ruoZ08Zrut7cS8BrK2Vt1agxeA0mfs/2BCaffP8m5mc45ku4DVpW0PXAt8AagSAGO7VfmL/8jq4quSLrPjTIbARdK6hVOrwHcKOlaklrU+o82oKT/BB4LbA18k3Q9X1IoX4CTgbtJNol/W8y/XSJCAScIOo6k9wDH2S5eqCBpA+BI0k19ke+d7atLv9coE5Ykw0HSSSQ/y5+SFva2Iz1Eb4bR97cuTY3OAknX2F5f0hbAp4AvAB+zvelivjVYQiSdSloIOzOf2hq4EPgDaYAZXbgNqaz8Vu2ZGgTDQsmLfBdgZWABcHwpCecgCKYn5k/BMFGyHNyKtPj/I+AlwPm2X9NmXtMh6aC+w2WBbYArmubc657vm+8sDZxne7MmcYPFkzudL5ytyrVdQR2zo66pelobSa8mNVJBug81ssHpsHpRle73oJtIWhNYm7SO88G+l+4BrrH9YMP4V9ieJ2lvYDnbn+/CdVKammp1Xb2mNWZvZeCsEmsjXRyD12AKRatrBimu6DqSHk8q3Dgc2IBko3kKSXHpkBZTa4V8z58S278ZIGZvntf7vDxJ6WzLgRMdH/9ntp9bIlaPUMAJgu4zFzg9dx0dByywfVuJwHmheAMV9L3rKDWrx4MxTswfPc5uKY9RoUZnwUP588uAb9g+VdJ+DWMG41kaWNf27wGUvOoPt71Xu2nNKGqq1FR7pgbBEFkd2Nf2VW0nEgSzjZg/BUPmNaQF3itt7yXpycDRLec0Lbb37j/Oi9XfLRC6ZzFxV+5IvpVQfxsKWZlyq7bzCKbH9qWSnkV37Khrqp5Wxfb3ge8XDNkp9aI+ine/B90lb7b+Bti80ltI0uakNaJeod5Sld5r5BiSWl1Xr+mlGftZLF0oZufG4CWR9HbgHcDTJF3T99Jc4IJ2smoPST8lNd8+TCpSu1PSlcD7gRe0mlxLDFJgswT0xj5/kfQUktr3agXjXyhpPdvXlgoYBThB0HFsfwL4hKT1SdKK50i62fa2TWNP9L2T1HvP2WbHsQOpenwfxqrHP9FqRjMQ20e0ncOIcYWkzSZ0FlzWMObvJP0XSV3oc/kan9MwZjCe1XvFN5nbSJPSoBxvZUylpuh9o+YzNQiGhe0PAUhahfFyyP835TcFQVCEmD8FQ+avWZ31wVz0dTupCLNL3EfqiG/Kodl24qPAD4DlgY8ViBssARPmP8EIou7ZUfca4c6lA41wvc1vSfcwtvkNadPXtldoEH5fYIGkcepFDeINi+3bTiAYHSpfI5DW7D8EnGj7OklPA85qGLMz5OIbbM+d7PWeWh3QpACnc9e0pH2At5CKIgUcLelQ2wdN/52LZSaMwZtwLPBjJlG0KqG21EF2JBUX/itwZC7IWpO0LnBem4nNME7JzRsHAFeQniXfaBq0V0RIqpfZS9JNJAuq3vNp4OLCsKAKghmCpFWBnYHXAnNLVB1LOo0x37uecga2D2wauwtMMTnoVUw/TJKUD6/zhkg63vYufQ+7cYxwBX1VJF1P6k4b11kAPMjgXpmPJU2YrrX9i6zOsp7t0wulPeuRdDDwDMZ7vv9yYrdvMDhZ6nUX0j24ikpNjWdqEAwLSa8Avgg8hbQQtCZwve3ntJpYEMwCZvv8KRgukr4O/DtpvPJe4F7gqlFWXpT0Q8bmfHNI0v3H2/7g1N8VBEFT1DE7akmPI3U6z2FM9fQY23e0mlhLZFu9rqgXBUEwgkhabbYVzGZ1ls1t35ePHwdc1HSNr4tj8FpImgdsQRrfXzDChb3VUbbEzV9fS2rgf2HsCZQnNz4ta/vuArGK22Utih0FOEHQbSS9g7QRuTKwgLR41djLMscu7ns3k1B4nRehNwGY6mFXSbJu5Kn68A9VhKpIehXQ8x89t6nnezA5fSo1rwZKKb9Ve6YGwbCQdDXJ43yh7Q0lbQ3saftNi/nWIAgaEvOnoC0krQWsYPuaxfzTVpH0wr7DB4Hf2L65QNxJ1W5CfSoIEr1NIUn7kxpyju3fKBo1JL2HMdXTWU2fetE9PfUiYJTVi4JgSiR9CjiHVARRTdVK0mdIRfHfnK2Fe8GiIoj5tu/Px8sCl9per+B7rEUHxuA1kPRR0hrqCfnUTqQmyf3ay6o9JD3N9k3560Nsv73tnGYykjYGbrF9S6F4R9l+3eLOPRrCgioIus/qwL62r6oQu7jv3UwivM7LkItvlgIOt7112/mMCjUKjyTtABzImCrCGsANQKgiFMT2CYxNPoJ63A7cSvJ8XaVQzJrP1CAYFg/kMcocSXNsnyXpy20nFQSzhJg/Ba1g+38lrSZpGdt/azufqbB9TqXQ/Zt4ywIvB66v9F5B0EW6Zkc9FzhdUjXV0w7xUdsLsnrRNiT1okOAkVQvCoLFcBOwO3BQVpw/j9S4dnLh97kEeDrwJeD1hWMH3eEw4L8l9RojdwK+VfINujIGr8SewAZ9BU6fBa4CZmUBDvBxSfvYvsv227M97oG239h2YjOUvYH1Jf2P7RLWnOP2x/J+5UZNAoYCThDMEEoqWkzwvXsGaXBcxPcuCKZC0hnAq0pIxwWTE6oI9ZjGz7rHHYRlXRGGoVITKlFBl5G0kLSw9FlgJVKx2nzb/9RqYkEwg4n5UzAK5Pv/04Hv2/63tvOZjCnGyncDlwHv7XWNFnifZYCf2N6qRLwg6DpdtaOuoXraNbqmXhQES0K2/d4F+DfgCbbntpxSMEPps0gCOM/2lRXeY+TH4DWQdBbwStt35ePHAyfYflG7mbXDZM/meF7XR9Jc2/c0+P4PkSzllgP+0jsN/B041PaHBo4dBThB0G0kvQL4ImOKFmsC19seWNGipvVNEEyFpJOBDYGf0tfBaPtdrSU1w5B0me2NcyHOhrYflnS17Q3azm2mE5Z15cgLj8fVUKmp8UwNgmGTN1juJ00Y9wRWAI6x/adWEwuCGUzMn4JRQZKAdW1f13Yuk5GtJ24GjiU9p15L2rC4Anh7qYKZ3HF6qe1/LBEvCGYKXWs0yJv0O5PuFXNnY0GrpFOA35HUi+YBfwUuiXWcoItI+iawLnAbSf3mfOAK2w8OGO8gJm+CA2JNebYi6YnTvV5jbWTUx+A1kHQSMJ+0l2PSc+oS0lh/1l1/eb9lK9t35uMnAueUtDybjeQiuikpZckpaf8mxTaTERZUQdB99gM2Y4KiRZOAvQXiqXzvgIF974JgGsK2pz53SVoeOBc4RtLtjJdrDyoRlnXl6A2GKy0eF3+mBsGw6ClxkRYze4uQyp/3yxL+ocQVBBWI+VMwTJZgU2GUF/53mLBpfKikq2x/QNK/Dxq0T4UKYCmSUuInG+QZBDOKrtlRT6J6+pbSqqcdYheSetEXbN+V1Yve13JOQTAoK5Ge03cBfwL+OGjxTeayIlkFM43LSeNCkZ53d+avHw/8H7D2IEE7PgavwYn5o8fZLeUxKhwIXCRpQT7eGfh0i/nMFA7Mn5cFNgauJl3P65OeAZsXep91JL0UOM32wyUChgJOEHScmooWkq6wPa/veCmS3Om6TWMHwUQkPQ643/ZD+XgpYBnbf5n+O4MlJf+M/0ryet8DWJGkinBHq4nNYCRdn7/8mu2DW01mhlBTpSZUooKZTChxBUF9Yv4UDANJv2aaTQXbA20qDANJFwFfAr6XT70GeI/tzXIhzvMGjNuvQvUgcFvDDb0gmFF0zY66puppV+maelEQTIekZwMvBt4NLGX7qS2nFMxAJH0DONH2j/LxS4CdbL91wHidHYMHw0HSuqTxFsCZs7h4uDiSTgA+bvvafPxc4D9sv6ZQ/G2BvUiNuQuAw2zf2CRmKOAEQffpKVqcRyFFi37fO0l/7p0m+941iR0E03AGsC1wbz5eDjgd+KfWMpp5vJW0iPU74Ii2k5kN2H523vTerO1cZhA1VWqKP1ODYFQIJa4gqEfMn4Jh0lvcn2pToc3cloA9gK8AXydtYFwM7ClpOeD/NYj7GOBm23/Lz7pXSzrS9l1NEw6CGcIDeSw4R9Ic22dJ+nLbSU1FZdXTTtE19aIgmA5JLwe2BF5AKlo4k7T+0jTuD5neimqHpu8RdJLNbL+ld2D7x5I+P2iwjo/BiyHpeNu7TFCgXMRstIvskQtuouimDuv0im8AbP8sF3MWwfZCYKGkFYHd8te/Bb4BHG37gUcbMxRwgqDjSHoscD9pgXdPYAWSokVjL8savndBMBWTdTw26YIMHomkj5Pki/8EHAcssH1bu1nNLCR9zvYHFncuGJzKym/VnqlBEATBzCfmT8EwkXSt7fUWd242IOkqkiT5WsCPgJOB59h+aZt5BcGoIGkhaXNwf+BJpEKO+bZHsuGppupp1+iaelEQTIekg0kFN+fZvqVg3K8AqwJH51O7kayZTwKwfU6p9wq6g6SfkP7een8XewAvsP3ihnFn9Rhc0mq2fz9BgXIRPXvmICiJpO+QmmT7r+flbe9W8D1WItmH7wncAhwDbAGsZ3urRxtvTqnEgiAYLpLOz1/eRvJNvRM4GPgM8GtJv86eyU1YR9JLJcW9IhgG90nql+zfiGSXFBTC9ifygtU7gdWAc/JCXFCO7SY595KhZzGzmahS8xWaK78N45kaBEEQzHxi/hQMk1skfUTSWvnjw6SFwpFF0sqS/l3SoZK+3fsoEPrhbDn1KuAg2+8jzXeCIEjsCPyFZPdyGvAr4BWtZjQ9PdXT/8mKA9uQFLNmIw9k2/BF6kWkgsMg6CL32T6uv/hG0ucKxH2+7V1t/zB/7A5safucKL6Z1ewGrAycCJyQvy6xWd+5MXhJcvHNUsDhtn8z8aPt/IIZy17AdcA++ePn+VwRJJ1I2mtYDni57R3y82pvYPmBYoYCThDMTHK13oW212kQo7jvXRBMhaT5wHdJA1aROhd2tX15q4nNQCStCuwMvBaYO5ulIUsh6e3AO4CnkRYze8wFLrBdyiJp1tOGSk2JZ2oQBEEw84n5UzBMJD0R+DjJxgHgXOATo6zcJ+lC0sLm5cBDvfO2v98w7n8DXwY+DLzC9q8l/cz2c5vEDYKZgqT3MGZHPfLUVD3tGl1TLwqC6ZB0he15E85d03RdUtL1wMts35SPnwacaruYPUkQ9OjiGLwGks4AXmX77rZzCYKmZCu55wDPBx4GzgcOsX3/wDGjACcIZi49ObgCcXq+dx8GGvneBcF0SFoa6G1w3xh/Y2XJCh67kCr+FwDHZ2/SoCH5PvkE0qLYB/teume2TcBqIel821tIuocxj2Hlzw+TrNUOsP31Su9f5JkaBEEQzHxi/hQEk1PLYljSusDbgItsf0fS2sAutkt01QdB5+maHXVf0clngZWYxUUnkh5HUoeeQ7JbWJHUgHJHq4kFwaOgdtOapBeTxts35VNrAf9q+/QmcYOZhaTPAHcD34x7aHMknQxsCPyUPmVy2+9qLalgxiHpeNu7SLqWsf2ARZRqLJd0PPBnku0UwO7A423vPHDMKMAJgmA6SvveBcFUSNoZOM32PZI+AswD9rN9RcupzRgk7U/qeruq7VxmMtlKbQvSoPCC+BseDqFSEwRBEIwCMX8KaiPpy7b3lfRDJl+E3KGFtJYISfuRxms/ajuXIJiNSFof2BV4NXCz7W1bTmlS2lA9HVW6pl4UBJNRu2ktryn/BFgb2AH4J+DDsR4X9CNpJ+DpwAa2Xz/A93d2DF4DSW+Y7LztI4adSzBz6TXESlpzstdL2Z5J+rntdRd37lHFjAKcIAimIvverQMcRZJPv7Xvtctsh+dwUIye5KikLYBPAV8APmZ705ZTm3FIWgVYtnds+/9aTGdGIemjpM7CE/KpnUjdhfu1l9XsIVRqgiAIgjaJ+VMwDCRtZPtySS+c7HXb5ww7pyUlKxk+Dvgb8ABpc922Vxgw3lQdkb24YbUbBH2Muh1126qno0jX1IuCoA1iTTmYiKSlgHfZ/lLBmJ0dg9cgK7Tdb/uhfLwUsIztv7SbWTBTkfRkYH4+vMT27QVjHw0cbPvifLwp8M5BivUWxYwCnCAIpqKG710QTIWkK21vmFVarrV9bO9c27nNFCS9Avgi8BSShPOawPW2n9NqYjMISTeSOinuz8fLAVeFKksQBEEQzHxi/hQMG0n/ADwzH3bCwlfSE4FnML4hYKANi76OyPcCFwM3979eqiMyCLrOTLGjns2qp11RLwqCR4Ok6/OXX7N9cIM4saYcPAJJl9jepFLszo3BSyPpYmBb2/fm4+WB02ejXWRQH0m7AAcAZ5OKs7cE3mf7e4XiX09qpuo1qq8B3Ag8yICNHY8pkVgQBDOWvUi+d1/Nx7uTujkH9r0Lgmn4naT/ArYDPidpGZLHdVCO/YDNgIV5Yro1Sc45KMctpM2E3kbbMkBIRQdBEATB7CDmT8HQkLQVcATwv6RFyNUlvcH2uW3mNR2S3gzsAzwVuIo0N7kQ2GaQeH3Kh8sDhxIKEUEwFasD+3bdjtr2HfneNxu5HbgVuANYpeVcgqAItp+dC+s2axgq1pSDybhA0sGkseF9vZNNrcm6OAavxLK94hsA2/dmC8kgqMGHgfk91RtJKwMLgSIFOMD2heIsIhRwgiCYkhq+d0EwFXmAtj2pU+EXklYD1rN9esupzRh61geSrgY2tP2wpKttb9B2bjMFSSeRpBB/SpLM3g64hNyNa/td7WUXBEEQBEFNYv4UDBNJlwO7274xHz8T+I7tjdrNbGqyVdR84GLbz5P0LOAztl9VKH4oRATBNIQddfeYKepFQQCLLGv+mtcjnwk8C/hxU/WQWFMOJkPSWZOctu0XNYzbuTF4DSRdAOzdK2iStBHJwmfzdjMLZiKSrrW9Xt/xHODq/nOjRijgBEEwHVdI2myC791lLecUzFCyP+gJklaRtEY+fUObOc1A7spykOcBx0i6nb4OgKAIJ+aPHme3lEcQBEEQBMMn5k/BMFm6t/APYPt/JC3dZkJLwP2275eEpGVs3yCppJVMKEQEwSRMZUdNsk0MRpsZoV4UBJlzgS0lPQE4HbiUVDi7R5OgvTXlvuPfA7+f+juC2YDtrSuF7uIYvAb7Agsk3UJSAlqVdD0HQQ1Ok/QT4Dv5eFfgRy3ms1hCAScIgimp4XsXBFMhaQfgQMYWhNYAbrAdC0KFyB0h95MGxXsCKwDH2P5Tq4kFQRAEQRDMAGL+FAwTSYcBDwFH51N7AEvZfmN7WU2PpBNJVm37Ai8C7iRtYry0YdxQiAiCacgquC9igh217Te1nFqwhIR6UTATkHSF7XmS9gaWs/15SVfZfl7buQUzD0lPBj4DPMX2SyStC2xu+1sN43ZuDF6LXHjUK6a/samaVRBMhiSRLIznA1vk0+fZPnHq72qfKMAJgmBKJK053eu2fzOsXIKZTywI1UPS+ba3kHQPyRYJUhEOwMPAn4ADbH+9lQRnAJKOt71LltV/xOAqNtyCIAiCYOYT86dgmEhaBngnfYuQwNdt/629rJYcSS8EVgROs/33hrH2B44LhYggmJywo+4uU6kXRbNa0EUkXQm8A/gS8Cbb1020FQmCUkj6MXAY8GHbG0h6DHBl07+3ro/BSyFpZ9I4/h5JHwHmAfv1LKmCoCRdfFZEAU4QBEEwEsSCUHtIWgm40HZJ+fdZhaTVbP9+qo232HALgiAIgiAISiFpKeA6289qO5cgCEYfSQuBnYDPAiuRCjnm2/6nVhMLFks0qwUziVx8+17gAtufk/Q0ksXau1pOLZiBSLrU9nxJV9reMJ9rpLgUY/AxJF1je31JWwCfAr4AfMz2pi2nFsxAJB0BHGz70rZzWVIe03YCQRAEQZC5S9LyJD/gYyTdDtzXck6zAtt3SNqq7Ty6TC6+WQo4vKLHcBAEQRAEQRBg+yFJN0paI2xIgiBYAnYg2VHvw5gd9SdazShYUh7IazZzJM2xfZakL7edVBAMgu1zgHMAJM0B/hjFN0FF7stNpwaQtBlwd5OAMQYfx0P588uAb9g+VdJ+bSYUzGg2BfaQ9BvSnqEYcZvvKMAJgiAIRoUdgb8C7yZ5p64IfLLVjGYRtn/fdg5dJ0/CHpa0ou1GE7ogCIIgCIIgWAxPAK6TdAl9jQu2d2gvpSAIRomeHTVwG4+0o95PUthRjz69ZrXziGa1oONIOhZ4G2nj/lJgBUlfsX1Au5kFM5T3AD8Ani7pAmBl4DUF4sYYPPE7Sf8FbAd8LltzzWk5p2Dm8uK2E3i0hAVVEARBMBJIeg9wnO3ftZ1LEAyKpJOBDYGfMn4SFh09QRAEQRAEQTGyjcMjyN3lQRAEiyXsqEcfSY8lqReJMfWiY2z/qdXEgmAAevY/kvYA5gEfBC4fZQWDoLtIWhv4LbAO6R56I/C8phY2MQZP5OfT9sC1tn8haTVgPdunt5xaMIOQAX1QOQAADeFJREFU9MTpXh/l8VAU4ARBEAQjgaSPA7sAfwKOAxbYvq3drILg0SHpDZOdt33EsHMJgiAIgiAIgiAIgumQtFoo4o4ePfUiSffwSPWih///9u4+xtKyPuP49xppRV4WW8IKtAjYWFJRcCmkVJGK0LQWocY3NKyx1jYa/lBjTUxbW6IQTbUamxSssYmtuo2wMdTXIiUlvEitRQVRtJoURSiyugsNriygXP3jnCm76zLLMrN7z5z9fpKTc+77nHnmymQmc5/n/J7fzeTcmd2LtKIk+TrwTOCfgL9te3WSm9oePziaZlCSLwFnz1/sm+RU4KK2zxibbLYkWQ3sOz92ay4tpSS3MlkHBXgycPf08ROB29oePTDegizAkSQtK0mOA84BXgzc3vaMwZGkRy3J/sCWtj+djh8HPL7tj8cmkyRJ0izY7sPYn9F21R6MI0kawO5FWomSvB54C3ATcCaTD1M/2vY5Q4NpJiU5CbgYOItJx6V3Ai9o+73HeDzX4FtJcjbwHuBwYAOTv+dvtj12aDDNpCQfBC5r+9np+PnAC9u+dmyyR2YBjiRpWUlyKPBS4OXAgbYh1UqS5AvAGW1/NB0fAFzR9lljk0mSJGmWJLkAuBP4CJOrAM8FDmv7l0ODSZL2CLsXaaVLEuBxbX8yOotmU5LfBD7AZCu/M9v+YAmO6RocSHIT8DzgyrZrkpwGrG37msHRNIOS3Lx996odzS0nFuBIkpaFJOcx2YLqEGA9cGnbW8amknbN/H7WO5uTJEmSFmNHWza4jYMkSVqukrwDeFfbe6bjXwD+pO1bxybTLEnyKbbtVPM0JgUzdwO0PXuRx3cNDiS5oe2J00KcNW0f2ht/DtozknwOuBb46HTqXODUtr8zLtXC9hkdQJKkqSOAN7a9cXQQaRE2Jzmh7ZcBkvw6cN/gTJIkSZo9m5OcC3yMyYcMrwA2j40kSZL0iJ7f9s/mB23vTvJ7gAU4Wkp/vZuP7xp84p5p5/drgHVJNrB3/hy0Z7wCOB+4bDq+Zjq3bNkBR5K0rCRZDew7P25728A40i6Z7i/8MeB/mLQhPRQ4p+2XhgaTJEnSTElyFPA3wLOZnPz/PJMLGr4zLpUkSdKOJfkqcFLb+6fjJwA3tD12bDLNoiRHA3e23TIdPwF40mLXyq7BJ5Lsz+Si0zkm3UgOAta13Tg0mLRMWIAjSVoWkpwFvBc4HNgAHAl8wzdhWmmS/BxwzHT4X20fHJlHkiRJkiRJGinJW4CzgA9Np14NfLLtu8al0qxKcgPwrLYPTMc/D3y+7Uljk82GJG8CLml7x+gsmn1JfhV4M3AUW+3u1PZ5ozLtjFtQSZKWiwuBk4Er265JchqwdnAmaZckeSlweduvJXkrcEKSC+e3pJIkSZKWQpJDgD/mZ09C/uGoTJIkSY+k7V8luQk4Yzp1QdvPjcykmbbPfPENQNsHpkU4i+Ia/P8dCFyRZBNwCbC+7V2DM2l2rQf+Dvh74KeDszwqFuBIkpaLB9tuTDKXZK7tVUneNzqUtIv+ou36JKcApzPZd/j9wG+MjSVJkqQZ8wngWuBKVshJSEmStHdrezlw+egc2iv8IMnZbT8JkOT3gR8uwXFdgwNt3wa8LclxwDnA1Ulub3vGTr5Ueix+0vb9o0PsCgtwJEnLxT1JDmCygF2XZAOweXAmaVfNv/E6E/hg288kuXBkIEmSJM2k/dq+ZXQISZKkxyLJlcCDwEVtPz06j2bO65h8xnDRdPw94JVLcFzX4NvaAHwf2AisHpxFs+tTSc4DLgPun59su2lcpIWl7egMkiSRZD9gCxAmW0+tAtYt53+i0vaSfBq4A/ht4ATgPuCLbY8fGkySJEkzZVrkfX3bz47OIkmStKuSHA4cBpzc9qKdvV56LKYX/NL2R0t0PNfgwLQY4mXAIUy2B7q07S1jU2lWJbl1B9Nt+5Q9HuZRsgBHkjRUkuvanpLkXmD+n1Km9w8Bm4B3t714SEBpF0wLyX4XuLntt5McBjyj7RWDo0mSJGmGTN8/7c/kCsAHmbyHattVQ4NJkiRJgyU5CDgfOHU6dTXw9rb/u8jjugYHkrwTuKTtjaOzSMuRBTiSpGUtycFMqsqPGZ1FerSSrAb2nR+3vW1gHEmSJM2gJL8IPJVt151Xj0skSZK0rSQ38/BFl9s8xaRw4bg9HEl7gSQfB74G/ON06pXA8W1ftATHdg0+5Tlw7SlJng48jW1/3z48LtHCLMCRJC17SQ5re+foHNLOJDkbeA9wOJM9cJ8MfLPtsUODSZIkaaYk+SPgDcAvAzcCJzO5cOH0ocEkSZK2kuTIhZ5v+909lUV7jyQ3tn3mzuYew3FdgwNJzgLey8PnwI8EvuE5cO0OSc4HnsukAOezwPOB69q+ZGSuhcyNDiBJ0s5YfKMV5AImb7y+1fZo4AzgC2MjSZIkaQa9ATgJ+G7b04A1wKJa6kuSJC21tt/d+gbcDdy71U3aHe5Lcsr8IMmzgfuW4LiuwScuZNtz4KfjOXDtPi9h8jv2/bavBo4HDhobaWH7jA4gSZI0Qx5suzHJXJK5tlcled/oUJIkSZo5W9puSUKSx7f9ZhK37ZUkSctSktcCbwO28PCWVAWeMiyUZtnrgA8nmf+Q/m7gVUtwXNfgE54D1550X9uHkvwkySomXZeOGB1qIRbgSJIkLZ17khwAXAOsS7IB2Dw4kyRJkmbP7UmeCPwz8K9J7gbcwkGSJC1Xbwae3vaHo4NodiV501bDDwP7Tx9vZtKp/KuL/BauwSfmz4Ffi+fAtfvdMP27+yDwJeBHwL+PjbSwtN35qyRJkrRTSfZn0s50DjiXSSvEdW03Dg0mSZKkmZXkt5isOy9v+8DoPJIkSdtLcjnworY/Hp1FsyvJ+dOHxzDZKuoTQICzgC+2XbuE32uvXYMn2Y9JN6sAa4FVTM6BbxoaTDMvyVHAqraLLabbrSzAkSRJWiLTqywuaXvH6CySJEmSJEnScpBkDfAh4D+A++fn275+WCjNrCTXAGe2vXc6PhD4TNtTxyZb2ZJc1/aUJPfy8FZymd4/BGwC3t324iEBtVdIchiwqe39O33xIG5BJUmStHQOBK5Isgm4BFjf9q7BmSRJkiRJkqSRPgD8G3Azkw/qpd3pScDWXWkemM5pEdqeMr0/cEfPJzkYuB6wAEe700eAX0ny8bZvHh1mR+yAI0mStMSSHAecA7wYuL3tGYMjSZIkSZIkSUMk+UrbNaNzaO+Q5M+BlwGXTadeyKRr+TvHpdo7JDms7Z2jc2i2JQnwtLZfH51lRyzAkSRJWmJJDgVeCrwcOLDtcYMjSZIkSZIkSUMkeQfwHeBTbLsF1aZRmTTbkpwAPGc6vKbtV0bmkbQ4SVYD+86P2942MM6CLMCRJElaIknOY3J1xSHAeuDStreMTSVJkiRJkiSNk+TWHUy37VP2eBhJ0oqR5GzgPcDhwAbgSOAbbY8dGmwB+4wOIEmSNEOOAN7Y9sbRQSRJkiRJkqRl4tfabtl6Ism+j/RiSZKmLgBOBq5suybJacDawZkWNDc6gCRJ0qxo+6dtb0yyOsmT52+jc0mSJEmSJEkDXf8o5yRJ2tqDbTcCc0nm2l4FnDg61ELsgCNJkrREkpwFvJft2iECy7YdoiRJkiRJkrQ7JDkU+CXgCUlO2OqpVcB+Y1JJklaQe5IcAFwDrEuyAdg8ONOC0nZ0BkmSpJmQ5CbgeWzXDrHtawZHkyRJkiRJkvaoJK8C/oBJt4L/3Oqpe4F/aHvZiFySpJUhyf7AfUx2djoXOAhYN+2KsyxZgCNJkrREktzQ9sRpIc6atg8luant8aOzSZIkSZIkSSMkWQsUOIqHd+do27cPCyVJWvaSvAm4pO0do7M8Wm5BJUmStHTm2yFeywpphyhJkiRJkiTtZq8E7ga+DGwZnEWStHIcCFyRZBNwCbC+7V2DMy3IDjiSJElLJMl+TE4iBFjLZD/rdW03DQ0mSZIkSZIkDZLka22fPjqHJGllSnIccA7wYuD2tmcMjvSI5kYHkCRJWumSXDd9eBdwD5Mrev4WeAdwa5Jbk5w3Kp8kSZIkSZI00PVJnjE6hCRpxdoAfB/YCKwenGVBdsCRJEnazZIcDFzf9pjRWSRJkiRJkqQ9IcnNQIF9gKcC/w3cz6R7dNseNzCeJGmZm17Y/DLgEGA9cGnbW8amWtg+owNIkiTNurYbkzx3dA5JkiRJkiRpD3rB6ACSpBXtCOCNbW8cHeTRsgOOJEmSJEmSJEmSJEmSlp0kq4F958dtbxsYZ0FzowNIkiRJkiRJkiRJkiRJ85KcleTbwK3A1cB3gH8ZGmonLMCRJEmSJEmSJEmSJEnScnIhcDLwrbZHA6cDXxgbaWEW4EiSJEmSJEmSJEmSJGk5ebDtRmAuyVzbq4ATR4dayD6jA0iSJEmSJEmSJEmSJElbuSfJAcC1wLokG4DNgzMtKG1HZ5AkSZIkSZIkSZIkSZIASLIfsAUIsBZYBaxru2losAVYgCNJkiRJkiRJkiRJkqThklzX9pQk9wLzBS2Z3j8EbALe3fbiIQEXYAGOJEmSJEmSJEmSJEmSlr0kBwPXtz1mdJbtWYAjSZIkSZIkSZIkSZKkFSHJYW3vHJ1jexbgSJIkSZIkSZIkSZIkSYswNzqAJEmSJEmSJEmSJEmStJJZgCNJkiRJkiRJkiRJkiQtggU4kiRJkiRJkiRJkiRJ0iJYgCNJkiRJkiRJkiRJkiQtwv8B8fLlZPf8gywAAAAASUVORK5CYII=\n","text/plain":["<Figure size 2880x576 with 1 Axes>"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":573},"id":"73UChGrePhr1","outputId":"af8b97e5-cec0-469e-c55d-433364ee31a5"},"source":["exp = train_df.y.str.split(',').explode().value_counts()\n","top_100_tags = list(exp[0:25].index)\n","# z = lambda r : True if r.split(',') in top_100_tags else False\n","z = lambda r : True if all(x in top_100_tags for x in r.split(',') ) else False\n","top_100_idx = train_df.y.map(z)\n","train_df = train_df[top_100_idx]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Id</th>\n","      <th>Title</th>\n","      <th>Body</th>\n","      <th>Tags</th>\n","      <th>CreationDate</th>\n","      <th>Y</th>\n","      <th>y</th>\n","      <th>text</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>13</th>\n","      <td>34556906</td>\n","      <td>output FILE ,is this a fault?</td>\n","      <td>\\r\\nmy code here\\r\\n\\r\\n        #include &lt;stdi...</td>\n","      <td>&lt;c++&gt;</td>\n","      <td>2016-01-01 14:20:01</td>\n","      <td>LQ_EDIT</td>\n","      <td>c++</td>\n","      <td>output FILE ,is this a fault?</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>34560768</td>\n","      <td>Can I throw from class init() in Swift with co...</td>\n","      <td>&lt;p&gt;I'd like my class &lt;em&gt;init()&lt;/em&gt; in Swift ...</td>\n","      <td>&lt;swift&gt;</td>\n","      <td>2016-01-01 22:42:24</td>\n","      <td>HQ</td>\n","      <td>swift</td>\n","      <td>Can I throw from class init() in Swift with co...</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>34560942</td>\n","      <td>C# - Count a specific word in richTextBox1 and...</td>\n","      <td>&lt;p&gt;I'm not sure, if this question is unique, b...</td>\n","      <td>&lt;c#&gt;</td>\n","      <td>2016-01-01 23:06:53</td>\n","      <td>LQ_CLOSE</td>\n","      <td>c#</td>\n","      <td>C# - Count a specific word in richTextBox1 and...</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>34562551</td>\n","      <td>c++ vector type function implemetation</td>\n","      <td>class City\\r\\n    {\\r\\n       private:\\r\\n...</td>\n","      <td>&lt;c++&gt;</td>\n","      <td>2016-01-02 04:17:27</td>\n","      <td>LQ_EDIT</td>\n","      <td>c++</td>\n","      <td>c++ vector type function implemetation</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>34566364</td>\n","      <td>japanese and portuguese language cannot support</td>\n","      <td>My site Japanese supported. But Portuguese  la...</td>\n","      <td>&lt;php&gt;</td>\n","      <td>2016-01-02 13:20:49</td>\n","      <td>LQ_EDIT</td>\n","      <td>php</td>\n","      <td>japanese and portuguese language cannot support</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>44992</th>\n","      <td>60458575</td>\n","      <td>MySQL how to query five tables in one SELECT</td>\n","      <td>&lt;p&gt;I have 5 tables as follows:&lt;/p&gt;\\n\\n&lt;ul&gt;\\n&lt;l...</td>\n","      <td>&lt;mysql&gt;</td>\n","      <td>2020-02-28 20:07:09</td>\n","      <td>LQ_CLOSE</td>\n","      <td>mysql</td>\n","      <td>MySQL how to query five tables in one SELECT</td>\n","    </tr>\n","    <tr>\n","      <th>44993</th>\n","      <td>60460748</td>\n","      <td>Copy value of list not reference</td>\n","      <td>&lt;p&gt;I have a list that i want to compare to aft...</td>\n","      <td>&lt;python&gt;</td>\n","      <td>2020-02-28 23:54:33</td>\n","      <td>LQ_CLOSE</td>\n","      <td>python</td>\n","      <td>Copy value of list not reference</td>\n","    </tr>\n","    <tr>\n","      <th>44994</th>\n","      <td>60461193</td>\n","      <td>Weird question, but how do I make a python scr...</td>\n","      <td>&lt;p&gt;Before you get confused, I am going to comp...</td>\n","      <td>&lt;python&gt;&lt;python-3.x&gt;</td>\n","      <td>2020-02-29 01:25:40</td>\n","      <td>LQ_CLOSE</td>\n","      <td>python,python-3.x</td>\n","      <td>Weird question, but how do I make a python scr...</td>\n","    </tr>\n","    <tr>\n","      <th>44996</th>\n","      <td>60461754</td>\n","      <td>Does Python execute code from the top or botto...</td>\n","      <td>&lt;p&gt;I am working on learning Python and was won...</td>\n","      <td>&lt;python&gt;</td>\n","      <td>2020-02-29 03:33:59</td>\n","      <td>LQ_CLOSE</td>\n","      <td>python</td>\n","      <td>Does Python execute code from the top or botto...</td>\n","    </tr>\n","    <tr>\n","      <th>44998</th>\n","      <td>60465318</td>\n","      <td>how to implement fill in the blank in Swift</td>\n","      <td>&lt;p&gt;\"I _____ any questions.\"&lt;/p&gt;\\n\\n&lt;p&gt;I want t...</td>\n","      <td>&lt;ios&gt;&lt;swift&gt;</td>\n","      <td>2020-02-29 12:50:43</td>\n","      <td>LQ_CLOSE</td>\n","      <td>ios,swift</td>\n","      <td>how to implement fill in the blank in Swift</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>9968 rows × 8 columns</p>\n","</div>"],"text/plain":["             Id  ...                                               text\n","13     34556906  ...                      output FILE ,is this a fault?\n","24     34560768  ...  Can I throw from class init() in Swift with co...\n","25     34560942  ...  C# - Count a specific word in richTextBox1 and...\n","30     34562551  ...             c++ vector type function implemetation\n","48     34566364  ...    japanese and portuguese language cannot support\n","...         ...  ...                                                ...\n","44992  60458575  ...       MySQL how to query five tables in one SELECT\n","44993  60460748  ...                   Copy value of list not reference\n","44994  60461193  ...  Weird question, but how do I make a python scr...\n","44996  60461754  ...  Does Python execute code from the top or botto...\n","44998  60465318  ...        how to implement fill in the blank in Swift\n","\n","[9968 rows x 8 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":653},"id":"e_z1IU-XT0a0","outputId":"dc80c79e-11a0-4e63-bd40-8d933dbbb6aa"},"source":[" import nlu\n","# load a trainable pipeline by specifying the train  prefix \n","\n","unfitted_pipe = nlu.load('train.multi_classifier')\n","#configure epochs\n","unfitted_pipe['multi_classifier'].setMaxEpochs(100)\n","unfitted_pipe['multi_classifier'].setLr(0.005)      \n","#  fit it on a datset with label='y' and text columns. Labels seperated by ','\n","fitted_pipe = unfitted_pipe.fit(train_df[['y','text']], label_seperator=',')\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df[['y','text']])\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","      <th>multi_classifier_confidences</th>\n","      <th>sentence</th>\n","      <th>default_name_embeddings</th>\n","      <th>multi_classifier_classes</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>13</th>\n","      <td>c++</td>\n","      <td>output FILE ,is this a fault?</td>\n","      <td>[]</td>\n","      <td>output FILE ,is this a fault?</td>\n","      <td>[0.04620636999607086, -0.04046135023236275, -0...</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>swift</td>\n","      <td>Can I throw from class init() in Swift with co...</td>\n","      <td>[0.86285734, 0.98327714]</td>\n","      <td>Can I throw from class init() in Swift with co...</td>\n","      <td>[0.053270746022462845, -0.00784565694630146, -...</td>\n","      <td>[swift, c]</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>c#</td>\n","      <td>C# - Count a specific word in richTextBox1 and...</td>\n","      <td>[0.64955217]</td>\n","      <td>C# - Count a specific word in richTextBox1 and...</td>\n","      <td>[-0.005682709161192179, -0.023547030985355377,...</td>\n","      <td>[regex]</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>c++</td>\n","      <td>c++ vector type function implemetation</td>\n","      <td>[0.9755105, 0.77180904, 0.9789763]</td>\n","      <td>c++ vector type function implemetation</td>\n","      <td>[0.024628309532999992, -0.015623562969267368, ...</td>\n","      <td>[c++, python-3.x, python]</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>php</td>\n","      <td>japanese and portuguese language cannot support</td>\n","      <td>[0.55255216]</td>\n","      <td>japanese and portuguese language cannot support</td>\n","      <td>[0.038325726985931396, -0.005848723463714123, ...</td>\n","      <td>[php]</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>44992</th>\n","      <td>mysql</td>\n","      <td>MySQL how to query five tables in one SELECT</td>\n","      <td>[0.6404308, 0.99544823]</td>\n","      <td>MySQL how to query five tables in one SELECT</td>\n","      <td>[0.006962132174521685, -0.03580842167139053, -...</td>\n","      <td>[sql, mysql]</td>\n","    </tr>\n","    <tr>\n","      <th>44993</th>\n","      <td>python</td>\n","      <td>Copy value of list not reference</td>\n","      <td>[0.591653]</td>\n","      <td>Copy value of list not reference</td>\n","      <td>[0.025995030999183655, 0.001833591377362609, -...</td>\n","      <td>[javascript]</td>\n","    </tr>\n","    <tr>\n","      <th>44994</th>\n","      <td>python,python-3.x</td>\n","      <td>Weird question, but how do I make a python scr...</td>\n","      <td>[0.7427199, 0.99999976, 0.70473063, 0.72811186...</td>\n","      <td>Weird question, but how do I make a python scr...</td>\n","      <td>[0.018493961542844772, -0.04660267382860184, -...</td>\n","      <td>[html, python, javascript, node.js, php]</td>\n","    </tr>\n","    <tr>\n","      <th>44996</th>\n","      <td>python</td>\n","      <td>Does Python execute code from the top or botto...</td>\n","      <td>[0.9977689, 0.794142]</td>\n","      <td>Does Python execute code from the top or botto...</td>\n","      <td>[0.01413149293512106, -0.02844131551682949, -0...</td>\n","      <td>[python, php]</td>\n","    </tr>\n","    <tr>\n","      <th>44998</th>\n","      <td>ios,swift</td>\n","      <td>how to implement fill in the blank in Swift</td>\n","      <td>[0.9999993]</td>\n","      <td>how to implement fill in the blank in Swift</td>\n","      <td>[0.019475314766168594, -0.022571099922060966, ...</td>\n","      <td>[swift]</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>10944 rows × 6 columns</p>\n","</div>"],"text/plain":["                              y  ...                  multi_classifier_classes\n","origin_index                     ...                                          \n","13                          c++  ...                                        []\n","24                        swift  ...                                [swift, c]\n","25                           c#  ...                                   [regex]\n","30                          c++  ...                 [c++, python-3.x, python]\n","48                          php  ...                                     [php]\n","...                         ...  ...                                       ...\n","44992                     mysql  ...                              [sql, mysql]\n","44993                    python  ...                              [javascript]\n","44994         python,python-3.x  ...  [html, python, javascript, node.js, php]\n","44996                    python  ...                             [python, php]\n","44998                 ios,swift  ...                                   [swift]\n","\n","[10944 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"DL_5aY9b3jSd"},"source":["# 4. Evaluate the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0YDA2KunCeqQ","outputId":"8f72b51d-8e4c-49e8-884e-af5b0fdfa1ac"},"source":["from sklearn.preprocessing import MultiLabelBinarizer\n","from sklearn.metrics import classification_report\n","from sklearn.metrics import f1_score\n","from sklearn.metrics import roc_auc_score\n","mlb = MultiLabelBinarizer()\n","mlb = mlb.fit(preds.y.str.split(','))\n","y_true = mlb.transform(preds['y'].str.split(','))\n","y_pred = mlb.transform(preds.multi_classifier_classes.str.join(',').str.split(','))\n","print(\"Classification report: \\n\", (classification_report(y_true, y_pred)))\n","print(\"F1 micro averaging:\",(f1_score(y_true, y_pred, average='micro')))\n","print(\"ROC: \",(roc_auc_score(y_true, y_pred, average=\"micro\")))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Classification report: \n","               precision    recall  f1-score   support\n","\n","           0       0.67      0.80      0.73       840\n","           1       0.22      0.62      0.32       237\n","           2       0.37      0.47      0.41       467\n","           3       0.38      0.67      0.49       561\n","           4       0.48      0.54      0.51       831\n","           5       0.54      0.58      0.56       697\n","           6       0.49      0.73      0.59       792\n","           7       0.58      0.39      0.47      1352\n","           8       0.20      0.18      0.19       158\n","           9       0.49      0.77      0.60      1431\n","          10       0.57      0.75      0.65      2343\n","          11       0.36      0.56      0.43       833\n","          12       0.34      0.24      0.28       300\n","          13       0.51      0.74      0.60       539\n","          14       0.19      0.28      0.23       106\n","          15       0.63      0.67      0.65      1283\n","          16       0.61      0.74      0.67      1402\n","          17       0.21      0.25      0.23       411\n","          18       0.38      0.47      0.42       261\n","          19       0.90      0.10      0.19       183\n","          20       0.56      0.75      0.64       451\n","          21       0.56      0.73      0.63       485\n","          22       0.45      0.60      0.51       340\n","          23       0.34      0.13      0.19       220\n","          24       0.53      0.73      0.61       268\n","\n","   micro avg       0.50      0.63      0.56     16791\n","   macro avg       0.46      0.54      0.47     16791\n","weighted avg       0.51      0.63      0.55     16791\n"," samples avg       0.54      0.65      0.55     16791\n","\n","F1 micro averaging: 0.5556585043017869\n","ROC:  0.7920968190895907\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mhFKVN93o1ZO"},"source":["# 5. Lets try different Sentence Emebddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CzJd8omao0gt","outputId":"c3903ffc-ee61-47c1-87cf-bb1876436e25"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0ofYHpu7sloS","outputId":"ea715585-daa2-433d-d281-02b9e61222a4"},"source":["pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.multi_classifier')\n","pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['en_embed_sentence_small_bert_L12_768'] has settable params:\n","pipe['en_embed_sentence_small_bert_L12_768'].setBatchSize(32)  | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['en_embed_sentence_small_bert_L12_768'].setIsLong(False)  | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['en_embed_sentence_small_bert_L12_768'].setMaxSentenceLength(128)  | Info: Max sentence length to process | Currently set to : 128\n","pipe['en_embed_sentence_small_bert_L12_768'].setDimension(768)  | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['en_embed_sentence_small_bert_L12_768'].setCaseSensitive(False)  | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['en_embed_sentence_small_bert_L12_768'].setStorageRef('sent_small_bert_L12_768')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')   | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)           | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)       | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)      | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])       | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)           | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)       | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['multi_classifier'] has settable params:\n","pipe['multi_classifier'].setMaxEpochs(2)            | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['multi_classifier'].setLr(0.001)               | Info: Learning Rate | Currently set to : 0.001\n","pipe['multi_classifier'].setBatchSize(64)           | Info: Batch size | Currently set to : 64\n","pipe['multi_classifier'].setValidationSplit(0.0)    | Info: Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off. | Currently set to : 0.0\n","pipe['multi_classifier'].setThreshold(0.5)          | Info: The minimum threshold for each label to be accepted. Default is 0.5 | Currently set to : 0.5\n","pipe['multi_classifier'].setRandomSeed(44)          | Info: Random seed | Currently set to : 44\n","pipe['multi_classifier'].setShufflePerEpoch(False)  | Info: whether to shuffle the training data on each Epoch | Currently set to : False\n","pipe['multi_classifier'].setEnableOutputLogs(True)  | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":570},"id":"ABHLgirmG1n9","outputId":"60e9995e-080c-4213-cf03-c7baba89bd6a"},"source":["# Load pipe with bert embeds\n","# using large embeddings can take a few hours..\n","pipe['multi_classifier'].setMaxEpochs(120)            \n","pipe['multi_classifier'].setLr(0.0005)  \n","fitted_pipe = pipe.fit(train_df[['y','text']],label_seperator=',')\n","preds = fitted_pipe.predict(train_df[['y','text']])\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","      <th>multi_classifier_confidences</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","      <th>document</th>\n","      <th>multi_classifier_classes</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>13</th>\n","      <td>c++</td>\n","      <td>output FILE ,is this a fault?</td>\n","      <td>[]</td>\n","      <td>[-0.0598912313580513, 0.429191917181015, -0.25...</td>\n","      <td>output FILE ,is this a fault?</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>swift</td>\n","      <td>Can I throw from class init() in Swift with co...</td>\n","      <td>[0.61310124]</td>\n","      <td>[-0.45358699560165405, 0.1986018270254135, -0....</td>\n","      <td>Can I throw from class init() in Swift with co...</td>\n","      <td>[java]</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>c#</td>\n","      <td>C# - Count a specific word in richTextBox1 and...</td>\n","      <td>[0.8172003]</td>\n","      <td>[-0.592096209526062, 0.0025841565802693367, -0...</td>\n","      <td>C# - Count a specific word in richTextBox1 and...</td>\n","      <td>[c#]</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>c++</td>\n","      <td>c++ vector type function implemetation</td>\n","      <td>[0.98100495]</td>\n","      <td>[-0.6645137071609497, 0.34700289368629456, 0.1...</td>\n","      <td>c++ vector type function implemetation</td>\n","      <td>[c++]</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>php</td>\n","      <td>japanese and portuguese language cannot support</td>\n","      <td>[]</td>\n","      <td>[-0.30820634961128235, 0.5732622742652893, 0.5...</td>\n","      <td>japanese and portuguese language cannot support</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>44992</th>\n","      <td>mysql</td>\n","      <td>MySQL how to query five tables in one SELECT</td>\n","      <td>[0.94582915]</td>\n","      <td>[-0.6759300231933594, 0.1323285549879074, 0.56...</td>\n","      <td>MySQL how to query five tables in one SELECT</td>\n","      <td>[mysql]</td>\n","    </tr>\n","    <tr>\n","      <th>44993</th>\n","      <td>python</td>\n","      <td>Copy value of list not reference</td>\n","      <td>[0.71518165]</td>\n","      <td>[-0.7307966947555542, 0.3146328032016754, -0.5...</td>\n","      <td>Copy value of list not reference</td>\n","      <td>[python]</td>\n","    </tr>\n","    <tr>\n","      <th>44994</th>\n","      <td>python,python-3.x</td>\n","      <td>Weird question, but how do I make a python scr...</td>\n","      <td>[0.9938545]</td>\n","      <td>[-0.478365957736969, -0.015336859039962292, 0....</td>\n","      <td>Weird question, but how do I make a python scr...</td>\n","      <td>[python]</td>\n","    </tr>\n","    <tr>\n","      <th>44996</th>\n","      <td>python</td>\n","      <td>Does Python execute code from the top or botto...</td>\n","      <td>[0.998447]</td>\n","      <td>[-0.7976136803627014, -0.17537403106689453, 0....</td>\n","      <td>Does Python execute code from the top or botto...</td>\n","      <td>[python]</td>\n","    </tr>\n","    <tr>\n","      <th>44998</th>\n","      <td>ios,swift</td>\n","      <td>how to implement fill in the blank in Swift</td>\n","      <td>[0.6266076, 0.9772264]</td>\n","      <td>[-0.4111633598804474, 0.04349775239825249, 0.2...</td>\n","      <td>how to implement fill in the blank in Swift</td>\n","      <td>[ios, swift]</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>9968 rows × 6 columns</p>\n","</div>"],"text/plain":["                              y  ... multi_classifier_classes\n","origin_index                     ...                         \n","13                          c++  ...                       []\n","24                        swift  ...                   [java]\n","25                           c#  ...                     [c#]\n","30                          c++  ...                    [c++]\n","48                          php  ...                       []\n","...                         ...  ...                      ...\n","44992                     mysql  ...                  [mysql]\n","44993                    python  ...                 [python]\n","44994         python,python-3.x  ...                 [python]\n","44996                    python  ...                 [python]\n","44998                 ios,swift  ...             [ios, swift]\n","\n","[9968 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"E7ah2LM6tIhG","outputId":"edaa6235-c8d2-474a-9cc1-331e0967086c"},"source":["from sklearn.preprocessing import MultiLabelBinarizer\n","from sklearn.metrics import classification_report\n","from sklearn.metrics import f1_score\n","from sklearn.metrics import roc_auc_score\n","mlb = MultiLabelBinarizer()\n","mlb = mlb.fit(preds.y.str.split(','))\n","y_true = mlb.transform(preds['y'].str.split(','))\n","y_pred = mlb.transform(preds.multi_classifier_classes.str.join(',').str.split(','))\n","print(\"Classification report: \\n\", (classification_report(y_true, y_pred)))\n","print(\"F1 micro averaging:\",(f1_score(y_true, y_pred, average='micro')))\n","print(\"ROC: \",(roc_auc_score(y_true, y_pred, average=\"micro\")))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Classification report: \n","               precision    recall  f1-score   support\n","\n","           0       0.96      0.67      0.79       738\n","           1       0.95      0.71      0.82       228\n","           2       0.70      0.53      0.60       440\n","           3       0.91      0.63      0.75       508\n","           4       0.95      0.57      0.71       733\n","           5       0.91      0.58      0.71       621\n","           6       0.88      0.70      0.78       736\n","           7       0.81      0.65      0.72      1254\n","           8       0.86      0.58      0.69       145\n","           9       0.89      0.58      0.70      1288\n","          10       0.87      0.73      0.80      2164\n","          11       0.89      0.58      0.70       754\n","          12       0.84      0.67      0.74       277\n","          13       0.89      0.59      0.71       511\n","          14       0.96      0.27      0.42        96\n","          15       0.94      0.70      0.80      1193\n","          16       0.93      0.70      0.80      1265\n","          17       0.74      0.22      0.34       365\n","          18       0.97      0.70      0.82       246\n","          19       1.00      0.55      0.71       172\n","          20       0.92      0.71      0.81       427\n","          21       0.82      0.67      0.74       458\n","          22       0.81      0.66      0.73       319\n","          23       0.83      0.23      0.36       211\n","          24       0.97      0.64      0.77       242\n","\n","   micro avg       0.89      0.64      0.74     15391\n","   macro avg       0.89      0.59      0.70     15391\n","weighted avg       0.89      0.64      0.73     15391\n"," samples avg       0.70      0.64      0.65     15391\n","\n","F1 micro averaging: 0.7401884721644023\n","ROC:  0.8150061228796474\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","outputId":"bbf99f56-d4b1-4440-ecb7-fe9d61935c62"},"source":["stored_model_path = './models/multi_classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/multi_classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Tesla plans to invest 10M into the ML sector')\n","preds"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"M1LjAwJVJxun"},"source":[""],"execution_count":null,"outputs":[]}]}
\ No newline at end of file
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_token_label_text_classifier_stackoverflow_tags.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_label_text_classification/NLU_training_multi_token_label_text_classifier_stackoverflow_tags.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier for sentences with multiple classes at the same time \n","MultiClassifierDL is a Multi-label Text Classification. MultiClassifierDL uses a Bidirectional GRU with Convolution model that we have built inside TensorFlow and supports up to 100 classes. The input to MultiClassifierDL is Sentence Embeddings such as state-of-the-art UniversalSentenceEncoder, BertSentenceEmbeddings, or SentenceEmbeddings\n","\n","\n","\n","### Multi ClassifierDL (Multi-class Text Classification with multiple classes per sentence)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#multiclassifierdl-multi-label-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu > /dev/null pyspark==2.4.7\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2 Download sample dataset 60k Stack Overflow Questions with Quality Rating\n","\n","\n","https://www.kaggle.com/imoore/60k-stack-overflow-questions-with-quality-rate"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"y4xSRWIhwT28","outputId":"f7ac934c-b18f-4ffd-d773-842c81b2a80a"},"source":["import pandas as pd\n","! wget -N https://ckl-it.de/wp-content/uploads/2020/11/60kstackoverflow.csv -P /tmp\n","test_path = '/tmp/60kstackoverflow.csv'\n","train_df = pd.read_csv(test_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-02 11:20:29--  https://ckl-it.de/wp-content/uploads/2020/11/60kstackoverflow.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 50356825 (48M) [text/csv]\n","Saving to: ‘/tmp/60kstackoverflow.csv’\n","\n","60kstackoverflow.cs 100%[===================>]  48.02M  2.57MB/s    in 21s     \n","\n","2021-01-02 11:20:51 (2.32 MB/s) - ‘/tmp/60kstackoverflow.csv’ saved [50356825/50356825]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"gBxgVIB787wd"},"source":["# Split labels and clean them.\n","import pandas as pd\n","\n","train_df = pd.read_csv(test_path)\n","\n","f = lambda x : x.replace('<','').replace('>','')\n","g = lambda l : list(map(f,l))\n","train_df['y'] = train_df.Tags.str.split('><').map(g).str.join(',')\n","train_df['text'] = train_df['Title']\n","\n"," \n","# train_df = train_df.iloc[:50]"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":430},"id":"OfMCrNk-L_pq","outputId":"6ce7798d-ff2f-4b02-a066-67497ba0bdfa"},"source":["counts = train_df.explode('y').y.value_counts()\n","counts.iloc[0:100].plot.bar(figsize=(40,8), title='Distribution of Label Tags in Dataset')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["<matplotlib.axes._subplots.AxesSubplot at 0x7f977030b278>"]},"metadata":{"tags":[]},"execution_count":4},{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAACOAAAAJhCAYAAADinV3wAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdf9SnZV0n8PcnJiAVQWEiGdCxZPuxdSQbSbfaSsrCsWDPUbPcJKKlHx77YZ6cytR+7C62FavbZkuyhZo/kPJA4boaarW7qQ1qWmk14iAgP0YEFNQS/ewf32vyYXyGeR6uZ3geptfrnO957vu6rvu6P/f3e/8zc97nuqq7AwAAAAAAAAAA3DNfsN4FAAAAAAAAAADAfZkADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAABgzVTVb1fVL6zRXA+tqtur6rBx/taq+qG1mHvM97+q6qy1mm8V9/2VqvpIVd2whnNuraquqk335rUH076/PwAAAMBGJoADAAAArEhV7a6qT1bVx6vq1qr6f1X1I1X1z/+/0N0/0t2/vMK5vu3uxnT3h7r7Ad39mTWo/QVV9Yp95j+9uy+anXuVdTw0yU8n+aru/pJl+r+lqq69N2u6OyMAs/fz2fH77z1/2sG898zvP77Hzy6p9dqquriqHr2KOT7vnTkY7q37AAAAAAeXAA4AAACwGt/V3UcleViS85I8J8mFa32TjbYayxp6aJKbu/um9S5kJUYA5gHd/YAkH8ri99/b9vvrXd8BfHjUfVSSxyR5f5I/r6rT1rcsAAAA4FAkgAMAAACsWnff1t2XJfmeJGdV1VcnSVX9XlX9yjg+rqr+eKyW89Gq+vOq+oKqenkWQZQ/GquT/MySbZDOqaoPJXnzfrZG+rKqekdVfayqLq2qB497fd7KMXtX2amq70zyc0m+Z9zvr0b/P29pNep6blVdXVU3VdXLquro0be3jrOq6kNj+6if3993U1VHj+v3jPmeO+b/tiRvSnLCqOP3VvOdV9X2qnrXePZrquoFywz7war6cFVdX1XPXnLtF1TVjqr6QFXdPFaDefBq7r9PLadW1V+M3/b6qvrNqjp8Sf/jq+rvquq2qvqtqvrTJd/1I8b5beO7fM1+7nGX33/8Xr9cVf93rML0xqo67kC19sK13f28JC9N8sIl93jR+C4/VlVXVtU3jfb9vTNnV9X7xv2vqqofXjLXsu/76Duhqv5gvBMfrKofv7v7AAAAAPc9AjgAAADAPdbd70hybZJvWqb7p0ff5iTHZxE06O7+/tx1NZVfXXLNNyf5yiTfsZ9bPj3JDyZ5SJI7k7x4BTW+Icl/SvKacb9HLjPsB8bnW5N8aZIHJPnNfcZ8Y5IvT3JakudV1Vfu55b/LcnRY55vHjWf3d1/kuT0jJVZuvsHDlT7Pu4Ycx2TZHuSH62qM/cZ861JTk7y+CTPqc9t8/XMJGeOek5IckuS/77K+y/1mSQ/leS4JI/N4jv5sWQRRElySZKfTXJskr9L8m+WXPvLSd6Y5EFJTszi+1qp70tydpIvTnJ4kmff/fDP84dJHlVV9x/nf5nklCQPTvLKJK+tqiPv5p25KckTkzxw1HF+VT1q9C37vo8Qzh8l+askW7L4rn6yqr5jhe8mAAAAcB8ggAMAAADM+nAWAYZ9fTqLoMzDuvvT3f3n3d0HmOsF3X1Hd39yP/0v7+6/7u47kvxCkqdU1WH3vPR/9rQkv9HdV3X37VmER566z+o7v9jdn+zuv8oiTPF5YYlRy1OT/Gx3f7y7dyf59STfP1tgd7+1u9/b3Z/t7vckeVUWgZqlfnF8f+9N8rtJvne0/0iSnx8rwfxjkhckeVLdw62+uvvK7n5bd985nvF/LKnlCUn+prv/sLv3hqRuWHL5p7PYwuyE7v5Ud/+fVdz6d7v778f7cXEW4ZnV+HCSyiLElO5+RXffPJ7j15MckUXIalndfXl3f2CsqvOnWQSJ9obP9ve+PzrJ5u7+pe7+p+6+KsnvZPGeAAAAAIcIARwAAABg1pYkH12m/b8k2ZXkjWO7nh0rmOuaVfRfneQLs1iFZdYJY76lc2/KYiWTvZaGSD6RxSo5+zpu1LTvXFtmC6yqr6+qt4xtjG7LIlSz77Pv+/2cMI4fluR1Y3ukW5O8L4tVbI7PPVBV/2pst3RDVX0si1Vc9tZywtI6Rghl6fZgP5NFCOYdVfU3VfWDq7j1Sn6Du7MlSSe5dTzHs8eWUreN7+Xo3M37VFWnV9XbxhZTt2YRNto7fn/v+8Oy2Hbs1iXf/8/lHn73AAAAwMYkgAMAAADcY1X16CxCDZ+3islYAeanu/tLk3x3kmdV1Wl7u/cz5YFWyDlpyfFDs1h15CNZbM90vyV1HZbFVkArnffDWQQlls59Z5IbD3Ddvj6Sz63wsnSu61Y5z3JemeSyJCd199FJfjuLIMtS+34/Hx7H1yQ5vbuPWfI5srvvaV0vSfL+JCd39wOzCJTsreX6LLaWSpJUVS097+4buvs/dPcJSX44yW9V1SPuYR2r9e+SvLO776iqb8oiDPSUJA/q7mOS3LbkOe7yzlTVEUn+IMmvJTl+jH/93vF3875fk+SD+3z3R3X3E5a7DwAAAHDfJIADAAAArFpVPbCqnpjk1UleMbY82nfME6vqESOAcVsWK658dnTfmORL78Gt/31VfVVV3S/JLyW5pLs/k+TvkxxZVdur6guTPDeL7YT2ujHJ1qra3/+FvCrJT1XVw6vqAVms6PKasYXSio1aLk7yH6vqqKp6WJJnJXnFauapqiP3+VSSo5J8tLs/VVWnJvm+ZS79haq6X1X96yRnJ3nNaP/tUdPDxvybq+qM1dS0j6OSfCzJ7VX1FUl+dEnf5Um+pqrOHFtcPSPJlyx5tidX1d5Azi1ZBFA+m4OkFrZU1fOT/FAWYaG9z3Bnkj1JNlXV85I8cMml+74zh2fxTu1JcmdVnZ7k8Uvus7/3/R1JPl5Vz6mqL6qqw6rqq0d4bbn7AAAAAPdB/mEPAAAArMYfVdXHs1jV4+eT/EYWQY/lnJzkT5LcnuQvkvxWd79l9P3nJM8dW/I8exX3f3mS38tiK6Ijk/x4knT3bUl+LMlLs1ht5o7cdduj146/N1fVO5eZ93+Ouf8syQeTfCrJM1dR11LPHPe/KouVgV455l+pLUk+uc/ny7J4vl8a3//zsgj67OtPs9gG6Yokv9bdbxztL8pi9Zw3juvfluTrV/dYd/HsLAJAH0/yO/lc0Cfd/ZEkT07yq0luTvJVSXYm+ccx5NFJ3l5Vt4+afqK7r5qoZX9OGPe4PclfJvmaJN+y5Dv530nekEV46+osfvOlW3jd5Z3p7o9n8b5dnEVw6PtG/Xst+76PUNYTk5ySxbv1kSze06OXu89aPDgAAABw76vFNtwAAAAAsPbGyi7XJnnakgAWAAAAwCHFCjgAAAAArKmq+o6qOqaqjshiy6fKYtUdAAAAgEOSAA4AAAAAa+2xST6QxXZL35XkzO7+5PqWBAAAAHDw2IIKAAAAAAAAAAAmWAEHAAAAAAAAAAAmbFrvApLkuOOO661bt653GQAAAAAAAAAAsKwrr7zyI929ebm+DRHA2bp1a3bu3LneZQAAAAAAAAAAwLKq6ur99dmCCgAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYIIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMEEABwAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYIIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkrCuBU1U9V1d9U1V9X1auq6siqenhVvb2qdlXVa6rq8DH2iHG+a/RvPZgPAAAAAAAAAAAA6+mAAZyq2pLkx5Ns6+6vTnJYkqcmeWGS87v7EUluSXLOuOScJLeM9vPHOAAAAAAAAAAAOCStdAuqTUm+qKo2JblfkuuTPC7JJaP/oiRnjuMzxnlG/2lVVWtTLgAAAAAAAAAAbCwHDOB093VJfi3Jh7II3tyW5Mokt3b3nWPYtUm2jOMtSa4Z1945xh+777xVdW5V7ayqnXv27Jl9DgAAAAAAAAAAWBebDjSgqh6Uxao2D09ya5LXJvnO2Rt39wVJLkiSbdu29XJjtu64fMXz7T5v+2xJAAAAAAAAAACwaivZgurbknywu/d096eT/GGSb0hyzNiSKklOTHLdOL4uyUlJMvqPTnLzmlYNAAAAAAAAAAAbxEoCOB9K8piqul9VVZLTkvxtkrckedIYc1aSS8fxZeM8o//N3b3sCjcAAAAAAAAAAHBfd8AATne/PcklSd6Z5L3jmguSPCfJs6pqV5Jjk1w4LrkwybGj/VlJdhyEugEAAAAAAAAAYEPYdOAhSXc/P8nz92m+Ksmpy4z9VJInz5cGAAAAAAAAAAAb30q2oAIAAAAAAAAAAPZDAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgwqb1LmA9bN1x+YrH7j5v+0GsBAAAAAAAAACA+zor4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgwgEDOFX15VX17iWfj1XVT1bVg6vqTVX1D+Pvg8b4qqoXV9WuqnpPVT3q4D8GAAAAAAAAAACsjwMGcLr777r7lO4+JcnXJflEktcl2ZHkiu4+OckV4zxJTk9y8vicm+QlB6NwAAAAAAAAAADYCFa7BdVpST7Q3VcnOSPJRaP9oiRnjuMzkrysF96W5JiqesiaVAsAAAAAAAAAABvMagM4T03yqnF8fHdfP45vSHL8ON6S5Jol11w72u6iqs6tqp1VtXPPnj2rLAMAAAAAAAAAADaGFQdwqurwJN+d5LX79nV3J+nV3Li7L+jubd29bfPmzau5FAAAAAAAAAAANozVrIBzepJ3dveN4/zGvVtLjb83jfbrkpy05LoTRxsAAAAAAAAAABxyVhPA+d58bvupJLksyVnj+Kwkly5pf3otPCbJbUu2qgIAAAAAAAAAgEPKppUMqqr7J/n2JD+8pPm8JBdX1TlJrk7ylNH++iRPSLIrySeSnL1m1QIAAAAAAAAAwAazogBOd9+R5Nh92m5OctoyYzvJM9akOgAAAAAAAAAA2OBWswUVAAAAAAAAAACwDwEcAAAAAAAAAACYIIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMEEABwAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYIIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMGHTehdwKNm64/IVj9193vaDWAkAAAAAAAAAAPcWK+AAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABNWFMCpqmOq6pKqen9Vva+qHltVD66qN1XVP4y/Dxpjq6peXFW7quo9VfWog/sIAAAAAAAAAACwfla6As6Lkryhu78iySOTvC/JjiRXdPfJSa4Y50lyepKTx+fcJC9Z04oBAAAAAAAAAGADOWAAp6qOTvJvk1yYJN39T919a5Izklw0hl2U5MxxfEaSl/XC25IcU1UPWfPKAQAAAAAAAABgA1jJCjgPT7Inye9W1buq6qVVdf8kx3f39WPMDUmOH8dbklyz5PprR9tdVNW5VbWzqnbu2bPnnj8BAAAAAAAAAACso5UEcDYleVSSl3T31ya5I5/bbipJ0t2dpFdz4+6+oLu3dfe2zZs3r+ZSAAAAAAAAAADYMFYSwLk2ybXd/fZxfkkWgZwb924tNf7eNPqvS3LSkutPHG0AAAAAAAAAAHDIOWAAp7tvSHJNVX35aDotyd8muSzJWaPtrCSXjuPLkjy9Fh6T5LYlW1UBAAAAAAAAAMAhZdMKxz0zye9X1eFJrkpydhbhnYur6pwkVyd5yhj7+iRPSLIrySfGWAAAAAAAAAAAOCStKIDT3e9Osm2ZrtOWGdtJnjFZFwAAAAAAAAAA3CcccAsqAAAAAAAAAABg/wRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACZsWu8CWJmtOy5f8djd520/iJUAAAAAAAAAALCUFXAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMGHTehfA+tq64/IVj9193vaDWAkAAAAAAAAAwH2TFXAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMEEABwAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYsKIATlXtrqr3VtW7q2rnaHtwVb2pqv5h/H3QaK+qenFV7aqq91TVow7mAwAAAAAAAAAAwHpazQo439rdp3T3tnG+I8kV3X1ykivGeZKcnuTk8Tk3yUvWqlgAAAAAAAAAANhoZragOiPJReP4oiRnLml/WS+8LckxVfWQifsAAAAAAAAAAMCGtdIATid5Y1VdWVXnjrbju/v6cXxDkuPH8ZYk1yy59trRdhdVdW5V7ayqnXv27LkHpQMAAAAAAAAAwPrbtMJx39jd11XVFyd5U1W9f2lnd3dV9Wpu3N0XJLkgSbZt27aqawEAAAAAAAAAYKNY0Qo43X3d+HtTktclOTXJjXu3lhp/bxrDr0ty0pLLTxxtAAAAAAAAAABwyDlgAKeq7l9VR+09TvL4JH+d5LIkZ41hZyW5dBxfluTptfCYJLct2aoKAAAAAAAAAAAOKSvZgur4JK+rqr3jX9ndb6iqv0xycVWdk+TqJE8Z41+f5AlJdiX5RJKz17xqAAAAAAAAAADYIA4YwOnuq5I8cpn2m5Octkx7J3nGmlQHAAAAAAAAAAAb3AG3oAIAAAAAAAAAAPZPAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwIRN610Ah6atOy5f8djd520/iJUAAAAAAAAAABxcVsABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEzatdwGwWlt3XL7isbvP234QKwEAAAAAAAAAsAIOAAAAAAAAAABMEcABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAE1YcwKmqw6rqXVX1x+P84VX19qraVVWvqarDR/sR43zX6N96cEoHAAAAAAAAAID1t5oVcH4iyfuWnL8wyfnd/YgktyQ5Z7Sfk+SW0X7+GAcAAAAAAAAAAIekFQVwqurEJNuTvHScV5LHJblkDLkoyZnj+IxxntF/2hgPAAAAAAAAAACHnJWugPNfk/xMks+O82OT3Nrdd47za5NsGcdbklyTJKP/tjEeAAAAAAAAAAAOOQcM4FTVE5Pc1N1XruWNq+rcqtpZVTv37NmzllMDAAAAAAAAAMC9ZiUr4HxDku+uqt1JXp3F1lMvSnJMVW0aY05Mct04vi7JSUky+o9OcvO+k3b3Bd29rbu3bd68eeohAAAAAAAAAABgvRwwgNPdP9vdJ3b31iRPTfLm7n5akrckedIYdlaSS8fxZeM8o//N3d1rWjUAAAAAAAAAAGwQK1kBZ3+ek+RZVbUrybFJLhztFyY5drQ/K8mOuRIBAAAAAAAAAGDj2nTgIZ/T3W9N8tZxfFWSU5cZ86kkT16D2uBetXXH5Sseu/u87QexEgAAAAAAAADgvmRmBRwAAAAAAAAAAPgXTwAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMCETQcaUFVHJvmzJEeM8Zd09/Or6uFJXp3k2CRXJvn+7v6nqjoiycuSfF2Sm5N8T3fvPkj1w4a3dcflKx67+7ztB7ESAAAAAAAAAOBgWMkKOP+Y5HHd/cgkpyT5zqp6TJIXJjm/ux+R5JYk54zx5yS5ZbSfP8YBAAAAAAAAAMAh6YABnF64fZx+4fh0kscluWS0X5TkzHF8xjjP6D+tqmrNKgYAAAAAAAAAgA1kJSvgpKoOq6p3J7kpyZuSfCDJrd195xhybZIt43hLkmuSZPTflsU2VfvOeW5V7ayqnXv27Jl7CgAAAAAAAAAAWCcrCuB092e6+5QkJyY5NclXzN64uy/o7m3dvW3z5s2z0wEAAAAAAAAAwLpYUQBnr+6+Nclbkjw2yTFVtWl0nZjkunF8XZKTkmT0H53k5jWpFgAAAAAAAAAANpgDBnCqanNVHTOOvyjJtyd5XxZBnCeNYWcluXQcXzbOM/rf3N29lkUDAAAAAAAAAMBGsenAQ/KQJBdV1WFZBHYu7u4/rqq/TfLqqvqVJO9KcuEYf2GSl1fVriQfTfLUg1A3AAAAAAAAAABsCAcM4HT3e5J87TLtVyU5dZn2TyV58ppUBwAAAAAAAAAAG9wBt6ACAAAAAAAAAAD2TwAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACZvWuwDgntu64/IVj9193vaDWAkAAAAAAAAA/MslgAN8HsEeAP3xZUAAACAASURBVAAAAAAAAFg5W1ABAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYMKm9S4A+Jdj647LVzx293nbD2IlAAAAAAAAALB2rIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMEEABwAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYIIADAPD/2bv3eNuref/j73fl0pWiEyqVFDqIlBNC5FYhpDpJOp3cb4UfyjW3UzouR7lVonShKEcqQnQTqp3aKVLouCsOFQcpn98fnzH3mnvtudbea44x9t6z/Xo+Huux95xrrc/87rm/8/sdl8/4DAAAAAAAAAAAAKACCTgAAAAAAAAAAAAAAABABRJwAAAAAAAAAAAAAAAAgAok4AAAAAAAAAAAAAAAAAAVSMABAAAAAAAAAAAAAAAAKpCAAwAAAAAAAAAAAAAAAFQgAQcAAAAAAAAAAAAAAACoQAIOAAAAAAAAAAAAAAAAUIEEHAAAAAAAAAAAAAAAAKACCTgAAAAAAAAAAAAAAABAhVWW9QEAQK2NDzxzTj9//aE7dzoSAAAAAAAAAAAAAMCKiAo4AAAAAAAAAAAAAAAAQAUScAAAAAAAAAAAAAAAAIAKJOAAAAAAAAAAAAAAAAAAFUjAAQAAAAAAAAAAAAAAACqQgAMAAAAAAAAAAAAAAABUIAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFCBBBwAAAAAAAAAAAAAAACgAgk4AAAAAAAAAAAAAAAAQIXFJuDY3tD2N21fbfsq2/uX59ex/TXb15Y/1y7P2/bhtq+zPd/2Vr3/EQAAAAAAAAAAAAAAAMCysiQVcG6T9LqI2ELStpJeYXsLSQdKOiciNpN0TnksSTtK2qx8vVjSx5ofNQAAAAAAAAAAAAAAALCcWGwCTkT8OiIuK3+/RdIPJK0vaRdJx5UfO07Ss8rfd5H06UjfkXR32/dufuQAAAAAAAAAAAAAAADAcmBJKuAsYHtjSQ+X9F1J60XEr8u3fiNpvfL39SX9fOjXflGemx7rxbYvtX3pjTfeOMfDBgAAAAAAAAAAAAAAAJYPqyzpD9peQ9Kpkg6IiJttL/heRITtmMsLR8RRko6SpK233npOvwsAS8vGB565xD97/aE7dzwSAAAAAAAAAAAAAMDyaokq4Ni+kzL55sSIOK08/dvB1lLlzxvK87+UtOHQr29QngMAAAAAAAAAAAAAAADucBabgOMsdXOMpB9ExAeGvnW6pH3K3/eR9MWh51/gtK2km4a2qgIAAAAAAAAAAAAAAADuUJZkC6rHSNpb0pW2Ly/PvUnSoZJOsb2fpP+RtHv53lmSdpJ0naT/k7Rv0yMGAAAAAAAAAAAAAAAAliOLTcCJiAsleYZv7zDi50PSKyqPCwDu0DY+8Mwl/tnrD92545EAAAAAAAAAAAAAAGotdgsqAAAAAAAAAAAAAAAAADMjAQcAAAAAAAAAAAAAAACoQAIOAAAAAAAAAAAAAAAAUIEEHAAAAAAAAAAAAAAAAKACCTgAAAAAAAAAAAAAAABAhVWW9QEAANrZ+MAz5/Tz1x+6c6cjAQAAAAAAAAAAAIAVBxVwAAAAAAAAAAAAAAAAgAok4AAAAAAAAAAAAAAAAAAV2IIKALBE5rK9FVtbAQAAAAAAAAAAAFiRUAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFCBBBwAAAAAAAAAAAAAAACgAgk4AAAAAAAAAAAAAAAAQAUScAAAAAAAAAAAAAAAAIAKJOAAAAAAAAAAAAAAAAAAFUjAAQAAAAAAAAAAAAAAACqQgAMAAAAAAAAAAAAAAABUIAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFCBBBwAAAAAAAAAAAAAAACgAgk4AAAAAAAAAAAAAAAAQAUScAAAAAAAAAAAAAAAAIAKJOAAAAAAAAAAAAAAAAAAFUjAAQAAAAAAAAAAAAAAACqQgAMAAAAAAAAAAAAAAABUIAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFCBBBwAAAAAAAAAAAAAAACgAgk4AAAAAAAAAAAAAAAAQAUScAAAAAAAAAAAAAAAAIAKJOAAAAAAAAAAAAAAAAAAFUjAAQAAAAAAAAAAAAAAACqQgAMAAAAAAAAAAAAAAABUWGVZHwAAYMW28YFnLvHPXn/ozh2PBAAAAAAAAAAAAADGQwUcAAAAAAAAAAAAAAAAoAIJOAAAAAAAAAAAAAAAAEAFEnAAAAAAAAAAAAAAAACACiTgAAAAAAAAAAAAAAAAABVWWdYHAABADxsfeOacfv76Q3fudCQAAAAAAAAAAAAA7uiogAMAAAAAAAAAAAAAAABUIAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFBhlWV9AAAATJqNDzxziX/2+kN37ngkAAAAAAAAAAAAAJYHi62AY/uTtm+w/f2h59ax/TXb15Y/1y7P2/bhtq+zPd/2Vj0PHgAAAAAAAAAAAAAAAFjWlmQLqmMlPW3acwdKOiciNpN0TnksSTtK2qx8vVjSx9ocJgAAAAAAAAAAAAAAALB8WuwWVBFxvu2Npz29i6Tty9+Pk3SupDeW5z8dESHpO7bvbvveEfHrVgcMAMAdFVtbAQAAAAAAAAAAAJNpSSrgjLLeUFLNbyStV/6+vqSfD/3cL8pzi7D9YtuX2r70xhtvHPMwAAAAAAAAAAAAAAAAgGVr3AScBUq1mxjj946KiK0jYut111239jAAAAAAAAAAAAAAAACAZWKxW1DN4LeDraVs31vSDeX5X0racOjnNijPAQCAZWQuW1tJbG8FAAAAAAAAAAAAzNW4FXBOl7RP+fs+kr449PwLnLaVdNPQVlUAAAAAAAAAAAAAAADAHc5iK+DY/oyk7SXd0/YvJL1d0qGSTrG9n6T/kbR7+fGzJO0k6TpJ/ydp3w7HDAAAlhNzqa4zl8o6veICAAAAAAAAAAAAPSw2ASci9pzhWzuM+NmQ9IragwIAAOiBxB4AAAAAAAAAAAD0sNgEHAAAACweyT0AAAAAAAAAAAArLhJwAAAAlmOTuM0XyUgAAAAAAAAAAGBFQwIOAAAAJgLJSP3jAgAAAAAAAACA8ay0rA8AAAAAAAAAAAAAAAAAmGRUwAEAAAAgico6AAAAAAAAAACMiwQcAAAAAN2xHRcAAAAAAAAA4I6MBBwAAAAAmIbEHgAAAAAAAADAXKy0rA8AAAAAAAAAAAAAAAAAmGRUwAEAAACApWQulXUkqusAAAAAAAAAwKQgAQcAAAAA7gB6bZu1PMSda2wAAAAAAAAAWNpIwAEAAAAA3KH0ShoCAAAAAAAAgJmQgAMAAAAAwBIgsQcAAAAAAADATEjAAQAAAABgGeq5HdfysIUYyUgAAAAAAABYEZCAAwAAAAAAlgs9k5EAAAAAAACAnkjAAQAAAAAAd3hU7QEAAAAAAEBPJOAAAAAAAACMiW2+AAAAAAAAIJGAAwAAAAAAsEIhaQgAAAAAAKC9lZb1AQAAAAAAAAAAAAAAAACTjAo4AAAAAAAAWG71rKxD1R4AAAAAANAKCTgAAAAAAABAQyT2AAAAAACw4iEBBwAAAAAAAJgAk1gNiGQkAAAAAMCKggQcAAAAAAAAABNlEpORAAAAAAB3bCTgAAAAAAAAAEBnJPYAAAAAwB0bCTgAAAAAAAAAMKGWh+3DesYmGQkAAADApCABBwAAAAAAAAAw8UjsAQAAALAskYADAAAAAAAAAMAMJrEa0PIQt2fs5SEuAAAAMB0JOAAAAAAAAAAAAHNAMlL/uL1jAwAAtEYCDgAAAAAAAAAAAFYYJCMBAIAeSMABAAAAAAAAAAAAVkAkIwEA0A4JOAAAAAAAAAAAAABWaCQjrRhxe8aetLg9Y5P8hhUVCTgAAAAAAAAAAAAAAGCpIxlp+YrbM/byELc3EnAAAAAAAAAAAAAAAABwh9Y7sWelOf8GAAAAAAAAAAAAAAAAgAVIwAEAAAAAAAAAAAAAAAAqkIADAAAAAAAAAAAAAAAAVCABBwAAAAAAAAAAAAAAAKhAAg4AAAAAAAAAAAAAAABQgQQcAAAAAAAAAAAAAAAAoAIJOAAAAAAAAAAAAAAAAEAFEnAAAAAAAAAAAAAAAACACiTgAAAAAAAAAAAAAAAAABVIwAEAAAAAAAAAAAAAAAAqkIADAAAAAAAAAAAAAAAAVOiSgGP7abavsX2d7QN7vAYAAAAAAAAAAAAAAACwPGiegGN7ZUkfkbSjpC0k7Wl7i9avAwAAAAAAAAAAAAAAACwPelTAeaSk6yLiJxFxq6TPStqlw+sAAAAAAAAAAAAAAAAAy5wjom1A+7mSnhYRLyyP95b0LxHxymk/92JJLy4PHyDpmiV8iXtK+l2jw11asSctbs/YxO0fe9Li9ow9aXF7xp60uD1jT1rcnrEnLW7P2JMWt2fsSYvbMzZx+8eetLg9Y09a3J6xJy1uz9iTFrdn7EmL2zP2pMXtGXvS4vaMTdz+sSctbs/Ykxa3Z+xJi9sz9qTF7Rl70uL2jD1pcXvGnrS4PWMTt3/sSYvbM/akxe0Ze9Li9ow9aXF7xp60uD1jzyXuRhGx7qhvrNLueOYmIo6SdNRcf8/2pRGxdYdD6hZ70uL2jE3c/rEnLW7P2JMWt2fsSYvbM/akxe0Ze9Li9ow9aXF7xp60uD1jE7d/7EmL2zP2pMXtGXvS4vaMPWlxe8aetLg9Y09a3J6xJy1uz9jE7R970uL2jD1pcXvGnrS4PWNPWtyesSctbs/Ykxa3Z+xJi9szNnH7x560uD1jT1rcnrEnLW7P2JMWt2fsSYvbM3aruD22oPqlpA2HHm9QngMAAAAAAAAAAAAAAADucHok4FwiaTPbm9i+s6R/lXR6h9cBAAAAAAAAAAAAAAAAlrnmW1BFxG22XynpbEkrS/pkRFzV8CXmvG3VchB70uL2jE3c/rEnLW7P2JMWt2fsSYvbM/akxe0Ze9Li9ow9aXF7xp60uD1jE7d/7EmL2zP2pMXtGXvS4vaMPWlxe8aetLg9Y09a3J6xJy1uz9jE7R970uL2jD1pcXvGnrS4PWNPWtyesSctbs/Ykxa3Z+xJi9szNnH7x560uD1jT1rcnrEnLW7P2JMWt2fsSYvbM3aTuI6IFnEAAAAAAAAAAAAAAACAFVKPLagAAAAAAAAAAAAAAACAFQYJOAAAAAAAAAAAAAAAAEAFEnAA3GE4bbisjwMAAAAAAAAAAAAAsGJZ4RNwbP+T7fsOvhrFfO+SPAe0YPsZtifis2x7q9m+auNHREg6q8GhTjzbK9nefVLiYumwvfrgemF7c9vPtH2nZX1cd1S217b90GV9HHdk5Zq01rI+DqBWy3PZ9rtsrzL0eC3bn2oRG1iaJqmfA+COyfY6I75W2P6T7XuVPuQzbN9rWR8P7rhs72Z7zfL3t9g+rcW4IXBHYft423cberyR7XMaxb7LkjyHFYvtlZf1MWD5UObS71r+btv72j7C9suGx6KAHmyvYXuNZX0cS2KFHcwqHcZrJf1U0nmSrpf05UbhnzziuR1bBLZ9zxZxZon/z43jHWf77kOP17b9yYbxm3X+ZxhYWfDV6Hh7TH7vIela24fZfmD9UU4Z3EinPVdzDr6/fH1E0nclHSXp6PL3j1TEHXaZ7W1aBLJ9pe35I76utD2/Qfxu51xE/EPSG2qPcWnEtf0l26fP9NXytVrrmXBpe/8yaWrbx9i+zPZTKsOeL+mutteX9FVJe0s6tvI4u147be+2JM+NGfshLeJMi3lu+X9bR9Jlko62/YEGcVe2fWL9EY6M+5rWcUvs45fkuTHinlTe49UlfV/S1bZfXxu3xD6sxL6T7XNs32j7+S1iTxrbd7P9QduXlq/3Dw/wLY9s7zft8cq2376sjmdxOp7Lq0j6ru2H2n6ypEskzWsQV1L7BJ/y//TNNkc3Mv6mLgPGtre3/erh/snyyPZ65d7/5fJ4i+nn95hx91+S55aXuOrYz2mt9OsW6Ts1fo1e58UikzWjnhsj7l1sP8/2m2y/bfBVG7fE/qcRzz2gQdxu7c6heMt98nDn9uFjluS55chlkm6U9CNJ15a/X1/6Zo+oCexOiyNa36eH4rxQ0sWSniPpuZK+Y/vfa+OW2E3v1T37qLZfO9tXTexZXnPfit/tPa71Y9sn2n6p244pvzUibrG9naQnSTpG0scaxu/C9ibD7QHbq9reuDJm8z6qOy+QHHqdLW2/snxt2TDuyrbv44aLqjteOx9oewdPmyy0/bTK0Bcq+3w72X6RpK9J+q/KmAPfXsLnxlbe3zVbxuyhx7265+fP9rql7X2U7U8OvmpiDrnW9n/a3qJRvG5jndNeYyPbTyp/X7XFeee+8wGr2X6r7aPL481sP71F7IbO0lRuwaGSdlbO722jnO9bIfVoA/TSs63ci+2H2P6epKuU46fzbD+4YfxX2V67VTxJchaMWL45O+MHS9pIOaBsZbGL+1XEvELSEyV9PSIebvsJkp4fEWMPYNl+maSXS7qfpB8PfWtNSd+KiLEbx7ZXioh/2L4sIrYqz+0fER8aN+YMr7MgfqN434uIhy/uuTFjv1DS2yR9Q3lOPF7SOyNirEaF7Z9KihLrvpL+UP5+d0k/i4hNGhzzPEmPlbS2pG8pJ0RujYi9KuOuJWlPSfsq/w2fkvSZiLilMu6Vkl4UEd8pj3eVdEhEbF4Z9zRJb4+IK8vjB0s6OCKeWxO3xPqhpPtL+h9Jf9bU9WLOVShsbzTb9yPif8Y6yKn4w+fciPDjX+NK/EMl/U7Sycr3YhD4f5enuLYfP9v3I+K8ceIOxV9X0hslbSFpQSMoIp5YE7fEXuSaaXv+OOfbiNhXRMSWtp8q6SWS3irp+Jpr9OB4bb9K0qoRcZjtyyPiYRUxe5/Ho97jJvcq2xdIuosyCenEiLipQczvlXbFCyVtGBFvb3hOXCjpiRFxa22saXEvjohHtoxZ4i70/+RcLXNlRFR11gfnrO29JG0l6UBJ8xq9x4PYz5b0dEmvlXR+RIw9WGj7S8rPyEgR8cwx4x6xmLivHifuUPxTlUkhx5Wn9pa0ZUQ8pyLmlRp9zGPfq6fFP0nZbttP0jrKz/Z5EfH/xozX5f9uKH7Pc3kHSWco27OPi4jramMOxT5E0lOU7c71JH1Y0hER8eGKmOdIek6L6/CI2JdL2lrSxsqBoi9K+ueI2KkiZu9z+cvK9vybS1tgFUnfi4iqxNEZ7qnVfbNecUucXv2cwyS9W9JfJH1F0kMlvSYiThgz3l+U7eIvS/qMpLMj4vaaYxzxGk3PizI4uJqkb0raXlNtubUkfSUiqpKebH9F0k3KBMAF70VEvL8mbol9jXJy9pTy+HWS9mvQxujS7iz3p5cq34dLlO/xhyLiPyvjbiDpCEnbKT8fF0jaPyJ+URO3xF4q7cOZnhsj7ubKyfn1IuLBziqUz4yId1fGPVrS5yPi7PL4KZJ2VX4WPxQR/1IRu9f4UPP7dIl7jaRHR8Tvy+N7SLooIlokvzW9V/cc3/Nikrsj4h3jxp7lNX8WEWMlGCyFca27SPoX5bn8GEkPkDQ/Ip5dGXfQrz5E2Yc8qWHb4vDZvl/Tj7J9qfJzcmt5fGflnMDYCwU79VFnS36PRuNl+0t6kaTTylPPlnRURBxRGfdVkt4u6beS/lGebtH+7tHHebWkV0j6gaSHKe/RXyzfa3Hv207ZjvudpIdHxG8q491L0vqSTpD0PC3cNvx4bduwvMY2kj6pnC+zpD9K+veIGGvBiO1bNHu/rCrpuce9uufnz/ZFyvbg9Pb3qePGHIq9pqR/VX5GVlL+P342Im6ujNtlrLPEfpGkF0taJyI2tb2Z8lzeoTJuz/mAk5X/fy8obdrVlO2tscbubV8YEduN+KyM/RmxffWg31U+I9tELtpeMJ8xzrFOe42m/fVpsddV3p82VuYbSJIioiqxvEcboMRpfry95nN6jqOW69ubI+Kb5fH2kv4jIh49bsxp8d+tvMZdpry+nR1Rl0AzKeWgjpH0Gk27cVT6e0T83rn6aKWI+Kbt2izhk5QDbocoB9AHbonKCW9J59n+s6R7OTOkr5S0j6SmCTga/YGrsZLttSPiD5LkzJ5rdd69Xtm4XKjzr/xwzNmgA14GWL4QEWeVxztKelaTI5YcEf/nXKn40cHkd23QiLjZ9uclrSrpAGWn5vW2D6/s2DxP0idtnyvpPpLuoUxcq/WAKMk3khQR37f9oAZxJempjeIsMhBRJgCaXTdrBn2W0B7lz1cMv6wySXC5iRuVCTZL4ERlstDOyoHvfZQrF8fmqYTLTb3wqrE1lR2yFgbX452UiTdX2a69Rtv2oyTtpZyglqSqEqK9zuNy7d1J0vrTBsjWknRbi9eIiMeWzte/S5pn+2JJn4qIr1WEXcX2vSXtLunNLY5zyE8kfctZGWo4+a22ws63bH9YiybVXTZOMNsHSXqTpFVt36ypc/lWtVkJcSfniqNnSfpwRPzddquM8sE1fmdJn4uIm+o/dnpfbYAZXNop7sCmEbHr0ON3NGizdF21ExHPs72Hsp38Z0nPi4iaa3Kv/7uBLuey7cdJOlzSOyU9RNIRtveLiF/VxpakiDjI9teVK5xaJfj8SdKVtr+mha9DVYlkxT8i4rYyaXFERBzhXDVTo/cKtHtGxCnleqpy/GP3g23vqWzXb+KFqwuuJWnsfuoscdesiTusYz/nKRHxhnJeXK+s6HC+csJhHD9U9pOeK+l1kj5l+wvKZKFWbd2m54UywfsAZT9v+J5/s3LSqdYGEVG7unsm20s6ylmdZj3lxNbYySJLod25RTmX91KOGR2oHN+qSsBRJoGcJGlQpef55blRVZnnqnX78FGSHi1pXS9cJWQtVfZHiqOVY0RHSlJEzHcmPlUl4EjaNiJeNHgQEV+1/b6IeInrt+PoNT7U4z4tSb+XNJz8eEt5roWm9+qe43sR8Q7nwoJXR8QHa2IN88zVaKy8zo2l97iWcpz+7+XPf0i6oXzV+qXtI5XXs/eWz1urSv53VS7QOrk83k3S1WpT4WOV4UnkiLi1TMBVxSx/NuujRsQTKo9pSewn6V8i4s+S5KwO8W1l4miN/ZXjyq2uP5K6XTtfJOkREfEnZxWEz9veOHJBddV/ou29lQv1XqCcmD7L9r4RcUVF2KdK+jdJG0gaHme6WTnG08Ixkl4eERdIC5KIPqX8N8xZRPSuotP8Xt3587daRLyxR+DIBRBHKyt9P17ZBv1g6au9q+Lz0musU8q5i0cqP9eKiGs9opLmkhqaD7hfx/mATSNij9LXVjn/xr5eRMR25c+Wn5Wf235iRHxD2ZfeUNL/lPnZVlr314d9UZmo9nW1yzeQ+rQBpA7H23Fesuc46uqD5BtJiohznRXFm4iIt9h+q6aScT9s+xRJx0TEj2f/7dEmJQHnpohotT3UwB+dpf/Ol3Si7Rs0dIEfR+SKzZsk7eksFzdYhfQtVQ5AlknCuysHabaR9EJJm9v+rHJl79hlOJ2rNwbZbut5qCx0RLyz5riV2w192/bnyuPdJL2nMuZAr87/9AGWLzszLltoPvltexdl4/j+kj4t6ZERcYMzO/ZqVXRsIuJK2++RdLzy/X1cNFhNJ2m+7U9o6oa5l6Tq0rdS/eqdUWy/RNI7JP1VUxmcLRJZBvFnXfEw7iBnrxtpx4SLp0t6lxatdlZbov0eEXGMs2rYecqExksqY86X9AxlmcXhjk2LhMuBeba/KmkTSQc5Vxr8YzG/szgHSDpIOQh5le37KVfLVOtwHv+vMsHgmVp465RblEm5TZTO11vKax0u6eGlY/OmiDht9t8e6Z2SzpZ0YURcUt7jaxsd7o/L10rKzl0rg1UUw/f80JgJlxFxiKRDbB8SEQfVHtwIRyo7X1dIOt+5qrNqxc2QM5yV1P4i6WXOVQZ/rQnYK8kwIo5b/E9V+Yvt7SLiQklyVqT8S03A3hMAJaFuf0mnSnqQpL2dK2X/b5x4w/93tleVdN+IuKbJwaZe5/L7JO0WEVdLku3nKCtGNtnCp1OCz2maWh3b2t/L4NU+ynu3JFWVDV8Kk1l/LoNXUeJvq+xnjusiSb+WdE9l/2zgFtW1wXvFldS3n6P2k1kRuehkMCh9L2Uy7qG2N4iIDWuCF03PizL58yHbr6pMZprJRbYfMrzwopWI+LWzws5ByvbxgRHxp4qQv1Lfdmev5OF1I2J4e4xjbR/QIK7UuH0o6c6S1lB+9obbsTcrE9dqrRYRF0/7HLdInvq17TdK+mx5vIek35YEjNq+WfPxoRK0VyLudcrtTr6oPBd2UY7tvFaqnihrfq8uuozvRcTt5XibJeAok2yeqpz4H2bl/bZKx3Gtm5XJ7x+QdHTDpIjdJT1N0vsi4o/ORS5Nth1WTvRvFxG3SZLtj0u6ICJe2iD2jbafGRGnl9i7KKuT1GjeRx0obarXKvs5Ly79qQdExBktwmvhicLb1Wbh789V1y4eqdO1c6VB+yQirneu1v986ffVvhe7Ks/jGyR9xpn4fZym7t9zVsYYjrO9azSomDKD2wfJN+U1L7Rdfb/2YrZMqRir7TGXM2tV4THHIgfOsL3TIPG0pdL22Vk5Mb2xsv93orJC0FmSxt0xoddYpyT9rSRBSJKcFURr2uA9CzAM3FrGnwZ9vk0l/W3cYJ0+Gy+U9GnbByuvx5eXxLS7K+8pLfRYIDnQK1GtRxtA6nC8Hecjey60/0lJkDm+PH6+MoGvmYgI27+R9BtlX3Jt5X37axHxhrnGm5QtqA5V3thO09DFZtyToMRcXdlwXUl5A72bcvuJ6o5CXW5GxgAAIABJREFUOQl219Qg8rOUF4mxV944V4NepFxh+MiI+INzNcguyqSIsTP/bO8z9PCdym2dJLWZ3HHuCzkYpPnGYDKgQdxPKxvEC3X+y9fYnX/bZyszCoeTQx4XEdWVVZzZwa9Tlh57b5mYPSDqypseK+mTEXH+iO/tEBHnVMQ+RtKmyobV5sqKS0dExEfGjVni3lXSyyQ9rjx1vqSPRUSTDmRrtq+V9KiIaHHDHBX/O8qtJ+YrO2APVQ4A/1UVpSd7dqSd24ZN39Lp05Uxr1NmM18ZDW9Otr8TEduWz/bhygH2z0fEphUx50XEI9x4275pr7GSsuP8kzLgdA9J60dEk2S1En+NqCwVOhSv6Xnsqe2yToqI57U4xhGv8VDl9W1n5X7Zx0TEZbbvI+nbETFrue5lxfZq4yYULE0lkenZGtoWISL+u9NrrTIYQG0Qax1l8vftpb24ZlSWci5xu2xZ435bXG2pnPC+W3nqD5L2aXENmmkCIOq3rPuhpFdGxNfL+fdaZTnrf66M+wxlUsudI2IT2w9TbntauwXVyjG0RU055pVrz+Xpcctz92g1IeKsFvZv0xJ8/iPqt6vpkeQ06Iu8VHld/4ztTSTtHhHVe7R3PJcfoWyzPFi5Fdy6kp5b+/kb9IEjtzbeXJmU9eWI+HtN3BJ7I0mblc/fqsrVX7XbRB2rfv2cQ5X99L8oV0XeXdIZMeZ2Mp5lWwzbG01P2hrzNbZSJh01OS9cVi3ONAlQOfgv21crk6d+qhzHabJFW4n9dWWb/tXKFZfHKLfjGGvLwaG4d1IO9ja9Fjm3oHijMuFyZ+WWOCdExGMr456jsi1beWpPSftGZVn9ngafB2fiYtReJ4biflnSK5Vjb1vZfq5yW7IdK+PeU7nVyXblqW8pr/s3Kc+Tsasj9BgfKnF73ae7bb3U617deXzvg8okoVZVoo5RVmO9cMT3qvvEvca1yuTSdsp76a3Ksevzx71H214rsmLYqAnDkHTz9HbuGK9xjfK9+N/yeG1J34k226ltqpyQvo/yvvdzSXvHmCunh+IO91FXk7RWoz5q061OpsV+rTKp7gvlqWdJOjYixtqBwFPV0/5ZudXZmVp4nqiqWkaPa6ftb0h6bURcPvTcKsrq/XtFRIukywXjQ7bvHA228XEmkr9H0n0iYsdyjX5URBzTIPZ/KStbfkb5md5D2Zc6Qaq6hl6vbBMutOVg+fbY/TNnYtb/U9u5nE/N8u2Ium1lbpG0uvJ6POjjRdQvbpXtnygXcR4TERdN+97hDdovzcc6nUm3f1RWinqVsnrN1RFRVbHc9shtISPiZ6Oen2PsJ0t6i3Le5avK7R3/LSLOHTNel62GSuwHKecMV5H0C0mXRNmKqlbr/vq02O9W3uuaJqrZvr/yWnaf8tQv1KYN0Px43Xkryh5j4KW99g5N9csukHRwlN13ajm3znyBMmnqE5L+O3LhzEqSrh1nLnFSEnBGnQxVJ0FptJ0cEb8c/8hmjH2NpC2jJBOUAcjLaxrypQH8KOUH+FLlyoj7K6tFXBARTbYh6DmZ3Fqvzn/p1LxdCyeHvCPaZbEOXqd68tuZefz16FS60Ll67kNRLhS27ybpAxGx3+y/ecfiXF35nNaNwKH4p0l6e5TVoc7kloMjomolYK+OdPnsba9sCJ4laUdltY/a4/2mpB1aNdSG4j5deUPeUDlpsZbyM336rL84e8zvKBNNdtFU2eIFajsdQ6+ztqTNtHCi0yKTUHOId5JyYPN25b7Fayk/47Xl75ufx7a/L+k/lPe5RVa61U4Kldc4Tzlh87mI+Mu07+0dEceP/s1ZY/bco/ZR5XjXiIj7lgSJl0TEyyvjrqd8r5sOrtj+qLKtMpgU2kPSjyPiFTP/1qzxnh8RJ3jhbQsGQlk16fSahrdz7+JjlFuGNGnAD8UerLYdnFeDPcM/Jo1fwc32hyTdS1MTC3tK+q2k/y5xx1p9MPQ+r1H+/JNysmne8ADimLF7TQCsNb1dZXvziPhRZdx5ymTyc6NMsNu+MiIeUhn3J5I+r5xs+UFNrGlxN1eeV+uV+/9DJT0zKhYDTIvfPMHHnZKcRrzO2pI2jHbJrN2StMvg/AOUAxTXRJskmXnKFYprKyeRL5F0a0TsNesvLj7uiyS9WNI6EbGpM+n741GRBNC7n1Neo1nCpe3txx0UnePrNDsvbL8jIt5eJgEGg7IL/qwZ/C/xRyYyj3u/mxb7WTGU1Fvel4Mi4l2VcZfKtai8VnXycHmPj1COFYVy4vvVjQb/e7UPt1YmDQ1WON+kTJadN/NvLVHc+ym3On20cvLtp5KeHxHX18SdRD3u00uDOyTi9hzf6zFW3dNSGNd6oHJs6ABJ/xQRq44Z54yIePqICcPB39dQVtoZezsc2/tKOlg5kWzl+XFwNKww6qy6r6iozubZE2UH/d8Lp3/e5/gal0bE1h5KJLZ9RURsOW7MafEHOwRIOX8x9tZyveYBhuL36ONsIOm2Ue1L24+Jiu2Se40PldhfVt6r3xwRW5Z21vdq+78ldpdJX8+w5WBEvGSceENxu1RznES216i5ps0St+e5vJKyctFTlNf7syV9YjDXVRF3kFxg5Zj9Jsq+WdXir6H495C0bYn/nR7jDa3Zfnq0qZ42HLNX8mmXRLXBfaRFG2Ba3MHx/k15vK12jeim1xh4T86KTp8adWy2HzTOuO1EJOD0UBptuysbqycrJ99+2yj2NyU9OyL+WB7fXdJpLTph0xrEVyozvh4fEa+qjT09PtrrMfntXPX2nMgt0JZrtk+JiN09QwZkNFgN2YPthys7Ht/VwqsrWiVZXDW9gTbquTHidulIl/+/LZWdry3L4OwJEfHkyrjbKJMtzlPDVSw9OFdCPknSezVUNWygxcCN7Rcqt1LZQNLlyob3t2vuJbYvj4iH2d5LWa3mQOVkeouVyE3PY+c+0Hsp79XTk6WqJ4WGXufOygoAoewsVa0WGnqPny3p6coKHOe3GMCy/V1lif7Thz7T34+IB1fG7TK44qxG8qBBx7Z0fK+KiAeNGe8lEXHkLANv91BWCdx2vCOWnKsV9lUmC12qfF++Wts5L7EXaWO5QeLz4Fq/uOfGiHuSpK2Vnz8rz+f5ylLDn4uIscv395oAGJosXD8intZwsnBQSW34fjq/9trp3FrwX5Xn3ErKlZCfjcrKZM7kwtdLOrLltWIofvMEH49OcmpyzLbPVW4rs4oyMfkG5QrG6tLIHc/l+crtTk6OytVS0+IOqsu9StKqEXHY4L5VGfdy5aq070bbJLVu/Rx3TLhsbYaJtwWivlLN67ToJOfYCZeevXpBzRYA3c1wLRr7XPZSSB7upWP7cL6kV0TZfqK0+T/aaizAmUy3UrSrrLO5cuX7xhraarDR+N7Wkt40InZt+6JLIm7n92KpJb9NohaTnr3GtWyfqhwf+rFy4dMFyvZAlwrXzgTd74/bpxyKcy9Jg1X0320xoTfiNc6IiKdX/P5wouwo91C258Yei7N9kaQdlG3jrZwVfD4TEY8cN+ZQ7FHtgFuiQVL50Gs0q+zc69pZYh8fEXsv7rk5xuwyPlTiXBIR20zr/1b3GXoa1V5r1B+5QNJdJB2r3D2jWb/E/ZKdn6mpxNNzo1FShDPZ+UPKxO9/SPq2csFh1RYwPc/laa+zjqQNotFinGmxt5L08oh4YWWMGUXFbjBLKX6Twg5eCsmnvdj+mXIx7snKnWCW++QLd9pBo+UYuO3/iogDPEMF+Bb9hdK+vCoqK4ZO13Kf+m6cVTeGVyycp+yI1ex3/g5J7yiNqT0knWf7FxHxpOoDzsGqq5zbRoWkJ0u62Pbh5bVrOje7Dv39woj4vHLlbCvLbani6Vp3/pfGB1nSFmUwci/lfpEHKicCaqpP/EnSleV8Gy572yQ5ZDrbB0fEwWP++v7lz7E7oMvIkZK+odzXuml1lmK+7U9o4bLILRqDTfcMHTLYuuA2Z+nwG5TVZWq9R3k+31XSnRvEk9SngxCZef5Z2z+IiCvaHOki9pe0jTLT/QnOFWX/URnzTs7S+s+S9OHIMnqtGoNNz+PI0tsXlkSC6jK3o9jeSfn5/rFy0mmTkuTx5YqwPfeoVUT8fFq8Fh2Oe0bEKbYPKq9xm+0Wca9TbrEwyBzfsDw3log4svz16zFtpZjL6jHb7xw3fnmN6yS92bmd6NOVCRG3l0HPD1VOGtpDq9xsP1qZdFFrddv3G1zTyjVv9QZxN5C01WCw35n4dKayPT5P0tgJOJIOknRRGWhpmdh6rMpkYXn8I2XHt/YacpXt50lauXRGX62sMlClTA4eLelo5zYUJ0n6oO3PS3pXjL+txWoRcfG0a0WTLdqKo1USfCQpIuY7E7ZqBqf/PuJ62arNdbfS/n6hpE+XiYxWg269zuVnKPuop9j+h/I8PiXqq1rYucJwL+VKQCm3eq71t4i4dfD/VybqW7QvevZz9lAmv11iu2nC5XS2j4qIF1eEeMYs3wtNbX09rkdodMLlS22Pk3B5UokxT4uWOw9JVVu0zaTB+yyNvhbVnBOD+/GaM3x/E+XWzHNKHrb9hpJAd8So42v0GenVPrx9kHxT4l5oe+x7lEcnN2nwfxj1izk+J+njyjLkrQf6T1TeT1uPM/S4T0t934uDlYmc50pSRFxe2rRjKckVb1e+r29TbjvxHEk/lLR/RPy68ni7TZzO4Gplv6pGr3GtQ5TJeUtlIqy8TlXyTbGypBuV/ffNnVUzx642PIP1a365tFlXUm4Xesqon3FuXVbjYOVk4Ya2T1TZ6qQy5sBlWnRLoN/Y/q2kF8WYlc88YnGr7RaVnXtdO6XcNmuB0lZ+RG3QTuNDkvRnZwWOwXjytsr5riZs76x8T4arfVeN5Uj6le23aOExyV9VxlREPLbMQe0raZ5zq7JjI+KrtbHVYfzCuW3PNso2hiTtX8ahDqo4zoGTJH1EudW8lIuJPqOpZMax9TqXPWIxju2LIuI1LeIPRMRltmvfh/fP9hLKRQI1PqpcjDtfeU1+qHLh4WA77dr4rQa/H69sr8zUB76Hcouu2oXgPRLVHqjsB79C0jG2z1Ausltke9ElPMYHRsQPZ0qeqk2aKj6l/Gw8ujz+pbLNX/t+tBwDH1TReV/lMc0osnLRNbbv22DcbYGJSMBRTn58X7kKXpL2Vp4Ys64EW0I3SPqNpN9L+qcG8aTc2/QLQ4/PbRRXkt5ue/+I+GNEvMxZRv390agSQOXE0tLWuvPf/YOsPpPfp6l+0HUuxi4NPRjkiNzzfT1lg1CSLo6IG1ocXCd3igarpGexr3LgdZCgdL5KObZKB6tPR/pSZ2Wvo5Xnw5+USS217tM6u71o3kEYDHpLeuGoz3CjQe+/RsRfbcv2XUqDq3ZP8iMlXS/pCknnO8vWV68UKpqexy4Z75L+4BFZ79FgCypJH5D0hMEkd0lSO1OZIDmuM5yVX/4i6WW211V2Zlr4eWmwRrmX7C+pxbY1vQZX1pT0gzJAEcpB9Uttny5VJbYeoew0LvJcRCxSkWqunMnZ+0raSdKpyoGL7ZQdwJoVX/tJ+qQzsdzKgcgW7bcDJJ3r3M5IysTk2glIKdvFw0mbf1euBvyL7dpkzl4TAL0mC1+lHBT7m/KecrYaDMQ6V1jsrDzfNlYOuJyo3CLoLOVe2uP4XbmeDT7Tz5VUPdE0pEeCT5ckp2IV2/dW9iWr9nofocu5HFny9jBJh5X3463Kqnu1yTIHKJOGvhARV5XJzdnKwC+p82y/SdKqzj3rXy7pSw3iduvndE64nO7Ixf/IzCJi31YHMoOmCZdRVvtHxCaNj3Nxqt7noum1qGPy8KD912Qr8hn0ah+eZ/tIZX8slMlw5w4GlscYSB4kNz1AOb4wqJz5DEkX1x+ubouIFn3zUW6Mim2RZ9ErEbfne9E6EfdY5XVsdeV97kRlm+tZynHEXSpiD79Gs4nTmZLJpAXbLtXqNa51haRX2B5eNPvxaFjlZMDtVte/V3ntuUpT51koxzBaGnu7pYGy+O0NkkYm4ETEfqOen0P8rzqrvw22Otk/2m118jVJn4+IsyXJ9lOUC4s/pZwIHndMrsfiVqnDtbP0Swdt5Js1NUF9q3LbxBq9xoekrIZwuqRNbX9L0rrKCiXVbH9c0mqSnqCc03mu2tyv91QmXg7m5M4rz1WLiB+V5J5LJR0u6eHOE+VNlWOTPcYvdpL0sIj4hyTZPk55LWqRgLNaRBw/9PgE269vELfnudxlMc60e/ZKyjHKqoSv6Lj1cvErZfLjlZJk+8HK7RfH/myX+YrB+OBLRjw3Z0sj+bRXolpkZeRTlAup1lYuCD9P44/jvE7SizQ6OatF0pQkbRoRe9jeU8p/g91kJXGzMfBBwm5EnNfguGaztnI84GItvABs7MIcE7EFlUeUuBv13Bxjvlw5CLuuMpHjlIi4uu5I+/Po0k3Nto2yfWpE7Lr4n1z2bM+LiOps8Rli31lTkx7XtOo42n61pDcqO6g7K1ewnBARj62M23yv7J5s767sGJ2rvAA/VtLrIys6LXds/4cyaeFLWnh1c/OENTcuh+jOe4ba3li5/2aLxuthysHpFqsIhuMusk2IK7fisv2MiPiS7X1GfT/abEH1BeWk7AHKBtUflINmO9XGHnoNS1o5IlpWRmhyHnv2csvRIvHUpazu0GMrEwK3meXXliTu8B61q0taM9rsUXtPZeP9ScrP9FeVA2Rj70le4m6lTGB5sDLheV1Ju0VldSdnRY8ZzbXh7Kza8GjlZ+KDQ99aS7n1Z4ttvuZJ+qNy4PzU4U6j7dMiojr5u3Q+FI1KF9veTZkQsolyZc+jldtF1JaPfasycfGL5alnKAfh3i/pqIjYa6bfXYLYXbY8da5u2lXS1yJLqG8r6b0RMeu5uKyUpKlvSjomIi6a9r3Dx03mLEkVRynPhT9I+qmk50fE9XVHvCD+lyW9Ulnla6uS4LNfROxYEXM15QTWU8pTZyurAFVX7ivH9zZl9dCXl/fnP1v0eXqdyyX2RsrJoT2Uiw1OjojZVsUtM2WAbD/l/5+V/3+fiAaDDT37OdMSLs/WVMLl3jVjDb2UPslhMbXV9dqSXhcRb6mM+0NJDxn0eW3fRdIVEfHAcc5xdy5v3tO0a9HgXH5XVG6lMmrSuNVEci8ztA+fW9vvc27ZPpOI8Ssany9p5yhbTzm3eTwzIh43+28uNu7ByoV7X1DjsQDbOygnB8+ZFrt2W7mm92lPbSPzavV7L45Rvg8HKttyr1b2fV86ZrzhbVN+FhH3Hfpek21U3HiLFtt/VY6TjeqbvyYi7l5xuN3GtZxVcO8kaTAGsrey0tXY23D0ZvsaSQ9t0c6cIX7TtkuZKPydMsFreEKoxWfvS8oFBqdHxJ8X9/NzjD1qS6D5EfHQys/KVcqFMScpF7eeVzvGV+I27+MMxT6kdmJ3RMwu40ND8VdRJrdabedGBufA4M81lBPtVXMj015jZUmrR5utyQb9hZ2VSWXHRFY7uY+kb0fERhWxz1Xj8YuSXLL94PpQ7uHnTh8XHzP2e5XjC5/VVBL12irJb+Nek3qey7avVLbrj1OOk10yap5gjLhvH3p4m/L+empNn8Gzb73Uon14VURMr8a1yHNzjNmtj+OsiL91bZwZYs/XwolqKyur+bX4nDxe+dl4mjJp7+SIOLU2bi/uuBVliV89Bl4+xzOOLbX4fyuvM/LaO9f5i4ViNhgT6872t5WT8xeWx4+R9L6IeFRFzEOUJ/+c9zSfJeYpEbH7TCdEow/wFcqb6B/K43UknTe9QVsRv9sgcivTOv83KldFtuw8bq+8KV+vvOlvKGmfaF+KtMnktzvslV0GHl+pPI+PUFYMGZTrfWfU7z19haQnR6l646wQ8fUWk6c92P7piKcjIpqUT/eIcoiSqsshtu5I9x5Mt32LcoXa35SVFpxhY60x4w2uFW/UiA5Ci86v7U0j4se1cZbgdR4v6W6SvhIRt1bE6TJxU2Kdqw7ncQ9DnZknS9pImZ0eknaT9LOIeHlF7Fco94Uefo/3jIiP1h11Xisj4sbaOCPi3kU5ubtgcEXSSr0GJMdVPgfbK8tNf3zoW7dI+lJEXNvgNRZs5dSa7f2Vq/0G2w5tJenAqEw6HBq42k7Su5RtgrdFRHUZYNtbK6unSdkZa7LSvuMEQK/Jwq8pk9KGP9efjYinVsZdo7ZNtZj4qys/y7c0jts8wcf2bhHxucU9N2bs4yQdMNR/alZBtOO5/F3lZNbnlH3WquuSl85Wu8316OcMxW6acFkG7V6orCbzlRiqdmL7LRHRompWs33Up8VomnDZK8GixB61rcyuylWyTbaVacmdkodn+iwPNPqMbCLp51q4ffiwiLikNnYP0yfUS/t2fkRUVRDtORZg+wRlqfqFqnDU3p9a36fLezB9O7mBVu9F00Tc4cl42+8e7u+2mHgrcc5Vw4nTMgHyqhixLY/tn0dE1bbfvc7lUYkPLZIhSpyNJG0WEV93JrWs0qJdWxItduvRDu80RtvzOjSYKNxZuZ3TZyWdUTOJPBT7q8rEus+Wp/ZQjsE8TdIl47Zf3G9xa+9FDD22O+nGWZFkYw3toBERn24Q9+KIeKTt7yjnGn4v6aqIuH9l3EW2JlNWtayqjGT7PGWlns9HxF+mfW/vWLgizFxjNx+/cFaxOFS50MfKc+7AiDh53JhDsUddiwbGvib1GusssXdTVpP9VuRuIs0W45T4a0hSi/uJ7YMj4mB3Wohq+zPKJM7hbdrWiIg5V4oqfbL1S6znaaqNuJayCt4Da461vEbP5NMuiWq2r1dWnDpFDebjPEMy1kA02B3AWcH4LZK2UCa/PUbSv0XEuZVxm42Bl/aglFt7SVM72Txf+dk4sOZYe5qUBJwtJX1aOfloSf+rPAmqVmaX2P+khfebHHt/L9v3johfD50QC4ksJV7F9guUpQsHg9G7SXpP5c1+sBLEyhKtO5a/V70fvYzo/C90EjfoPM6T9LwoKxWc+3x+JhpU2+kx+V2O94nKm8Rg1c33o2IrH9unKAfbVlUOuP1AebN7pqR7RcTe48Yu8RdaBeFcNXtFNEoka832Xad3Pkc9VxH/exHxcGc5xA2jlENscNNv2pHuOZjew1IaKDxPOdFyiaQLJJ0fpZRjRcx1Zvt+TUOz18TNcOwO5/GPJX1H+f5eEBFXNTjWUZ2ZBaJiqwePrtrXJLnV9o+UE70nKycL/1gbs8RtumLB9oURsZ0zqW74Hl2VVDcUf6MWbapZ4vfYj3zBQLTtpyoHht4i6fgGE6eDz94hkq6MiJNanXO9dB5Ebr5Sb4ZrZ/V7bPuuyqoh08+32oGVUdsL3CRpXrRdfNAswaf1dWhanG4VRDtOZj0gGlZ8sf2IiJjnDit6SvzHKLc+3Ug5SD+43rfolzXt5wzFbppw6awAsJqyhP7eykUyry3fa3Uuz5e0TUwlGKwq6dKoWLU4FLt5wmWPPpTtr2hqW5nnKasWnaTcVuZJEVG1rUzp+/8/LTrhNG5Vli7Jw0Of5edIupemBtL3lPTbaJAAXz5/z4yIX5bHj5P0kXH77LafHxEnzHCPUkR8YPyjlWy/WVnl+gvKa9AuygTGQ2ri9mT7mqhMEFpM/C6JuD24cSKuc2u3w6ZPiNm+v6RDo2K7haFYg4nTf1YmUVVNnDq3m/59jKhabHu9iPht5fF2GdeyfZkymeXH5fH9lBPVtX2cFym31V0nIjZ1bgv48YjYoSZuiX2qpC21aPWp6q3EZ2i7LFIJZnnjTCR+onKri6fV9tlLzHsqk2a3K099S9I7lP2S+0bZCrwF26tEo8rOPa6dpa/+SE1td7KnMgnpTRUxDx/x9E3KtuEXR3xvLrGPl7SppMuVCS1Stu1bfEbeqrx27iDpI8oxo6OjcjvxwVicc2uyrVS2Jqsdk+yt0/jFvZXb60hZ5bu6IndPvcY6e3Ju33S8pMEY/u+UC/i/XxFz/4j4kO3tohSiaKmMP71MU4mA50v62DjtAOeOAP8maWstvCXuLZKOjTaJIT3HDbskqtleKxpU3hqKN9v8RdSOHZbXWEf5HizYQUNZyX+2hLslidt8DLznvFaJta3y/vQgSXdWbh3255o20SqL/5FlLzLRZkvba5XHLcrHPUPSByTdR7lKfyNlksHYg1eRyTcrKy8yXfbsi4hP275UU/u7PSfqt846TlOT1BuVx1a7feSairKXfBlwfLmyIR/KydmPz/KrS+pOwwPekft83qlBXEnacbhxHRF/sL2T8gI0rtZ7ZUvS5pHVnCzp18rBzLB9oXKFQa2v2D5bue+7lEkiZzWI28tFysb74p4b1yqlcby7pvYPr1YmU86b1pH+pDIbeZx4XfcitX3O9MGUUc8tqcG1oqeIeLxzy7ptlIPrZzorGsyaRLMY8zR1Tb6vcuWNJd1d0s8k1fy7VvbQXqzlOnqXinjDupzHygzsf1FuVfefZWByfkQ8e9yAUZFgswRWtu2IzHAun787twgcEZvbfqSyKtmbbV+trMJxwmJ+dSRPrVhY1fbDpYVWLKxWcZzblT/XHDfGYnyiDMg3rUZSYvXaj1yaen93Uu49fZXdZF/dX9o+Urma8L3OFd8rNYjb04NGTQDUBvWiq0M2t32TMjHphorQ/7B93yiJ6c5k+xarGI5XVhd8qqR3KlcgtdjrfOvy9aXy+OmS5kt6qe3PRcRhNcGnT56W03isBB/bOyo/E+tPG0ReS6O3YBjHSrbXjoUriLbqBzc9lwcT1JJ2diYDLmTcCerov2f2MZJeo2zD3L6Yn52LHv0cSVJE/MRtEy4fORjgt/1hSR+1fZpygqXFtV7KSZtzhgbi9tXUlh9VSsJNkypnQ3r0odaLiCMkyfbLI+K95fkjbO9XEXfgc8oxhU+owbk81B87NhomDw8+y7bfHwuXZv9SGS8wY8J4AAAgAElEQVRq4aWS/ruMnW0l6RDl9Xpcq5c/u7QPI+I9zqoWj1Xeo/eNiO+NG8+dtwEoLrK9RYMxvYW0vE9Pi3NXjRiDq03gKA7S1ELD2Z5bIoOJXNsrR8TtQ89fp2zft3C1MuHr/5QTTv8t6UfjBotZEm9rk2+KXuNar5f0TefWqoNx5Rb97VcoExa+K0kRca1zEW0Lp5evHka1XVpsyflg5djIcJuluhpJib2qsvrdHsrzoVXb4nfKSnWjVCXfjGrDKftTNTG7XDuLnbXwdifHKaskjJ2Ao/y3P1BT18ldlVV7trT9hIg4oCL21pK2GIxrteJcgHtOGcs51fYZku4abbbnvlOZv3mWcmuyv7cYbinJf4do0c9fk6r4yuvcxsr+6Va2qz7bzoURl0fE6bafL+kNtj/Uoh1axjd31qKJ6lVJ1K3HOofZ3kA5oT5YaHCBsmrmLypDHyXptRHxzfI622uqgta49lVuxXW42s05LVDaax+U9MEyJrLBuG24iDhO0nG2d41O2yv1nNeJiM84KxkOEtXe2ChR7R7O7Ws31sKfkbGq4A3mL2xvMj0hxlmttIUvKeeszyxxH6S8r9QueuoxBm7bj4lSbdhZqa3l+PeHldehzynvgy+QtHlNwIlIwCkTCbuqnLiD/6eKwTFJercyq+vrkauGn6AsWVQlIm63/Q/bd2vUeBj1GlcrO3qt4i2YVC9ZZMtd0s0MjpN0s/KmJOUKuOOUk7815jlXMA6XY2s1iNVj8vsq288rsTdTbs11UWVMSZlGafusQYO7PK5qfJcL7eHKG9xgFcRREfGFuqNtr9fk9AjvVJZYvjByL9L7SareSkXq05F2VuJaxLgdhDKQt5qke5aJ9OH3ef2xDnLh+POVyV7VWziMiL2dcpD3scoEmTOUjfmxDSUZHi3pCxFxVnm8o7IjWaPbxI36nce3K7cku1056XZD+apW3odRW3HUZJB/RdLJJSFCkl5SnmsiIi6WdLGzotoHlP9/43ZKn6pcsbCBcpuJwWfvZtUNBA065ldFg7KjI9wzhlbElGTWVoOxj46p/cjfYfv9kr7cKPY8ZxnuTSQdZHtNtZlI3l1Zxvt9EfHHkgj3+gZxe+o1AbCfpEcpV7FImRg5T9Imtt8Z41eMfLOkC51Vz6y85r+k8lgl6f4RsZvtXSLiOGfZ7Kp7SLGBpK2irPp27k9+pnJVzzxJVQk4apvg8ytlO/uZ5dgGblEmdLTwfknftr1QBdFGsVufy7NNUI/dBnf/PbNviohW18ph3fo5bp9wuSDZNnIF9ottv03SNyStURF3gYh4b2nXDpLT3xURZ7eI3VLnPtTwANv0vkeLwbfbIuJjDeJM1yt5eHUPVXMqA7GrL+Z3lkhpz79aWYr8r/r/7J13mGRV9bXfNQTJQUSSgoIEkShZFEExK4KkH0lEREBEEMGEKIKKCHwKiIDkZAARFCQjOWeQZA6IiCLKIFnX98c+d+pWdXX3TJ1zerpl1vPM09Strn0v1feesPfaa0VTzsD2ALaPTevDJ2x/c9QPDIZmv2Dy11hvJp7f9/V5z4QVei7WBu5UdPg+C1PUw3LH5FpE3FOJ+fnI9Hprgky8+aAXqvpE3F8p1E5OKk10Ir6PJ4CvpdfZ30cbkj7nAgpOtfNati9Pc3Sj5vSgy1gZP2v7uaYGoFCJKEIGSIXDWii+dklr+fUJAsAFhHL9tQydBweJfSZBALiIKDxd1ZBECsQuqirXiluraaZqEwORM2wUrefNjAWwErBuQzKUdDSxl3wjkKXMTdghLUw05RaD7f9KOgpYNb1+lpYKVSaOJVRU7gKuVjTNlKjNnUQoOX2TuOd2oFDBV8MoDZH3bB9NkLBWBvYiGiVOJdY1uTiPWBPeQ6GmiAaFc51tnESoZTZz87bp2Nsy487ZkG8AbF+pUM/Kwf2SfgUsmvZ8DYqsDxPhZCNiPL4NeFTS9c5Tzrwyrd8acva1hO3iYznX2qAW+bQiUe1c4pk7j7LPyNkMzTH9CMh2bCHWsecpRCKWI8aLabKgHgY1cuA7AidKapySHgeyVYDasP1rdQj8J0m6g2gKGAgTgoBDeJH/ixgYSk3Kz9t+TNIkSZNsXyHpW4ViPwncI+lSuv3psmX6ZqALK9hevvX6isSQzcUuRKdF8/e6BvhOgbhQp/i9O1EYepYgGVwMHJgZ81aFiseT7UK0pKWIhMvAaJF6VqRM0qom2sXpNpt7MpnF6R783C1p5ZRAzfYirbiRXqP137MRRYDbGXyDsDOwJ6FIdhvdJIBvDxizjYaAdJak/xKSlme6jMXelcQ1HwRcYPu5AjEbrG17p+aF7QslZW34U+HmLmDDdKhk4abKfUzcB/cQz+BxpRbxCW3P7dmATYhicA4+Q0hl75peX0okhbKhUALchGBjL0V0XK45aLxWx8Kne5NJuUz6REh+UC3VkIKopUYCkUwAeErSooQf+SKFYu8IrAL81vZTkhagQHeo7adozae2/0LhRFkpjAGxdWZCkeSv6XwLEXPTWoS87kAEHNsXKWwG1k6H9nQfe4AB0MhL/zMlFh4BSpDJXk73nul5QjXiaUkl9lLFCD4OpdO7FNYKXWtihWf04bkX6woKoj33cjsRkqsg1pA3L2u6elrnXLfPR6YW78347NTgCkmHEGNR28rh9sy4NfY5DUoTLm+V9E7bU0i3tg+Q9DCRCC+CRHSqQXYqiZp7qJ+09qlTVGQVtjIDK0+0cJ6kjxFrrPa9PLAFbEIt8vAnicR3W3kiiyAq6Ty611VzEPm4ExRd2QN1cMKU9eFWRBGrKNKcsRORoBZwuqTvOikmTSscdrqTgAttn1nwUoEpjUk7AzVsVWsRcWvk4GoTcVcm9k7Hp7/niQT5rYQ9QK2cZIPNiTxDLqqMyRpGHQp4TRorcvN9V0n6PLHWehuhvnTeKJ8ZEZLOdKh99yUmFyC/Qffa5XvE2uUrmTE3I+7lO2zvkPY4JQrTEIXCrdxSiiqIoqpyLdRqmqnZxHAQcIekLruTvMtlfoLk3ZBM5iQs2/5TYM/3MuA+STfTvR4aeA3QwuWSNgV+bBdV2DnP9hQip6Q/UqYwO3siGioV5/dXWM1lWWYl1FAaeiHVXt5PWIeeoDIqkRCKKcUtvUrnOnuwoO22jc/JknLUoRr8VmGn1uSYtgWymn5tb5XyDBcT66LSmNf2E5I+QiiSfKmH6DMIfkDk2pr8/zZE3WXDYT8xlahJPqUeUe2Z9jiUC0nLEWpv8/asu+ahWwFuYNj+mUI97FKiEWwT2yX21cVz4A5l55UTAQeXF0B5SuF0cWeqwf2FTMKlyo7vdaBCPu89MS8jlAS+DixAdNOvYTtHJqyJvX2/45UZ9kUg6UO2T57e1zE1kHQ6ISl4Y3q9FrCb7b7qHFMZs2bHfnOOd9KZhC4tWPweE6QFZ64KzinE3+6WQpdVFaoop5fi/4pgu59EJPhKyNNOIjZzB1faSLfPNR+RxHpnZpzdB02QTsM5lgb2A7axPVOBePMRMpbrEcSk/wI32N6vQOyLCQJgW41rPRew2amBGvdxivt+gkm/JvAc0T12te3LS8TvOdckQsEney1QA4ru2HMJAtkNBeMO8UuVdJvtLCa9pKuJzqab6SYkZ20k0zz6XaCtRvLREvOpKvmRD3OuRYB/uEyH6ISAKvtES7qvXQhJxa17bS+vPl7B0xD3QGB/d7oL5wEOd6adXUp8nA2sCJxMJFD3a5EwBo27H5HA+kk69D5CZv8wQnUwq5tF0gPAik7+9Aq10rtsLzfo9zzMODTw36w2eu7l9nq2uZezlB2H+T6KeVuXRiom9MIex8qqkm62vaakG4EPEITLe22/Zjpf2rBQBU/ymqi9h6qBtNbqhZ1pM5AKNZv0kIfPKfFMpzG4yV88kLuukDRi4tmZVnaSvgnMQiTn2+vDLMJeKiCsY/vf6fWcxL4st1v4VnfbfBWDpHscjUml4xafp1Oc4jm4VuxZCCL14h7BiinzHG8mCBHzEV3DBzosqQaNV+37SPGKzvulx2R1Ggv7wc5TlW325jsCbyf2fBcDx+fkGSQtYvsvaQweApexaHl97njWJ2azZrmNUOCYDNxfKnetegoD2TmFYeLeZHut0mu4WmNnK/4idBoab3am3UkiVXyBaAxsSD1fI0jr+9seWBV3uLVA7hogxZ5MkIVeIBqgGnWPrPVsxbzW9URO8keEMt6fga/bXnbED05d7LOATziaqIpAodp7EVHoXo+oed5VYr0h6WDCQuyS3Fg9cavkOlPsy4k89ffToa0Im9K3Dv+pqYo7P/Blum05v+xke10KJecUBfn07YQYwL4Otcu7c9bK/er2pda36Xob8unKSuRT27nqRVPGC4Vi7Z8TUS173aVQwFuaUBDNbkxKNZGNCUJW2zpzMlGLG1hhT9KRdJOR3wr8hlASKy4mUiIHnv5eQ+A8p6R2/CWAvxK5lk8SanXfydkvTBQFnOslrWg7Vz6vjY2ISX4PgqE4DzFolsCPCLZbk6SfiXyboapIhIg9GvJNmkQOy90wVcZqxL3RdNYvDjyYBmcPMnm4bsd+c46LKGhHkhLe/To3iia8Fd1jHy1UVF8L2EbSH4ikWymp5Vo4P02gr6JbNrXI4E54CW5IMPOPUCjXnOwMtqlD1nNz218b/bez8W8g23fS9pEK78ZX0f09l9j4L0Go4GxJdN58OjcmgMPu5bfAK4lumTcQCeUS2IqQOW2KeFenY8WgIKM+T3RFnD/a74+C4vcxgO2fEN3OyxFs9z2Jv9/smdfbD0tTRn1iCtL8+hTxHf8iI85MRIfQpwpeW20mfTYRrR9cSY1Edf3I++E0YClJZ9veu9I5xhVc3yf6yvR3a9S4NkvH5gT+OfzHRsXMhBzyDsBChDpbFmE03W9PpATN1UApD3lsHyjpQjo+57vYbghPJaRkzwBuktQm+Hwvfc/T1PmtUELYGni1pHZCYW46Eu3jDq17eVtiHf4qOmuXFenM3dMESesQa4kFJe3VemsegmxRFKXWAW5ZGpdE5X3OeQoi9SGEkqOB4wrEnYJm/1QwZHFP8sq4XNL/I5L/EMTZA0rPq5LOt11E5cnJCrYC+lkZZt8bGmoNvLJCeWLg/VNTXFOoIf7F9jPp9ezEHJiLVdLP9l7adFTKBoXoVlj4TzqWi8sk7c1QwlCJOep2SWu4fGNSsXm6B8VzcC28EziUSHi/WtIqxHiRS9yfCXgPUYh8FUFGPoN4Bi8gbwwt/n2kIqSJe3cRddStsomAlM9r3Wn7cElvtH1t5rUNgUO9+TgKzs1NkbsE0WYEHKZQMPgRYYM+cA6ghVvTmuU4QonlSaBIkVp1FQZqqcqdn76Pb9BRziqhOlx87FS3UibAQ+nnopIWzSmsp4LxBXSUQj5vu1F0zrKkLkG0GSF2P6vdgTEGea09CDXATxBKnBsAfZvvpxbqqA3OTXmloS2J/fWOth+RtDix3ymBG4FzUi7jeQqQp2rkOnvwYSJ306gvXkcBJWpCDWgs3E6OJ9+qvcEBBJn12kS+WRL4VWbMSyT9H9CoRW6WzlECT6f61guKJrhHifpLCUyW9DmCF7BeuqdL1HNWBLYj9jWNC8XA+5xWTWSd0uQ0upsioVuJsgZK5MD/3frv2Qil5/uzryyhtT58hkJckXGtgKOOJOTMRFHst2T6Iku61vYbE9u2+Z9vNuX/JRK9h9ge2HJIwcDe0B3JwrmASzxOO+qhf4dpv2PjCRqmY6HBoBsqVerYH+Zc2UlvSW0m92yE5NsLtosQDFrnKdZ9M9zfrvImeGBIuoiODd6UpJ7twyqcawNC8WROwrf2s4NOsJIOJTblRWU91S1LPonYqJ9pO0s+VcN43+YuaCXdRCyizkzXmSUJ2RP7t8ADBNP9GqKLpaQNVVUoLHYWIeyujioYt+R9fDbBeP8NUaRuvudnRvzg6HGbJP2TrcOPAJ8rSQyQtAaRjF3T9mcyY91ge50yV1aXSZ/i9y3c2P59TtwUa35ibdju0ru6QNwxXfuk+3B52/eO1TnHAxTdhJtSmNiavs8PEF1IEMmVs0vMgZLeStjWPU6okQ3cAdGKWbOr/o3A0rZPkrQgMJftfsoOg8ZfnQ7B57oWwWda4yxBkHgPoluGfTJwt+0Xsi60MhRqdY8TBI7sNaKi23R9whL3mNZbkwlZ9dwEWe/5qqwDUuzsbr1a+5yUYFu7mefSmFSccFly/5Ti3Wp7dbU6Fcfznj2t4X5Bx3J5O2Bl28PZlgx6nqrfgaSFndmlnuK8jA55+MZC5OE2GXSKNbDtzQrEvpWw+XguvZ6VGO/XGPmT0weJtLg9HQLkxkQzQJbVvCqpIqXYDwCvIWyoijYmlZqne2JWycGl2LcRxYkrm+dZBTqo0379CuCE3r2NpCNycg01v48Uv+jYVjqvJelO26tUmOv62kM1yHk+emoBXW9RQIGjdZ6FgS2IQvg8BBEn14aqif0qYB7bubYhTbyaCgO1VOVmJyy/30RHeeLo3BxRil107FR/lcgGdgapPO19twGWdNieLg4sbPvmQWOOcr4ixHJJl7tHfaTfsWmIV1MhYiZCWb5ow5Qqqw3WQnqm3w/cU7jOUDTXORaQdA0huHAS8L3S+8jWeYqtBSQtYPuxErFaMRtFq4b4PolOPTWXnPUdwirz/4BPEfn7O52pRJ1iL0wQ1W6xfU0aP9d3ZhO4pF8TOd6itSFJryCIZM38dA0hpvHQ8J8a6DzzA68stcboE79oDjzlcS62vX6heOsC+xO2zu1c9cDrlvFOwKm6mRnmnAsA1ztDRq7ZhIx2bDxB0l3EIPN4ev1S4Krcze5ExHCLoBqLn4rF75ttl/LKbGJe5HyLoZeO9L7LdJEVhyrY4PXEX4Bg3G5HyJydQGwaVgHO8oCdmD2LoKcplFToeUZeAP5QYsKXdD/lvW+RtKzryVhPcnRm1Yi9DLA3QwvU49LOoXUff5AgspS6jz8NHOPwqd2PIEgeaPuOAtdc9dkuDUlHA4sRne9tgmiubU8NJn21wo3CtmcPQnXqTqKYdUOJZ6MGcXGizn01UZPYmvYPS9u+TNIcwEy2J2fGXI/wiD6d6GiZn+goe3jED44e9+vA3yncVa/oZF0dWNb2Mmm9eZbtdUf56LScoyrBZ6Kg1jwiaYkae92xhKTjbO9UIW6Rfc5YEFdK7J964l1NqA0eT6y1/gJ8yPbKpc5REmOVF5F0oisq90r6me33FIhThTzcc44i1sApVr+/312595ukrwHfcCgONt/Lp2x/ISduivV6OiTcawrtF2brLer2OzZg7Jo2OBNqnpZ0o+2122OzMm0RUowh6iyS1rV9XU7csUAFAk7RNYuk7xPrzcWANjE9i0jWei52Sz9PSz+3TXGzGr/GEpJWJNR7t7Q96wCfH5HY5AK2JKpsb1UDCrXlyXTs2rcG5rW9RYHYE2bsTPmh/wJvsf3aNJ9ekptvGeF8q9keWCFB0myEkswVRMNB0xA/D3BR7j1XMa91o+21R//NgWLPSUflYxnCUvRCJxu0aYzVT3SggSkjOnA1UT8smgevletMsZcEDifyhibyfZ90gebc9DfbAdicaOQ/yfaluXF7zrGx7XMLxfoVkUM9ibjPxi8xoAelyae1IOlc4KO2Hy0c91LCRrW9JtrGZciyVxIExpmJHO2jRO5+r5E+N0K8McuBp3nvFheyEVc0R3ySobnqgYlr49qCqtl0SjrN9nbt9xRKCdv1/WDeOR+TtH5mmH+r1fGn6Nx7Ovvi6uIw4AaF9yTExPHV6Xg90w01iDYjnOth4GEyJL56BrVJhAzuvJmXNgSFkse30ZHTXZzoGBbhwf1HyLcxqoQaNnht3EBMoBv3EFlulXTMMJ8ZFS4s69mKW+sZ+QWwMFFMKIZe8o2iO+IR2zcVCP91SV8hxviLgJWIhfzpI39sqnAW0f1+PN1y6tMMDd9FVtL+rbmPN7L959bxrPsY2Nb2N1Ii5C2ENPkxhJVdLm5TIdl3dStDDYHLqKjNRvibt4kmBnI3pR+VNKRIWqCYNbNbrH/bzyUSTi72ILzTb7S9gUJyuJTd3s7AXsALkkr5kbfnvl6YgtZDEwivKFmUbpDu448CLyUU1RYjxossf29i3Nnc9n3pPB8g/N9zE9Nbpp+7tY6VuCc2IciKt0OsNyUVWxO0CT5E8mYWIgE+MMGnJ1E4a4r571zS8Big1hrxKUmHEHLq7WJ9FtFQQzt6StlaDEEJ8k3lfc7lkjalsFJkGxXGue2I7+HjRGLolYQq0HjF0+3id7r/iudFapJvUvwS5Ju+5GHybZd6UcQaOOFvkjay/VOYsofKVu0B3mX7880L249LejeQRcCRtDZwbysPN4+ktQrs+a5nqPR/v2PTDNt/kLQyoeIAQRq6KzdujXl6mPM0ku9H2f52Zrh7FfZIM0lamrD6yFLjTDiCoX+rI/scG48oTRIqumaxvZWig/xionBTBK1awNt6CEifkXQ73YqJ4w6SXkus8Tcl9u4/JDr3B8FIzQklrPugor0VgKQVCNXs9no2195qBdvLt15fISnHXg+oO3Ym8snHCJJoo9pzTCaZcy3br5d0B0yZT0vkW6ZAYfti25NzyDcJOxOW8osS91qTH3mCsFnNQi/5RmF/9hihiJujqnqHwiq5ODmEUPl+U0OeAm4hxo9pto22/cb0s+++X0l0ABiYgEO4k1ypsLtuW2b9v4yYUC/XCUFaOIrIkUCoqXyfAjll27+U9AXCyucIYFVJIuzgchslFyP27P9QNISVIO4vQzRzfBg4IpEZT7b9ywGubznbDwxHFM0hiI5EPlWmyu4YENXmAx6QdAvlbOUAXm77pNbrkyXtmRmzwbyOpuePAKfa/pKkHKJTtRx4T31rJmBBwhqwFP5l+8KC8cY3AaeF17VfKOTfVhvmd7Ph5AmbgT2BsyQ9TNxoC9NJsI9L2D5V0aneTHQfaIoMLxYMM/BOQU4BoHLxuz2ovQD8DtgxI94UJCbvPgyV3RrUt/DVKe5xwDm2L0iv30XIRY4rqNsGbweFfHGWDd4wWHa4xL/tgwcNmhZ92wCvtn2gpFcCizhTjnSYZ+VfxILzU9PKIldd79t+WAtYUdLMtt+VGevttj8taRPg94T1ydV0OnFy8ILtowvEgfDErI1tCGnITSS1x4uVcu5jOuSj9wDH2f5ZIj2VwFrANpJKyL4fWuiahoULyGwOg7YF4mzExjRL2SOhVuHmGdvPSELSS9Kmb2DlwgYKS5J3unBHrAdUf/ofRy3Swm7AmsBNALZ/JenlBeKuY7vd/fBjSVlk1HS/fdb2D7Ovbiies21JTueas3D84gSfdqIwrV/eT8eqZdxhDNaIZxAFm/cSdlTbA3/LjAmhTjeko6cEEjGtKSpca/ucUT4yNai2z6EO4bL4/qkVdybga7a3oaAneWXsDpwgqSFNPQ4UsfkcSzJZIVQhD6u/NfBZw39imrALcIakRqn3T5Rpgpsprd+eBVBYibykQNyj6SZVPNnn2FQjEQsWA2aXtCrdnfpzZFxn+xx7ADvRKTCdrrD4OHKEj00NqhJxGzgUFxagzHy9O7AvMZd+nyB1DJxQl7QO8AZgQYU9WYN5iIT9uIftj5eIU3nN8jfgF66j2ie11IokvYEY52qcqCSZ7ERiDfcOZ6pl2t4g81pGRFpzH+RQJDtGoVJa0t7qS4TSyfLABcC7gGuBXALO7ZLWtn1jOs9aRD4yFzXHzlMJ1Z5mfN+aaGDbPCPm82l92Oz5FiQUcbKhsFI/kcjTStI/gQ/nkHBsHw4cLmn3AvPc1EDE3mQb8kiCNckhsv2UpB2B7ziaD7OJuP3gMqIDv0v/Zk3/iqBirhNgDtuntV6fLmmf3KCSViLUb94DXAq8z/btCvXhG8i4PyQdTNSS76OzZzdRbxgYqfZ0KXCppA2I2sXH0j332V4S2yjYi2h+60cUzSWIViOfjgFR7UsDfm40/F3StsQaGWArYlwqgZklLUJYZ+6bG6xyDrxd33oB+GsmwbIXVyga4X5Md21yYNLXuCbgSPocUcybXdITzWHgOeC70+3CRoHtW1JCpSkGPegBpOPGGolw86Ii3bTRDLySDiQUOE4j7rdtCJuoHFQrflce1BoFjuMom6Rf262uWNsXSvpGwfilMBakBYDVJO3L0ARybvHmOyQ5UiJ59STB+s6VI/0W8BDBIhfBHl+K2KSeSGywpwXViQttuNVxWQCzpJ/vIew9/hU5jCI4L3VsnEP3pD/NUn29CbHUxVJ6DXA6YZn1Cwpt+hP+LOlY4G3AwQp/z1KJt3cUijMm6mmpqHc0sJDtFdKGbyNn+snb7iqGKeTErx3m16cFTeHm28RY8SfCoiwXDym69M4lNo6PA9lJX4fs77eJpFtxjNTFkc6fLSE+3jEGpIVnHUpLzflmZgRy9TRgKYUsctezBwz87KX7bR8iQV8aZ6Zxcz6FKtCHibVcKVQl+KTE0LkpaT9eu5xrrxEXsH2CpD3S/HKVoosqF8U7egAU/uyvoZMQ2lnShrZ3G+Fjo6LWPqcW4TKhyv7J9n8kLSFpVhf2lK+I7xLy2M0c/R6iWakEkboamayNgkXZKuRhuvdRxayBAWz/Blhb0lzp9ZMl4hIEw8slNV2cOwCnFIirdlNLmmdz9jvvAD5EqBYdRoeAM5nIU5bAjoSCwb9hSsHlBjoF2kFRZZ6WdLDtz/Qc/nSfY9MM208RSf/sxH/CrMBcxJqzXWR5Atis0DmqQtLrbN9bIFTNfOR/JC1eaW7aETgxkThFkDirKJ6VJJPZXqfAJXVBUt89tDOVZNI4cQFhsYvt3+fE64PNgJWBO2zvIGkhMprVWnvJWYiGjj+m10sADxS43pp7nBqqPUcQ+cKXS/oq8X1n2zkmnAB8zPY1QGPNdRKh+p2LRyTNbXuyQg4jWOsAACAASURBVD3k9cBXcvIhaW2/me0zm2O2jxrhI1MbdybgMdt758Ya/hRah6g9NU0GRYmGJQmGtqs0ANTKdSZcKOmzwA+I8WJL4AIlpdVBcuwJRxKK9Z+3PUXhMxH3cp/DjYlm7WdH/c1pQJrntiUI9X8lyM8/BVYh9rBTvfe2/dH03H2h9J66Nvl0lHNnEdXatQFJ77V9/ki/Pw34MHHPfZO4j68n9iklcABBfL828RqWBH6VG7RSDvwr7uOU1HssA40y1uqtY1mkL3kCWL1JOsj256b3dUwtJG1OeFcWW0jMwNhBfXzN+x3LPEdX8Ttjsm+6TYeFMyTvJN1mu7jalKSLCbnNZuO1DbCe7WLF8IkESQ8SnbL30CIu5HYRSbrdSY7UHR/17Ht5mGfkTturlH5WclDz2Wid4+vEwvhpQnVhPuB829lSlpL6+UzbGZ29knYmOqafoVOUzorZin1twyQvCUlzAO8E7nGoWSwCrGj7ktLnKgFJ7yUIb72EumwbFYXixj7Asa1n+he2V8iN3XOeZYGfuZyHaunCTTv2mwk7kotKJH0lHUrqVmkXcUpA0o3EmvBu4r5YiejUe4a4R0rbUIw7SFpipPcLzHvfAP5JEL12J2S+77OdVcyp9eylOeTvBAmnLWedsy4UUSxcDng7ca9d7EJe5Cn+foQqwNuAg4hkwPec0cnYM2dPIja8b65RxJgIkHSj7bXTmvkIQpXsR7aXyoz7daLzv1hHT4r7APDaZtxMybh7bb82M27Nfc6U9XFJ1No/pdinAq8lEqXtMSNX9r0KUvLuLKLLez0i2fs+2/8qEPumEuvtqTzXAkQDyc8yYpxDEE32JBJ4jwOz2H535rUNIUQMQ5IYJPa8RBfneunQVcABhf5+7yQk8AEutX1xgZg/Bq4kCjgQa4ANbGcp7Ura1D1k9VJIxeQ1nCxIFBYlt9heMTPu3sDSFJynU9zbbb++59jdLqAMnIpvewOvoqx62BK568vphX7f93hE7bkpjUWUGHtaMV9mu4Qyazvmmba30FAF9OxGA0ntZ3c2wl73dtvZZDJJpwDfdgFb7j6xb7a9pqTbgA0IAuP9tgey8R2DvWSVsTPFPp34ntuqPbvZHqhBKa211ybsUt5K3GeX275/xA9Offwh6+RSY1IzbyRSz1eAQ4Av5q7rJN1qe/XRf3Oa495Qa0+qsBbaG7jO9sFp7byn7U8UPk/2WjbFuYI+DU4F5upquc5hcusNiuTDS0Nh8bV56RyqpF8SggMnuYewL+kzHkC9vtaeOsWuQj4d5lwllfCamMXWcZLe5Z5GKkm72D6mRPwaqJED7/1OFc0Wd7ub4DquMK4VcFpYVuEJfZHtkl31tbCf7bPSQuKtRFfS0RTwFpyBMcG/JW1Dhxm7Fa1NZA6GK36T4X1HMKTfAPw8vd6AYEH+jXxJxGIKHD3YikjmNdL0V6djEwIVJuW/OVm0FEYtOdKnJG0B/Ci93oy4p6HPQnxQSLoMeJ74ngdhDL9vhPeKyIXa/mwq+P7L0f31FGGbkQ3X6frem+i8KZpsSviSpOOBy+keL7K+Z0c35I9br/9CqJSNV3yLsCK7pzSBg5BOvVndKkvZUovq2Mop/XwEKFG42YPokpoMHJfY758tQZ5Ka6ylbZ+UxrbFCCncXFSxJEl4GNjJyXZJ0grA/iUSpxMFY1AA+QzwEYLQujMhdX58gbhVnj06FrVtlZCsdaEdnaypeFeEdNMn/ubEc/IEofj5xQIEn/ac/QJh61hkPp2g+EoqOH2K6HSah1D7yEXxjp6EXwOL01E6eWU6loua+5zLJW1KecJlrf0TwG/Sv0l0KzqMS9j+raStCMW6PxJ2HE+P8rGpRXF5aOhflLX9GJBVsLC9SfrP/VPxYl7gopyYCW9j6JrtXX2ODYITCWXLLdLr7Yh13YjEuNGgUBS4xPZFifS9rKRZnK8avQtBWPwCMT5cTkjj5+IVigaqyYSyVbH1LPF93pQIWhCNHSfkBExE2R8SRNwi87SkXQlC05KS2tY0cwOlup4b9bDjKaBqJelbtvcEvq2kZtGGy9tc10Axad2+wcvltarMTb0kwFSgzSIBSpqU6gqXkOzpFGqDhxe45D3Sz+KKQ7Z3b79WqMH+oFD4krbcvbg1XetxhGLdk0Szy0CouZesMXb2YDU6qj0Q6+YHG8LWtH7fDpW3o1Lhu4T6Ty+uUiiqfp+OasiVKaeTu95q28x/1+Vs5i9LJKpizS0Jd0r6KTFPteNm5TpTvn6j9nxk+7dAFvlmGCJ2EbU6IqfcYDZgU8rkRWrlW2oqqi5NEPWWJ76L5nw5TbNHEs/bU8R915tfzyVmLTvcvncQ8k1CrT01dDs4TCGfkm9jOAQua6vaoOQ6bj9Jz9r+OYBCTfstxNo5C6kBYEfgdXTfy7mqg8Vy4Bojp6QaDSgTRQFnQ6JbaG1isjvJ9oPT96qGR8P8k3QQUYD7Xk024AyUhaRXAYcD6xKT3nUE+/j3BWL/ClinZPFb0iXA9qkojUId4mQXUJMZhiU8LtnBY41S7PEU660EAakocSERybYkEgunkORIbZ+VGXdJ4hlZh3hGbiSKQn8GVrNdwroGhW/qIsT3nC0dOhaQtDrwsDO9vlvxVmDoYn7ghabC0/sDidRSFKmjZzngXjpELxdYsE0opKLKW2sQhlMXxMcJu7PXS9oM2NH2u0qfqwSUFLEkvYMoinwBOC23A0BhS7M6sXlcJo0VZ9leN/+q60HSvbZfN9qxFyNKFABSAuteD9hROUrsifbsVetkHYv4MzBxIOk8Yi04L5Eguzm9tSZws+31M+PX3OdMBuYkkrvFCJdjsX9SRWW5EtDQ7v+XA/8i7XNKFPXSeqsX9oAduE1Rtt1VV7Ao25yjlzw8l+2ByMMtQsRSdJPd5ia6qLctcL132l5ltGMDxL0NeBMwP2F5eith+7FNTtxaqLGelfTq5m+fipmNiug1tu8ocM33OFNFpyfevMTf6yC6rSEnFyIXosLqYZJWs32bQi1zCDwG9sGDIO1zmsaInWkVVmwfUOF8JfNaRecmSWcTJMDGom47YGXbA5MAJV1DFNBXItRN7gHOy92f9pyjmjJZK94swC9sZ1sZahhVmdJkl5Rrn8f23aP86nRD6bGzJ3Zx9R7VVe/tt85qMPB6K8U+n8gfv43IVz9N7Bty1dqrrMHVsczsjZud61RSPc2N0xOzmlrdMOe72faamTGq5VtSnug9DFXYy1Jpk3QtUaj/JtFMtAMwyfYXM2JuP9L7trMsW1PNYl+GKrbnKLRV2VMPc675gB/YfmeheAsTeQsTY9BfC8R8iZN1mKQ1E7FsyrGMuC8DzieUot5J1F+2chkl+LMIIufWhB3VNoRa3R4jfnD0uMVz4KrslFRl7TkRCDgN0qZvK2Kg+BPBoD7d+Z0yRVFrITEDEx81it+S7ndL3l2FJN9rQpXkhWuh9ga6JnFB0nJUkCOtAUnvI2xvihIXFD7TXwMWtf0uScsTRLis7sJhznUKkcz5pe0tR/v9UWJ9CVifIOBcQHSzXusMtQxJq5I6LSnLokfSgyUSQBMdktYgLKiuovs7zpbgTuS37xJqAI8Tii/bDkoQVR0/1nb8Rlr4cOBK2+eUICRLuhNYlZDdbuRpiyUUJM1PSE+3iW9XF4j7fSLh27ZfnMv2hFGAq4kSBQBJPwF2t/3HUX952uIWffZ6YhclWqaYDwCvIdRISney9osPDFZUl3TESO+XmJ8mIlJxfieGrpWz1oYq3NHTKmyuTqindUlZ5xY4J+I+pybSeHEa8NJ06O/AB23fO/2uaihqFJlqo3ZRtjR5eIwIETcA+zg1V0haFzjUmTYM6tgk7w7MbvsbJYg9/c5RKFbx9WxDNpF0ue23lrjOnvg1LWUawpAJslfWfqEVd3/gUeqoh00Y9BTfDgCmFPJyC28pfo1iU5W5qSIJcD5CjeVkYGWikHU+cJXto0f46NTGL178VofwDKE0tDxwpu3PDv+paT7Hy+nejwy8n5K0nO0Hhsk3GPjHOF0L1Bw7X9rn8OScmtZYFr5LQhPMZr4mJB1NqDlnq+uopVZHqJI1KEnObt/HkwhlpyNyc8GV8y0XEM/HPbRcAWx/OTNus5abQtwrRSZWqEU+Y/s/6fVMwEty64mSHiQIHL3fxbgbj/uhMPn0I8Qa6+fE2PlmIi9yYmbcfmuAUvZ9LwcuI9YwHy5FvFRHTKTZ88xCNAVkkQNr5MDTfvRO2/+WtC3BvTi81D1cY+05USyomqT8dsC2wB3AGcSmb3uiQDmesAWxkDjU9j/TQmKf6XxNMzCVqJXwTvgcITlZsvh9uaSLCVlICMWTyzLiTUEacHelk6S/kvDjzCW9FZUXHgPUlPaG8H0vTlyQtBTwO9tHSVofeJukv9j+Z2bcWs/IlsC3Etv0RNulZFRPJkgn+6bXvyQkSYsTcGxvDyCphPTyZkRC6A7bOyQi0emjfGY0HEssLrsW24VwvaTlbd9XOO5Ew1cJaeXZgFlLBnbI0W6YNmOTbE/ODHnYSKcj35LkNoV6wauBz6XnosR995xtK8nJp++jCNImbA/gFcCdhPriDeR/FxAdMbvSkSe/mrAofVFimAJAbvft/MC9km6mO4GVZTNQ4dkDhidaki+pm60OMobxP0DMz/MTybYZCPwEuIZY05dcKxe1lGkINmmd+XngH8Qa66wSRT2G7nP+j0L7HKhDuKy4f4JITO9l+4p0rvWJpqQ3FIhdDGORxC1NJrP9plZRdg3CznAZST+gTFF2ExJ5OJ3v4Zz9Qvr//JekLwCP2H423Q8rSTo1d7+XsAtwavquIcboETtzpxKStA6RhN0xHZupQNyucxSMVWM9O0nS54l7bK/eNwsQ96tYykjaj5g/msLgSZLOsl3COqS5t9q501y79iZRvz9Du73Hpapzm2SjUOHKJt204vUWm46UlF1sot7c9LSkN/aQALNsDCVdSlhZ/pcgWjwu6Q7g03Tmk0FjT1Em01CrtutzYgOHtv77BeAPth8a7penBZI2IvICixIkuCWA+wkrikHxKSJnOFy+YQGFuth2GeeogZp2XLcTFq2Pp7jzAY9I+ith0XHbtAa0Xc2OVD124pS1Xzy2/be3/RdJ3yCs4QZGIvbsBSxu+6MKi6BlbZ+fGbeWPQsp3mN055sGtdn9HnAhFcnZxDq5UWl7gSDK7DjiJ6YCtfItCa8o9Az34llFc8ivJH2cEGOYq1Dsy4ENidwywOzE85E7r/7N9k8zY3RB0ibAz5s9WNpPrW/73AKx+5JPc+Mm7AOs6rAabrgH1xP5kmlGym8uRlgkrUpnLzIPMMegF5mIlm2izazE2ngzSaUIl02e4p+JVP0IoWCbixo58KOBlSWtTKw1jifyp30VLwdA+bVnIaJUVSj8kJclGPUn2X6k9d6ttlcf9sPTESXZ4zMwdpB0PZHwvo1Wwtv22QVi30wUVnqZprkSch8gZJwBrrZ9zki/Pw1xjwdmoVt26z+2P5IZt6i8cC2MBXs8neck4JDSxAWFSsTqBFHmZ8BPgdfZfndm3JrPyDyE0tkOxALjJOD7OYtvSbfYXkOtTsVc9mordjXmrZKMp0KmfQNi43u/M+xVVNEOUWEhsxSxAXuWwooLEwWSfmF7hUqxiyZBJG1h+0xJS6YNb1GkzegqwG8TIXkBYDFnyk4rfL2XJsiRBxGd6t+zfWSBa76HKLzdaHsVhYrY15whNznMeV5KJALGrQR3TVTsNhkTmwFJ7ycKnjdlxrmHDtFy5YZoafttJa5zIkDSfURy6UKCjNRVNC2YLJxQKLVOmZq4Jc8laSWCUL0p8JDtDQvE3ITufU52Mi/F7Uu4dKYiZ639U4p9l3tUdfsdezFAheWhW0XZrYE1W0XZ9wPr2c4iwbfW9Y36y5zE/ZZLiGjv9y4gyHtZ+70eQoiI7nqIYqRzySFprv4UsZ8+WNH1vGdmU1LvOb5i+wuFYhVfz0paFtgY2JOWxVAD53dlV7GUUXROr2z7mfR6dmIvnNv5PgnY3PYPc+IME/sBwi67N3fxWOlzlUbpvXv6+72ht9hU4O9XZW5KxZVTCZtLESTfD9m+KyPmHISV+umE/d1ChKrjgUS3960Zsasrk9WApLuI4v9ljg74DQjlieyi+ijnvcT222ueY1pRa+xMsY8DfmT74vT67cR6+SQih7hWgXMsQqgLZVmdpFhV7MRT7C41CIW6xz22l8+M+0NirP+g7RXS83597j5HlexZakKV1OpqoSbhS9LBhCNAUYUlhfr5/QSZ7kBirvqG7RsLxK6lAPdWouZyOd3CAIMQvoa9rlLrl578Xmny6fUEUei59HpWQulyIJKTQsHwQ8S+rL2WeAI4Jec7ro2UFzkbWJFoYp8L2M/2sQXPUSQH3tpLfxH4s+0TeueUzPirEPmFYmvPiaKA812CZbousLrCY+9o28+MR/KNhrLHFycm6hz2+AyMHeZwQW/eHsxie0iHUy7SIF5jIF+jZ9P887Q5y8V5kj7G+JcXHgv2OETC/06FX21J4sJ/bb+QCFrftn1kSiTnotozYvsJST8i2N17Et2i+0g6IqO4/u+UXGrUMtYGBuqO7YOazNtbFczx44hN5JOEEkcOLpT0UeA8yj97RTxY/wdwgaS3l97cJXzY9uEpCdIoA57G4J1CnyW6B35EbHBLw0SHwnuJRMWctIjJg0CSCHWF5YiNzLLAF21fmnepU/CM7WckofDpfSAVSbIh6UpgI2L9fRvwqKTrbX+yRPwJhqLdJg1KE21GwFrAipJmdp4v+dO2/yvphURAfZTojCyKRJAEOMr2t0vHz8QxRAJoSeK5aCAKdL5PYJwv6d22Lygct3hHTw8eJTqmHiOja0rStbbfqE7HV0PM2knSf4lEyCG2v5NxrXvQIVxu0BAuM+I1qLV/AvitQoHitPR6W6A4gXaCYCnbm7ZefzmRUQbF+4mi7EcJ1ZeFCBWATYnGg4GR1i7nSzoWmE/STgR5+LicuAnt/d6RhfZ7TUf9ssQz8hPiGdwWuDkzdjNXX5UKY03XcxHyTUqeLwP8QNIsLqA85bBHbheuZgUezIz5IHCwQur9wpxYw8SvpUL1MLGWfya9fgnR8Z2FtBbah1jjl8a/anzHY4TS9mSPEcXNBpPTsVxUmZtSsWPltEbG9hMFYj5FqOs9Yvt9MIUQ/ydChWlgAo47ymSHEySIySn+PJLWyiHua2gHPERO61bgU85rpnne9mOSJkmaZPsKSd/KiNc0iQ4L2z8eb+QbqK7gt7btnVrnukTSobZ3lvSSQuc4jVBgOtv23pmxmrX3u4FTbd+b1jODB5Q+Ryhmzi7pidY5niNqf7lYyvaWkraCeN5zrznhNbY3l/R+26dI+h6Za8MGkpYh8soLJdLQSsBGzlCWU121OiS9gaGK+LkKvqVznW3cCJyTyL7PQxmrNnes6p4kmohL4t+SXt8QpyStRpk9+w5ELnUWOsIAgyouNZjU51gRzkHl/N6vgZsUNvYm9oJ3KzUieBobDhziCqdI2tQFGtRHQ0nCJfGsbUo8101zy0K5QSvlwCenuWRbYL30XM+Se60NbN9J4bXnRCHg7EAUWY5Ir7cmbozNp9sVjYwDiYJ6F3t8Ol/TDEw9aiW8oWDxu09iuhePkZ+Y/o+kpWz/Jp1zScrI4FeRFy6NZgMNbNXLHicS/6VQi7jwfNp4fBB4XzpWYlKq8owoFAU+RHQgnUp0nz6akrP3AYMScPYi1H+WknQdsCBh71QCL9h2uvZvJ+ZtkU4h2x9L/3mMpIuAeXKZwgTTHcIOb8qpKPDsVU5WTCTsCuwt6VkKbu4SSidBHlOS1Jc0RIbUmbY9wHeITd1bCALOZIJVv8agAdPzdoHDY7kU6aaNhxLx7VzgUkmPA6Xu7XkTyfAjxN/vS+qWJ38xoVYBoAuSLiOew6OcKT3dhu3PFwpVg2g5BLZfm0hOWR7ONWD7COAISUfb3nV6X884wh7A59Nc8kI6VmIuaVvKTOnoyYxJItZvQayxziKk9AdWdrT9xvSzr7R+i7SXs8+pRbistX+CIG18mUiSmkj+l5C/n4goSiarXJS1pM2JPUlp8nDx/Z6T+oqkq4HXt4rI+5NvFYnCfuoEortycUUjw86tvc+gcdcnksa/J8a3V0ra3pm2cn1QssD5c0lbM7SQdUBm3C4UJOL+i7D6vJQYg94G3CzpCMi2Vr9MoXL5Q7ptRHPzLldIOoQYN9s5uHGrBiDp1cBfmv93hdLQQrZ/nxm6aLGphfbcBGExkD03JVJCUxCaudn2Fno+2gTOa23/iGhKKYGj6W5uebLPsWnFt4CHiCZBEZacSxHkwBMJFclB8U9JcxF/tzMkPUrrGRwQzXz0csIy5efp9QbE+m3cKgG0UXDsBPiLpM8AP0ivtwT+qlB/KWIPb3vDlB/KUpJJKG6/aPsg4CBJB9n+3KgfmHY8l8bLpvlyKVrjfgZq2bNA5AH2AY4FsH13IvjkkGW2pVut7uuE2mc2AUfSacTYcyed/Y3Jt9AuTvhq4f8RJPt77HwrGHXbIg1BgTwqREPyWZIeJr6bhYlxPxdrOFP1rg9ulfT/gKPS64/T3Vg1MCqTT39Dt9vFT9LPXGu/6ySdACxq+12SlgfWsX1CZtxelNyP/IT4Xm+jzJjZoEYOfEuCG7Kj7UckLQ4cknuhkra1fbp6rIFba8+BFWAnigXVfe6Roet3bLxAyRZL0em2aurmeFFKQ09EpMF9TmLAKVo8VSic9MKu4D+tAnKyCmm6k4juFRFdgDs4eTu/WNCHPb4xUIw9Xgtpkt+FkDf/fkrkbGH74My4VZ4RSScDJ/ZLkkp6q+3LM2LPTCS7BTzoAt2QKe5VwEVEgulNROf3XYkcUCL+B+gQv651pr2cpNmaTdhIx2ZgfEJhV7cYkQRZGZiJkMgcyNJP0SX8emLhPsQaI7fbQB1pyLb9Wwk58lMIwtsto/5y3nneTMhOXuQkS5oZ7x7g7URxaF/btyg6n19UNm0Akk4l5E27CgDpX9bmpuc8iwKLEB2HR432+6PEWoFIZrbtZXMTTe34r6IM0RJJB7tHqa7fsRl48UIFO3pSvIOAH6aOoTGBpEVs/yXj8+cQjT57EkTRxwm10lyr1hn7pzGAKliTpLhTbDlLEgNrrV1q7fdS7AeBlZw6K1Mx/O7cxL2km4hmiJ+21ofZFq4K296tHeoyTVf59wddJ49yLgHL2743M85FdBLebXukw/KusO+5FiDWQwOTqBTy+sPCGdbqtXJlkvqNvXam3WBNSLqVsIpq2yJcZ3vgJoYU50sjve9M67N0jpmAOUusL2o+H2lM3sP2P9Pr+YHDbJcgDvWz48ja8/XbQzfnyd1fKywRnyHm0m2IefUMF7BpSwSO7Zv1mqJj/2Tb78iNPVYoMXamOC8DvkTk9yAaO79M3OOL2/51RuyX071H/WPGpTYxq9iJt+JvBKyXXl7pAs0yCluvfYk9+yWEk8aHbF+ZGbeaPYukW2yv0ZMzy7IaSvPeJq3xbT7gxyXmvURKW74EkaUnbtFcZ0/sqwmroSJENw1je94gN4+azvESgvDWrLkfBCY5U+0kfc+HOKNRpk/MOYH9CFtxiEbJr9jOJXIi6UCGJ5/uanv93HOUhqQLiVzAvg4bv5kJy/kidaKec5Xaj2Tvw4aJWzUHLum9JeaOFGtn28cOs1a2M8jfE4WAczqRrLgxvV4L2M32B6fvlfWHott2Y8K25mVEQXYND+ghNwNjD4Uv3dJ0L2BLTKBjWvzOTUynGC+hNeHnTvatuFULWSWhSl7nI5yvuFWEWtKFheIVfUZSouYy2xsUuLze2LsRCYR2cmUr56lDNbEXJpi3t9i+JjFv1y9xL0v6DqEG9P10aEvgN7Z3y4g5xBez37EZGBySLrf91tGODRi7ShJE0oK2/5Z7fX3i3kR0vN2SiDgLApc40wtY0gPEs/EHojuvlG1fVSi63/cjyHQfU6giHOJuC40XBcaiAFAS6XrXJ9YtFwDvIv6OpdTUmvMUkZEdZqx/UZK9/hcgaX/b+xeK1dVN3hzPSSj8L6AC4bLK/qnPeb5GFG6OL1Eom4ioQCarUpTts3YBoOS4XGG/ty/RhNI0AGxMEO0Oyox7k+21VJ6gPWSeK5zkrVHgrJXwfpntv5eOOwNjg2EIHOO2qVOh1rALQZK5BZgHONx2VkdyrecjxZ4y/ox0bMDYPwauJFRvAD4GbGB744yYNwDfpKPSsxmwl+21c4v1NSHpftuvbb2eBNzbPjbekHJ8axJNIjfb/ut0vqRhkUgshwGLErWnJYD7bb+uUPz5GZr3zVaVUxD31wTOSIe2InJG2Sqz6qi+irCZHddzYSrWf5xo8n29pM0IZYeBba4lnUuoTnep1RFEhiy1OklnAZ/IrTX1iVuN8KVo9l0SuJBuJbwiTV/pHKXX4FVy96netBTwO+K7KJpLLUnCTfFqkk+XAfZmaF4ki6hWmlSX6m/DwvlKkUj6LmFlfE9urJ64VXPgNepZkta1fd1ox6YFE8WCajXgeknNBndx4MHEohqPBZf3E/LHn6TDHn9RJzUnEhKzeQ/gFYSk3tqERGYJ/+XrGSo72u/YwKhA3liNzmS0iqRsosxwhSzyJQtroYrX+XBwHauI4yl0n9V4Rmz/R9J/Jc3rsP4qiZ3cUj6w/bikncizLWhiPSLpDGANSe8lNuil7uO3AK91YsqmgsBArOaURFiM8FpeFabIe84DzFHgWl/0kDQb8V2+LCUq2t/xYiXO4ejYaDyAm4JsdsGtl3xTsKB3BFG0WUjSV4lE4Rcy4jUYs465wnPqz22f1bxwdNi/6Mg3UI9go7Ag2Z9IPs5MJ6GQqzS4GdGJdYftHSQtBJyeGbMfsmRkJe1KJPmXVLe069xEl+UMTExsRNzXJVBLXqXGmwAAIABJREFUXnhCo0SjRQ+K75+Gwc1EEvWbhAXRiwa9ZDKVsyZZqSHfpHiPp7VzLsZi7VJsvwdg+6upMPSmdGgH23cUCP0nSW8ALGkWYl95/yifmRrcKul4OvPzNmRYhzUYrsAJlChwXi9pxVIJb0mT0n7hEtK9IGkP24dnxj3T9hZNDrb3/RI5WYXt9F6EEsRHJS0NLOvMzlZJX+x3fJwTT/8maSPbPwVQ2F1nF5FrFZuI7usnJG1DFDg/S6wzci0Bij4fPZgkaX7bj8OUAlepGskuxD74C8Tzcjnw0cyY2wCHEzksAzcC2yqaAz8+SED1t/YAilpoXy7pYrqbyi4rELcKUq7zi4RlloAjJR1g+8QCsfvZ1jRWKsd6sMbcA4l87GW2V5W0AWE/lI1h8r43EHnKXLwHWCXNV02u8w4gi4CTvuPvEQp72eobI5zn/cAjtm8qEG434LvAcpL+TBAjtsmMeQ4d8jQEIbAUXgbcJ+lmusksWbZLtXKdCb9L/2ZN/2qgyBp8DHL37ywQowv9SLiSskm4CU9J2oJu8mkzVuaqipwFHEP87UrZRQP8O9X0mlrO2sRYPyhuS7FEcCIeT/89H/BH4NWDBm6t62cGdpD0W8oSs2rnwEvZ1LVxJEOf5X7HphoThYBTfHCojJ2JzqA/ExJLMzCxsAfBFL7R9gaSlgO+lhOwZwJtP7DFi98lyRuq5+05VoWsUqjpdQ4M22WRJXHae4qCsYo/IwlPAvek77ndGZr7/c4kSS0iy0wUWnSnheAhxIam2aDv4/AQz8WvicXVH9LrV6Zjg+AdwIeIzXOb5T+ZzE3uDEzBzoSNxaLEArl55p4AiihZ9aBkQbYXRQp6ts9Q2AE05LyNbWcXWGz/YfTfKoPChMgbJd1JyJFe2IxJL0ZULACcQBDgu6TqC+Bph6XsCwq1hUeJMbkobG+oqCIParP7PaLwcRBR/GgwuURnzAxMN5Rcw73C9kTbW48JShEuK+6fhsD2uaVjTiDUIpNVKcqO0dqleBIydfMW6+hN2IUoJC9GNLpcTBSgcrFritPsHa+hQMMFFQuchBXJhxT2SyUS3ldJ+jewsKR3AvcA2xPfdw72SD/fmxlnJJxEPM+NcvifieJIrrR8uwg7G/H/UILwVRO7AGdIauaihyhDsqxVbJolkek2JtTrn5dUYp9T+vlo4zDgBoWaA8DmwFcLxMX2o4RNRjGkotX7hnn72gFjzj34FU31OT4uaRM6VkPfdaatemXsA6zaNCGlPMD1QDYBh7AmXZBuMtJkYBngOGC7AWI+b/sxSZMSAfMKSd8qcK1QL+/bYD7CPhSicb0EDiW+169LugX4AXD+gOSmkbAWsKKkmZ2hVANTnu0NFTY+k2xPzr04Z9hBTgX2rxi7QdFcZ9P8JWmu9PrJUrFbKLUGr5q7r7QfqUXChQrk0xZesH306L82zdgL+CnRWHcdMe4PrJxt+9UAko4DzrF9QXr9LmLdlYOa63qonwPfuVQgSesQe5AFJe3VemsewhJvYEwIAs5YFloKYW7gEkn/AH5IyMiNW8nCGRiCZ2w/IwlJL7H9gKRcq6H2BHpo6/hk4HM5gSUdbPszPYc/3efYIFidCt6ejFEhqyBqsserdlm0UFJtoMYzAvDj9K80LgJ+KKnx5905HSuBfQmLwUcBFBY7l9FhZ+dgbuD+1FlggqB1q6SfwrR1GKQN2CmSNrV9doFrm4EepM7SwyXtbvvIMThlsSKLeuQUbZ+bjh1WIPwcxGLVwOwF4g1BqaJpilVzTl2G8EX+MHCEpDOBk23/skDsiYZaBYB/2b6wYLwGtyq8048jkglPEh2ARaChthYDqZ05FOT+BWyVCN9vJJ696+gkOGdgAqBnXF6tz7FBUbObfEKjIOGy+P5J0pGM0OVXohlgAqIWmaxaUXYMMK7sG4eDww4it7u7X9xniWJFMUuBhJoFzqzCXS9svymtV24jCqcfAZaR9APgqkELDbb/kppYTnYFy+iEpWxvKWmrdM6nEik5C737GUmHEqSvcQvbvwHWrlAsrFVsOhb4PXAXcLWkJYgmlFwUfT7asH2qpFvpqHl8wPZ9JWKnRoOjgYVsryBpJWAj21/JiLkgsBNDmxey7BHHAolwM55JN208RuTpG0ymnArHG2yv0Xp9njpWJQPt/YB/pnHiaoK09yjdpMMc1Mr7QjSL3CHpCiKvtR7dzSMDwaFoeVWar95CPDMnEgXUYnABq6wGkn5DkAquSf8GvRfGRK3O5VVD+6EooVzSCoTS8EvT678DHxw05zIMiqzBxzp3XyiXWouEW4V8qo6l03mSdiPqUG01p6y8me3bFfbWyxL38oO2n8+JmbC27Z1a57lQ0jdyAo4B56JYDlzSB4Y5/goA27n1xFmBuYg1Vpug/AQZBCoAvYibb6sjLbK3JKSVHrK94XS+pBmYCkg6B9iBUDJ4CyHtNYvtdxeIvS2xCHoVnU2TnSF/q/6+kEW8zlXP2/M7BHP3/4BPEYWsO23vUPI8EwWSHiQ2Y11dFrYH2tyoW2VpCJzpS1r5GZmdkJ1+MDdWK+YkQvK3GYMvJax1sou+ku6xvWLPue5qH8uI/eaR3h9k46Meuf5WrPEswT3hoJDVfxXd33HRznd1JOZLxKrlL/xFonB1NrHx2JggJQ+cfBzhXAsQG5Is5bCac2pPzA0I5bc5iWT1Z20XI3SMd0i6zfZqFeJ+nSB89W6iS/pxvwqYx2X8yPvaWtjOsrWQtB+wBR1Sa7VnbwbqoPS4rG554aWJLtzivu8TDeqvQJkbs/j+SdL2I71fudN1XEKVvOpT7OXpFGV/XqooWxK193s1IWlJopt1beLZuwH4ZEqyDxKvasFJ0mXEPHoQYb3wKNGA8YYRPzhyzHkc3cIv7ff+oMl/hZLs9cDWwJoOC7U7gPcD69nOUh2WdDlBVChtGY2kxtL6Otuvl7QU8H3baxY+z/zALbZfUzJuTSjT7qR1n32CuH/PoWCxaZhzzmz7hdJxJwIkXUUoqRxre9V07Be2V8iIeT1RnO9S+axRoC3Z3NIn9mXA8yl2rrpVcUg6FViRUNkzMXbenf5he2CCZ/pe32H7j+n14sDFiQB+R3OvTGPMOYGngUkEsXVe4Azn2Yg3savlfVP8RQiiKMQa/JFCcWcnCvZbEpYh59vevVDs1xUmbTQ52rUIu891iaL93bY3GSDWIokwu0S/92sU22s80yVznSne9cC+tq9Ir9cHvjboOm6s1uCS3kPYnbYbtYrn7nNzqZI+AXyGyGu+h1DyP932m0b84NTFLk4+VajqNZZO0LN3cL59fZWagMLO8Rq6rXbXsz0WNsfZyM2BSzop/efLCaWan6fXGxA11CJqPpKWKD1WziDgVERK6G1OkAzmfrEmNicyUgF8XuAi288ViHcxsWi9ne5N0zSrDEjaFfgYsCTwm9ZbcxNJi4FlkdXxpp0bWIWwIynm7dlzrldRqJBVGmPBHk/nuR5Yv7nHJM0KXJmxGLxihLftfIuP9rmKPSOS3kcoRM1q+9WSVgEOyL3fJC3fmziXtL7tK3PipjiHACvRLSN7t8uoZRSHpIvoyPVnjUEz0B8axnrCGd3pqtT5ro7E4p6E3VSDeYBNbK88SNxW/AeBlZ0kf1My5M5ByYWtuENUavodm8aY1ebU1jkWICwLPgg8Qtgl/ZSYZ89ykhb9X0ZPAeBvFO42GWb+KzLvpY6LRlHmWheQT5d0F5HQ7LK1sL1jZtwqz94M1EetcXm4RGyDGgnZ8Q4NVaB8M7HuHEiBciz3Ty9mzCCTBVrz3WyE6tJdxHewEnCr7XWm17WNBkk3AkfR2T/9H7C77bUGjFe14FSjwCnpfNvv7VMESJc8WPJf0hzAOkSC+1ZgIeA1hI3WNbZvHfSaU/yfAKsSDS0lLaOR9DbgC4QN5yVEIfJDuXv2nlzOTIQVwAGliQU1IelrBClgILuTYe6zBgPfb634Y9LkU5MYUhLqqJpMIVVIutP2Khkxsz4/wPmKNLf0ibsosEiKfVTJ2CUg6Usjve9kZTNg7HcTCrC/IZ7FVxP5hyuBnWxPs7Kawibjh7b/POh1TeV5itZGUsyN6FiTXWX7vAIxzySI9RcRjhRXFSZyZDep9Yk5M0FEejORa1iAyCkPZK2iUP+5zPXU6nrPl/VM18p19pzjrt49dL9j0xCv+hpc0jGEmvgGhGr0ZgRRLStPlGIXb0Dpc44iJNya5NOUI/sYnRzfNcAxtp/OjFu8JpDivhT4Ep1x82rgyzVI1KXQyoFvB/yVAjlwSZcA2zs1OyUy58mliEipmWFz2/9Mr+cHfpATf0JYUE00SPoY0XW6ICGxv1Nv8XcGJgZcXlpvsYLMxO8R3ooH0S3VOLnA4Hvo6L9SBrZ/L2kRhaTls6N/YkwxFl7nAL8GbkoJrSldFmkzNc1dFmO10E7nKvmM7E8sAq9Mse9UdEjm4szUyXIIsUD+BrFIzl4U295H0qZEghAqeloX6iyoJdc/Ax3UsO7LSpaPgGoSiwkPE89c47n9EqBEcuhtRIdFG+/qc2xaUHNObXADIX27UU+S7Na0wX4x4Da6CwCf6nk/a8yvNf8plPteQ6dYuLOkDW3vlhm6lq1FrWdvBuqjyrjcFKAlnWZ7u/Z7KUm0Xd8P/m9jH2BV9yhQElL1g6D6/qlF8umLFwnJp/a+bEKgme8k/Rh4vZMSkEJmf//peGlTgzlsn9Z6fbqkfQYN1iRgKxIJd6ZT4CyiNuXUpVmafG37KeBySY/Yfh9MIaD8Cdie/D1FLctobF8q6XZCGUnAHg67sly0x4wXgL+WKAqNJZxpdzIGJP+f0GnyqZbTczmryNr4u0LByQCSNgNyVfHOl/Ru2xdkX10fDFOQLUq+AbD9MLFPua107BLIIdhMRewLJC0NLJcOPdg0SwCD7v/mBi6R9A+CcHJWjWJ66dqIQrV2DeCMdOgTktbJGesUauR3Alu5gNr5cKepEPMJ4B7CPvO4HHIvgO3/SPqvpHldQa2uz/lyn+lauc42fqtQCG7WntsSBP6BMEZr8DfYXkmhxv1lSYcR+cos9GlAOVLSwA0orbhD1HqAEiTcOVyvwfkU4vk7Ir3eOh3bIjNujZpA06y4x6i/OL7Q5MA3tv1Q63hODvyV7lYa/iuhulQKL2vINwAONdGX5wScoYBTAZIOIjbod07va5mB8QVVkspWyN81jM3rXE7urrjKwDDnuYxgh55te++SsXMxFuzxWl0WGsYfsRW3SvJsUEi60fbaPZ1C2dYvqWvxYGA1YoN6BnBwyU6IsUCJbqFaY9AMdKBK1n21kMa4M21vWiH2uURi5VJifnoboQjwEEx7N8tYqNSk89SaU9cg7BeXoLs79EXRrd9GxW6TeenuCrmK6HLOSj5JegB4bbOJTgm+e22/NjNucVuLFLfoszcDYw9VkL5Ncbs6N9MccI/t5Uufa7xDhRUoW3Gr7Z8kHQ4sTEd6eisi6XQuVGkeGbcYjkzWe+x/HZLudY9tYb9j4wmSDiZUgX9AzFFbAvMTzRLTrIYnaTIjE9PmGfhimbJX3wIoXuBMBMirCXWaB0rETHGXdLL0knS07V0LxZ0TeKYpbqY55CWJ+FMifnG1wYkMFbY7kTQb/dffz4z4wdHjZtkrjRB3TPKRpZGayL5LKBo+DvyOULj8fUbMyYRtw7NEU1aj+pY1vqXYRRUBe2KvSxSkm/1vc90lGu2KQ9IywN4MVXMqpiLeOtfqwMOJwJAbayViLt0UeMj2hrkx+5yjmAKVpLuBVZqcbJpL7iiQ+x3IymuUmF+i00C0M6FiBJRR+VJYDL6RIMA9RzQDXG378oyYNdXqJtQzDVMULL5M99z3ZduPZ8attgaXdJPttRSqkR8AHiPyT1n2mQql5Df0NqA4QylZddV6vpKurzj5VNJ9vTmQfscGiFu0JiDpW7b3HK4Rx+O4AUeSShORJH2bUMFtO1H82uWsBm8jlKcbu8glgHOcoX42g4BTEYkd1fbp++N0vJwZmI5QRansxOLdgk4n0sZEUugrWRfN0CR9OpZNiBjmXCIYokX9VEtAFb3Oa0LSz+jji0jYftgZnpk1IOkE4HJCfWJTwqJkFtu7ZMadFfgqUYCcC/iC7R9kxrzW9hv7JHyLJUJKouYYNAMBjYH1xHAL7txzSLrBFawKJG0/0vu2p6mLOJEr5qeiSk3lOfVBIqH3C2AKAbBGkX28QyEP/QSdrretgXltZ3WbSDqb+H6be2s7woppRELqVMQ9H9jNHRWRJYBvO3WXZ8QtbmuR4hZ99mZg7KHC0reSPkcQAGcHmkKpiETvd21/Lv+qJxYU6ogrEp37UxQo079pVqBsxa22f5J0q+3VRzv2YsAMMllA0veJ4kpDytoGmMv2VtPvqkaGwg5nOAxcxJF0IKEycRoxvm0DLGL7i4PE6xO/eIFTYT35pvRvKeAOovB2eGbcUwgFmfYcclju/j8Vgja0/WR6PRdwSS5xMcXqVRvcEviN89UGe88zIWyMoP98khnvTGAynfFia2A+25tnxq3VaDhm+cgaSOv8SbYnT+9rGQk1CrKt2A8An2SofUjWXqcWFPbAxzD0erMUe1K++xW2/9Q6dgphWfNL21tmxl8Y2JywdJy71jOiQtZkiYCzfpPDUVirXFmAgHMoobjw41JF35599QEEWQ0ou6eWtByh6rwn8HLbs2fE6psLKHG9tZ7pWrnOmqi5Bk95ySMJu/KmAfd42/tlxi3egNLMy62fcwEX2n5TzrWm2DXJp6cTOb0b0+u1iJzfBzPjXkHBmoCk1WzfprACHAKP4wacRDTdl6GEvdyx/gPE3gli31SMsC/pnQSJ+iriet8EfNT2xQPHnEHAKQ9J7yOk4xYlOlmXAO4vwYCcgYkJDeNH3iCn+JY2Sys7da0ousrvzGSvNioDSxH2SA1KqwxMCJJaTfZ4il+ly0KVfRFLQ+FZvy/wdmKSuxg40PkdWXcRxZUDCGvAY4DnchNNtVGys6DmGDQDgeEWww1KLIpVqfNd0tHAYoRtZnuMy1LJSuuhn7mC2pTqqdQUn1Nbsa+1/cbcOP8LqNhtcqftVUY7Ng3xmkTQvISizM3prTWJrp71My4XhdVkY2sxAzMwBerTwdnv2ABxD3oxkm36QYUVKMdi/5QKx+9xR9liSWKezVLjmkiYQSbrhkLRYlc6ym9XA0fn7p8mIiTdZXvl0Y5lxK9S4EzksTWIZpldgKdtLzfyp0aNWWsOKbrO6olTRW1wmHMVKSLXRom/WU+8Wuvv+wjy1O8o0OSjMVI9LQ1J29o+Pa3ve2FCReunHlB1IRHplqY7h3r1QBfbHbeKImCKdZPttXLjjBUk3WZ7tUqx77G9Yp/jcw9K0pL0MaJ5aEEil3Om7fvyrnRK7Je5jA1gv9hbAV8HriDGi/WAz9r+YWbcplj/H6LJpWiDZGlSZIp5NrAyMdZdk/7dlLOOU0W1ulrPdK1cZ4pdtLGlFbfaGjzlIXcliv+Nak92bFVoQFEltZ5W/JcydO4rkV+/H1gWaGqRiwMPEnalOWuYakSZND8vk14+aPv53Jg1kfLr+xA2exOmCVXSy+jYnd6YOxfOPPqvzMAA+ArxR7rM9qqKrpZxuUGYgbFB5YHlYWISaibhlwC5hZzvEd6SVVQGJG0EHEYPSY3wixyPqOZ1nnAWQQo5nhaDvABq+yIWRdoM7Jv+lcROxKLq87YPkLQ7kMVoHiOcQJ/OgkEw3hc3/wsYI9b5uu7ucj9P0fn+ycy4sxGbpDbpz+SPe1sC30pJhRNdSFpfQ1VqTpJURKWGOnNqgy9JOp5Q+mp3QowrO8Axwu2S1u7pNinh//20pDfavjbFXZdIvg2KQ9PP1YkuiIdG+N1BMDdwiaQithaSzrS9hTqqZ10oVSycgTHB/2fvvcMsq6r0/8/byABKg4og+JOkoygKSEMTRlAQGDEBBkCCOhjG9EVQxzSmUVFUxATKDAYyCq0EBUVschxyEIFRcRwRAUVAQFHC+/tj79t1q6iqbu7Z+557qtbneeqpOud2rbu6qs45O6z1vg9LWsPjpW9LdM6sI+mlwGk1iiO7xKMtsFkCqs6fMvsCZ0u6KR+vBfxrodidwPb+wP5RTJbIC/Ffyh+dQEna+1vAsb3NkELcJ2kPxqytdqOvsHxQJtngfEvBDc4zSJuFF5E2V+bbvr1A6DmSntDb6M+bFyXWf++TNK9X+C5pI5qNs/r5JWmtojdvXZ3xxYwDMdkmslOn/kgW32i83cmTJfWrLTS1O6k1/n5JgRj9DON5WoPH5c9zp3h9bdKG6mZTvD4lSjZR+wBPBa7KMS5i/Px9UH4J/HduPFy0IdsrJBpkQ7aPsyQdQJq3989/izTPlCLfIyGtr7yTR+Zb4u/uCknzbV/af3LQ4pvM6sC+tq9qltoYkubkOcLpwLx8bh83VGbrx/Z3JJ1NKj418AHbtxaIO9W1VwpViLk/yX6r5F7AGcC2wL35eDnS77NxUR31rulaa50AT+ofb9q+MzeEN6LyGPwIkmLdV/Px7sCRpPFoE37F+MLWk/PnJtfOKZIeT7KSvYJ0TX+zQbxFTPHsuxDYpkD47QvEeAS19gYkbUX6u/hf0r1odUlvKFGIW5E/2P5BiUCq7EQh6Vm2b8jNvpD2BgDWyOtyA9/jQgGnAvkBsbGS6sKGth8u2XkTBP1IOok0aP0p6Qa0HalD+2ZoptIi6YskibsiC0x9ca8mTRTHFam5gD9kDWpWj+d4Vbos9EhfxNcCv3AhX8TSKMn0TbZh2FQJ6BBSpe2LbD87V7ufbnt+k7i1GUa3kDokwd1VJC0kSWV+zfYpDWN1rvNd0gqkDZC9SNf3YcB3miw0qa5KTc1n6tHAs4DrGKv+t0fMDnAYVOw22YC0MLEiaSL2J+BfbF/dMN+PkxY7ihTKTBK/iK2FpNVs/15TqJ5FMWZ3UAXp2xx3W9L9eDPSRvJhtm9smG4nUT0Fyirzpxx7Z5JC5NrADqSF9A+P2kbWMJB0Amlxd1YWk3W54FLSP5LuQ7uSNv8PI83NGi1OSloL+ArwfLJCImlj8n8bxt2fpFZXbIOzL/aXgI1Im1gXkLqnL7LdqKhF0utJSlEL8qmdgU/bPqph3PmkAqdbSM+mVYFd3cCeRZOrDRrYlAZqg71N5H7FgtKbyDVQRbuTWuPvHHsL4Bm2D5O0MsmGYzq7uSWNW0X1tC0kfdID2OLle/18Uif285Tsaj7jhja7OXZRRcAJsc+aPGSzsVZplKwRe4VvMOG56gGtESe8xw0kpajfkIpDi9nBq6C6vKTzcn7rA28kKRf80OWVX17F2LV9vgtYiEjq2U+ubftTklYnWVFesphvXdL4TyxdBChpacarqJwD/KcbKFuorlpdlWt6krXOtYEflVjrzIXfr5zQ2HLioH/TwxiDq5JiXW0kLQMsa/vuQvGqPfuGiaRDbTdqmsl/x7v31m7yWsZ3auwnlkLSNqT9gJFvQpX0DdtvqXGPiwKcCuTNtp1IcnorkRQ+5ruAfGMQTERTeHv2aDJJz5Wme5EWpXubpo0fol0rUlMlr/O+Lot3AX+gQpeFpFfSJ4dYYlJTi9xB12NZ0kbkg7bf3zDuFbbnqU/CudTkoyaSPgssReVuIXVEgrurSHoKsBrpZ/y1xf37xcR6MfANYFznu+3TG8ZdFngTSYWsf+GmSGFI/ht7Halz/3rSwtNXbR80YLyzSBPonoTs40k+340X8io/U28sUSQ0E5iqQKRH00KRXPiF7T83iTNJ3CKFMpPELWZrkYuEF9reukRuQXuosPTthNgrkhZDPgz8lvRsObrJYm/XyPOQ/2SC0mCTjeQct8r8Kce+xvb6eaPzUySVro/VLtgeRWZ7MdlMKLhUshh6OXAI6Ro8DPhK6c2tUpTc4Jwk9lzgX0hFgavaXqZAzHUZU8c4s1RRYN4s7I1nG8vfq5KN77A2kWuiwnYntcbfuYBjY2Ad28/M898Ftp8/SLy+uBNVT3fKcUuonlYjN8l8hfR8Mkmp5t29jeUBY15qe76kq4BNbf9N0nW2R1VFvJPkxp53MFYYch6pGKKx0leN57WS5fcXmaAu3/TvIq+vXA4cTrJH2h44BTjH9iFNYuf4XyetCfWaRXcFfmX7nQ3jdq75UkkheWmSsgWkdbOHbL+5QcwLgL09Xq3uYNubN823Fn3NJzeRitPWpMBa54TYRRpbhjEGV2reO9jjFeveabuRmn+NBpS8/vSySWI2UU/rxZ4Rzz5JGxVYY7hm4jrhZOdGCVVqQlVqvNwyH55r+5om8WoTFlR12IFkXbAPyXpqBaC0xHUQAM27YBYT+5vANyWtQ1rgvCYP5L5he7KKwCXlrlzEci5wjKTbKSAPXZFle8U3ALbvlfTYAnEvZ3yXxXsnvN6oy0JJuecHtk/Mv8N1JC09qpsrkwxGLpBUolPhgTwg7PnJr8wkVeojSG8zpV+G0zSQGZb0OdsfmHD6/ZOcCwYkL4Kc6tyNbfsWUpdoo8F2ZgXguYzvfC+xIXsUcAPwYlKn5R6kQplGSNqRtJnwjyRVkk1s357vnz8HBirAAe4GrlPycl6kUiPpq9BMpabmMxW4UNK6pTZAukytjcHccfNq8uQ/NcEVkezvcTtwK8myrbFssSrYWth+SNLDklYstekftMZDpL+5ZYF1JeECEsN9RZF7AlcCx5A2Gt4AbNU0fod4sMQmwkQqzp9grFDoZTneqZJGeiOyFrYXAgv7iskWSpo1xWTONsO2fyPpyaTuUEiKISUsjKqSC1r3Al4KfJ+x+9CZwEBNEpIOY/JO5KaLvJNucFLAPlvS/yMtIG9EkpT/NmnDtzF5PFFayXhnkurUzyR9BJgnab8mDSKDFtgsQdwt+zaR5wNvBp4p6bsU2kQeAkXtTioW5r0S2JBkPYHtW3JRWVMA5V0yAAAgAElEQVT2ZLzq6WdJFhSj/tw7Fvga6ecCqcD+O4yt7wzCzfnv+STgp5LuZMyurRE1NmT7Yq8IfJzx6h6fHOE5yhHAnxlv+3IEzW1fal1/+5EKvcapyzcJmNdYLiRtlh7sZNdzJfB+xn6PTXkR8GzbvTXaI0gbtE3ZtNd8CYushv6hQFwAJH3f9qtLxcvMn9CMfGZuEmjCvsACSePU6hrGBOpd07ZPk/QM0mY9wA22/zbd9zzK2PMYa2zZt0ljS80xuMZUdZYmrR/+Xz5ek7Rm25QFpAaUb9LXgNKQH5L2wa9lrMiiFNWefbXJTYG2fU/T4pvM5blg7+h8vAdlbERrMr90E6qkfYC3MFacfYySwtCg+wu9uNOqKrmBak8U4BRE2YsMuI2xiX9vwrSfpD8BB9j+eisJBjMKDUl2OhcuPCt//BG4GniPpLfafu2AYXckeYW/m/TAWJG06TuqVPE6t712jjdpl0XT+KQCpy1z1f9ppAfzrqSf+cihMUUggDmkhcgVC4T+KnAisIqkTwOvAT5SIG5VKqkXbAdMLLZ5ySTngsHZFfiypO8D37ZdYpLU46O2F+QFzReROt8PodliHsA/2t5Z0o62j5B0LGUW/3cHvtS/YdwrApPUxHLwxPzR4+wGsYChPVM3A65Skrj+G5STnQ4WcTKpQOty+pTDmlKjUCazOmkBqLStxb3AtXkBdVGBc5PitGC4aHK/84toUISb455IUi44Cni57VvzS8dJGvUFnCL0jTd/KOmd1FGgrDF/AvidpP8ijec+l4sO5zTNt6tEMRlI2gU4gDQWEnCQpPfZ/l6riU2DkoT6XcC3gA/2bbD8t6Qmihn9Vq/Lkja/b2kQr0fxDc4+liUV91xu+8FCMWvSm4tsAWxDubkIku5hbAz+D6TNp/tsrzBgvGFsItdmm5rBVc6O+u+2Lam3mf645tkB6fpdlrSxB7AM8LtCsWvyWI+3ezta0vuaBLTdK+b5DyU12BVJa3wlqLEh2+PbwM8YK2B5HUntbFTtQ57r8RYvZ0ka5eaZB2zfIWmOku3eWZK+3DDmjsDmwL8CR+YCgzVJTS5FCkSBX5Is8Hob6avnc02ZrPmyZEFAYyuySXhI0tNt/woWKWg1ug5tX6pk1VNMra6PKtd0X4Hv1bnA9+NNC3z7yQU3p0j6D9unLPYbloBKY/CXl8htGmo0oDy11ppm5WdfFZTsWr8NzE2Hugt4Y4EinLcB7yS5aEC6H496jUGNJtQ3kYot74O0x0BaJ2tUgAO8In9ehdTsfGY+3po0nxi4ACcsqIZIXiC6sHTlVzA70XAk775EugGdAXzLfb6pamClIek9JB/1Lkyeew/Pol7nE+IfT+qyOCaf2h1Y0XajLguNWS/tDSxn+/MaYesljfddfhD4NamS/vwCsZ9FWsAScIbtxuoetSnZWSDp7aQir6cBv+p7aS7JS73UInLAokr33UidvWbMguKehnGvzAv/+wPX2j5WfdZqDeJeYnsTSeeS/k5uJXVvNFXheoRsukZUInNIz9TO2kR0BUk/s/3cCnH3J41bShfK9OIXtbXQFFZqrqvwFBRElfzOJb2EpNrwfNKC9PnAIb3u8tnAhPEmTCi6LPDsqzJ/yt//WJINwLW2fyFpNWA9F5Bn7xoTiskO6ysmQ9nuuLXkhkjukt6u13GbN5sWekStnSFtLnmCFYuktW3/uvD7zAHOd3Pb6E7ZZ9ek1lxkkvcRaRN4M9sfHDDGY0mbyEeTGpGeTFLl/BRwnu1ZUXS6OFTAjlrSvwHPIBWH7k+y/Dq2QDfySaSx0DjVU+BmGL3C8r4C3w8Ad5LWEE1q0HmC7Q+1ldt0SLrc9kaL/5cDxX7E+uOIr0lWsX2phaSFJGu2zwIrkVTa5jd97uXYi+7teV7yCeCFtvduEPOHpGtiRdK13Rsjb0Jaf9qqYc57kK63eSTlotcAH7G9oEHMNXpfAqeSGhgFZawoJW1DWi/st5nfyw1UM/uKWe7JxSzzgCLFLLWuaQ3JZneyNcoGsTozBu97Pr0L+AMFG1ByAcQZs3E+OhmSriE9N87Lx1sAX2+yDp4LC6+z/azF/uMRIhd6P520v1ekCbW3TuYxdcRlgUttr1cgZSSdDrzBWekqr7ccbvvFg8YMBZwhkquSt2o7j2BmkDcKlyLdBGooZQBcQxqsTmYPtUmDuHOB05VUoY4j+Tjf1iBeVSpXj0O9LgtJ2pykeNNTm1iqQNwqOCsCVYp9A2XkGodJyc6CY4EfkxbE+hcx72ky0A4mx/afJX0PWI4k//pK4H2SvtpwIbJW5/uhSkpZHwF+ACwPfHTQYP0FX3ny0WMucEGDuNVUaobxTI1Cm6FwoaT1bF9bMmitBXPVs7X4HnC/7Yfy+yxF6hoOusP9tu+XhKRlbN+gZGnUlL14pLT+UcDOBWJ3AtdXoKw1f8L2X+jrvsoLQ79vErPDHMpYMdnGkhYVk82W4pvMHI+Xu7+D0VdF+h5pM2jiudIbwM+ggF0kY/bZ51HZPlvlFElqMRQVLqdO0ZMkfZzxc9dHE+MvwBmSbrX9Cli0aP9bkkrWyBfgqLDdierZUa9Muob/TFoz+xiwbcOYUEH1tDITLebf2veagWLziRL3Cg1BERD4q6Qtek11SipnjdXEK7IRY7YvkFRabuytP4xgI9EOJIWofUjKbCuQCmVK0H/vOd9J1aOput4XGn7/tNg+Rkllr9d8uVOB5ssjGLuu18zHyuca27SR1sf+i5TzXcBPSGoOTaimVke9a7qKzW4u6H2q7d/2TjWN2UeXxuATn0/vnfB6kwaUi4ETc+H7A4wVWQykYDgDeKhXfANg+3xJjVQunWzmb5S0RonCvyGyfYWYh5FUU3vjw51IqqqlWL1XfJO5jTQWGJhQwAmCjiPpDOBVruChmwcqr2RsYfp82ydO/12PKv76pOr0VwM32y4xSS9OzerxHL9Kl4WkF5C8nC+w/TklKct9R61TqIcq+i12kYqdBfMYu6YvKPV3HCQk7Qj8C6nD8kjgCNu35y7Mn9teq0Hsop3vSmpkjzidP9v2FweMuyLwBAoXfGk4KjXVnqlBPfqKsh5D2nC7iQ7YfOWuqRcxwdbCdhOLNiRdDGxr+958vDxweoluyGA45AWFvUhFnC8idVIvbfulDeP+fELR96TnZgOqp0BZdf4UJKb4/T3e9qwpJgOQdACwPvCdfGpX4JoCG+rFyQ0tzwE+D/TbsawAvM92o+JTjbcwgqTo+CHb328Y97GkDU4xtsF5TK0mhhKKJLUoPReZELt/LWAOsDFJbWHzhnEXKS5JOsT225vEGyYqrC40Wee/CqiT1oobTE/Te4UqKwLm99iAtCayYn6fPwH/YvvqprFrMNU6Q49RaaiRdL7tLSY893q/x4dJP+cDbA9sTyLpCGAf23fl4ycAB9p+Y4PUJ3ufl7ucJdDTSXsLf8sN8OsDR/b+DwXi11B8Kz6eVUW1ulrXtKRTSBaD25H2XP5KUkVqrCYj6VpndQwlq7YitmRdGoP3mKoBxfbARVT5WbIj6W9t1hcaKNkALkf6u+ip4N1PUmRk0D0YJcX6DUnKYf028zs0TLkz5CKvzUg/zy3y6fNsX1nwPQ4mrSn3X9e/dBP1t7gugqDbSDqZdAP+KeNvwI2LLCR9nbSJ3H/T+ZXtdzaNneOvSuq4fS0wd1Qn6KoshZi7V9YBxnVZkGyYRnbjsDSSTmVyn8U/kH4ORSd6o46ki0iL0f2dBV9osggp6aMkRZ1eMdNOJAWqxp0FQULS4cC3bZ87yWvb2D5j+FlNTu4qhXT/mU9Sv4FknXGJR9CaLCt5LHQllZqaz9SgHl1ZLJ2IKtla1CrgDNpB0gvJfue2/94wVqek9WtSqxip9vwpSEQx2VhXL2kM178IOZIFX7lIfSdSx/4P+l66B/iu7QtbSWwKhrTB+STbf2yY6tBRYevMHPMU0rwf0jrI/wL/Z/uohnGHsolcClWwO1ElO+qKcaupng4DSUsDb2fMSvxs4L/cQEVbk6gXTXZuwNjFN2QneY8VICkFl4oZTE0uzrrQzWxPH1GwUakApaQl0FWk4s21SPfPHwDPadrA0Be/xv+/+Hi2ZjFL33sUvaYrF/geQZr/Xto0Vl/MTo3Be9RoQMmFIVuVKmzqOpKms4+z7YGUs/Ka0GQBzxkkXhuojHpf8fvwJO/xSsbGcOc2va6jACcIOo6kN0x23vYRBWLfADy7V8GaKw2vs/3shnHfQSoCWBlYABxvu4TlUhVqVo/n+EPZOJT0GeBu4Ju27ygRsySq4LPYZWp0Fki6EdjAY16ZywFXNZmYB2PULg6pRZ4wvcz2Pfl4LnCq7RdM/53toLrKb9WeqUF9JB1l+3WLOzcqSFpI2oz8LLASyYZqvhsq1Ui6ANi7110jaSPSolOjLvJguOTC72fYPkzJU355279uGDOKvjO1ipFqzZ+C8UQxWaK/q7crSNrcdlN7hcninmF7m8WdK/h+A29w9jqw+zceJe1j+yvFEy2IpB2AAxmzzlwDuMEN1Yty7CtIawHX5uPdSOq9jZqehrWJXIq8cdNTJdkYuDR/3WTTppY6aWdVT2si6ZvA0iSrGkhW4g/ZfnODmNVUhmpsyPbFXoakeL4WSakUANufbBo7mJ7eddTg+68mbarfmY+fCJxTesxReG39CtvzJL0f+KvtgwrH/xfbh5eI1Rez+Hi2cjFL567pPDf7R+A3pCa7IirJHR2D1yj4OpxUiPtjxtsYDqSoHsxs1Fy97wskm74Teusto85jFv9PgiAYcb4H3G/7IVi0AbxModi/JC2q9Ca4q+dzTVmdtJhyVYFYw6Cq1/kQFxAuAZ4OfAkYxcXp4j6LXSYX2mxQuLPgFlK34v35eBlSZ0RQACdf1oclrVijOKQiTwb6VRX+ns+NKvcC10qqoVJT85ka1GfcBlD+/W3UUi5Lwg6k+/E+jNlafKJA3H2BBZJuIS0wrUpS4Qg6gpJC2cakYpnDSBs5RwPPbxi6hg93V9kIuFDSuGKkXsd9g0XZWvOnYDy1fn9d4wpJ80t29dZmYvFNbs65A/i+7QcfbTxJywKPBZ6U1U16KjUrAP9fw3SnxPYdShYXg3COpPuAVSVtD1wLvAEY6QIckhrwZkywziwU+zWkscvupK7T1wP/XCDuHElPmLCJPLJr4f2NHHnjeKCimwkx7yY1Yu2mCXbUpCafxnGb5jgh7u/zGP7wrjW2ZOZPUJo4MxczPGrUpzIk6Zq+l+aSfn8leO6EzdezJJVqkDyZ9DdyOX0bs0F9mhTfZA4ELpK0IB/vDHy6YczJmtbe2jRmHw/k4s3Xk1SdIc2hGqExJbXD83FJJbXi41nbfwFOkLSKxlTVbiiQKwzpmi6hlNFHrabezo3BSTlvNqHg67KGMX+dP/4hf8xqJO1DWr+5B/gGSYXqg4MWwOmRFrvjsL3CIHGHhZL7ySak/8Mltm8jKZQNyluB9wAPSupZBLvWzyE3TT5AuhcNZJc4spOOIAiWmDOAbUkbkpB8Bk8nWfk0ZS5wvaRLSDfKTYDLJP0ABvcZtP0hoIp0cSV2IW1YfMH2Xbl6/H2L+Z6Rw/ZJbeewGM6Q9BPGS/YvbDGfVpnYWSClteSGnQV3A9flwgWTisoukfTVHDtsdppTszikFkeS/g56soo7AYe3l85iOYExG7XS1HymBpWQ9CHg34HlJPWKFUUqJju0tcSmQNnWglRoOtHWYj9JjWwtbF8q6Vmk4g2AG91A9j5ohVeS7PCuALB9S1Yna8Sod40PmVrFSFXmT8EjiGKyxKbAHpKKdvUOGZEKAvYgFaY+Wt5KKjx9CmlDqPc8/TPQdNNmWgbd4LS9paTHk/KdD7wZeKak75IUBg4pmGZJHsiFR3Oyis9Zkr5cIrDtm/LG6UkklbZ/dhkLnCqbyF1Ej7SjPkzSSNpRd7ixBeAhSU+3/SsASU8DHhow1rEkVYGiKkMTqLEh2+OptuN53UFsHynpMqBXBPgqF1Cun3ht276kacw+9gLeBnza9q8lrQ00sjHMrO9sYwhg+05JpVTUil8fU6nVMaFZaUCGck3bfnZPKaNArN9oEmXZxkl2cwxeo+CrRAPZTOKNtr8i6cUklevXke5DAxXg2J4LIOlTwO9zLJHmTasVybgSkt4MfAw4k5TzQZI+afvbg8bs/TyGyOtJP+eB70VhQRUEHUfSVbaft7hzA8ae1F+whwf0GZT0CuCLjA0G1wSudwHp4pp0pWBI0kFMXx07soUAkl4FbJkPG/ssdhlJpzHWWbBowcb2gQ1iTmqv0xc7bHYaMtXPeNR/trkbsv/au7LNfKZD0uOYRKUmd/o0jV3tmRrUR9L+vSLfLqMGthb5+3cGTrN9j6SPkLpu9nO2pApGH0mX2N5EY1LqjwMuGvEFvYB686cgmAx1zKJFyZLtNbaPrxB7b9sHlY5bg1yofyHJ6mWTvJl3JbAj8ALbR7ea4BRozDpzf+BJFLDO7G369J1ahTQH/htAieeepHUZ20Q+s8Qm8jBQYbsTdcyOWtLJpGLkLjW2IGkbUvf7TaQNpzWBvWyf1TDuOPWiUuN6VbQnlXQocJCztVwQwHCubUnzCl4jQ7HjKkXO90VMUKuz/aYCsatc05KeZPuPJWP2xV6kLGv7mZKeAiyw3UhZtmtjcJg65x6D5J7HtDv3itSyQtR3bddSHhpplO0hJX0FONv2iSpghSfp6gnqepOeGyXyuPOfbN+Rj5vY986b7vVRXusMBZwg6D739Q8sJW0ElOgUqrlAvB/1pIuLU7l6vAalulWGju2ayhZdo3hnwagXgcwEbB+RFzPXsH1j2/ksKfkZMrID1gnUVKmp9kwNhsI6kl5KKj55uO1kBsXNbC0APmp7Qe702gb4AnAIqUsrGHGUJO9OUbI/fbyktwBvJEkYByNOFNgEwyBv/kCSN+8Mth+W9H6geAEOcKukuR0pPt0R2Bz4V+BISU8mbdK/GjivzcQWw46kcfG7SZ23KwJN1FkBXt40qcWRC246UXQDVe1OumZH3cm1IdtnSHoG45UoG1m1VFYvqqHA0Susewywl6SbSEV1XVCICOozjGv7m6RxQAm6pqRWXK2u1jWd83uYtJ43L5/bx3ZJS86iyrJdHYNDteKglSdRiFqlwvt0hcslnQ6sDXwo/62VWJu8T9IewHdJ1+Ju9BUwjih3MP46uSefG4ReM/qypIK6q0n3n/VJ+6CbDxh3HJKeD/wHaV72GMbucU8bNGYU4ARB99mX5JV9C+mmsCrJuqc4KuB7l6kmXVyJml7nxelakYWyFYem9rW8gwZWHB3mQknrlegskHS87V0m6TAEynQWBoms8PUFkvft2pKeB3zSYTlRkmVt94pvsH2vpMcWij20Z2pQha+TpKcPygtkh3WpEK4fD2hrkemppr0M+IbtUyWNnL1AMDm2nVWM3kOyUFkH+Jjtn7abWTAIBedPQdDP5aQxvUjNIXfmrx9PUjFYu73UFstCSf8GHMf4zvemdir9xafbAgcwosWnWbXxDEm32n4FLNrc+i3wBka3oeatwHG2fwcUWXMY5U7xFqlld9I1O+rvMYnqabspLRm54OYaAEkbS7rF9i0NQu7JePWizwJXkZobm+Za4xqsXlgXdJchNa1p8f9kyXAlO66K3CVpeeBc4BhJt9N8o77WNX2OpPuAVSVtD1xLGgeVLMD5e55fGxYpajehy2PwGjwkaQ1nl4issjObLXfeBDwPuMn2X7Lqy14F4u5Oui6+QlbCy+dGmV8C/51Vz0wq5L9G0nsAbH9xSQPZ3hpA0gnAvN5emaTnkgpmSvEtUqPBODeKJkQBThB0HNuXSnoW47srHqj0do197zK9weB5lBsM1qRrBUMASPoh01tRjURBgO0t8udJK9B7EnWkjdUZT6XOgn3y51gIqc9/AJsAZwPYvkrJ9z0oR03lt2E+U4PC2F5I2thbkdQRslDSb0nKIUfPot/l77J6ynbA5yQtA8xpOafg0XEFcJft97WdSNCYUvOnIFiE7bUBJH0DONH2j/LxS0gWQaNMr7D5nX3nDDQdL/cXnx7akeLTV/d9fb7t75GKDkaVucDpkv5EKqBaYPu2lnOaicyR9IQJdicl1u9PzB89zi4QsyY1VU+Hyd7A+pL+x/agjR2dUi/qFfVIOsr26/pfk3QU8LpJvzGYFQypae0TBWN1TUmtuFpdrWva9paSHk/a7J4PvBl4pqTvkmy+DmmSd+b4SZRlvzlosI6PwWvwYeB8SeeQ9i22JCk8zkqy2uevSX/HyxaM+7+ka7tL/Cp/9Dg5fx5YgYpkJbeoUd32zyQ9u0G8idxt+8cF4yF7NhekBUH3yR2yp3VEahmArFRwP+nBvCewAnBMga63KqiC1/kwUPKbXBXoecjvBtwGnATdksiXtFpDNYDOoAqerDnuUiQVp60HSixYIiRdbHsz9Xm8KnvAtp3bTEHSfJLs5jiVGtuXF4jduWdqMJ5ctPk60vjiFuAYYAtgPdtbtZja0MjjrO2Ba23/QtJqpP//6S2nFiwhkm4A/hH4DeMVIuJZEgTBIiRda3u9xZ2bDUg6hbQhvR1p/PZX4BLbG7Sa2DRozGrornxcymqoKpLWJxVSvRq42fa2Lac0o5D0euDfgXF2J7aPai+r4SPpKtvPW9y5rqBskTfg955E2pwep14E3AwjqV4EgKQrbM/rO16KND9Zt8W0gpaRdDlJTebsvjWzn9l+7oDxprWamm1rOVldoqdWVzp20Ws6K7JdSFLy2CQrvl1JKjR4ge2jpw2wZO8hUjHnP5PWD38CnOvm1oAxBs9IehJjzSYX2/5jm/m0iaQ3k5qgn0pSqtsMuMj2i6b9xsXHXRl4C7AWfUXZoz5nKI2k75DWx3r3hj2A5W3vVij+Z4GlSDaJi+4RTZ4joYATBN2nX2p5G1IVeRGpZRX2vVO2GiIVgfSq/3qykPvlTqpRtBqq4XU+DJ5ve+O+4x9Kusz2u1vL6FEg6fr85ddsH9xqMkOkYmfBQ5IelrSi7bsLpBpMznWSdgeWUvJ+fxdpQhkUorJKTbVnalAfSSeS/i6OAl5u+9b80nFZNnpWkK0tTpC0iqQ18ukb2swpeNS8uO0EgsEoPX8KgsVwSy4Y7l+EbGJzMhSyXPi6JEUHIFk8NAy7C6n49Au278rFp6OuIlbLaqg2twO3kqyiV2k5lxlHabsTddeOuprqaU3yOOAq2/dJ2pNUEPiVhlZPnVIvkvQhUhHZcpL+3DsN/B04tLXEglHhAdt3p7qIRTzcIN6B07xmxu6ls4XianUVr+kdgc1JiilHSnoyaQ71apJrQgm+lYsUfgqQHRl+RFrva0Inx+ClyQVO2wNPs/1JSWtI2sT2JW3n1hL7kApmL7a9dV67/kyBuCeTromFFLJGqo2kZwL/xiOLhprck/cC3s6Y08O5pDX7UvTW/vv3Uxs9R0IBJwg6Tk9lQdL+pKrjY/uVFxrGvoFJfO9s39E09hTvtxJwoe11FvuPh0jN6vGa5AKWl9m+KR8/DTjVdklptqrkv4nNbJ/adi7Dpka3kJLv5oakiUd/R/1Idkx1kaw88WHGd1d8ytmvPWhOTZWams/UoD5Z9vc5wPNJi3jnA4fMtutP0g6khcinkDbJ1gBusP2cVhMLglnAsOdPwexGyZrm48AL8qlzgU+MqrIsgKSPA1uRCnB+BLyEZL/0moLv8a+2R36TV9LVwFYTrIbOGdXuaUnvIBU6rUxSZzm+SWFIMBx6asJTKe02LAypRk3V05pIugbYAFgfOJxkdbKL7Re2mVcbSNrf9ofaziMYLSR9i2Qx90FSocW7gKVtv63VxGYYNdTqal3TExTEryVZiL3Q9t4FYn8KWMn2O7LS4KnAN2wf1jBu58bgNZB0CGnt7UW2n51/xqfbnt9yaq0g6VLb8yVdBWxq+2+Srmu6FtdFBcA8z/lPHrkuMtLjuNKEAk4QdJ/fKXlZbgd8TtIywJxCsYv73k2H7TskbTWs93sUdNXrfF/gbEk35eO1GGEfTkmfs/2BCaffP8m5GU3lbqET8kdQiaw88eH8EdShpkpNzWdqUJ+9gD8DX83Hu5PUcHZuLaN2+BRJ6nZhLijbmmTJFQRBfYY6fwpmN3mRf5/F/sPR4jWkzekrbe+Vu50bWwxM4G10Q2XhQOAiSeOshlrMZ3GsDuxr+6q2EwmWnFx8sxRwuDtkR11Z9bQmD9q2pB2Bg21/S9KbBgnUYfWiHutIeimpeaaJwkkws9ibtF72N+BYctNa06CSXjXd67Zn21poDbW6Wtf0q/u+Pt/294DvlQhs+6OSPi/pP4GNgM/a/n6BuF0cg9dgU9vzlKzDemqO/9B2Ui1ys6THAycBP5V0J8lWvCmnSHqp7R8ViDUsHrRdUp1mMrVhAEqpDUtakfGFdecAn2ziJBEKOEHQcbLawvakTv1fZKnl9WyfXiB2cd+7LlOjerwmWSXiJ8DawA7APwEfHtXf30TFl3zumg4sKlShRmeBpMcB99t+KB8vBSyTi0aCAkg6i8kXx2ab7G01Kiu/VXumBvWR9POJKmGTnZvpKNlNbpw7Tja0/bCkq21v0HZuQTDTiflTMEwqSXtXRdIltjeRdDmwNXAPcL3tZxV8j86oF0palzFZ8zO7oCgjaRXG24f9X4vpBEuIpDNIdladsKOuqXpaE0nnAKcBbwS2JG2CXz2IslVX1Yt6SNqW1CCxGUk16zDbN7abVdA2kna2vWBx5waIeypp3fvMfGprkh38H0h2sG9sEr8r1FSrq3VNSzoC2MfZljOrqBzY5Hc2oSBLwEeBS0j358YFWV0cg9dA0n+TrrtLcyHOyiQFnE6Mw2si6YXAiqSxzN8bxroHeBxpfeEBxmyuV2icaGGyOhQkdbM/8Mh1kYFVomqrDUv6PvAz4Ih86nXABranLYz/mFYAACAASURBVPCcjlDACYKOkzfOT5C0iqQ18ukbCoUv7nvXcbrmdd5TiZhL+p2VVIkohqS3A+8AnpblenvMBS5oJ6uRoEZnwRnAtsC9+Xg54HTSYDkow7/1fb0sqWDvwZZymalUU6mp/EwN6nOFpM1sXwwgaVPgspZzaoO7lLzNzwWOkXQ7fbaDQRBUJeZPwTBZQJL2/iZ9i5AjzmW5M/QbpMXTe4GLCr/HKwrHq0beFBv5ohsASa8AvsiYxeWawPUk+89g9LkXuFZSV+yoa6qe1mRXkgrnG23fmueUBwwSqKvqRT1sLwQW5o7y3fLXvyXd/4/uiKJRUJ4PkcYvizv3aFkaWNf27yEVsJGunb0axu0a1dTqKl7T6/eKb/L73CmpaQHHxLHglaS/kVeQ5mZNFZG6OAavwVeBE4FVJH2apHT5kXZTGg1sn1Mw1txc2PIM+orgR5TLSdeY8vF7J7zeRK2mttrw0233K3J9ItuJDUwo4ARBx5G0A0m6uLcIsgZwQ1NvwWCMmtXjNampElGSPHB/ArA/yQO4xz2zzTu1nxqdBZN5hnbRR7Rr9Dp9285jplBZ+S2eqR1G0vUkmfpeJ/YawI2kIjjPFkW1rHb2V1Jh2h6krptjSnWFBEEQBKOBpMttb9R2HoMiaS1gBdvXLOafThfjPdO9bvuLg8YOxpOV9V7EBItL2wPZ6wTDRdIbJjtv+4jJzrdNV9azJiNb683Ph5fYvr1hvE6pF/UjaSVSB/mewC3AMcAWpPn7Vi2mFgwZSS8BXkpaXz+u76UVSIUzjdbMJF1v+9l9x3OA6/rPzSZqqdXVuKbz+GIr23fm4ycC5wyiHDYsuj4GL4mSXeQ2pIKLM2xf33JKI0NeowT4mu2DG8R5M8ny7KnAVaS9ogttb9M8yzpIWo7UcL8FqSDnPOA/bf+1QcyqasOSLgLeZ/v8fPx84Au2Nx80ZijgBEH3+RTppjtuEaRE4Bq+dx2lq17n1VQiSpL/nu4GdpM0j7EH8wXArC3AqdRZcJ+keb2BiaSNSJu0QSH6pBYhXW8bkTa/g0JUVqmp9kwNhsL2bScwIrwVOM727xiTTg2CYAjE/CkYMj/MzSInUkjauxZ5njflaw0WTucO+H3Bo+cB23dImiNpju2zJH257aSCJeZ7TGJH3W5K09KJ9ayJSNqFpHhzNmkz8iBJ77P9vQZhu6ZeBICkE0nNEUcBL7d9a37pOEmzUaV0tnMLSZ12B5JCQo97SJYiTTlD0k+A7+Tj1wILC8TtFDXV6ipe0wcCF0nqqSDtDHy6QbxF1LC3ynRmDF4b2zcQyuGTYvvZuWhts4ah9iEV9l5se+tc9PSZxgnW5QjgzySVJEjqgEeQijAHpbba8NuAI/Oajkj7kv/SJGAo4ARBx5F0me2Nc7XwhrYflnS17Q0KxC7ue9dlalWP16KmSkQNJH2U9BDuyUDuBCywvV97WbVL6c4CSfOB7+ZYAlYFdrV9+bTfGCwxkn7NmNTig8CvSRtv57ea2AyipkpNzWdqEAwLSR8nPU//ROouXGD7tnazCoLZQcyfgmGSx50Tse0m0t5VkHTWNC/bdti0jTiSFpLm6J8FViKNw+fbDjvjDiDpYmBb2/fm4+WB00f199e19aweeR65XU/1RtLKpOaOgeeTXVMv6pEVT54DPB94GDgfOMT2/a0mFrSKpPfb/vyEc/vY/kqB2K9krAj+XNsnNo3ZNWqq1dW8piWty9gm+pmlnAcmU04roabWpTH4MCml+NJVJD3J9h8rxL3U9vxsh7Sp7b9Jum6U1dol/dz2uos7N4pIWgHA9p8bx4oCnCDoNn2LIPsDT6LgIkjY1SSmqh4f5YdcF5F0I2mD4v58vBxwle112s2sHSZ0FhzW11mwqEhgwLhL57gANw6opBMErVF5QaHaMzUIho2k9YFdgVcDN9vetuWUgmDGE/OnIGgHScsCbyJtDPU3zTTtcA4yuSDiflKjwZ4k25BjZmO3dxfp6vOpg41w1/bbpmQbnKubWKko2cs+Qr0oK8OOLJKOJ3W/H5NP7Q483vbO7WUVtI2kK2zPm3CuREHEoutE0jqkdc8fz7Y1z8qN2p27ptVBe6uu01N8sX1q27kMi6wM+XD//a1UYWGOdSKwF7AvaT38TmBp2y8tEb8Gko4GDrZ9cT7eFHin7dc3jPsyHjnf+2STmH2xlyGtn65Fn3tUk/hhQRUE3WdHkoXMu4E9SFYnRW46wF8lbTHB92422tXsR1iSDINbSA/PXuX8MsDv2kundQ5lrLNgY0mLOgsaFN/sDJxm+2eSPgLMk7RfA8n3YAKSpu1wt33CdK8HS0RN+fuaz9QgGDa3A7cCdwCrtJxLEMwWYv4UDBVJzwXWZfwi5JHtZTQ9kiZddC2Q81Ek+fsXk8Zue5AsF4KGSDrf9hbAbSSlT0hFOAD7SfoTcIDtr7eSYLCkdMqOeirVUwrYqFTmtAk2OLsCP2oY8wxgW5IVFcBywOnAqDeJPHdCp/tZkoqoWgTdQ9JupIKNtSX9oO+luSTl1qacC2yZLYZOI9ld7UoaD8wm7soKZ+cBx0i6nT7ruoZ08Zrut7cS8BrK2Vt1agxeA0mfs/2BCaffP8m5mc45ku4DVpW0PXAt8AagSAGO7VfmL/8jq4quSLrPjTIbARdK6hVOrwHcKOlaklrU+o82oKT/BB4LbA18k3Q9X1IoX4CTgbtJNol/W8y/XSJCAScIOo6k9wDH2S5eqCBpA+BI0k19ke+d7atLv9coE5Ykw0HSSSQ/y5+SFva2Iz1Eb4bR97cuTY3OAknX2F5f0hbAp4AvAB+zvelivjVYQiSdSloIOzOf2hq4EPgDaYAZXbgNqaz8Vu2ZGgTDQsmLfBdgZWABcHwpCecgCKYn5k/BMFGyHNyKtPj/I+AlwPm2X9NmXtMh6aC+w2WBbYArmubc657vm+8sDZxne7MmcYPFkzudL5ytyrVdQR2zo66pelobSa8mNVJBug81ssHpsHpRle73oJtIWhNYm7SO88G+l+4BrrH9YMP4V9ieJ2lvYDnbn+/CdVKammp1Xb2mNWZvZeCsEmsjXRyD12AKRatrBimu6DqSHk8q3Dgc2IBko3kKSXHpkBZTa4V8z58S278ZIGZvntf7vDxJ6WzLgRMdH/9ntp9bIlaPUMAJgu4zFzg9dx0dByywfVuJwHmheAMV9L3rKDWrx4MxTswfPc5uKY9RoUZnwUP588uAb9g+VdJ+DWMG41kaWNf27wGUvOoPt71Xu2nNKGqq1FR7pgbBEFkd2Nf2VW0nEgSzjZg/BUPmNaQF3itt7yXpycDRLec0Lbb37j/Oi9XfLRC6ZzFxV+5IvpVQfxsKWZlyq7bzCKbH9qWSnkV37Khrqp5Wxfb3ge8XDNkp9aI+ine/B90lb7b+Bti80ltI0uakNaJeod5Sld5r5BiSWl1Xr+mlGftZLF0oZufG4CWR9HbgHcDTJF3T99Jc4IJ2smoPST8lNd8+TCpSu1PSlcD7gRe0mlxLDFJgswT0xj5/kfQUktr3agXjXyhpPdvXlgoYBThB0HFsfwL4hKT1SdKK50i62fa2TWNP9L2T1HvP2WbHsQOpenwfxqrHP9FqRjMQ20e0ncOIcYWkzSZ0FlzWMObvJP0XSV3oc/kan9MwZjCe1XvFN5nbSJPSoBxvZUylpuh9o+YzNQiGhe0PAUhahfFyyP835TcFQVCEmD8FQ+avWZ31wVz0dTupCLNL3EfqiG/Kodl24qPAD4DlgY8ViBssARPmP8EIou7ZUfca4c6lA41wvc1vSfcwtvkNadPXtldoEH5fYIGkcepFDeINi+3bTiAYHSpfI5DW7D8EnGj7OklPA85qGLMz5OIbbM+d7PWeWh3QpACnc9e0pH2At5CKIgUcLelQ2wdN/52LZSaMwZtwLPBjJlG0KqG21EF2JBUX/itwZC7IWpO0LnBem4nNME7JzRsHAFeQniXfaBq0V0RIqpfZS9JNJAuq3vNp4OLCsKAKghmCpFWBnYHXAnNLVB1LOo0x37uecga2D2wauwtMMTnoVUw/TJKUD6/zhkg63vYufQ+7cYxwBX1VJF1P6k4b11kAPMjgXpmPJU2YrrX9i6zOsp7t0wulPeuRdDDwDMZ7vv9yYrdvMDhZ6nUX0j24ikpNjWdqEAwLSa8Avgg8hbQQtCZwve3ntJpYEMwCZvv8KRgukr4O/DtpvPJe4F7gqlFWXpT0Q8bmfHNI0v3H2/7g1N8VBEFT1DE7akmPI3U6z2FM9fQY23e0mlhLZFu9rqgXBUEwgkhabbYVzGZ1ls1t35ePHwdc1HSNr4tj8FpImgdsQRrfXzDChb3VUbbEzV9fS2rgf2HsCZQnNz4ta/vuArGK22Utih0FOEHQbSS9g7QRuTKwgLR41djLMscu7ns3k1B4nRehNwGY6mFXSbJu5Kn68A9VhKpIehXQ8x89t6nnezA5fSo1rwZKKb9Ve6YGwbCQdDXJ43yh7Q0lbQ3saftNi/nWIAgaEvOnoC0krQWsYPuaxfzTVpH0wr7DB4Hf2L65QNxJ1W5CfSoIEr1NIUn7kxpyju3fKBo1JL2HMdXTWU2fetE9PfUiYJTVi4JgSiR9CjiHVARRTdVK0mdIRfHfnK2Fe8GiIoj5tu/Px8sCl9per+B7rEUHxuA1kPRR0hrqCfnUTqQmyf3ay6o9JD3N9k3560Nsv73tnGYykjYGbrF9S6F4R9l+3eLOPRrCgioIus/qwL62r6oQu7jv3UwivM7LkItvlgIOt7112/mMCjUKjyTtABzImCrCGsANQKgiFMT2CYxNPoJ63A7cSvJ8XaVQzJrP1CAYFg/kMcocSXNsnyXpy20nFQSzhJg/Ba1g+38lrSZpGdt/azufqbB9TqXQ/Zt4ywIvB66v9F5B0EW6Zkc9FzhdUjXV0w7xUdsLsnrRNiT1okOAkVQvCoLFcBOwO3BQVpw/j9S4dnLh97kEeDrwJeD1hWMH3eEw4L8l9RojdwK+VfINujIGr8SewAZ9BU6fBa4CZmUBDvBxSfvYvsv227M97oG239h2YjOUvYH1Jf2P7RLWnOP2x/J+5UZNAoYCThDMEEoqWkzwvXsGaXBcxPcuCKZC0hnAq0pIxwWTE6oI9ZjGz7rHHYRlXRGGoVITKlFBl5G0kLSw9FlgJVKx2nzb/9RqYkEwg4n5UzAK5Pv/04Hv2/63tvOZjCnGyncDlwHv7XWNFnifZYCf2N6qRLwg6DpdtaOuoXraNbqmXhQES0K2/d4F+DfgCbbntpxSMEPps0gCOM/2lRXeY+TH4DWQdBbwStt35ePHAyfYflG7mbXDZM/meF7XR9Jc2/c0+P4PkSzllgP+0jsN/B041PaHBo4dBThB0G0kvQL4ImOKFmsC19seWNGipvVNEEyFpJOBDYGf0tfBaPtdrSU1w5B0me2NcyHOhrYflnS17Q3azm2mE5Z15cgLj8fVUKmp8UwNgmGTN1juJ00Y9wRWAI6x/adWEwuCGUzMn4JRQZKAdW1f13Yuk5GtJ24GjiU9p15L2rC4Anh7qYKZ3HF6qe1/LBEvCGYKXWs0yJv0O5PuFXNnY0GrpFOA35HUi+YBfwUuiXWcoItI+iawLnAbSf3mfOAK2w8OGO8gJm+CA2JNebYi6YnTvV5jbWTUx+A1kHQSMJ+0l2PSc+oS0lh/1l1/eb9lK9t35uMnAueUtDybjeQiuikpZckpaf8mxTaTERZUQdB99gM2Y4KiRZOAvQXiqXzvgIF974JgGsK2pz53SVoeOBc4RtLtjJdrDyoRlnXl6A2GKy0eF3+mBsGw6ClxkRYze4uQyp/3yxL+ocQVBBWI+VMwTJZgU2GUF/53mLBpfKikq2x/QNK/Dxq0T4UKYCmSUuInG+QZBDOKrtlRT6J6+pbSqqcdYheSetEXbN+V1Yve13JOQTAoK5Ge03cBfwL+OGjxTeayIlkFM43LSeNCkZ53d+avHw/8H7D2IEE7PgavwYn5o8fZLeUxKhwIXCRpQT7eGfh0i/nMFA7Mn5cFNgauJl3P65OeAZsXep91JL0UOM32wyUChgJOEHScmooWkq6wPa/veCmS3Om6TWMHwUQkPQ643/ZD+XgpYBnbf5n+O4MlJf+M/0ryet8DWJGkinBHq4nNYCRdn7/8mu2DW01mhlBTpSZUooKZTChxBUF9Yv4UDANJv2aaTQXbA20qDANJFwFfAr6XT70GeI/tzXIhzvMGjNuvQvUgcFvDDb0gmFF0zY66puppV+maelEQTIekZwMvBt4NLGX7qS2nFMxAJH0DONH2j/LxS4CdbL91wHidHYMHw0HSuqTxFsCZs7h4uDiSTgA+bvvafPxc4D9sv6ZQ/G2BvUiNuQuAw2zf2CRmKOAEQffpKVqcRyFFi37fO0l/7p0m+941iR0E03AGsC1wbz5eDjgd+KfWMpp5vJW0iPU74Ii2k5kN2H523vTerO1cZhA1VWqKP1ODYFQIJa4gqEfMn4Jh0lvcn2pToc3cloA9gK8AXydtYFwM7ClpOeD/NYj7GOBm23/Lz7pXSzrS9l1NEw6CGcIDeSw4R9Ic22dJ+nLbSU1FZdXTTtE19aIgmA5JLwe2BF5AKlo4k7T+0jTuD5neimqHpu8RdJLNbL+ld2D7x5I+P2iwjo/BiyHpeNu7TFCgXMRstIvskQtuouimDuv0im8AbP8sF3MWwfZCYKGkFYHd8te/Bb4BHG37gUcbMxRwgqDjSHoscD9pgXdPYAWSokVjL8savndBMBWTdTw26YIMHomkj5Pki/8EHAcssH1bu1nNLCR9zvYHFncuGJzKym/VnqlBEATBzCfmT8EwkXSt7fUWd242IOkqkiT5WsCPgJOB59h+aZt5BcGoIGkhaXNwf+BJpEKO+bZHsuGppupp1+iaelEQTIekg0kFN+fZvqVg3K8AqwJH51O7kayZTwKwfU6p9wq6g6SfkP7een8XewAvsP3ihnFn9Rhc0mq2fz9BgXIRPXvmICiJpO+QmmT7r+flbe9W8D1WItmH7wncAhwDbAGsZ3urRxtvTqnEgiAYLpLOz1/eRvJNvRM4GPgM8GtJv86eyU1YR9JLJcW9IhgG90nql+zfiGSXFBTC9ifygtU7gdWAc/JCXFCO7SY595KhZzGzmahS8xWaK78N45kaBEEQzHxi/hQMk1skfUTSWvnjw6SFwpFF0sqS/l3SoZK+3fsoEPrhbDn1KuAg2+8jzXeCIEjsCPyFZPdyGvAr4BWtZjQ9PdXT/8mKA9uQFLNmIw9k2/BF6kWkgsMg6CL32T6uv/hG0ucKxH2+7V1t/zB/7A5safucKL6Z1ewGrAycCJyQvy6xWd+5MXhJcvHNUsDhtn8z8aPt/IIZy17AdcA++ePn+VwRJJ1I2mtYDni57R3y82pvYPmBYoYCThDMTHK13oW212kQo7jvXRBMhaT5wHdJA1aROhd2tX15q4nNQCStCuwMvBaYO5ulIUsh6e3AO4CnkRYze8wFLrBdyiJp1tOGSk2JZ2oQBEEw84n5UzBMJD0R+DjJxgHgXOATo6zcJ+lC0sLm5cBDvfO2v98w7n8DXwY+DLzC9q8l/cz2c5vEDYKZgqT3MGZHPfLUVD3tGl1TLwqC6ZB0he15E85d03RdUtL1wMts35SPnwacaruYPUkQ9OjiGLwGks4AXmX77rZzCYKmZCu55wDPBx4GzgcOsX3/wDGjACcIZi49ObgCcXq+dx8GGvneBcF0SFoa6G1w3xh/Y2XJCh67kCr+FwDHZ2/SoCH5PvkE0qLYB/teume2TcBqIel821tIuocxj2Hlzw+TrNUOsP31Su9f5JkaBEEQzHxi/hQEk1PLYljSusDbgItsf0fS2sAutkt01QdB5+maHXVf0clngZWYxUUnkh5HUoeeQ7JbWJHUgHJHq4kFwaOgdtOapBeTxts35VNrAf9q+/QmcYOZhaTPAHcD34x7aHMknQxsCPyUPmVy2+9qLalgxiHpeNu7SLqWsf2ARZRqLJd0PPBnku0UwO7A423vPHDMKMAJgmA6SvveBcFUSNoZOM32PZI+AswD9rN9RcupzRgk7U/qeruq7VxmMtlKbQvSoPCC+BseDqFSEwRBEIwCMX8KaiPpy7b3lfRDJl+E3KGFtJYISfuRxms/ajuXIJiNSFof2BV4NXCz7W1bTmlS2lA9HVW6pl4UBJNRu2ktryn/BFgb2AH4J+DDsR4X9CNpJ+DpwAa2Xz/A93d2DF4DSW+Y7LztI4adSzBz6TXESlpzstdL2Z5J+rntdRd37lHFjAKcIAimIvverQMcRZJPv7Xvtctsh+dwUIye5KikLYBPAV8APmZ705ZTm3FIWgVYtnds+/9aTGdGIemjpM7CE/KpnUjdhfu1l9XsIVRqgiAIgjaJ+VMwDCRtZPtySS+c7HXb5ww7pyUlKxk+Dvgb8ABpc922Vxgw3lQdkb24YbUbBH2Muh1126qno0jX1IuCoA1iTTmYiKSlgHfZ/lLBmJ0dg9cgK7Tdb/uhfLwUsIztv7SbWTBTkfRkYH4+vMT27QVjHw0cbPvifLwp8M5BivUWxYwCnCAIpqKG710QTIWkK21vmFVarrV9bO9c27nNFCS9Avgi8BSShPOawPW2n9NqYjMISTeSOinuz8fLAVeFKksQBEEQzHxi/hQMG0n/ADwzH3bCwlfSE4FnML4hYKANi76OyPcCFwM3979eqiMyCLrOTLGjns2qp11RLwqCR4Ok6/OXX7N9cIM4saYcPAJJl9jepFLszo3BSyPpYmBb2/fm4+WB02ejXWRQH0m7AAcAZ5OKs7cE3mf7e4XiX09qpuo1qq8B3Ag8yICNHY8pkVgQBDOWvUi+d1/Nx7uTujkH9r0Lgmn4naT/ArYDPidpGZLHdVCO/YDNgIV5Yro1Sc45KMctpM2E3kbbMkBIRQdBEATB7CDmT8HQkLQVcATwv6RFyNUlvcH2uW3mNR2S3gzsAzwVuIo0N7kQ2GaQeH3Kh8sDhxIKEUEwFasD+3bdjtr2HfneNxu5HbgVuANYpeVcgqAItp+dC+s2axgq1pSDybhA0sGkseF9vZNNrcm6OAavxLK94hsA2/dmC8kgqMGHgfk91RtJKwMLgSIFOMD2heIsIhRwgiCYkhq+d0EwFXmAtj2pU+EXklYD1rN9esupzRh61geSrgY2tP2wpKttb9B2bjMFSSeRpBB/SpLM3g64hNyNa/td7WUXBEEQBEFNYv4UDBNJlwO7274xHz8T+I7tjdrNbGqyVdR84GLbz5P0LOAztl9VKH4oRATBNIQddfeYKepFQQCLLGv+mtcjnwk8C/hxU/WQWFMOJkPSWZOctu0XNYzbuTF4DSRdAOzdK2iStBHJwmfzdjMLZiKSrrW9Xt/xHODq/nOjRijgBEEwHVdI2myC791lLecUzFCyP+gJklaRtEY+fUObOc1A7spykOcBx0i6nb4OgKAIJ+aPHme3lEcQBEEQBMMn5k/BMFm6t/APYPt/JC3dZkJLwP2275eEpGVs3yCppJVMKEQEwSRMZUdNsk0MRpsZoV4UBJlzgS0lPQE4HbiUVDi7R5OgvTXlvuPfA7+f+juC2YDtrSuF7uIYvAb7Agsk3UJSAlqVdD0HQQ1Ok/QT4Dv5eFfgRy3ms1hCAScIgimp4XsXBFMhaQfgQMYWhNYAbrAdC0KFyB0h95MGxXsCKwDH2P5Tq4kFQRAEQRDMAGL+FAwTSYcBDwFH51N7AEvZfmN7WU2PpBNJVm37Ai8C7iRtYry0YdxQiAiCacgquC9igh217Te1nFqwhIR6UTATkHSF7XmS9gaWs/15SVfZfl7buQUzD0lPBj4DPMX2SyStC2xu+1sN43ZuDF6LXHjUK6a/samaVRBMhiSRLIznA1vk0+fZPnHq72qfKMAJgmBKJK053eu2fzOsXIKZTywI1UPS+ba3kHQPyRYJUhEOwMPAn4ADbH+9lQRnAJKOt71LltV/xOAqNtyCIAiCYOYT86dgmEhaBngnfYuQwNdt/629rJYcSS8EVgROs/33hrH2B44LhYggmJywo+4uU6kXRbNa0EUkXQm8A/gS8Cbb1020FQmCUkj6MXAY8GHbG0h6DHBl07+3ro/BSyFpZ9I4/h5JHwHmAfv1LKmCoCRdfFZEAU4QBEEwEsSCUHtIWgm40HZJ+fdZhaTVbP9+qo232HALgiAIgiAISiFpKeA6289qO5cgCEYfSQuBnYDPAiuRCjnm2/6nVhMLFks0qwUziVx8+17gAtufk/Q0ksXau1pOLZiBSLrU9nxJV9reMJ9rpLgUY/AxJF1je31JWwCfAr4AfMz2pi2nFsxAJB0BHGz70rZzWVIe03YCQRAEQZC5S9LyJD/gYyTdDtzXck6zAtt3SNqq7Ty6TC6+WQo4vKLHcBAEQRAEQRBg+yFJN0paI2xIgiBYAnYg2VHvw5gd9SdazShYUh7IazZzJM2xfZakL7edVBAMgu1zgHMAJM0B/hjFN0FF7stNpwaQtBlwd5OAMQYfx0P588uAb9g+VdJ+bSYUzGg2BfaQ9BvSnqEYcZvvKMAJgiAIRoUdgb8C7yZ5p64IfLLVjGYRtn/fdg5dJ0/CHpa0ou1GE7ogCIIgCIIgWAxPAK6TdAl9jQu2d2gvpSAIRomeHTVwG4+0o95PUthRjz69ZrXziGa1oONIOhZ4G2nj/lJgBUlfsX1Au5kFM5T3AD8Ani7pAmBl4DUF4sYYPPE7Sf8FbAd8LltzzWk5p2Dm8uK2E3i0hAVVEARBMBJIeg9wnO3ftZ1LEAyKpJOBDYGfMn4SFh09QRAEQRAEQTGyjcMjyN3lQRAEiyXsqEcfSY8lqReJMfWiY2z/qdXEgmAAevY/kvYA5gEfBC4fZQWDoLtIWhv4LbAO6R56I/C8phY2MQZP5OfT9sC1tn8haTVgPdunt5xaMIOQAX1QOQAADeFJREFU9MTpXh/l8VAU4ARBEAQjgaSPA7sAfwKOAxbYvq3drILg0SHpDZOdt33EsHMJgiAIgiAIgiAIgumQtFoo4o4ePfUiSffwSPWih///9u4+xtKyPuP49xppRV4WW8IKtAjYWFJRcCmkVJGK0LQWocY3NKyx1jYa/lBjTUxbW6IQTbUamxSssYmtuo2wMdTXIiUlvEitRQVRtJoURSiyugsNriygXP3jnCm76zLLMrN7z5z9fpKTc+77nHnmymQmc5/n/J7fzeTcmd2LtKIk+TrwTOCfgL9te3WSm9oePziaZlCSLwFnz1/sm+RU4KK2zxibbLYkWQ3sOz92ay4tpSS3MlkHBXgycPf08ROB29oePTDegizAkSQtK0mOA84BXgzc3vaMwZGkRy3J/sCWtj+djh8HPL7tj8cmkyRJ0izY7sPYn9F21R6MI0kawO5FWomSvB54C3ATcCaTD1M/2vY5Q4NpJiU5CbgYOItJx6V3Ai9o+73HeDzX4FtJcjbwHuBwYAOTv+dvtj12aDDNpCQfBC5r+9np+PnAC9u+dmyyR2YBjiRpWUlyKPBS4OXAgbYh1UqS5AvAGW1/NB0fAFzR9lljk0mSJGmWJLkAuBP4CJOrAM8FDmv7l0ODSZL2CLsXaaVLEuBxbX8yOotmU5LfBD7AZCu/M9v+YAmO6RocSHIT8DzgyrZrkpwGrG37msHRNIOS3Lx996odzS0nFuBIkpaFJOcx2YLqEGA9cGnbW8amknbN/H7WO5uTJEmSFmNHWza4jYMkSVqukrwDeFfbe6bjXwD+pO1bxybTLEnyKbbtVPM0JgUzdwO0PXuRx3cNDiS5oe2J00KcNW0f2ht/DtozknwOuBb46HTqXODUtr8zLtXC9hkdQJKkqSOAN7a9cXQQaRE2Jzmh7ZcBkvw6cN/gTJIkSZo9m5OcC3yMyYcMrwA2j40kSZL0iJ7f9s/mB23vTvJ7gAU4Wkp/vZuP7xp84p5p5/drgHVJNrB3/hy0Z7wCOB+4bDq+Zjq3bNkBR5K0rCRZDew7P25728A40i6Z7i/8MeB/mLQhPRQ4p+2XhgaTJEnSTElyFPA3wLOZnPz/PJMLGr4zLpUkSdKOJfkqcFLb+6fjJwA3tD12bDLNoiRHA3e23TIdPwF40mLXyq7BJ5Lsz+Si0zkm3UgOAta13Tg0mLRMWIAjSVoWkpwFvBc4HNgAHAl8wzdhWmmS/BxwzHT4X20fHJlHkiRJkiRJGinJW4CzgA9Np14NfLLtu8al0qxKcgPwrLYPTMc/D3y+7Uljk82GJG8CLml7x+gsmn1JfhV4M3AUW+3u1PZ5ozLtjFtQSZKWiwuBk4Er265JchqwdnAmaZckeSlweduvJXkrcEKSC+e3pJIkSZKWQpJDgD/mZ09C/uGoTJIkSY+k7V8luQk4Yzp1QdvPjcykmbbPfPENQNsHpkU4i+Ia/P8dCFyRZBNwCbC+7V2DM2l2rQf+Dvh74KeDszwqFuBIkpaLB9tuTDKXZK7tVUneNzqUtIv+ou36JKcApzPZd/j9wG+MjSVJkqQZ8wngWuBKVshJSEmStHdrezlw+egc2iv8IMnZbT8JkOT3gR8uwXFdgwNt3wa8LclxwDnA1Ulub3vGTr5Ueix+0vb9o0PsCgtwJEnLxT1JDmCygF2XZAOweXAmaVfNv/E6E/hg288kuXBkIEmSJM2k/dq+ZXQISZKkxyLJlcCDwEVtPz06j2bO65h8xnDRdPw94JVLcFzX4NvaAHwf2AisHpxFs+tTSc4DLgPun59su2lcpIWl7egMkiSRZD9gCxAmW0+tAtYt53+i0vaSfBq4A/ht4ATgPuCLbY8fGkySJEkzZVrkfX3bz47OIkmStKuSHA4cBpzc9qKdvV56LKYX/NL2R0t0PNfgwLQY4mXAIUy2B7q07S1jU2lWJbl1B9Nt+5Q9HuZRsgBHkjRUkuvanpLkXmD+n1Km9w8Bm4B3t714SEBpF0wLyX4XuLntt5McBjyj7RWDo0mSJGmGTN8/7c/kCsAHmbyHattVQ4NJkiRJgyU5CDgfOHU6dTXw9rb/u8jjugYHkrwTuKTtjaOzSMuRBTiSpGUtycFMqsqPGZ1FerSSrAb2nR+3vW1gHEmSJM2gJL8IPJVt151Xj0skSZK0rSQ38/BFl9s8xaRw4bg9HEl7gSQfB74G/ON06pXA8W1ftATHdg0+5Tlw7SlJng48jW1/3z48LtHCLMCRJC17SQ5re+foHNLOJDkbeA9wOJM9cJ8MfLPtsUODSZIkaaYk+SPgDcAvAzcCJzO5cOH0ocEkSZK2kuTIhZ5v+909lUV7jyQ3tn3mzuYew3FdgwNJzgLey8PnwI8EvuE5cO0OSc4HnsukAOezwPOB69q+ZGSuhcyNDiBJ0s5YfKMV5AImb7y+1fZo4AzgC2MjSZIkaQa9ATgJ+G7b04A1wKJa6kuSJC21tt/d+gbcDdy71U3aHe5Lcsr8IMmzgfuW4LiuwScuZNtz4KfjOXDtPi9h8jv2/bavBo4HDhobaWH7jA4gSZI0Qx5suzHJXJK5tlcled/oUJIkSZo5W9puSUKSx7f9ZhK37ZUkSctSktcCbwO28PCWVAWeMiyUZtnrgA8nmf+Q/m7gVUtwXNfgE54D1550X9uHkvwkySomXZeOGB1qIRbgSJIkLZ17khwAXAOsS7IB2Dw4kyRJkmbP7UmeCPwz8K9J7gbcwkGSJC1Xbwae3vaHo4NodiV501bDDwP7Tx9vZtKp/KuL/BauwSfmz4Ffi+fAtfvdMP27+yDwJeBHwL+PjbSwtN35qyRJkrRTSfZn0s50DjiXSSvEdW03Dg0mSZKkmZXkt5isOy9v+8DoPJIkSdtLcjnworY/Hp1FsyvJ+dOHxzDZKuoTQICzgC+2XbuE32uvXYMn2Y9JN6sAa4FVTM6BbxoaTDMvyVHAqraLLabbrSzAkSRJWiLTqywuaXvH6CySJEmSJEnScpBkDfAh4D+A++fn275+WCjNrCTXAGe2vXc6PhD4TNtTxyZb2ZJc1/aUJPfy8FZymd4/BGwC3t324iEBtVdIchiwqe39O33xIG5BJUmStHQOBK5Isgm4BFjf9q7BmSRJkiRJkqSRPgD8G3Azkw/qpd3pScDWXWkemM5pEdqeMr0/cEfPJzkYuB6wAEe700eAX0ny8bZvHh1mR+yAI0mStMSSHAecA7wYuL3tGYMjSZIkSZIkSUMk+UrbNaNzaO+Q5M+BlwGXTadeyKRr+TvHpdo7JDms7Z2jc2i2JQnwtLZfH51lRyzAkSRJWmJJDgVeCrwcOLDtcYMjSZIkSZIkSUMkeQfwHeBTbLsF1aZRmTTbkpwAPGc6vKbtV0bmkbQ4SVYD+86P2942MM6CLMCRJElaIknOY3J1xSHAeuDStreMTSVJkiRJkiSNk+TWHUy37VP2eBhJ0oqR5GzgPcDhwAbgSOAbbY8dGmwB+4wOIEmSNEOOAN7Y9sbRQSRJkiRJkqRl4tfabtl6Ism+j/RiSZKmLgBOBq5suybJacDawZkWNDc6gCRJ0qxo+6dtb0yyOsmT52+jc0mSJEmSJEkDXf8o5yRJ2tqDbTcCc0nm2l4FnDg61ELsgCNJkrREkpwFvJft2iECy7YdoiRJkiRJkrQ7JDkU+CXgCUlO2OqpVcB+Y1JJklaQe5IcAFwDrEuyAdg8ONOC0nZ0BkmSpJmQ5CbgeWzXDrHtawZHkyRJkiRJkvaoJK8C/oBJt4L/3Oqpe4F/aHvZiFySpJUhyf7AfUx2djoXOAhYN+2KsyxZgCNJkrREktzQ9sRpIc6atg8luant8aOzSZIkSZIkSSMkWQsUOIqHd+do27cPCyVJWvaSvAm4pO0do7M8Wm5BJUmStHTm2yFeywpphyhJkiRJkiTtZq8E7ga+DGwZnEWStHIcCFyRZBNwCbC+7V2DMy3IDjiSJElLJMl+TE4iBFjLZD/rdW03DQ0mSZIkSZIkDZLka22fPjqHJGllSnIccA7wYuD2tmcMjvSI5kYHkCRJWumSXDd9eBdwD5Mrev4WeAdwa5Jbk5w3Kp8kSZIkSZI00PVJnjE6hCRpxdoAfB/YCKwenGVBdsCRJEnazZIcDFzf9pjRWSRJkiRJkqQ9IcnNQIF9gKcC/w3cz6R7dNseNzCeJGmZm17Y/DLgEGA9cGnbW8amWtg+owNIkiTNurYbkzx3dA5JkiRJkiRpD3rB6ACSpBXtCOCNbW8cHeTRsgOOJEmSJEmSJEmSJEmSlp0kq4F958dtbxsYZ0FzowNIkiRJkiRJkiRJkiRJ85KcleTbwK3A1cB3gH8ZGmonLMCRJEmSJEmSJEmSJEnScnIhcDLwrbZHA6cDXxgbaWEW4EiSJEmSJEmSJEmSJGk5ebDtRmAuyVzbq4ATR4dayD6jA0iSJEmSJEmSJEmSJElbuSfJAcC1wLokG4DNgzMtKG1HZ5AkSZIkSZIkSZIkSZIASLIfsAUIsBZYBaxru2losAVYgCNJkiRJkiRJkiRJkqThklzX9pQk9wLzBS2Z3j8EbALe3fbiIQEXYAGOJEmSJEmSJEmSJEmSlr0kBwPXtz1mdJbtWYAjSZIkSZIkSZIkSZKkFSHJYW3vHJ1jexbgSJIkSZIkSZIkSZIkSYswNzqAJEmSJEmSJEmSJEmStJJZgCNJkiRJkiRJkiRJkiQtggU4kiRJkiRJkiRJkiRJ0iJYgCNJkiRJkiRJkiRJkiQtwv8B8fLlZPf8gywAAAAASUVORK5CYII=\n","text/plain":["<Figure size 2880x576 with 1 Axes>"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":573},"id":"73UChGrePhr1","outputId":"af8b97e5-cec0-469e-c55d-433364ee31a5"},"source":["exp = train_df.y.str.split(',').explode().value_counts()\n","top_100_tags = list(exp[0:25].index)\n","# z = lambda r : True if r.split(',') in top_100_tags else False\n","z = lambda r : True if all(x in top_100_tags for x in r.split(',') ) else False\n","top_100_idx = train_df.y.map(z)\n","train_df = train_df[top_100_idx]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Id</th>\n","      <th>Title</th>\n","      <th>Body</th>\n","      <th>Tags</th>\n","      <th>CreationDate</th>\n","      <th>Y</th>\n","      <th>y</th>\n","      <th>text</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>13</th>\n","      <td>34556906</td>\n","      <td>output FILE ,is this a fault?</td>\n","      <td>\\r\\nmy code here\\r\\n\\r\\n        #include &lt;stdi...</td>\n","      <td>&lt;c++&gt;</td>\n","      <td>2016-01-01 14:20:01</td>\n","      <td>LQ_EDIT</td>\n","      <td>c++</td>\n","      <td>output FILE ,is this a fault?</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>34560768</td>\n","      <td>Can I throw from class init() in Swift with co...</td>\n","      <td>&lt;p&gt;I'd like my class &lt;em&gt;init()&lt;/em&gt; in Swift ...</td>\n","      <td>&lt;swift&gt;</td>\n","      <td>2016-01-01 22:42:24</td>\n","      <td>HQ</td>\n","      <td>swift</td>\n","      <td>Can I throw from class init() in Swift with co...</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>34560942</td>\n","      <td>C# - Count a specific word in richTextBox1 and...</td>\n","      <td>&lt;p&gt;I'm not sure, if this question is unique, b...</td>\n","      <td>&lt;c#&gt;</td>\n","      <td>2016-01-01 23:06:53</td>\n","      <td>LQ_CLOSE</td>\n","      <td>c#</td>\n","      <td>C# - Count a specific word in richTextBox1 and...</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>34562551</td>\n","      <td>c++ vector type function implemetation</td>\n","      <td>class City\\r\\n    {\\r\\n       private:\\r\\n...</td>\n","      <td>&lt;c++&gt;</td>\n","      <td>2016-01-02 04:17:27</td>\n","      <td>LQ_EDIT</td>\n","      <td>c++</td>\n","      <td>c++ vector type function implemetation</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>34566364</td>\n","      <td>japanese and portuguese language cannot support</td>\n","      <td>My site Japanese supported. But Portuguese  la...</td>\n","      <td>&lt;php&gt;</td>\n","      <td>2016-01-02 13:20:49</td>\n","      <td>LQ_EDIT</td>\n","      <td>php</td>\n","      <td>japanese and portuguese language cannot support</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>44992</th>\n","      <td>60458575</td>\n","      <td>MySQL how to query five tables in one SELECT</td>\n","      <td>&lt;p&gt;I have 5 tables as follows:&lt;/p&gt;\\n\\n&lt;ul&gt;\\n&lt;l...</td>\n","      <td>&lt;mysql&gt;</td>\n","      <td>2020-02-28 20:07:09</td>\n","      <td>LQ_CLOSE</td>\n","      <td>mysql</td>\n","      <td>MySQL how to query five tables in one SELECT</td>\n","    </tr>\n","    <tr>\n","      <th>44993</th>\n","      <td>60460748</td>\n","      <td>Copy value of list not reference</td>\n","      <td>&lt;p&gt;I have a list that i want to compare to aft...</td>\n","      <td>&lt;python&gt;</td>\n","      <td>2020-02-28 23:54:33</td>\n","      <td>LQ_CLOSE</td>\n","      <td>python</td>\n","      <td>Copy value of list not reference</td>\n","    </tr>\n","    <tr>\n","      <th>44994</th>\n","      <td>60461193</td>\n","      <td>Weird question, but how do I make a python scr...</td>\n","      <td>&lt;p&gt;Before you get confused, I am going to comp...</td>\n","      <td>&lt;python&gt;&lt;python-3.x&gt;</td>\n","      <td>2020-02-29 01:25:40</td>\n","      <td>LQ_CLOSE</td>\n","      <td>python,python-3.x</td>\n","      <td>Weird question, but how do I make a python scr...</td>\n","    </tr>\n","    <tr>\n","      <th>44996</th>\n","      <td>60461754</td>\n","      <td>Does Python execute code from the top or botto...</td>\n","      <td>&lt;p&gt;I am working on learning Python and was won...</td>\n","      <td>&lt;python&gt;</td>\n","      <td>2020-02-29 03:33:59</td>\n","      <td>LQ_CLOSE</td>\n","      <td>python</td>\n","      <td>Does Python execute code from the top or botto...</td>\n","    </tr>\n","    <tr>\n","      <th>44998</th>\n","      <td>60465318</td>\n","      <td>how to implement fill in the blank in Swift</td>\n","      <td>&lt;p&gt;\"I _____ any questions.\"&lt;/p&gt;\\n\\n&lt;p&gt;I want t...</td>\n","      <td>&lt;ios&gt;&lt;swift&gt;</td>\n","      <td>2020-02-29 12:50:43</td>\n","      <td>LQ_CLOSE</td>\n","      <td>ios,swift</td>\n","      <td>how to implement fill in the blank in Swift</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>9968 rows × 8 columns</p>\n","</div>"],"text/plain":["             Id  ...                                               text\n","13     34556906  ...                      output FILE ,is this a fault?\n","24     34560768  ...  Can I throw from class init() in Swift with co...\n","25     34560942  ...  C# - Count a specific word in richTextBox1 and...\n","30     34562551  ...             c++ vector type function implemetation\n","48     34566364  ...    japanese and portuguese language cannot support\n","...         ...  ...                                                ...\n","44992  60458575  ...       MySQL how to query five tables in one SELECT\n","44993  60460748  ...                   Copy value of list not reference\n","44994  60461193  ...  Weird question, but how do I make a python scr...\n","44996  60461754  ...  Does Python execute code from the top or botto...\n","44998  60465318  ...        how to implement fill in the blank in Swift\n","\n","[9968 rows x 8 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":653},"id":"e_z1IU-XT0a0","outputId":"dc80c79e-11a0-4e63-bd40-8d933dbbb6aa"},"source":["import nlu\n","# load a trainable pipeline by specifying the train  prefix \n","\n","unfitted_pipe = nlu.load('train.multi_classifier')\n","#configure epochs\n","unfitted_pipe['multi_classifier'].setMaxEpochs(100)\n","unfitted_pipe['multi_classifier'].setLr(0.005)      \n","#  fit it on a datset with label='y' and text columns. Labels seperated by ','\n","fitted_pipe = unfitted_pipe.fit(train_df[['y','text']], label_seperator=',')\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df[['y','text']])\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","      <th>multi_classifier_confidences</th>\n","      <th>sentence</th>\n","      <th>default_name_embeddings</th>\n","      <th>multi_classifier_classes</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>13</th>\n","      <td>c++</td>\n","      <td>output FILE ,is this a fault?</td>\n","      <td>[]</td>\n","      <td>output FILE ,is this a fault?</td>\n","      <td>[0.04620636999607086, -0.04046135023236275, -0...</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>swift</td>\n","      <td>Can I throw from class init() in Swift with co...</td>\n","      <td>[0.86285734, 0.98327714]</td>\n","      <td>Can I throw from class init() in Swift with co...</td>\n","      <td>[0.053270746022462845, -0.00784565694630146, -...</td>\n","      <td>[swift, c]</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>c#</td>\n","      <td>C# - Count a specific word in richTextBox1 and...</td>\n","      <td>[0.64955217]</td>\n","      <td>C# - Count a specific word in richTextBox1 and...</td>\n","      <td>[-0.005682709161192179, -0.023547030985355377,...</td>\n","      <td>[regex]</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>c++</td>\n","      <td>c++ vector type function implemetation</td>\n","      <td>[0.9755105, 0.77180904, 0.9789763]</td>\n","      <td>c++ vector type function implemetation</td>\n","      <td>[0.024628309532999992, -0.015623562969267368, ...</td>\n","      <td>[c++, python-3.x, python]</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>php</td>\n","      <td>japanese and portuguese language cannot support</td>\n","      <td>[0.55255216]</td>\n","      <td>japanese and portuguese language cannot support</td>\n","      <td>[0.038325726985931396, -0.005848723463714123, ...</td>\n","      <td>[php]</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>44992</th>\n","      <td>mysql</td>\n","      <td>MySQL how to query five tables in one SELECT</td>\n","      <td>[0.6404308, 0.99544823]</td>\n","      <td>MySQL how to query five tables in one SELECT</td>\n","      <td>[0.006962132174521685, -0.03580842167139053, -...</td>\n","      <td>[sql, mysql]</td>\n","    </tr>\n","    <tr>\n","      <th>44993</th>\n","      <td>python</td>\n","      <td>Copy value of list not reference</td>\n","      <td>[0.591653]</td>\n","      <td>Copy value of list not reference</td>\n","      <td>[0.025995030999183655, 0.001833591377362609, -...</td>\n","      <td>[javascript]</td>\n","    </tr>\n","    <tr>\n","      <th>44994</th>\n","      <td>python,python-3.x</td>\n","      <td>Weird question, but how do I make a python scr...</td>\n","      <td>[0.7427199, 0.99999976, 0.70473063, 0.72811186...</td>\n","      <td>Weird question, but how do I make a python scr...</td>\n","      <td>[0.018493961542844772, -0.04660267382860184, -...</td>\n","      <td>[html, python, javascript, node.js, php]</td>\n","    </tr>\n","    <tr>\n","      <th>44996</th>\n","      <td>python</td>\n","      <td>Does Python execute code from the top or botto...</td>\n","      <td>[0.9977689, 0.794142]</td>\n","      <td>Does Python execute code from the top or botto...</td>\n","      <td>[0.01413149293512106, -0.02844131551682949, -0...</td>\n","      <td>[python, php]</td>\n","    </tr>\n","    <tr>\n","      <th>44998</th>\n","      <td>ios,swift</td>\n","      <td>how to implement fill in the blank in Swift</td>\n","      <td>[0.9999993]</td>\n","      <td>how to implement fill in the blank in Swift</td>\n","      <td>[0.019475314766168594, -0.022571099922060966, ...</td>\n","      <td>[swift]</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>10944 rows × 6 columns</p>\n","</div>"],"text/plain":["                              y  ...                  multi_classifier_classes\n","origin_index                     ...                                          \n","13                          c++  ...                                        []\n","24                        swift  ...                                [swift, c]\n","25                           c#  ...                                   [regex]\n","30                          c++  ...                 [c++, python-3.x, python]\n","48                          php  ...                                     [php]\n","...                         ...  ...                                       ...\n","44992                     mysql  ...                              [sql, mysql]\n","44993                    python  ...                              [javascript]\n","44994         python,python-3.x  ...  [html, python, javascript, node.js, php]\n","44996                    python  ...                             [python, php]\n","44998                 ios,swift  ...                                   [swift]\n","\n","[10944 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"DL_5aY9b3jSd"},"source":["# 4. Evaluate the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0YDA2KunCeqQ","outputId":"8f72b51d-8e4c-49e8-884e-af5b0fdfa1ac"},"source":["from sklearn.preprocessing import MultiLabelBinarizer\n","from sklearn.metrics import classification_report\n","from sklearn.metrics import f1_score\n","from sklearn.metrics import roc_auc_score\n","mlb = MultiLabelBinarizer()\n","mlb = mlb.fit(preds.y.str.split(','))\n","y_true = mlb.transform(preds['y'].str.split(','))\n","y_pred = mlb.transform(preds.multi_classifier_classes.str.join(',').str.split(','))\n","print(\"Classification report: \\n\", (classification_report(y_true, y_pred)))\n","print(\"F1 micro averaging:\",(f1_score(y_true, y_pred, average='micro')))\n","print(\"ROC: \",(roc_auc_score(y_true, y_pred, average=\"micro\")))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Classification report: \n","               precision    recall  f1-score   support\n","\n","           0       0.67      0.80      0.73       840\n","           1       0.22      0.62      0.32       237\n","           2       0.37      0.47      0.41       467\n","           3       0.38      0.67      0.49       561\n","           4       0.48      0.54      0.51       831\n","           5       0.54      0.58      0.56       697\n","           6       0.49      0.73      0.59       792\n","           7       0.58      0.39      0.47      1352\n","           8       0.20      0.18      0.19       158\n","           9       0.49      0.77      0.60      1431\n","          10       0.57      0.75      0.65      2343\n","          11       0.36      0.56      0.43       833\n","          12       0.34      0.24      0.28       300\n","          13       0.51      0.74      0.60       539\n","          14       0.19      0.28      0.23       106\n","          15       0.63      0.67      0.65      1283\n","          16       0.61      0.74      0.67      1402\n","          17       0.21      0.25      0.23       411\n","          18       0.38      0.47      0.42       261\n","          19       0.90      0.10      0.19       183\n","          20       0.56      0.75      0.64       451\n","          21       0.56      0.73      0.63       485\n","          22       0.45      0.60      0.51       340\n","          23       0.34      0.13      0.19       220\n","          24       0.53      0.73      0.61       268\n","\n","   micro avg       0.50      0.63      0.56     16791\n","   macro avg       0.46      0.54      0.47     16791\n","weighted avg       0.51      0.63      0.55     16791\n"," samples avg       0.54      0.65      0.55     16791\n","\n","F1 micro averaging: 0.5556585043017869\n","ROC:  0.7920968190895907\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mhFKVN93o1ZO"},"source":["# 5. Lets try different Sentence Emebddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CzJd8omao0gt","outputId":"c3903ffc-ee61-47c1-87cf-bb1876436e25"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0ofYHpu7sloS","outputId":"ea715585-daa2-433d-d281-02b9e61222a4"},"source":["pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.multi_classifier')\n","pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['en_embed_sentence_small_bert_L12_768'] has settable params:\n","pipe['en_embed_sentence_small_bert_L12_768'].setBatchSize(32)  | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['en_embed_sentence_small_bert_L12_768'].setIsLong(False)  | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['en_embed_sentence_small_bert_L12_768'].setMaxSentenceLength(128)  | Info: Max sentence length to process | Currently set to : 128\n","pipe['en_embed_sentence_small_bert_L12_768'].setDimension(768)  | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['en_embed_sentence_small_bert_L12_768'].setCaseSensitive(False)  | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['en_embed_sentence_small_bert_L12_768'].setStorageRef('sent_small_bert_L12_768')  | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')   | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)           | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)       | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)      | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])       | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)           | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)       | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['multi_classifier'] has settable params:\n","pipe['multi_classifier'].setMaxEpochs(2)            | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['multi_classifier'].setLr(0.001)               | Info: Learning Rate | Currently set to : 0.001\n","pipe['multi_classifier'].setBatchSize(64)           | Info: Batch size | Currently set to : 64\n","pipe['multi_classifier'].setValidationSplit(0.0)    | Info: Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off. | Currently set to : 0.0\n","pipe['multi_classifier'].setThreshold(0.5)          | Info: The minimum threshold for each label to be accepted. Default is 0.5 | Currently set to : 0.5\n","pipe['multi_classifier'].setRandomSeed(44)          | Info: Random seed | Currently set to : 44\n","pipe['multi_classifier'].setShufflePerEpoch(False)  | Info: whether to shuffle the training data on each Epoch | Currently set to : False\n","pipe['multi_classifier'].setEnableOutputLogs(True)  | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":570},"id":"ABHLgirmG1n9","outputId":"60e9995e-080c-4213-cf03-c7baba89bd6a"},"source":["# Load pipe with bert embeds\n","# using large embeddings can take a few hours..\n","pipe['multi_classifier'].setMaxEpochs(120)            \n","pipe['multi_classifier'].setLr(0.0005)  \n","fitted_pipe = pipe.fit(train_df[['y','text']],label_seperator=',')\n","preds = fitted_pipe.predict(train_df[['y','text']])\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>y</th>\n","      <th>text</th>\n","      <th>multi_classifier_confidences</th>\n","      <th>en_embed_sentence_small_bert_L12_768_embeddings</th>\n","      <th>document</th>\n","      <th>multi_classifier_classes</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>13</th>\n","      <td>c++</td>\n","      <td>output FILE ,is this a fault?</td>\n","      <td>[]</td>\n","      <td>[-0.0598912313580513, 0.429191917181015, -0.25...</td>\n","      <td>output FILE ,is this a fault?</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>swift</td>\n","      <td>Can I throw from class init() in Swift with co...</td>\n","      <td>[0.61310124]</td>\n","      <td>[-0.45358699560165405, 0.1986018270254135, -0....</td>\n","      <td>Can I throw from class init() in Swift with co...</td>\n","      <td>[java]</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>c#</td>\n","      <td>C# - Count a specific word in richTextBox1 and...</td>\n","      <td>[0.8172003]</td>\n","      <td>[-0.592096209526062, 0.0025841565802693367, -0...</td>\n","      <td>C# - Count a specific word in richTextBox1 and...</td>\n","      <td>[c#]</td>\n","    </tr>\n","    <tr>\n","      <th>30</th>\n","      <td>c++</td>\n","      <td>c++ vector type function implemetation</td>\n","      <td>[0.98100495]</td>\n","      <td>[-0.6645137071609497, 0.34700289368629456, 0.1...</td>\n","      <td>c++ vector type function implemetation</td>\n","      <td>[c++]</td>\n","    </tr>\n","    <tr>\n","      <th>48</th>\n","      <td>php</td>\n","      <td>japanese and portuguese language cannot support</td>\n","      <td>[]</td>\n","      <td>[-0.30820634961128235, 0.5732622742652893, 0.5...</td>\n","      <td>japanese and portuguese language cannot support</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>44992</th>\n","      <td>mysql</td>\n","      <td>MySQL how to query five tables in one SELECT</td>\n","      <td>[0.94582915]</td>\n","      <td>[-0.6759300231933594, 0.1323285549879074, 0.56...</td>\n","      <td>MySQL how to query five tables in one SELECT</td>\n","      <td>[mysql]</td>\n","    </tr>\n","    <tr>\n","      <th>44993</th>\n","      <td>python</td>\n","      <td>Copy value of list not reference</td>\n","      <td>[0.71518165]</td>\n","      <td>[-0.7307966947555542, 0.3146328032016754, -0.5...</td>\n","      <td>Copy value of list not reference</td>\n","      <td>[python]</td>\n","    </tr>\n","    <tr>\n","      <th>44994</th>\n","      <td>python,python-3.x</td>\n","      <td>Weird question, but how do I make a python scr...</td>\n","      <td>[0.9938545]</td>\n","      <td>[-0.478365957736969, -0.015336859039962292, 0....</td>\n","      <td>Weird question, but how do I make a python scr...</td>\n","      <td>[python]</td>\n","    </tr>\n","    <tr>\n","      <th>44996</th>\n","      <td>python</td>\n","      <td>Does Python execute code from the top or botto...</td>\n","      <td>[0.998447]</td>\n","      <td>[-0.7976136803627014, -0.17537403106689453, 0....</td>\n","      <td>Does Python execute code from the top or botto...</td>\n","      <td>[python]</td>\n","    </tr>\n","    <tr>\n","      <th>44998</th>\n","      <td>ios,swift</td>\n","      <td>how to implement fill in the blank in Swift</td>\n","      <td>[0.6266076, 0.9772264]</td>\n","      <td>[-0.4111633598804474, 0.04349775239825249, 0.2...</td>\n","      <td>how to implement fill in the blank in Swift</td>\n","      <td>[ios, swift]</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>9968 rows × 6 columns</p>\n","</div>"],"text/plain":["                              y  ... multi_classifier_classes\n","origin_index                     ...                         \n","13                          c++  ...                       []\n","24                        swift  ...                   [java]\n","25                           c#  ...                     [c#]\n","30                          c++  ...                    [c++]\n","48                          php  ...                       []\n","...                         ...  ...                      ...\n","44992                     mysql  ...                  [mysql]\n","44993                    python  ...                 [python]\n","44994         python,python-3.x  ...                 [python]\n","44996                    python  ...                 [python]\n","44998                 ios,swift  ...             [ios, swift]\n","\n","[9968 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"E7ah2LM6tIhG","outputId":"edaa6235-c8d2-474a-9cc1-331e0967086c"},"source":["from sklearn.preprocessing import MultiLabelBinarizer\n","from sklearn.metrics import classification_report\n","from sklearn.metrics import f1_score\n","from sklearn.metrics import roc_auc_score\n","mlb = MultiLabelBinarizer()\n","mlb = mlb.fit(preds.y.str.split(','))\n","y_true = mlb.transform(preds['y'].str.split(','))\n","y_pred = mlb.transform(preds.multi_classifier_classes.str.join(',').str.split(','))\n","print(\"Classification report: \\n\", (classification_report(y_true, y_pred)))\n","print(\"F1 micro averaging:\",(f1_score(y_true, y_pred, average='micro')))\n","print(\"ROC: \",(roc_auc_score(y_true, y_pred, average=\"micro\")))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Classification report: \n","               precision    recall  f1-score   support\n","\n","           0       0.96      0.67      0.79       738\n","           1       0.95      0.71      0.82       228\n","           2       0.70      0.53      0.60       440\n","           3       0.91      0.63      0.75       508\n","           4       0.95      0.57      0.71       733\n","           5       0.91      0.58      0.71       621\n","           6       0.88      0.70      0.78       736\n","           7       0.81      0.65      0.72      1254\n","           8       0.86      0.58      0.69       145\n","           9       0.89      0.58      0.70      1288\n","          10       0.87      0.73      0.80      2164\n","          11       0.89      0.58      0.70       754\n","          12       0.84      0.67      0.74       277\n","          13       0.89      0.59      0.71       511\n","          14       0.96      0.27      0.42        96\n","          15       0.94      0.70      0.80      1193\n","          16       0.93      0.70      0.80      1265\n","          17       0.74      0.22      0.34       365\n","          18       0.97      0.70      0.82       246\n","          19       1.00      0.55      0.71       172\n","          20       0.92      0.71      0.81       427\n","          21       0.82      0.67      0.74       458\n","          22       0.81      0.66      0.73       319\n","          23       0.83      0.23      0.36       211\n","          24       0.97      0.64      0.77       242\n","\n","   micro avg       0.89      0.64      0.74     15391\n","   macro avg       0.89      0.59      0.70     15391\n","weighted avg       0.89      0.64      0.73     15391\n"," samples avg       0.70      0.64      0.65     15391\n","\n","F1 micro averaging: 0.7401884721644023\n","ROC:  0.8150061228796474\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","outputId":"bbf99f56-d4b1-4440-ecb7-fe9d61935c62"},"source":["stored_model_path = './models/multi_classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/multi_classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Tesla plans to invest 10M into the ML sector')\n","preds"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"M1LjAwJVJxun"},"source":[""],"execution_count":null,"outputs":[]}]}
\ No newline at end of file
diff --git a/examples/colab/Training/named_entity_recognition/NLU_training_NER_demo.ipynb b/examples/colab/Training/named_entity_recognition/NLU_training_NER_demo.ipynb
index d521e8d8..ddabd159 100644
--- a/examples/colab/Training/named_entity_recognition/NLU_training_NER_demo.ipynb
+++ b/examples/colab/Training/named_entity_recognition/NLU_training_NER_demo.ipynb
@@ -1 +1 @@
-{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_NER_demo.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyOOijIjq9lJM7l/lkKS+Vv4"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/collab/Training/named_entity_recognition/NLU_training_demo.ipynb)\n","\n","\n","\n","# Training a Named Entity Recognition (NER) model with NLU \n","With the [NER_DL model](https://nlp.johnsnowlabs.com/docs/en/annotators#ner-dl-named-entity-recognition-deep-learning-annotator) from Spark NLP you can achieve State Of the Art results on any NER problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu > /dev/null\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download conll2003 dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1606719395651,"user_tz":-60,"elapsed":59783,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"29d77c28-a56f-4e18-afab-0b23b69a7b5c"},"source":["! wget https://github.com/patverga/torch-ner-nlp-from-scratch/raw/master/data/conll2003/eng.train"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2020-11-30 06:56:34--  https://github.com/patverga/torch-ner-nlp-from-scratch/raw/master/data/conll2003/eng.train\n","Resolving github.com (github.com)... 140.82.121.4\n","Connecting to github.com (github.com)|140.82.121.4|:443... connected.\n","HTTP request sent, awaiting response... 302 Found\n","Location: https://raw.githubusercontent.com/patverga/torch-ner-nlp-from-scratch/master/data/conll2003/eng.train [following]\n","--2020-11-30 06:56:34--  https://raw.githubusercontent.com/patverga/torch-ner-nlp-from-scratch/master/data/conll2003/eng.train\n","Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n","Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 3283420 (3.1M) [text/plain]\n","Saving to: ‘eng.train’\n","\n","eng.train           100%[===================>]   3.13M  --.-KB/s    in 0.06s   \n","\n","2020-11-30 06:56:35 (55.8 MB/s) - ‘eng.train’ saved [3283420/3283420]\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.ner')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":244},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1606720185735,"user_tz":-60,"elapsed":849845,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"785fa952-bce5-4ba6-9647-0dc4db57bab0"},"source":["import nlu\n","# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# Since there are no\n","train_path = '/content/eng.train'\n","trainable_pipe = nlu.load('train.ner')\n","fitted_pipe = trainable_pipe.fit(dataset_path=train_path)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["pos_anc download started this may take some time.\n","Approximate size to download 4.3 MB\n","[OK!]\n","glove_100d download started this may take some time.\n","Approximate size to download 145.3 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>pos</th>\n","      <th>entities</th>\n","      <th>entities_confidence</th>\n","      <th>ner_confidence</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]</td>\n","      <td>Donald Trump</td>\n","      <td>PER</td>\n","      <td>[0.9993000030517578, 0.9976000189781189, 0.999...</td>\n","      <td>[[-0.5496799945831299, -0.488319993019104, 0.5...</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]</td>\n","      <td>Angela Merkel</td>\n","      <td>PER</td>\n","      <td>[0.9993000030517578, 0.9976000189781189, 0.999...</td>\n","      <td>[[-0.5496799945831299, -0.488319993019104, 0.5...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                    pos  ...                            default_name_embeddings\n","origin_index                                             ...                                                   \n","0             [NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]  ...  [[-0.5496799945831299, -0.488319993019104, 0.5...\n","0             [NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]  ...  [[-0.5496799945831299, -0.488319993019104, 0.5...\n","\n","[2 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"owFhjKqzQiv5","executionInfo":{"status":"ok","timestamp":1606720185739,"user_tz":-60,"elapsed":849824,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"fcfc6b10-79c7-453c-f2af-b4d2622d4e91"},"source":["# Check out the Parameters of the NER model we can configure\n","trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['named_entity_recognizer_dl'] has settable params:\n","pipe['named_entity_recognizer_dl'].setMinEpochs(0)   | Info: Minimum number of epochs to train | Currently set to : 0\n","pipe['named_entity_recognizer_dl'].setMaxEpochs(2)   | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['named_entity_recognizer_dl'].setLr(0.001)      | Info: Learning Rate | Currently set to : 0.001\n","pipe['named_entity_recognizer_dl'].setPo(0.005)      | Info: Learning rate decay coefficient. Real Learning Rage = lr / (1 + po * epoch) | Currently set to : 0.005\n","pipe['named_entity_recognizer_dl'].setBatchSize(8)   | Info: Batch size | Currently set to : 8\n","pipe['named_entity_recognizer_dl'].setDropout(0.5)   | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['named_entity_recognizer_dl'].setVerbose(0)     | Info: Level of verbosity during training | Currently set to : 0\n","pipe['named_entity_recognizer_dl'].setUseContrib(True)  | Info: whether to use contrib LSTM Cells. Not compatible with Windows. Might slightly improve accuracy. | Currently set to : True\n","pipe['named_entity_recognizer_dl'].setValidationSplit(0.0)  | Info: Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off. | Currently set to : 0.0\n","pipe['named_entity_recognizer_dl'].setEvaluationLogExtended(False)  | Info: Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off. | Currently set to : False\n","pipe['named_entity_recognizer_dl'].setIncludeConfidence(True)  | Info: whether to include confidence scores in annotation metadata | Currently set to : True\n","pipe['named_entity_recognizer_dl'].setEnableOutputLogs(False)  | Info: Whether to use stdout in addition to Spark logs. | Currently set to : False\n","pipe['named_entity_recognizer_dl'].setEnableMemoryOptimizer(False)  | Info: Whether to optimize for large datasets or not. Enabling this option can slow down training. | Currently set to : False\n",">>> pipe['pos'] has settable params:\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setIncludeStorage(True)         | Info: whether to include indexed storage in trained model | Currently set to : True\n","pipe['default_name'].setCaseSensitive(False)         | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['default_name'].setDimension(100)               | Info: Number of embedding dimensions | Currently set to : 100\n","pipe['default_name'].setStorageRef('glove_100d')     | Info: unique reference name for identification | Currently set to : glove_100d\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['NerToChunkConverter'] has settable params:\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"25RTuUXMFyEA"},"source":["# 4. Lets use BERT embeddings instead of the default Glove_100d ones!"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"QMxPpeiDGNVi","executionInfo":{"status":"ok","timestamp":1606720185740,"user_tz":-60,"elapsed":849801,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"cb469930-5b2a-4706-b4f4-3c931be59799"},"source":["# We can use nlu.print_components(action='embed') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed') returns Spark NLP model glove_100d\n","nlu.load('en.embed.glove') returns Spark NLP model glove_100d\n","nlu.load('en.embed.glove.100d') returns Spark NLP model glove_100d\n","nlu.load('en.embed.bert') returns Spark NLP model bert_base_uncased\n","nlu.load('en.embed.bert.base_uncased') returns Spark NLP model bert_base_uncased\n","nlu.load('en.embed.bert.base_cased') returns Spark NLP model bert_base_cased\n","nlu.load('en.embed.bert.large_uncased') returns Spark NLP model bert_large_uncased\n","nlu.load('en.embed.bert.large_cased') returns Spark NLP model bert_large_cased\n","nlu.load('en.embed.biobert') returns Spark NLP model biobert_pubmed_base_cased\n","nlu.load('en.embed.biobert.pubmed_base_cased') returns Spark NLP model biobert_pubmed_base_cased\n","nlu.load('en.embed.biobert.pubmed_large_cased') returns Spark NLP model biobert_pubmed_large_cased\n","nlu.load('en.embed.biobert.pmc_base_cased') returns Spark NLP model biobert_pmc_base_cased\n","nlu.load('en.embed.biobert.pubmed_pmc_base_cased') returns Spark NLP model biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed.biobert.clinical_base_cased') returns Spark NLP model biobert_clinical_base_cased\n","nlu.load('en.embed.biobert.discharge_base_cased') returns Spark NLP model biobert_discharge_base_cased\n","nlu.load('en.embed.elmo') returns Spark NLP model elmo\n","nlu.load('en.embed.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed.albert.base_uncased') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed.albert.large_uncased') returns Spark NLP model albert_large_uncased\n","nlu.load('en.embed.albert.xlarge_uncased') returns Spark NLP model albert_xlarge_uncased\n","nlu.load('en.embed.albert.xxlarge_uncased') returns Spark NLP model albert_xxlarge_uncased\n","nlu.load('en.embed.xlnet') returns Spark NLP model xlnet_base_cased\n","nlu.load('en.embed.xlnet_base_cased') returns Spark NLP model xlnet_base_cased\n","nlu.load('en.embed.xlnet_large_cased') returns Spark NLP model xlnet_large_cased\n","nlu.load('en.embed.electra') returns Spark NLP model electra_small_uncased\n","nlu.load('en.embed.electra.small_uncased') returns Spark NLP model electra_small_uncased\n","nlu.load('en.embed.electra.base_uncased') returns Spark NLP model electra_base_uncased\n","nlu.load('en.embed.electra.large_uncased') returns Spark NLP model electra_large_uncased\n","nlu.load('en.embed.covidbert') returns Spark NLP model covidbert_large_uncased\n","nlu.load('en.embed.covidbert.large_uncased') returns Spark NLP model covidbert_large_uncased\n","nlu.load('en.embed.bert.small_L2_128') returns Spark NLP model small_bert_L2_128\n","nlu.load('en.embed.bert.small_L4_128') returns Spark NLP model small_bert_L4_128\n","nlu.load('en.embed.bert.small_L6_128') returns Spark NLP model small_bert_L6_128\n","nlu.load('en.embed.bert.small_L8_128') returns Spark NLP model small_bert_L8_128\n","nlu.load('en.embed.bert.small_L10_128') returns Spark NLP model small_bert_L10_128\n","nlu.load('en.embed.bert.small_L12_128') returns Spark NLP model small_bert_L12_128\n","nlu.load('en.embed.bert.small_L2_256') returns Spark NLP model small_bert_L2_256\n","nlu.load('en.embed.bert.small_L4_256') returns Spark NLP model small_bert_L4_256\n","nlu.load('en.embed.bert.small_L6_256') returns Spark NLP model small_bert_L6_256\n","nlu.load('en.embed.bert.small_L8_256') returns Spark NLP model small_bert_L8_256\n","nlu.load('en.embed.bert.small_L10_256') returns Spark NLP model small_bert_L10_256\n","nlu.load('en.embed.bert.small_L12_256') returns Spark NLP model small_bert_L12_256\n","nlu.load('en.embed.bert.small_L2_512') returns Spark NLP model small_bert_L2_512\n","nlu.load('en.embed.bert.small_L4_512') returns Spark NLP model small_bert_L4_512\n","nlu.load('en.embed.bert.small_L6_512') returns Spark NLP model small_bert_L6_512\n","nlu.load('en.embed.bert.small_L8_512') returns Spark NLP model small_bert_L8_512\n","nlu.load('en.embed.bert.small_L10_512') returns Spark NLP model small_bert_L10_512\n","nlu.load('en.embed.bert.small_L12_512') returns Spark NLP model small_bert_L12_512\n","nlu.load('en.embed.bert.small_L2_768') returns Spark NLP model small_bert_L2_768\n","nlu.load('en.embed.bert.small_L4_768') returns Spark NLP model small_bert_L4_768\n","nlu.load('en.embed.bert.small_L6_768') returns Spark NLP model small_bert_L6_768\n","nlu.load('en.embed.bert.small_L8_768') returns Spark NLP model small_bert_L8_768\n","nlu.load('en.embed.bert.small_L10_768') returns Spark NLP model small_bert_L10_768\n","nlu.load('en.embed.bert.small_L12_768') returns Spark NLP model small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed.bert.') returns Spark NLP model bert_finnish_cased\n","nlu.load('fi.embed.bert.cased.') returns Spark NLP model bert_finnish_cased\n","nlu.load('fi.embed.bert.uncased.') returns Spark NLP model bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed') returns Spark NLP model glove_840B_300\n","nlu.load('xx.embed.glove.840B_300') returns Spark NLP model glove_840B_300\n","nlu.load('xx.embed.glove.6B_300') returns Spark NLP model glove_6B_300\n","nlu.load('xx.embed.bert_multi_cased') returns Spark NLP model bert_multi_cased\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":244},"id":"Xz7xnvbCFxE3","executionInfo":{"status":"ok","timestamp":1606721019713,"user_tz":-60,"elapsed":1683756,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"b8b15ce7-149e-427a-9aad-37d76e074154"},"source":["# Add bert word embeddings to pipe \n","fitted_pipe = nlu.load('bert train.ner').fit(dataset_path=train_path)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["small_bert_L2_128 download started this may take some time.\n","Approximate size to download 16.1 MB\n","[OK!]\n","pos_anc download started this may take some time.\n","Approximate size to download 4.3 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>bert_embeddings</th>\n","      <th>pos</th>\n","      <th>entities_confidence</th>\n","      <th>ner_confidence</th>\n","      <th>entities</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>[[-0.447601854801178, 1.0348625183105469, 0.51...</td>\n","      <td>[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]</td>\n","      <td>PER</td>\n","      <td>[0.7784000039100647, 0.9710999727249146, 0.997...</td>\n","      <td>Donald Trump</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>[[-0.447601854801178, 1.0348625183105469, 0.51...</td>\n","      <td>[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]</td>\n","      <td>PER</td>\n","      <td>[0.7784000039100647, 0.9710999727249146, 0.997...</td>\n","      <td>Angela Merkel dont</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                bert_embeddings  ...            entities\n","origin_index                                                     ...                    \n","0             [[-0.447601854801178, 1.0348625183105469, 0.51...  ...        Donald Trump\n","0             [[-0.447601854801178, 1.0348625183105469, 0.51...  ...  Angela Merkel dont\n","\n","[2 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606721039475,"user_tz":-60,"elapsed":1703498,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"b4b22c80-3318-4070-e516-36847a66e88e"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":137},"executionInfo":{"status":"ok","timestamp":1606721049691,"user_tz":-60,"elapsed":1713703,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"cdd160bf-462e-4dbe-b618-db259feb3987"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions on laws about cheeseburgers')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>bert_embeddings</th>\n","      <th>pos</th>\n","      <th>entities_confidence</th>\n","      <th>ner_confidence</th>\n","      <th>entities</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>[[-0.6870577335357666, 1.1118954420089722, 0.5...</td>\n","      <td>[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS, IN, ...</td>\n","      <td>PER</td>\n","      <td>[0.7975000143051147, 0.9343000054359436, 0.995...</td>\n","      <td>Donald Trump</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>[[-0.6870577335357666, 1.1118954420089722, 0.5...</td>\n","      <td>[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS, IN, ...</td>\n","      <td>PER</td>\n","      <td>[0.7975000143051147, 0.9343000054359436, 0.995...</td>\n","      <td>Angela Merkel dont</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                bert_embeddings  ...            entities\n","origin_index                                                     ...                    \n","0             [[-0.6870577335357666, 1.1118954420089722, 0.5...  ...        Donald Trump\n","0             [[-0.6870577335357666, 1.1118954420089722, 0.5...  ...  Angela Merkel dont\n","\n","[2 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606721049695,"user_tz":-60,"elapsed":1713695,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"90c19529-41ab-4533-fba6-6107dac7c23e"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')    | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])          | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)         | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)   | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)          | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)              | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)    | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')        | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)            | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentiment_dl'] has settable params:\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                         | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                  | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(128)                        | Info: Number of embedding dimensions | Currently set to : 128\n","pipe['glove'].setMaxSentenceLength(128)                | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setStorageRef('small_bert_L2_128')       | Info: unique reference name for identification | Currently set to : small_bert_L2_128\n",">>> pipe['named_entity_recognizer_dl'] has settable params:\n","pipe['named_entity_recognizer_dl'].setIncludeConfidence(True)  | Info: whether to include confidence scores in annotation metadata | Currently set to : True\n","pipe['named_entity_recognizer_dl'].setBatchSize(8)     | Info: Size of every batch. | Currently set to : 8\n","pipe['named_entity_recognizer_dl'].setClasses(['O', 'B-ORG', 'I-ORG', 'I-MISC', 'I-PER', 'B-LOC', 'B-MISC', 'I-LOC'])  | Info: get the tags used to trained this NerDLModel | Currently set to : ['O', 'B-ORG', 'I-ORG', 'I-MISC', 'I-PER', 'B-LOC', 'B-MISC', 'I-LOC']\n","pipe['named_entity_recognizer_dl'].setStorageRef('small_bert_L2_128')  | Info: unique reference name for identification | Currently set to : small_bert_L2_128\n",">>> pipe['NerToChunkConverter'] has settable params:\n","pipe['NerToChunkConverter'].setPreservePosition(True)  | Info: Whether to preserve the original position of the tokens in the original document or use the modified tokens | Currently set to : True\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"o3jCHbIsMZrn","colab":{"base_uri":"https://localhost:8080/","height":299},"executionInfo":{"status":"error","timestamp":1607488227274,"user_tz":-60,"elapsed":919,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"f00d563f-c7ea-4f33-85d7-b9d45f35b1f0"},"source":["from varname import nameof\n"],"execution_count":null,"outputs":[{"output_type":"error","ename":"ModuleNotFoundError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)","\u001b[0;32m<ipython-input-1-5b67bbfd897c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mvarname\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnameof\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'varname'","","\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n"]}]},{"cell_type":"code","metadata":{"id":"USD6d66Sw6_P"},"source":[""],"execution_count":null,"outputs":[]}]}
\ No newline at end of file
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_NER_demo.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/named_entity_recognition/NLU_training_NER_demo.ipynb)\n","\n","\n","\n","# Training a Named Entity Recognition (NER) model with NLU \n","With the [NER_DL model](https://nlp.johnsnowlabs.com/docs/en/annotators#ner-dl-named-entity-recognition-deep-learning-annotator) from Spark NLP you can achieve State Of the Art results on any NER problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu > /dev/null\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download conll2003 dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1606719395651,"user_tz":-60,"elapsed":59783,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"29d77c28-a56f-4e18-afab-0b23b69a7b5c"},"source":["! wget https://github.com/patverga/torch-ner-nlp-from-scratch/raw/master/data/conll2003/eng.train"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2020-11-30 06:56:34--  https://github.com/patverga/torch-ner-nlp-from-scratch/raw/master/data/conll2003/eng.train\n","Resolving github.com (github.com)... 140.82.121.4\n","Connecting to github.com (github.com)|140.82.121.4|:443... connected.\n","HTTP request sent, awaiting response... 302 Found\n","Location: https://raw.githubusercontent.com/patverga/torch-ner-nlp-from-scratch/master/data/conll2003/eng.train [following]\n","--2020-11-30 06:56:34--  https://raw.githubusercontent.com/patverga/torch-ner-nlp-from-scratch/master/data/conll2003/eng.train\n","Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n","Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 3283420 (3.1M) [text/plain]\n","Saving to: ‘eng.train’\n","\n","eng.train           100%[===================>]   3.13M  --.-KB/s    in 0.06s   \n","\n","2020-11-30 06:56:35 (55.8 MB/s) - ‘eng.train’ saved [3283420/3283420]\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.ner')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":244},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1606720185735,"user_tz":-60,"elapsed":849845,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"785fa952-bce5-4ba6-9647-0dc4db57bab0"},"source":["import nlu\n","# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# Since there are no\n","train_path = '/content/eng.train'\n","trainable_pipe = nlu.load('train.ner')\n","fitted_pipe = trainable_pipe.fit(dataset_path=train_path)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["pos_anc download started this may take some time.\n","Approximate size to download 4.3 MB\n","[OK!]\n","glove_100d download started this may take some time.\n","Approximate size to download 145.3 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>pos</th>\n","      <th>entities</th>\n","      <th>entities_confidence</th>\n","      <th>ner_confidence</th>\n","      <th>default_name_embeddings</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]</td>\n","      <td>Donald Trump</td>\n","      <td>PER</td>\n","      <td>[0.9993000030517578, 0.9976000189781189, 0.999...</td>\n","      <td>[[-0.5496799945831299, -0.488319993019104, 0.5...</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]</td>\n","      <td>Angela Merkel</td>\n","      <td>PER</td>\n","      <td>[0.9993000030517578, 0.9976000189781189, 0.999...</td>\n","      <td>[[-0.5496799945831299, -0.488319993019104, 0.5...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                    pos  ...                            default_name_embeddings\n","origin_index                                             ...                                                   \n","0             [NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]  ...  [[-0.5496799945831299, -0.488319993019104, 0.5...\n","0             [NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]  ...  [[-0.5496799945831299, -0.488319993019104, 0.5...\n","\n","[2 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"owFhjKqzQiv5","executionInfo":{"status":"ok","timestamp":1606720185739,"user_tz":-60,"elapsed":849824,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"fcfc6b10-79c7-453c-f2af-b4d2622d4e91"},"source":["# Check out the Parameters of the NER model we can configure\n","trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['named_entity_recognizer_dl'] has settable params:\n","pipe['named_entity_recognizer_dl'].setMinEpochs(0)   | Info: Minimum number of epochs to train | Currently set to : 0\n","pipe['named_entity_recognizer_dl'].setMaxEpochs(2)   | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['named_entity_recognizer_dl'].setLr(0.001)      | Info: Learning Rate | Currently set to : 0.001\n","pipe['named_entity_recognizer_dl'].setPo(0.005)      | Info: Learning rate decay coefficient. Real Learning Rage = lr / (1 + po * epoch) | Currently set to : 0.005\n","pipe['named_entity_recognizer_dl'].setBatchSize(8)   | Info: Batch size | Currently set to : 8\n","pipe['named_entity_recognizer_dl'].setDropout(0.5)   | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['named_entity_recognizer_dl'].setVerbose(0)     | Info: Level of verbosity during training | Currently set to : 0\n","pipe['named_entity_recognizer_dl'].setUseContrib(True)  | Info: whether to use contrib LSTM Cells. Not compatible with Windows. Might slightly improve accuracy. | Currently set to : True\n","pipe['named_entity_recognizer_dl'].setValidationSplit(0.0)  | Info: Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off. | Currently set to : 0.0\n","pipe['named_entity_recognizer_dl'].setEvaluationLogExtended(False)  | Info: Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off. | Currently set to : False\n","pipe['named_entity_recognizer_dl'].setIncludeConfidence(True)  | Info: whether to include confidence scores in annotation metadata | Currently set to : True\n","pipe['named_entity_recognizer_dl'].setEnableOutputLogs(False)  | Info: Whether to use stdout in addition to Spark logs. | Currently set to : False\n","pipe['named_entity_recognizer_dl'].setEnableMemoryOptimizer(False)  | Info: Whether to optimize for large datasets or not. Enabling this option can slow down training. | Currently set to : False\n",">>> pipe['pos'] has settable params:\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True)       | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([])        | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0)            | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999)        | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setIncludeStorage(True)         | Info: whether to include indexed storage in trained model | Currently set to : True\n","pipe['default_name'].setCaseSensitive(False)         | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['default_name'].setDimension(100)               | Info: Number of embedding dimensions | Currently set to : 100\n","pipe['default_name'].setStorageRef('glove_100d')     | Info: unique reference name for identification | Currently set to : glove_100d\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+')    | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"])  | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0)            | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999)        | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['NerToChunkConverter'] has settable params:\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"25RTuUXMFyEA"},"source":["# 4. Lets use BERT embeddings instead of the default Glove_100d ones!"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"QMxPpeiDGNVi","executionInfo":{"status":"ok","timestamp":1606720185740,"user_tz":-60,"elapsed":849801,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"cb469930-5b2a-4706-b4f4-3c931be59799"},"source":["# We can use nlu.print_components(action='embed') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language <en> NLU provides the following Models : \n","nlu.load('en.embed') returns Spark NLP model glove_100d\n","nlu.load('en.embed.glove') returns Spark NLP model glove_100d\n","nlu.load('en.embed.glove.100d') returns Spark NLP model glove_100d\n","nlu.load('en.embed.bert') returns Spark NLP model bert_base_uncased\n","nlu.load('en.embed.bert.base_uncased') returns Spark NLP model bert_base_uncased\n","nlu.load('en.embed.bert.base_cased') returns Spark NLP model bert_base_cased\n","nlu.load('en.embed.bert.large_uncased') returns Spark NLP model bert_large_uncased\n","nlu.load('en.embed.bert.large_cased') returns Spark NLP model bert_large_cased\n","nlu.load('en.embed.biobert') returns Spark NLP model biobert_pubmed_base_cased\n","nlu.load('en.embed.biobert.pubmed_base_cased') returns Spark NLP model biobert_pubmed_base_cased\n","nlu.load('en.embed.biobert.pubmed_large_cased') returns Spark NLP model biobert_pubmed_large_cased\n","nlu.load('en.embed.biobert.pmc_base_cased') returns Spark NLP model biobert_pmc_base_cased\n","nlu.load('en.embed.biobert.pubmed_pmc_base_cased') returns Spark NLP model biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed.biobert.clinical_base_cased') returns Spark NLP model biobert_clinical_base_cased\n","nlu.load('en.embed.biobert.discharge_base_cased') returns Spark NLP model biobert_discharge_base_cased\n","nlu.load('en.embed.elmo') returns Spark NLP model elmo\n","nlu.load('en.embed.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed.albert.base_uncased') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed.albert.large_uncased') returns Spark NLP model albert_large_uncased\n","nlu.load('en.embed.albert.xlarge_uncased') returns Spark NLP model albert_xlarge_uncased\n","nlu.load('en.embed.albert.xxlarge_uncased') returns Spark NLP model albert_xxlarge_uncased\n","nlu.load('en.embed.xlnet') returns Spark NLP model xlnet_base_cased\n","nlu.load('en.embed.xlnet_base_cased') returns Spark NLP model xlnet_base_cased\n","nlu.load('en.embed.xlnet_large_cased') returns Spark NLP model xlnet_large_cased\n","nlu.load('en.embed.electra') returns Spark NLP model electra_small_uncased\n","nlu.load('en.embed.electra.small_uncased') returns Spark NLP model electra_small_uncased\n","nlu.load('en.embed.electra.base_uncased') returns Spark NLP model electra_base_uncased\n","nlu.load('en.embed.electra.large_uncased') returns Spark NLP model electra_large_uncased\n","nlu.load('en.embed.covidbert') returns Spark NLP model covidbert_large_uncased\n","nlu.load('en.embed.covidbert.large_uncased') returns Spark NLP model covidbert_large_uncased\n","nlu.load('en.embed.bert.small_L2_128') returns Spark NLP model small_bert_L2_128\n","nlu.load('en.embed.bert.small_L4_128') returns Spark NLP model small_bert_L4_128\n","nlu.load('en.embed.bert.small_L6_128') returns Spark NLP model small_bert_L6_128\n","nlu.load('en.embed.bert.small_L8_128') returns Spark NLP model small_bert_L8_128\n","nlu.load('en.embed.bert.small_L10_128') returns Spark NLP model small_bert_L10_128\n","nlu.load('en.embed.bert.small_L12_128') returns Spark NLP model small_bert_L12_128\n","nlu.load('en.embed.bert.small_L2_256') returns Spark NLP model small_bert_L2_256\n","nlu.load('en.embed.bert.small_L4_256') returns Spark NLP model small_bert_L4_256\n","nlu.load('en.embed.bert.small_L6_256') returns Spark NLP model small_bert_L6_256\n","nlu.load('en.embed.bert.small_L8_256') returns Spark NLP model small_bert_L8_256\n","nlu.load('en.embed.bert.small_L10_256') returns Spark NLP model small_bert_L10_256\n","nlu.load('en.embed.bert.small_L12_256') returns Spark NLP model small_bert_L12_256\n","nlu.load('en.embed.bert.small_L2_512') returns Spark NLP model small_bert_L2_512\n","nlu.load('en.embed.bert.small_L4_512') returns Spark NLP model small_bert_L4_512\n","nlu.load('en.embed.bert.small_L6_512') returns Spark NLP model small_bert_L6_512\n","nlu.load('en.embed.bert.small_L8_512') returns Spark NLP model small_bert_L8_512\n","nlu.load('en.embed.bert.small_L10_512') returns Spark NLP model small_bert_L10_512\n","nlu.load('en.embed.bert.small_L12_512') returns Spark NLP model small_bert_L12_512\n","nlu.load('en.embed.bert.small_L2_768') returns Spark NLP model small_bert_L2_768\n","nlu.load('en.embed.bert.small_L4_768') returns Spark NLP model small_bert_L4_768\n","nlu.load('en.embed.bert.small_L6_768') returns Spark NLP model small_bert_L6_768\n","nlu.load('en.embed.bert.small_L8_768') returns Spark NLP model small_bert_L8_768\n","nlu.load('en.embed.bert.small_L10_768') returns Spark NLP model small_bert_L10_768\n","nlu.load('en.embed.bert.small_L12_768') returns Spark NLP model small_bert_L12_768\n","For language <fi> NLU provides the following Models : \n","nlu.load('fi.embed.bert.') returns Spark NLP model bert_finnish_cased\n","nlu.load('fi.embed.bert.cased.') returns Spark NLP model bert_finnish_cased\n","nlu.load('fi.embed.bert.uncased.') returns Spark NLP model bert_finnish_uncased\n","For language <xx> NLU provides the following Models : \n","nlu.load('xx.embed') returns Spark NLP model glove_840B_300\n","nlu.load('xx.embed.glove.840B_300') returns Spark NLP model glove_840B_300\n","nlu.load('xx.embed.glove.6B_300') returns Spark NLP model glove_6B_300\n","nlu.load('xx.embed.bert_multi_cased') returns Spark NLP model bert_multi_cased\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":244},"id":"Xz7xnvbCFxE3","executionInfo":{"status":"ok","timestamp":1606721019713,"user_tz":-60,"elapsed":1683756,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"b8b15ce7-149e-427a-9aad-37d76e074154"},"source":["# Add bert word embeddings to pipe \n","fitted_pipe = nlu.load('bert train.ner').fit(dataset_path=train_path)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["small_bert_L2_128 download started this may take some time.\n","Approximate size to download 16.1 MB\n","[OK!]\n","pos_anc download started this may take some time.\n","Approximate size to download 4.3 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>bert_embeddings</th>\n","      <th>pos</th>\n","      <th>entities_confidence</th>\n","      <th>ner_confidence</th>\n","      <th>entities</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>[[-0.447601854801178, 1.0348625183105469, 0.51...</td>\n","      <td>[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]</td>\n","      <td>PER</td>\n","      <td>[0.7784000039100647, 0.9710999727249146, 0.997...</td>\n","      <td>Donald Trump</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>[[-0.447601854801178, 1.0348625183105469, 0.51...</td>\n","      <td>[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]</td>\n","      <td>PER</td>\n","      <td>[0.7784000039100647, 0.9710999727249146, 0.997...</td>\n","      <td>Angela Merkel dont</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                bert_embeddings  ...            entities\n","origin_index                                                     ...                    \n","0             [[-0.447601854801178, 1.0348625183105469, 0.51...  ...        Donald Trump\n","0             [[-0.447601854801178, 1.0348625183105469, 0.51...  ...  Angela Merkel dont\n","\n","[2 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606721039475,"user_tz":-60,"elapsed":1703498,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"b4b22c80-3318-4070-e516-36847a66e88e"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":137},"executionInfo":{"status":"ok","timestamp":1606721049691,"user_tz":-60,"elapsed":1713703,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"cdd160bf-462e-4dbe-b618-db259feb3987"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions on laws about cheeseburgers')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>bert_embeddings</th>\n","      <th>pos</th>\n","      <th>entities_confidence</th>\n","      <th>ner_confidence</th>\n","      <th>entities</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>[[-0.6870577335357666, 1.1118954420089722, 0.5...</td>\n","      <td>[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS, IN, ...</td>\n","      <td>PER</td>\n","      <td>[0.7975000143051147, 0.9343000054359436, 0.995...</td>\n","      <td>Donald Trump</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>[[-0.6870577335357666, 1.1118954420089722, 0.5...</td>\n","      <td>[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS, IN, ...</td>\n","      <td>PER</td>\n","      <td>[0.7975000143051147, 0.9343000054359436, 0.995...</td>\n","      <td>Angela Merkel dont</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                                bert_embeddings  ...            entities\n","origin_index                                                     ...                    \n","0             [[-0.6870577335357666, 1.1118954420089722, 0.5...  ...        Donald Trump\n","0             [[-0.6870577335357666, 1.1118954420089722, 0.5...  ...  Angela Merkel dont\n","\n","[2 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606721049695,"user_tz":-60,"elapsed":1713695,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"90c19529-41ab-4533-fba6-6107dac7c23e"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')    | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])          | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)         | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)   | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)          | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)              | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)    | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')        | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)            | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)                | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentiment_dl'] has settable params:\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32)                         | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False)                  | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(128)                        | Info: Number of embedding dimensions | Currently set to : 128\n","pipe['glove'].setMaxSentenceLength(128)                | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setStorageRef('small_bert_L2_128')       | Info: unique reference name for identification | Currently set to : small_bert_L2_128\n",">>> pipe['named_entity_recognizer_dl'] has settable params:\n","pipe['named_entity_recognizer_dl'].setIncludeConfidence(True)  | Info: whether to include confidence scores in annotation metadata | Currently set to : True\n","pipe['named_entity_recognizer_dl'].setBatchSize(8)     | Info: Size of every batch. | Currently set to : 8\n","pipe['named_entity_recognizer_dl'].setClasses(['O', 'B-ORG', 'I-ORG', 'I-MISC', 'I-PER', 'B-LOC', 'B-MISC', 'I-LOC'])  | Info: get the tags used to trained this NerDLModel | Currently set to : ['O', 'B-ORG', 'I-ORG', 'I-MISC', 'I-PER', 'B-LOC', 'B-MISC', 'I-LOC']\n","pipe['named_entity_recognizer_dl'].setStorageRef('small_bert_L2_128')  | Info: unique reference name for identification | Currently set to : small_bert_L2_128\n",">>> pipe['NerToChunkConverter'] has settable params:\n","pipe['NerToChunkConverter'].setPreservePosition(True)  | Info: Whether to preserve the original position of the tokens in the original document or use the modified tokens | Currently set to : True\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"o3jCHbIsMZrn","colab":{"base_uri":"https://localhost:8080/","height":299},"executionInfo":{"status":"error","timestamp":1607488227274,"user_tz":-60,"elapsed":919,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"f00d563f-c7ea-4f33-85d7-b9d45f35b1f0"},"source":["from varname import nameof\n"],"execution_count":null,"outputs":[{"output_type":"error","ename":"ModuleNotFoundError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)","\u001b[0;32m<ipython-input-1-5b67bbfd897c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mvarname\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnameof\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'varname'","","\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n"]}]},{"cell_type":"code","metadata":{"id":"USD6d66Sw6_P"},"source":[""],"execution_count":null,"outputs":[]}]}
\ No newline at end of file
diff --git a/examples/colab/Training/part_of_speech/NLU_training_POS_demo.ipynb b/examples/colab/Training/part_of_speech/NLU_training_POS_demo.ipynb
index 8a736050..a6a23e1a 100644
--- a/examples/colab/Training/part_of_speech/NLU_training_POS_demo.ipynb
+++ b/examples/colab/Training/part_of_speech/NLU_training_POS_demo.ipynb
@@ -1 +1 @@
-{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_POS_demo.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyNu3YIWBC0UgJMtKu9Hxgqm"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/collab/Training/part_of_speech/NLU_training_demo.ipynb)\n","\n","\n","\n","# Training a Named Entity Recognition (POS) model with NLU \n","With the [POS tagger](https://nlp.johnsnowlabs.com/docs/en/annotators#postagger-part-of-speech-tagger) from Spark NLP you can achieve State Of the Art results on any POS problem.\n","It uses an Averaged Percetron Model approach under the hood.\n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning POS classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu  pyspark==2.4.7  > /dev/null \n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"IWp5LbydCkqC"},"source":[""]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download French POS dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1607932039873,"user_tz":-60,"elapsed":80981,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"76f3b769-a646-444b-fdfc-d764d4b74e45"},"source":["! wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/fr/pos/UD_French/UD_French-GSD_2.3.txt"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2020-12-14 07:47:19--  https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/fr/pos/UD_French/UD_French-GSD_2.3.txt\n","Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.143.238\n","Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.143.238|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 3565213 (3.4M) [text/plain]\n","Saving to: ‘UD_French-GSD_2.3.txt’\n","\n","UD_French-GSD_2.3.t 100%[===================>]   3.40M  15.8MB/s    in 0.2s    \n","\n","2020-12-14 07:47:19 (15.8 MB/s) - ‘UD_French-GSD_2.3.txt’ saved [3565213/3565213]\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.pos')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1607932112061,"user_tz":-60,"elapsed":153158,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"c6032381-0446-484a-8c4e-0ad9fc500c48"},"source":["import nlu\n","# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# Since there are no\n","train_path = '/content/UD_French-GSD_2.3.txt'\n","trainable_pipe = nlu.load('train.pos')\n","fitted_pipe = trainable_pipe.fit(dataset_path=train_path)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>token</th>\n","      <th>pos</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Donald</td>\n","      <td>PROPN</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>Trump</td>\n","      <td>PROPN</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>and</td>\n","      <td>CCONJ</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>Angela</td>\n","      <td>PROPN</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>Merkel</td>\n","      <td>PROPN</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>dont</td>\n","      <td>PRON</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>share</td>\n","      <td>VERB</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>many</td>\n","      <td>ADJ</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>oppinions</td>\n","      <td>NOUN</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                  token    pos\n","origin_index                  \n","0                Donald  PROPN\n","0                 Trump  PROPN\n","0                   and  CCONJ\n","0                Angela  PROPN\n","0                Merkel  PROPN\n","0                  dont   PRON\n","0                 share   VERB\n","0                  many    ADJ\n","0             oppinions   NOUN"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 4. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607932114637,"user_tz":-60,"elapsed":155726,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"24d34ea2-dcc1-42b2-a5c6-10d345b76a3c"},"source":["stored_model_path = './models/pos_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/pos_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 5. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":485},"executionInfo":{"status":"ok","timestamp":1607932120301,"user_tz":-60,"elapsed":161383,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"db790b35-a51d-4226-8a0b-bb3e9e39e368"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions on laws about cheeseburgers')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>token</th>\n","      <th>pos</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Donald</td>\n","      <td>PROPN</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>Trump</td>\n","      <td>PROPN</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>and</td>\n","      <td>CCONJ</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>Angela</td>\n","      <td>PROPN</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>Merkel</td>\n","      <td>PROPN</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>dont</td>\n","      <td>PRON</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>share</td>\n","      <td>VERB</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>many</td>\n","      <td>ADJ</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>oppinions</td>\n","      <td>NOUN</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>on</td>\n","      <td>PRON</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>laws</td>\n","      <td>VERB</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>about</td>\n","      <td>ADV</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>cheeseburgers</td>\n","      <td>NOUN</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                      token    pos\n","origin_index                      \n","0                    Donald  PROPN\n","0                     Trump  PROPN\n","0                       and  CCONJ\n","0                    Angela  PROPN\n","0                    Merkel  PROPN\n","0                      dont   PRON\n","0                     share   VERB\n","0                      many    ADJ\n","0                 oppinions   NOUN\n","0                        on   PRON\n","0                      laws   VERB\n","0                     about    ADV\n","0             cheeseburgers   NOUN"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607932120301,"user_tz":-60,"elapsed":161374,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"6bb7769e-f545-40b8-f0ef-90fd9f32c149"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])  | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)  | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)  | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)  | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')  | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)  | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)  | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentiment_dl'] has settable params:\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"o3jCHbIsMZrn"},"source":[""],"execution_count":null,"outputs":[]}]}
\ No newline at end of file
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_POS_demo.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/part_of_speech/NLU_training_POS_demo.ipynb)\n","\n","\n","\n","# Training a Named Entity Recognition (POS) model with NLU \n","With the [POS tagger](https://nlp.johnsnowlabs.com/docs/en/annotators#postagger-part-of-speech-tagger) from Spark NLP you can achieve State Of the Art results on any POS problem.\n","It uses an Averaged Percetron Model approach under the hood.\n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning POS classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null   \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu  pyspark==2.4.7  > /dev/null \n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"IWp5LbydCkqC"},"source":[""]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download French POS dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1607932039873,"user_tz":-60,"elapsed":80981,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"76f3b769-a646-444b-fdfc-d764d4b74e45"},"source":["! wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/fr/pos/UD_French/UD_French-GSD_2.3.txt"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2020-12-14 07:47:19--  https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/fr/pos/UD_French/UD_French-GSD_2.3.txt\n","Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.143.238\n","Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.143.238|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 3565213 (3.4M) [text/plain]\n","Saving to: ‘UD_French-GSD_2.3.txt’\n","\n","UD_French-GSD_2.3.t 100%[===================>]   3.40M  15.8MB/s    in 0.2s    \n","\n","2020-12-14 07:47:19 (15.8 MB/s) - ‘UD_French-GSD_2.3.txt’ saved [3565213/3565213]\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.pos')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1607932112061,"user_tz":-60,"elapsed":153158,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"c6032381-0446-484a-8c4e-0ad9fc500c48"},"source":["import nlu\n","# load a trainable pipeline by specifying the train. prefix  and fit it on a datset with label and text columns\n","# Since there are no\n","train_path = '/content/UD_French-GSD_2.3.txt'\n","trainable_pipe = nlu.load('train.pos')\n","fitted_pipe = trainable_pipe.fit(dataset_path=train_path)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>token</th>\n","      <th>pos</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Donald</td>\n","      <td>PROPN</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>Trump</td>\n","      <td>PROPN</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>and</td>\n","      <td>CCONJ</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>Angela</td>\n","      <td>PROPN</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>Merkel</td>\n","      <td>PROPN</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>dont</td>\n","      <td>PRON</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>share</td>\n","      <td>VERB</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>many</td>\n","      <td>ADJ</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>oppinions</td>\n","      <td>NOUN</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                  token    pos\n","origin_index                  \n","0                Donald  PROPN\n","0                 Trump  PROPN\n","0                   and  CCONJ\n","0                Angela  PROPN\n","0                Merkel  PROPN\n","0                  dont   PRON\n","0                 share   VERB\n","0                  many    ADJ\n","0             oppinions   NOUN"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 4. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607932114637,"user_tz":-60,"elapsed":155726,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"24d34ea2-dcc1-42b2-a5c6-10d345b76a3c"},"source":["stored_model_path = './models/pos_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/pos_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 5. Lets load the model from HDD.\n","This makes Offlien NLU usage possible!   \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":485},"executionInfo":{"status":"ok","timestamp":1607932120301,"user_tz":-60,"elapsed":161383,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"db790b35-a51d-4226-8a0b-bb3e9e39e368"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions on laws about cheeseburgers')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>token</th>\n","      <th>pos</th>\n","    </tr>\n","    <tr>\n","      <th>origin_index</th>\n","      <th></th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Donald</td>\n","      <td>PROPN</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>Trump</td>\n","      <td>PROPN</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>and</td>\n","      <td>CCONJ</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>Angela</td>\n","      <td>PROPN</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>Merkel</td>\n","      <td>PROPN</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>dont</td>\n","      <td>PRON</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>share</td>\n","      <td>VERB</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>many</td>\n","      <td>ADJ</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>oppinions</td>\n","      <td>NOUN</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>on</td>\n","      <td>PRON</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>laws</td>\n","      <td>VERB</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>about</td>\n","      <td>ADV</td>\n","    </tr>\n","    <tr>\n","      <th>0</th>\n","      <td>cheeseburgers</td>\n","      <td>NOUN</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                      token    pos\n","origin_index                      \n","0                    Donald  PROPN\n","0                     Trump  PROPN\n","0                       and  CCONJ\n","0                    Angela  PROPN\n","0                    Merkel  PROPN\n","0                      dont   PRON\n","0                     share   VERB\n","0                      many    ADJ\n","0                 oppinions   NOUN\n","0                        on   PRON\n","0                      laws   VERB\n","0                     about    ADV\n","0             cheeseburgers   NOUN"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607932120301,"user_tz":-60,"elapsed":161374,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"6bb7769e-f545-40b8-f0ef-90fd9f32c149"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink')  | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([])  | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True)  | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False)  | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999)  | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0)  | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True)  | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False)  | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True)  | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+')  | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999)  | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0)  | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentiment_dl'] has settable params:\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"o3jCHbIsMZrn"},"source":[""],"execution_count":null,"outputs":[]}]}
\ No newline at end of file