From 5c250ed748cc5cafb14602fc86621d3157879442 Mon Sep 17 00:00:00 2001 From: C-K-Loan Date: Thu, 21 Jan 2021 05:18:36 +0100 Subject: [PATCH] notebook link updates --- .../NLU_training_sentiment_classifier_demo.ipynb | 2 +- .../NLU_training_sentiment_classifier_demo_IMDB.ipynb | 2 +- .../NLU_training_sentiment_classifier_demo_apple_twitter.ipynb | 2 +- .../NLU_training_sentiment_classifier_demo_finanical_news.ipynb | 2 +- .../NLU_training_sentiment_classifier_demo_reddit.ipynb | 2 +- .../NLU_training_sentiment_classifier_demo_twitter.ipynb | 2 +- .../NLU_training_multi_class_text_classifier_demo.ipynb | 2 +- .../NLU_training_multi_class_text_classifier_demo_amazon.ipynb | 2 +- ...raining_multi_class_text_classifier_demo_hotel_reviews.ipynb | 2 +- ...g_multi_class_text_classifier_demo_musical_instruments.ipynb | 2 +- .../NLU_training_multi_class_text_classifier_demo_wine.ipynb | 2 +- .../NLU_traing_multi_label_classifier_E2e.ipynb | 2 +- ...g_multi_token_label_text_classifier_stackoverflow_tags.ipynb | 2 +- .../named_entity_recognition/NLU_training_NER_demo.ipynb | 2 +- .../colab/Training/part_of_speech/NLU_training_POS_demo.ipynb | 2 +- 15 files changed, 15 insertions(+), 15 deletions(-) diff --git a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo.ipynb index 4c56b531..666b7068 100644 --- a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo.ipynb +++ b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/collab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo.ipynb)\n","\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Stock Market Sentiment dataset \n","https://www.kaggle.com/yash612/stockmarket-sentiment-dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1608771929986,"user_tz":480,"elapsed":2813,"user":{"displayName":"Adam Morgan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh9F8KQIFyVYLTpwx5Hjc1H-jOwcRnh58ghzE2E8Q=s64","userId":"13394261526394139327"}},"outputId":"968a492a-fed0-4a7b-9eba-ebef9ff9ab47"},"source":["! wget http://ckl-it.de/wp-content/uploads/2020/11/stock_data.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2020-12-24 01:05:27-- http://ckl-it.de/wp-content/uploads/2020/11/stock_data.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 479973 (469K) [text/csv]\n","Saving to: ‘stock_data.csv.1’\n","\n","stock_data.csv.1 100%[===================>] 468.72K 324KB/s in 1.4s \n","\n","2020-12-24 01:05:29 (324 KB/s) - ‘stock_data.csv.1’ saved [479973/479973]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"x-rbTZLm_Uqs","executionInfo":{"status":"ok","timestamp":1608771936564,"user_tz":480,"elapsed":3170,"user":{"displayName":"Adam Morgan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh9F8KQIFyVYLTpwx5Hjc1H-jOwcRnh58ghzE2E8Q=s64","userId":"13394261526394139327"}},"outputId":"2d69e18e-285e-4337-cb61-372099f47bc3"},"source":["! pip install nlu pyspark==2.4.7"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Requirement already satisfied: nlu in /usr/local/lib/python3.6/dist-packages (1.0.5)\n","Requirement already satisfied: pyspark==2.4.7 in /usr/local/lib/python3.6/dist-packages (2.4.7)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from nlu) (1.1.5)\n","Requirement already satisfied: spark-nlp<2.7,>=2.6.2 in /usr/local/lib/python3.6/dist-packages (from nlu) (2.6.5)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from nlu) (1.19.4)\n","Requirement already satisfied: dataclasses in /usr/local/lib/python3.6/dist-packages (from nlu) (0.8)\n","Requirement already satisfied: pyarrow>=0.16.0 in /usr/local/lib/python3.6/dist-packages (from nlu) (2.0.0)\n","Requirement already satisfied: py4j==0.10.7 in /usr/local/lib/python3.6/dist-packages (from pyspark==2.4.7) (0.10.7)\n","Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.6/dist-packages (from pandas->nlu) (2.8.1)\n","Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->nlu) (2018.9)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.7.3->pandas->nlu) (1.15.0)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"uDGIOASY_fRj","executionInfo":{"status":"ok","timestamp":1608771969641,"user_tz":480,"elapsed":26360,"user":{"displayName":"Adam Morgan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh9F8KQIFyVYLTpwx5Hjc1H-jOwcRnh58ghzE2E8Q=s64","userId":"13394261526394139327"}},"outputId":"caa60b4b-8819-4046-c0e8-d029434a4155"},"source":["import nlu\r\n","sentiment = nlu.load('sentiment')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["analyze_sentiment download started this may take some time.\n","Approx size to download 4.9 MB\n","[OK!]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":128},"id":"U0ENiuMc_kyb","executionInfo":{"status":"ok","timestamp":1608771986728,"user_tz":480,"elapsed":9983,"user":{"displayName":"Adam Morgan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh9F8KQIFyVYLTpwx5Hjc1H-jOwcRnh58ghzE2E8Q=s64","userId":"13394261526394139327"}},"outputId":"1b96fe79-8ae8-4ec3-e3fd-8c234ec15322"},"source":["sentiment.predict(\"I'm very very not at all happy\")"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentencesentiment_confidencecheckedsentiment
origin_index
0I'm very very not at all happy0.304300[I'm, very, very, not, at, all, happy]positive
\n","
"],"text/plain":[" sentence ... sentiment\n","origin_index ... \n","0 I'm very very not at all happy ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":406},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1607932042536,"user_tz":-60,"elapsed":82460,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"5c8fa3de-3b18-4427-ee0f-d26128fb7012"},"source":["import pandas as pd\n","train_path = '/content/stock_data.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","train_df.columns=['text','y']\n","train_df.y = train_df.y.astype(str)\n","train_df.y = train_df.y.str.replace('-1','negative')\n","train_df.y = train_df.y.str.replace('1','positive')\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
0Kickers on my watchlist XIDE TIT SOQ PNK CPW B...positive
1user: AAP MOVIE. 55% return for the FEA/GEED i...positive
2user I'd be afraid to short AMZN - they are lo...positive
3MNTA Over 12.00positive
4OI Over 21.37positive
.........
5786Industry body CII said #discoms are likely to ...negative
5787#Gold prices slip below Rs 46,000 as #investor...negative
5788Workers at Bajaj Auto have agreed to a 10% wag...positive
5789#Sharemarket LIVE: Sensex off day’s high, up 6...positive
5790#Sensex, #Nifty climb off day's highs, still u...positive
\n","

5791 rows × 2 columns

\n","
"],"text/plain":[" text y\n","0 Kickers on my watchlist XIDE TIT SOQ PNK CPW B... positive\n","1 user: AAP MOVIE. 55% return for the FEA/GEED i... positive\n","2 user I'd be afraid to short AMZN - they are lo... positive\n","3 MNTA Over 12.00 positive\n","4 OI Over 21.37 positive\n","... ... ...\n","5786 Industry body CII said #discoms are likely to ... negative\n","5787 #Gold prices slip below Rs 46,000 as #investor... negative\n","5788 Workers at Bajaj Auto have agreed to a 10% wag... positive\n","5789 #Sharemarket LIVE: Sensex off day’s high, up 6... positive\n","5790 #Sensex, #Nifty climb off day's highs, still u... positive\n","\n","[5791 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":667},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1607932220223,"user_tz":-60,"elapsed":260138,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"4baf0d66-f257-4c2b-8887-fe7dfe9dadd4"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.71 0.43 0.54 2106\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.77 0.83 0.80 3685\n","\n"," accuracy 0.69 5791\n"," macro avg 0.49 0.42 0.45 5791\n","weighted avg 0.75 0.69 0.70 5791\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textsentiment_confidencedocumentsentimentydefault_name_embeddings
origin_index
0Kickers on my watchlist XIDE TIT SOQ PNK CPW B...0.982228Kickers on my watchlist XIDE TIT SOQ PNK CPW B...positivepositive[0.006487144622951746, -0.042024899274110794, ...
1user: AAP MOVIE. 55% return for the FEA/GEED i...0.880183user: AAP MOVIE. 55% return for the FEA/GEED i...positivepositive[-0.03017628937959671, -0.0627138689160347, -0...
2user I'd be afraid to short AMZN - they are lo...0.837914user I'd be afraid to short AMZN - they are lo...positivepositive[0.05556508153676987, -0.016491785645484924, 0...
3MNTA Over 12.000.905505MNTA Over 12.00positivepositive[-0.01097656786441803, -0.02980119362473488, -...
4OI Over 21.370.532368OI Over 21.37neutralpositive[0.024849386885762215, 0.04679658263921738, -0...
.....................
5786Industry body CII said #discoms are likely to ...0.785020Industry body CII said #discoms are likely to ...negativenegative[0.020985644310712814, -0.03145354613661766, -...
5787#Gold prices slip below Rs 46,000 as #investor...0.861554#Gold prices slip below Rs 46,000 as #investor...negativenegative[0.05627664923667908, 0.012842322699725628, -0...
5788Workers at Bajaj Auto have agreed to a 10% wag...0.794606Workers at Bajaj Auto have agreed to a 10% wag...negativepositive[0.01210737880319357, -0.02798214927315712, -0...
5789#Sharemarket LIVE: Sensex off day’s high, up 6...0.966394#Sharemarket LIVE: Sensex off day’s high, up 6...positivepositive[0.0031773506198078394, -0.04296385496854782, ...
5790#Sensex, #Nifty climb off day's highs, still u...0.987555#Sensex, #Nifty climb off day's highs, still u...positivepositive[0.04964913800358772, -0.04634825885295868, -0...
\n","

5791 rows × 6 columns

\n","
"],"text/plain":[" text ... default_name_embeddings\n","origin_index ... \n","0 Kickers on my watchlist XIDE TIT SOQ PNK CPW B... ... [0.006487144622951746, -0.042024899274110794, ...\n","1 user: AAP MOVIE. 55% return for the FEA/GEED i... ... [-0.03017628937959671, -0.0627138689160347, -0...\n","2 user I'd be afraid to short AMZN - they are lo... ... [0.05556508153676987, -0.016491785645484924, 0...\n","3 MNTA Over 12.00 ... [-0.01097656786441803, -0.02980119362473488, -...\n","4 OI Over 21.37 ... [0.024849386885762215, 0.04679658263921738, -0...\n","... ... ... ...\n","5786 Industry body CII said #discoms are likely to ... ... [0.020985644310712814, -0.03145354613661766, -...\n","5787 #Gold prices slip below Rs 46,000 as #investor... ... [0.05627664923667908, 0.012842322699725628, -0...\n","5788 Workers at Bajaj Auto have agreed to a 10% wag... ... [0.01210737880319357, -0.02798214927315712, -0...\n","5789 #Sharemarket LIVE: Sensex off day’s high, up 6... ... [0.0031773506198078394, -0.04296385496854782, ...\n","5790 #Sensex, #Nifty climb off day's highs, still u... ... [0.04964913800358772, -0.04634825885295868, -0...\n","\n","[5791 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":107},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1607932222650,"user_tz":-60,"elapsed":262555,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"f38cb015-a48f-47a1-e2b7-5d5bb488beb9"},"source":["fitted_pipe.predict(\"Bitcoin is going to the moon!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentsentimentdefault_name_embeddings
origin_index
00.918913Bitcoin is going to the moon!positive[0.06468033790588379, -0.040837567299604416, -...
\n","
"],"text/plain":[" sentiment_confidence ... default_name_embeddings\n","origin_index ... \n","0 0.918913 ... [0.06468033790588379, -0.040837567299604416, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1607932222651,"user_tz":-60,"elapsed":262549,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"cf184e49-084a-42d4-c95a-4de7c21cae16"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":614},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1607932301821,"user_tz":-60,"elapsed":341713,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"0c0022d0-2bf6-44db-e737-30892668621f"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.79 0.67 0.72 2106\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.84 0.87 0.85 3685\n","\n"," accuracy 0.80 5791\n"," macro avg 0.54 0.51 0.53 5791\n","weighted avg 0.82 0.80 0.81 5791\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textsentiment_confidencedocumentsentimentydefault_name_embeddings
origin_index
0Kickers on my watchlist XIDE TIT SOQ PNK CPW B...0.999146Kickers on my watchlist XIDE TIT SOQ PNK CPW B...positivepositive[0.006487144622951746, -0.042024899274110794, ...
1user: AAP MOVIE. 55% return for the FEA/GEED i...0.941052user: AAP MOVIE. 55% return for the FEA/GEED i...positivepositive[-0.03017628937959671, -0.0627138689160347, -0...
2user I'd be afraid to short AMZN - they are lo...0.648649user I'd be afraid to short AMZN - they are lo...negativepositive[0.05556508153676987, -0.016491785645484924, 0...
3MNTA Over 12.000.988186MNTA Over 12.00positivepositive[-0.01097656786441803, -0.02980119362473488, -...
4OI Over 21.370.783930OI Over 21.37positivepositive[0.024849386885762215, 0.04679658263921738, -0...
.....................
5786Industry body CII said #discoms are likely to ...0.990443Industry body CII said #discoms are likely to ...negativenegative[0.020985644310712814, -0.03145354613661766, -...
5787#Gold prices slip below Rs 46,000 as #investor...0.999385#Gold prices slip below Rs 46,000 as #investor...negativenegative[0.05627664923667908, 0.012842322699725628, -0...
5788Workers at Bajaj Auto have agreed to a 10% wag...0.728881Workers at Bajaj Auto have agreed to a 10% wag...negativepositive[0.01210737880319357, -0.02798214927315712, -0...
5789#Sharemarket LIVE: Sensex off day’s high, up 6...0.987245#Sharemarket LIVE: Sensex off day’s high, up 6...positivepositive[0.0031773506198078394, -0.04296385496854782, ...
5790#Sensex, #Nifty climb off day's highs, still u...0.999714#Sensex, #Nifty climb off day's highs, still u...positivepositive[0.04964913800358772, -0.04634825885295868, -0...
\n","

5791 rows × 6 columns

\n","
"],"text/plain":[" text ... default_name_embeddings\n","origin_index ... \n","0 Kickers on my watchlist XIDE TIT SOQ PNK CPW B... ... [0.006487144622951746, -0.042024899274110794, ...\n","1 user: AAP MOVIE. 55% return for the FEA/GEED i... ... [-0.03017628937959671, -0.0627138689160347, -0...\n","2 user I'd be afraid to short AMZN - they are lo... ... [0.05556508153676987, -0.016491785645484924, 0...\n","3 MNTA Over 12.00 ... [-0.01097656786441803, -0.02980119362473488, -...\n","4 OI Over 21.37 ... [0.024849386885762215, 0.04679658263921738, -0...\n","... ... ... ...\n","5786 Industry body CII said #discoms are likely to ... ... [0.020985644310712814, -0.03145354613661766, -...\n","5787 #Gold prices slip below Rs 46,000 as #investor... ... [0.05627664923667908, 0.012842322699725628, -0...\n","5788 Workers at Bajaj Auto have agreed to a 10% wag... ... [0.01210737880319357, -0.02798214927315712, -0...\n","5789 #Sharemarket LIVE: Sensex off day’s high, up 6... ... [0.0031773506198078394, -0.04296385496854782, ...\n","5790 #Sensex, #Nifty climb off day's highs, still u... ... [0.04964913800358772, -0.04634825885295868, -0...\n","\n","[5791 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nxWFzQOhjWC8","executionInfo":{"status":"ok","timestamp":1607932301823,"user_tz":-60,"elapsed":341709,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"4dfe0938-a01e-4469-c4fa-8909deb02a2a"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":667},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1607932444818,"user_tz":-60,"elapsed":484698,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"dc1943bb-8f1f-4503-c7e4-8f4938ddf4aa"},"source":["trainable_pipe = nlu.load('embed_sentence.bert train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(40) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L2_128 download started this may take some time.\n","Approximate size to download 16.1 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.68 0.25 0.36 2106\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.72 0.84 0.77 3685\n","\n"," accuracy 0.63 5791\n"," macro avg 0.47 0.36 0.38 5791\n","weighted avg 0.71 0.63 0.63 5791\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textsentiment_confidencedocumentembed_sentence_bert_embeddingssentimenty
origin_index
0Kickers on my watchlist XIDE TIT SOQ PNK CPW B...0.874224Kickers on my watchlist XIDE TIT SOQ PNK CPW B...[-0.9207571744918823, 0.21013416349887848, 0.1...positivepositive
1user: AAP MOVIE. 55% return for the FEA/GEED i...0.647704user: AAP MOVIE. 55% return for the FEA/GEED i...[-0.43004727363586426, 0.5101231336593628, -0....positivepositive
2user I'd be afraid to short AMZN - they are lo...0.780586user I'd be afraid to short AMZN - they are lo...[0.3040030300617218, 0.22862982749938965, -0.5...positivepositive
3MNTA Over 12.000.978046MNTA Over 12.00[-1.810348391532898, -0.4799138903617859, -0.7...positivepositive
4OI Over 21.370.961256OI Over 21.37[-2.4639298915863037, 0.3879590630531311, -0.6...positivepositive
.....................
5786Industry body CII said #discoms are likely to ...0.759879Industry body CII said #discoms are likely to ...[-0.09503911435604095, 0.6293947696685791, 0.0...negativenegative
5787#Gold prices slip below Rs 46,000 as #investor...0.759041#Gold prices slip below Rs 46,000 as #investor...[-0.1287938952445984, 0.28170245885849, 0.0280...negativenegative
5788Workers at Bajaj Auto have agreed to a 10% wag...0.750849Workers at Bajaj Auto have agreed to a 10% wag...[-0.3395587205886841, 0.912406325340271, -0.32...negativepositive
5789#Sharemarket LIVE: Sensex off day’s high, up 6...0.567143#Sharemarket LIVE: Sensex off day’s high, up 6...[-0.6081283092498779, 0.2732301354408264, 0.25...neutralpositive
5790#Sensex, #Nifty climb off day's highs, still u...0.545603#Sensex, #Nifty climb off day's highs, still u...[-0.44862690567970276, 0.43264657258987427, 0....neutralpositive
\n","

5791 rows × 6 columns

\n","
"],"text/plain":[" text ... y\n","origin_index ... \n","0 Kickers on my watchlist XIDE TIT SOQ PNK CPW B... ... positive\n","1 user: AAP MOVIE. 55% return for the FEA/GEED i... ... positive\n","2 user I'd be afraid to short AMZN - they are lo... ... positive\n","3 MNTA Over 12.00 ... positive\n","4 OI Over 21.37 ... positive\n","... ... ... ...\n","5786 Industry body CII said #discoms are likely to ... ... negative\n","5787 #Gold prices slip below Rs 46,000 as #investor... ... negative\n","5788 Workers at Bajaj Auto have agreed to a 10% wag... ... positive\n","5789 #Sharemarket LIVE: Sensex off day’s high, up 6... ... positive\n","5790 #Sensex, #Nifty climb off day's highs, still u... ... positive\n","\n","[5791 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607932457549,"user_tz":-60,"elapsed":497423,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"c39e2854-fd34-4576-ebb2-352bc80fb3c8"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":125},"executionInfo":{"status":"ok","timestamp":1607932462254,"user_tz":-60,"elapsed":502122,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"22426d96-3b57-4450-8af1-7a0c69de879e"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Tesla plans to invest 10M into the ML sector')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentembed_sentence_bert_embeddingssentiment
origin_index
00.974726Tesla plans to invest 10M into the ML sector[-0.07111635059118271, 0.9532930850982666, -1....positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.974726 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607932462257,"user_tz":-60,"elapsed":502119,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"921f5de2-eeb7-4115-a427-1671e3390f1c"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(128) | Info: Number of embedding dimensions | Currently set to : 128\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L2_128') | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L2_128') | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"73rQbUy-KLpb"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo.ipynb)\n","\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Stock Market Sentiment dataset \n","https://www.kaggle.com/yash612/stockmarket-sentiment-dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1608771929986,"user_tz":480,"elapsed":2813,"user":{"displayName":"Adam Morgan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh9F8KQIFyVYLTpwx5Hjc1H-jOwcRnh58ghzE2E8Q=s64","userId":"13394261526394139327"}},"outputId":"968a492a-fed0-4a7b-9eba-ebef9ff9ab47"},"source":["! wget http://ckl-it.de/wp-content/uploads/2020/11/stock_data.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2020-12-24 01:05:27-- http://ckl-it.de/wp-content/uploads/2020/11/stock_data.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 479973 (469K) [text/csv]\n","Saving to: ‘stock_data.csv.1’\n","\n","stock_data.csv.1 100%[===================>] 468.72K 324KB/s in 1.4s \n","\n","2020-12-24 01:05:29 (324 KB/s) - ‘stock_data.csv.1’ saved [479973/479973]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"x-rbTZLm_Uqs","executionInfo":{"status":"ok","timestamp":1608771936564,"user_tz":480,"elapsed":3170,"user":{"displayName":"Adam Morgan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh9F8KQIFyVYLTpwx5Hjc1H-jOwcRnh58ghzE2E8Q=s64","userId":"13394261526394139327"}},"outputId":"2d69e18e-285e-4337-cb61-372099f47bc3"},"source":["! pip install nlu pyspark==2.4.7"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Requirement already satisfied: nlu in /usr/local/lib/python3.6/dist-packages (1.0.5)\n","Requirement already satisfied: pyspark==2.4.7 in /usr/local/lib/python3.6/dist-packages (2.4.7)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from nlu) (1.1.5)\n","Requirement already satisfied: spark-nlp<2.7,>=2.6.2 in /usr/local/lib/python3.6/dist-packages (from nlu) (2.6.5)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from nlu) (1.19.4)\n","Requirement already satisfied: dataclasses in /usr/local/lib/python3.6/dist-packages (from nlu) (0.8)\n","Requirement already satisfied: pyarrow>=0.16.0 in /usr/local/lib/python3.6/dist-packages (from nlu) (2.0.0)\n","Requirement already satisfied: py4j==0.10.7 in /usr/local/lib/python3.6/dist-packages (from pyspark==2.4.7) (0.10.7)\n","Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.6/dist-packages (from pandas->nlu) (2.8.1)\n","Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->nlu) (2018.9)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.7.3->pandas->nlu) (1.15.0)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"uDGIOASY_fRj","executionInfo":{"status":"ok","timestamp":1608771969641,"user_tz":480,"elapsed":26360,"user":{"displayName":"Adam Morgan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh9F8KQIFyVYLTpwx5Hjc1H-jOwcRnh58ghzE2E8Q=s64","userId":"13394261526394139327"}},"outputId":"caa60b4b-8819-4046-c0e8-d029434a4155"},"source":["import nlu\r\n","sentiment = nlu.load('sentiment')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["analyze_sentiment download started this may take some time.\n","Approx size to download 4.9 MB\n","[OK!]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":128},"id":"U0ENiuMc_kyb","executionInfo":{"status":"ok","timestamp":1608771986728,"user_tz":480,"elapsed":9983,"user":{"displayName":"Adam Morgan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh9F8KQIFyVYLTpwx5Hjc1H-jOwcRnh58ghzE2E8Q=s64","userId":"13394261526394139327"}},"outputId":"1b96fe79-8ae8-4ec3-e3fd-8c234ec15322"},"source":["sentiment.predict(\"I'm very very not at all happy\")"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentencesentiment_confidencecheckedsentiment
origin_index
0I'm very very not at all happy0.304300[I'm, very, very, not, at, all, happy]positive
\n","
"],"text/plain":[" sentence ... sentiment\n","origin_index ... \n","0 I'm very very not at all happy ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":406},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1607932042536,"user_tz":-60,"elapsed":82460,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"5c8fa3de-3b18-4427-ee0f-d26128fb7012"},"source":["import pandas as pd\n","train_path = '/content/stock_data.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","train_df.columns=['text','y']\n","train_df.y = train_df.y.astype(str)\n","train_df.y = train_df.y.str.replace('-1','negative')\n","train_df.y = train_df.y.str.replace('1','positive')\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
0Kickers on my watchlist XIDE TIT SOQ PNK CPW B...positive
1user: AAP MOVIE. 55% return for the FEA/GEED i...positive
2user I'd be afraid to short AMZN - they are lo...positive
3MNTA Over 12.00positive
4OI Over 21.37positive
.........
5786Industry body CII said #discoms are likely to ...negative
5787#Gold prices slip below Rs 46,000 as #investor...negative
5788Workers at Bajaj Auto have agreed to a 10% wag...positive
5789#Sharemarket LIVE: Sensex off day’s high, up 6...positive
5790#Sensex, #Nifty climb off day's highs, still u...positive
\n","

5791 rows × 2 columns

\n","
"],"text/plain":[" text y\n","0 Kickers on my watchlist XIDE TIT SOQ PNK CPW B... positive\n","1 user: AAP MOVIE. 55% return for the FEA/GEED i... positive\n","2 user I'd be afraid to short AMZN - they are lo... positive\n","3 MNTA Over 12.00 positive\n","4 OI Over 21.37 positive\n","... ... ...\n","5786 Industry body CII said #discoms are likely to ... negative\n","5787 #Gold prices slip below Rs 46,000 as #investor... negative\n","5788 Workers at Bajaj Auto have agreed to a 10% wag... positive\n","5789 #Sharemarket LIVE: Sensex off day’s high, up 6... positive\n","5790 #Sensex, #Nifty climb off day's highs, still u... positive\n","\n","[5791 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":667},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1607932220223,"user_tz":-60,"elapsed":260138,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"4baf0d66-f257-4c2b-8887-fe7dfe9dadd4"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.71 0.43 0.54 2106\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.77 0.83 0.80 3685\n","\n"," accuracy 0.69 5791\n"," macro avg 0.49 0.42 0.45 5791\n","weighted avg 0.75 0.69 0.70 5791\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textsentiment_confidencedocumentsentimentydefault_name_embeddings
origin_index
0Kickers on my watchlist XIDE TIT SOQ PNK CPW B...0.982228Kickers on my watchlist XIDE TIT SOQ PNK CPW B...positivepositive[0.006487144622951746, -0.042024899274110794, ...
1user: AAP MOVIE. 55% return for the FEA/GEED i...0.880183user: AAP MOVIE. 55% return for the FEA/GEED i...positivepositive[-0.03017628937959671, -0.0627138689160347, -0...
2user I'd be afraid to short AMZN - they are lo...0.837914user I'd be afraid to short AMZN - they are lo...positivepositive[0.05556508153676987, -0.016491785645484924, 0...
3MNTA Over 12.000.905505MNTA Over 12.00positivepositive[-0.01097656786441803, -0.02980119362473488, -...
4OI Over 21.370.532368OI Over 21.37neutralpositive[0.024849386885762215, 0.04679658263921738, -0...
.....................
5786Industry body CII said #discoms are likely to ...0.785020Industry body CII said #discoms are likely to ...negativenegative[0.020985644310712814, -0.03145354613661766, -...
5787#Gold prices slip below Rs 46,000 as #investor...0.861554#Gold prices slip below Rs 46,000 as #investor...negativenegative[0.05627664923667908, 0.012842322699725628, -0...
5788Workers at Bajaj Auto have agreed to a 10% wag...0.794606Workers at Bajaj Auto have agreed to a 10% wag...negativepositive[0.01210737880319357, -0.02798214927315712, -0...
5789#Sharemarket LIVE: Sensex off day’s high, up 6...0.966394#Sharemarket LIVE: Sensex off day’s high, up 6...positivepositive[0.0031773506198078394, -0.04296385496854782, ...
5790#Sensex, #Nifty climb off day's highs, still u...0.987555#Sensex, #Nifty climb off day's highs, still u...positivepositive[0.04964913800358772, -0.04634825885295868, -0...
\n","

5791 rows × 6 columns

\n","
"],"text/plain":[" text ... default_name_embeddings\n","origin_index ... \n","0 Kickers on my watchlist XIDE TIT SOQ PNK CPW B... ... [0.006487144622951746, -0.042024899274110794, ...\n","1 user: AAP MOVIE. 55% return for the FEA/GEED i... ... [-0.03017628937959671, -0.0627138689160347, -0...\n","2 user I'd be afraid to short AMZN - they are lo... ... [0.05556508153676987, -0.016491785645484924, 0...\n","3 MNTA Over 12.00 ... [-0.01097656786441803, -0.02980119362473488, -...\n","4 OI Over 21.37 ... [0.024849386885762215, 0.04679658263921738, -0...\n","... ... ... ...\n","5786 Industry body CII said #discoms are likely to ... ... [0.020985644310712814, -0.03145354613661766, -...\n","5787 #Gold prices slip below Rs 46,000 as #investor... ... [0.05627664923667908, 0.012842322699725628, -0...\n","5788 Workers at Bajaj Auto have agreed to a 10% wag... ... [0.01210737880319357, -0.02798214927315712, -0...\n","5789 #Sharemarket LIVE: Sensex off day’s high, up 6... ... [0.0031773506198078394, -0.04296385496854782, ...\n","5790 #Sensex, #Nifty climb off day's highs, still u... ... [0.04964913800358772, -0.04634825885295868, -0...\n","\n","[5791 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":107},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1607932222650,"user_tz":-60,"elapsed":262555,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"f38cb015-a48f-47a1-e2b7-5d5bb488beb9"},"source":["fitted_pipe.predict(\"Bitcoin is going to the moon!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentsentimentdefault_name_embeddings
origin_index
00.918913Bitcoin is going to the moon!positive[0.06468033790588379, -0.040837567299604416, -...
\n","
"],"text/plain":[" sentiment_confidence ... default_name_embeddings\n","origin_index ... \n","0 0.918913 ... [0.06468033790588379, -0.040837567299604416, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1607932222651,"user_tz":-60,"elapsed":262549,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"cf184e49-084a-42d4-c95a-4de7c21cae16"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":614},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1607932301821,"user_tz":-60,"elapsed":341713,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"0c0022d0-2bf6-44db-e737-30892668621f"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.79 0.67 0.72 2106\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.84 0.87 0.85 3685\n","\n"," accuracy 0.80 5791\n"," macro avg 0.54 0.51 0.53 5791\n","weighted avg 0.82 0.80 0.81 5791\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textsentiment_confidencedocumentsentimentydefault_name_embeddings
origin_index
0Kickers on my watchlist XIDE TIT SOQ PNK CPW B...0.999146Kickers on my watchlist XIDE TIT SOQ PNK CPW B...positivepositive[0.006487144622951746, -0.042024899274110794, ...
1user: AAP MOVIE. 55% return for the FEA/GEED i...0.941052user: AAP MOVIE. 55% return for the FEA/GEED i...positivepositive[-0.03017628937959671, -0.0627138689160347, -0...
2user I'd be afraid to short AMZN - they are lo...0.648649user I'd be afraid to short AMZN - they are lo...negativepositive[0.05556508153676987, -0.016491785645484924, 0...
3MNTA Over 12.000.988186MNTA Over 12.00positivepositive[-0.01097656786441803, -0.02980119362473488, -...
4OI Over 21.370.783930OI Over 21.37positivepositive[0.024849386885762215, 0.04679658263921738, -0...
.....................
5786Industry body CII said #discoms are likely to ...0.990443Industry body CII said #discoms are likely to ...negativenegative[0.020985644310712814, -0.03145354613661766, -...
5787#Gold prices slip below Rs 46,000 as #investor...0.999385#Gold prices slip below Rs 46,000 as #investor...negativenegative[0.05627664923667908, 0.012842322699725628, -0...
5788Workers at Bajaj Auto have agreed to a 10% wag...0.728881Workers at Bajaj Auto have agreed to a 10% wag...negativepositive[0.01210737880319357, -0.02798214927315712, -0...
5789#Sharemarket LIVE: Sensex off day’s high, up 6...0.987245#Sharemarket LIVE: Sensex off day’s high, up 6...positivepositive[0.0031773506198078394, -0.04296385496854782, ...
5790#Sensex, #Nifty climb off day's highs, still u...0.999714#Sensex, #Nifty climb off day's highs, still u...positivepositive[0.04964913800358772, -0.04634825885295868, -0...
\n","

5791 rows × 6 columns

\n","
"],"text/plain":[" text ... default_name_embeddings\n","origin_index ... \n","0 Kickers on my watchlist XIDE TIT SOQ PNK CPW B... ... [0.006487144622951746, -0.042024899274110794, ...\n","1 user: AAP MOVIE. 55% return for the FEA/GEED i... ... [-0.03017628937959671, -0.0627138689160347, -0...\n","2 user I'd be afraid to short AMZN - they are lo... ... [0.05556508153676987, -0.016491785645484924, 0...\n","3 MNTA Over 12.00 ... [-0.01097656786441803, -0.02980119362473488, -...\n","4 OI Over 21.37 ... [0.024849386885762215, 0.04679658263921738, -0...\n","... ... ... ...\n","5786 Industry body CII said #discoms are likely to ... ... [0.020985644310712814, -0.03145354613661766, -...\n","5787 #Gold prices slip below Rs 46,000 as #investor... ... [0.05627664923667908, 0.012842322699725628, -0...\n","5788 Workers at Bajaj Auto have agreed to a 10% wag... ... [0.01210737880319357, -0.02798214927315712, -0...\n","5789 #Sharemarket LIVE: Sensex off day’s high, up 6... ... [0.0031773506198078394, -0.04296385496854782, ...\n","5790 #Sensex, #Nifty climb off day's highs, still u... ... [0.04964913800358772, -0.04634825885295868, -0...\n","\n","[5791 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nxWFzQOhjWC8","executionInfo":{"status":"ok","timestamp":1607932301823,"user_tz":-60,"elapsed":341709,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"4dfe0938-a01e-4469-c4fa-8909deb02a2a"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":667},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1607932444818,"user_tz":-60,"elapsed":484698,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"dc1943bb-8f1f-4503-c7e4-8f4938ddf4aa"},"source":["trainable_pipe = nlu.load('embed_sentence.bert train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(40) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L2_128 download started this may take some time.\n","Approximate size to download 16.1 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.68 0.25 0.36 2106\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.72 0.84 0.77 3685\n","\n"," accuracy 0.63 5791\n"," macro avg 0.47 0.36 0.38 5791\n","weighted avg 0.71 0.63 0.63 5791\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textsentiment_confidencedocumentembed_sentence_bert_embeddingssentimenty
origin_index
0Kickers on my watchlist XIDE TIT SOQ PNK CPW B...0.874224Kickers on my watchlist XIDE TIT SOQ PNK CPW B...[-0.9207571744918823, 0.21013416349887848, 0.1...positivepositive
1user: AAP MOVIE. 55% return for the FEA/GEED i...0.647704user: AAP MOVIE. 55% return for the FEA/GEED i...[-0.43004727363586426, 0.5101231336593628, -0....positivepositive
2user I'd be afraid to short AMZN - they are lo...0.780586user I'd be afraid to short AMZN - they are lo...[0.3040030300617218, 0.22862982749938965, -0.5...positivepositive
3MNTA Over 12.000.978046MNTA Over 12.00[-1.810348391532898, -0.4799138903617859, -0.7...positivepositive
4OI Over 21.370.961256OI Over 21.37[-2.4639298915863037, 0.3879590630531311, -0.6...positivepositive
.....................
5786Industry body CII said #discoms are likely to ...0.759879Industry body CII said #discoms are likely to ...[-0.09503911435604095, 0.6293947696685791, 0.0...negativenegative
5787#Gold prices slip below Rs 46,000 as #investor...0.759041#Gold prices slip below Rs 46,000 as #investor...[-0.1287938952445984, 0.28170245885849, 0.0280...negativenegative
5788Workers at Bajaj Auto have agreed to a 10% wag...0.750849Workers at Bajaj Auto have agreed to a 10% wag...[-0.3395587205886841, 0.912406325340271, -0.32...negativepositive
5789#Sharemarket LIVE: Sensex off day’s high, up 6...0.567143#Sharemarket LIVE: Sensex off day’s high, up 6...[-0.6081283092498779, 0.2732301354408264, 0.25...neutralpositive
5790#Sensex, #Nifty climb off day's highs, still u...0.545603#Sensex, #Nifty climb off day's highs, still u...[-0.44862690567970276, 0.43264657258987427, 0....neutralpositive
\n","

5791 rows × 6 columns

\n","
"],"text/plain":[" text ... y\n","origin_index ... \n","0 Kickers on my watchlist XIDE TIT SOQ PNK CPW B... ... positive\n","1 user: AAP MOVIE. 55% return for the FEA/GEED i... ... positive\n","2 user I'd be afraid to short AMZN - they are lo... ... positive\n","3 MNTA Over 12.00 ... positive\n","4 OI Over 21.37 ... positive\n","... ... ... ...\n","5786 Industry body CII said #discoms are likely to ... ... negative\n","5787 #Gold prices slip below Rs 46,000 as #investor... ... negative\n","5788 Workers at Bajaj Auto have agreed to a 10% wag... ... positive\n","5789 #Sharemarket LIVE: Sensex off day’s high, up 6... ... positive\n","5790 #Sensex, #Nifty climb off day's highs, still u... ... positive\n","\n","[5791 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607932457549,"user_tz":-60,"elapsed":497423,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"c39e2854-fd34-4576-ebb2-352bc80fb3c8"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":125},"executionInfo":{"status":"ok","timestamp":1607932462254,"user_tz":-60,"elapsed":502122,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"22426d96-3b57-4450-8af1-7a0c69de879e"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Tesla plans to invest 10M into the ML sector')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentembed_sentence_bert_embeddingssentiment
origin_index
00.974726Tesla plans to invest 10M into the ML sector[-0.07111635059118271, 0.9532930850982666, -1....positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.974726 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607932462257,"user_tz":-60,"elapsed":502119,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"921f5de2-eeb7-4115-a427-1671e3390f1c"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(128) | Info: Number of embedding dimensions | Currently set to : 128\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L2_128') | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L2_128') | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"73rQbUy-KLpb"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_IMDB.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_IMDB.ipynb index af53f8a5..490dc371 100644 --- a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_IMDB.ipynb +++ b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_IMDB.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_IMDB.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/collab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_IMDB.ipynb)\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download IMDB dataset\n","https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews\n","\n","IMDB dataset having 50K movie reviews for natural language processing or Text analytics.\n","This is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets. We provide a set of 25,000 highly polar movie reviews for training and 25,000 for testing. So, predict the number of positive and negative reviews using either classification or deep learning algorithms.\n","For more dataset information, please go through the following link,\n","http://ai.stanford.edu/~amaas/data/sentiment/"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610788112062,"user_tz":-300,"elapsed":2594,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a99c604d-fad2-4ace-c9b3-13dcb5893e03"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/IMDB-Dataset.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:07:54-- http://ckl-it.de/wp-content/uploads/2021/01/IMDB-Dataset.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 3288450 (3.1M) [text/csv]\n","Saving to: ‘IMDB-Dataset.csv’\n","\n","IMDB-Dataset.csv 100%[===================>] 3.14M 2.29MB/s in 1.4s \n","\n","2021-01-16 09:07:56 (2.29 MB/s) - ‘IMDB-Dataset.csv’ saved [3288450/3288450]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610788130009,"user_tz":-300,"elapsed":1019,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"fa40642c-aeea-4506-b40e-3542a49a2ee9"},"source":["import pandas as pd\n","train_path = '/content/IMDB-Dataset.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
0One of the other reviewers has mentioned that ...positive
1A wonderful little production. <br /><br />The...positive
2I thought this was a wonderful way to spend ti...positive
3Basically there's a family where a little boy ...negative
4Petter Mattei's \"Love in the Time of Money\" is...positive
.........
2495Another great movie by Costa-Gavras. It's a gr...negative
2496Though structured totally different from the b...positive
2497Handsome and dashing British airline pilot Geo...positive
2498This film breeches the fine line between satir...negative
2499Mardi Gras: Made in China provides a wonderful...positive
\n","

2500 rows × 2 columns

\n","
"],"text/plain":[" text y\n","0 One of the other reviewers has mentioned that ... positive\n","1 A wonderful little production.

The... positive\n","2 I thought this was a wonderful way to spend ti... positive\n","3 Basically there's a family where a little boy ... negative\n","4 Petter Mattei's \"Love in the Time of Money\" is... positive\n","... ... ...\n","2495 Another great movie by Costa-Gavras. It's a gr... negative\n","2496 Though structured totally different from the b... positive\n","2497 Handsome and dashing British airline pilot Geo... positive\n","2498 This film breeches the fine line between satir... negative\n","2499 Mardi Gras: Made in China provides a wonderful... positive\n","\n","[2500 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609464660630,"user_tz":-300,"elapsed":19440,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3be85e48-38c8-4a7c-bbd7-226e204fa739"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.70 0.70 0.70 27\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.79 0.65 0.71 23\n","\n"," accuracy 0.68 50\n"," macro avg 0.50 0.45 0.47 50\n","weighted avg 0.74 0.68 0.71 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textdefault_name_embeddingssentimentsentiment_confidenceydocument
origin_index
0One of the other reviewers has mentioned that ...[-0.04935329407453537, -0.01034686528146267, -...positive0.968638positiveOne of the other reviewers has mentioned that ...
1A wonderful little production. <br /><br />The...[0.040489643812179565, -0.054199717938899994, ...negative0.990273positiveA wonderful little production. <br /><br />The...
2I thought this was a wonderful way to spend ti...[0.026364900171756744, 0.07112795859575272, 0....negative0.957352positiveI thought this was a wonderful way to spend ti...
3Basically there's a family where a little boy ...[-0.05151151493191719, 0.008207003585994244, -...negative0.958503negativeBasically there's a family where a little boy ...
4Petter Mattei's \"Love in the Time of Money\" is...[0.06880538165569305, 0.019250543788075447, -0...positive0.999108positivePetter Mattei's \"Love in the Time of Money\" is...
5Probably my all-time favorite movie, a story o...[0.004764211364090443, 0.027671916410326958, -...positive0.993937positiveProbably my all-time favorite movie, a story o...
6I sure would like to see a resurrection of a u...[-0.03813941031694412, -0.03322296217083931, 0...positive0.974884positiveI sure would like to see a resurrection of a u...
7This show was an amazing, fresh & innovative i...[0.010670202784240246, -0.04322813078761101, -...negative0.721451negativeThis show was an amazing, fresh & innovative i...
8Encouraged by the positive comments about this...[0.010801736265420914, -0.07724311947822571, -...positive0.884824negativeEncouraged by the positive comments about this...
9If you like original gut wrenching laughter yo...[-0.0245585348457098, 0.0005475765210576355, -...negative0.850509positiveIf you like original gut wrenching laughter yo...
10Phil the Alien is one of those quirky films wh...[0.023403573781251907, 0.017464609816670418, -...negative0.836944negativePhil the Alien is one of those quirky films wh...
11I saw this movie when I was about 12 when it c...[-0.046517230570316315, -0.025949953123927116,...negative0.999218negativeI saw this movie when I was about 12 when it c...
12So im not a big fan of Boll's work but then ag...[0.0032458826899528503, -0.013339877128601074,...negative0.999841negativeSo im not a big fan of Boll's work but then ag...
13The cast played Shakespeare.<br /><br />Shakes...[0.044309284538030624, 0.061706289649009705, -...neutral0.504574negativeThe cast played Shakespeare.<br /><br />Shakes...
14This a fantastic movie of three prisoners who ...[0.005487383343279362, -0.005359508562833071, ...positive0.956110positiveThis a fantastic movie of three prisoners who ...
15Kind of drawn in by the erotic scenes, only to...[0.04357790946960449, -0.034652918577194214, -...negative0.990112negativeKind of drawn in by the erotic scenes, only to...
16Some films just simply should not be remade. T...[0.006823724135756493, -0.0692802369594574, -0...negative0.996081positiveSome films just simply should not be remade. T...
17This movie made it into one of my top 10 most ...[-0.013747279532253742, -0.0038213622756302357...negative0.999338negativeThis movie made it into one of my top 10 most ...
18I remember this film,it was the first film i h...[-0.005101265385746956, 0.022435873746871948, ...positive0.986708positiveI remember this film,it was the first film i h...
19An awful film! It must have been up against so...[0.011224010959267616, -0.007102800067514181, ...negative0.998881negativeAn awful film! It must have been up against so...
20After the success of Die Hard and it's sequels...[0.022048521786928177, -0.020497862249612808, ...negative0.650546positiveAfter the success of Die Hard and it's sequels...
21I had the terrible misfortune of having to vie...[-0.010102338157594204, -0.05102328583598137, ...negative0.999930negativeI had the terrible misfortune of having to vie...
22What an absolutely stunning movie, if you have...[-0.016428396105766296, 0.007074637804180384, ...positive0.981123positiveWhat an absolutely stunning movie, if you have...
23First of all, let's get a few things straight ...[-0.06437410414218903, -0.029181038960814476, ...negative0.957470negativeFirst of all, let's get a few things straight ...
24This was the worst movie I saw at WorldFest an...[0.03901044651865959, 0.06355303525924683, -0....negative0.999471negativeThis was the worst movie I saw at WorldFest an...
25The Karen Carpenter Story shows a little more ...[-0.021897025406360626, 0.04400184750556946, 0...positive0.997247positiveThe Karen Carpenter Story shows a little more ...
26\"The Cell\" is an exotic masterpiece, a dizzyin...[0.0439823754131794, -0.007468021009117365, -0...positive0.996351positive\"The Cell\" is an exotic masterpiece, a dizzyin...
27This film tried to be too many things all at o...[-0.004155139438807964, -0.03771881386637688, ...neutral0.570219negativeThis film tried to be too many things all at o...
28This movie was so frustrating. Everything seem...[0.015594013035297394, -0.007509331218898296, ...negative0.999906negativeThis movie was so frustrating. Everything seem...
29'War movie' is a Hollywood genre that has been...[-0.036022596061229706, -0.006816706154495478,...negative0.733068positive'War movie' is a Hollywood genre that has been...
30Taut and organically gripping, Edward Dmytryk'...[0.0312348585575819, -0.04670163244009018, -0....positive0.995882positiveTaut and organically gripping, Edward Dmytryk'...
31\"Ardh Satya\" is one of the finest film ever ma...[0.060114260762929916, -0.0590929239988327, -0...positive0.999671positive\"Ardh Satya\" is one of the finest film ever ma...
32My first exposure to the Templarios & not a go...[0.013515714555978775, -0.004898980725556612, ...negative0.999994negativeMy first exposure to the Templarios & not a go...
33One of the most significant quotes from the en...[0.022280631586909294, -0.00839739479124546, -...positive0.997032positiveOne of the most significant quotes from the en...
34I watched this film not really expecting much,...[0.009434111416339874, -0.046402934938669205, ...negative0.992625negativeI watched this film not really expecting much,...
35I bought this film at Blockbuster for $3.00, b...[0.011683089658617973, -0.047437384724617004, ...negative0.999485negativeI bought this film at Blockbuster for $3.00, b...
36The plot is about the death of little children...[-0.0348515659570694, 0.01680166646838188, -0....neutral0.537487negativeThe plot is about the death of little children...
37Ever watched a movie that lost the plot? Well,...[-0.02899913117289543, 0.0164097361266613, -0....negative0.998984negativeEver watched a movie that lost the plot? Well,...
38Okay, so this series kind of takes the route o...[0.002110496163368225, 0.02887572906911373, -0...positive0.965860positiveOkay, so this series kind of takes the route o...
39After sitting through this pile of dung, my hu...[0.013781447894871235, -0.010363072156906128, ...positive0.697232negativeAfter sitting through this pile of dung, my hu...
40It had all the clichés of movies of this type ...[0.03799372911453247, -0.038665950298309326, -...negative0.998307negativeIt had all the clichés of movies of this type ...
41This movie is based on the book, \"A Many Splen...[-0.00033091730438172817, -0.05126418545842171...positive0.998454positiveThis movie is based on the book, \"A Many Splen...
42Of all the films I have seen, this one, The Ra...[0.014630819670855999, -0.04907294735312462, -...negative0.999938negativeOf all the films I have seen, this one, The Ra...
43I had heard good things about \"States of Grace...[0.027017194777727127, 0.002088379580527544, 0...positive0.884466negativeI had heard good things about \"States of Grace...
44This movie struck home for me. Being 29, I rem...[-0.0009387845057062805, -0.048219360411167145...negative0.969341positiveThis movie struck home for me. Being 29, I rem...
45As a disclaimer, I've seen the movie 5-6 times...[0.0065035647712647915, 0.00230638706125319, 0...negative0.967124positiveAs a disclaimer, I've seen the movie 5-6 times...
46Protocol is an implausible movie whose only sa...[0.05113476142287254, 0.04671141505241394, -0....neutral0.593109negativeProtocol is an implausible movie whose only sa...
47How this film could be classified as Drama, I ...[0.011419376358389854, -0.0828876867890358, -0...negative0.991421negativeHow this film could be classified as Drama, I ...
48Preston Sturgis' THE POWER AND THE GLORY was u...[0.024031344801187515, 0.03399205952882767, 0....positive0.994996positivePreston Sturgis' THE POWER AND THE GLORY was u...
49Average (and surprisingly tame) Fulci giallo w...[0.015038557350635529, -0.0037642912939190865,...positive0.996770negativeAverage (and surprisingly tame) Fulci giallo w...
\n","
"],"text/plain":[" text ... document\n","origin_index ... \n","0 One of the other reviewers has mentioned that ... ... One of the other reviewers has mentioned that ...\n","1 A wonderful little production.

The... ... A wonderful little production.

The...\n","2 I thought this was a wonderful way to spend ti... ... I thought this was a wonderful way to spend ti...\n","3 Basically there's a family where a little boy ... ... Basically there's a family where a little boy ...\n","4 Petter Mattei's \"Love in the Time of Money\" is... ... Petter Mattei's \"Love in the Time of Money\" is...\n","5 Probably my all-time favorite movie, a story o... ... Probably my all-time favorite movie, a story o...\n","6 I sure would like to see a resurrection of a u... ... I sure would like to see a resurrection of a u...\n","7 This show was an amazing, fresh & innovative i... ... This show was an amazing, fresh & innovative i...\n","8 Encouraged by the positive comments about this... ... Encouraged by the positive comments about this...\n","9 If you like original gut wrenching laughter yo... ... If you like original gut wrenching laughter yo...\n","10 Phil the Alien is one of those quirky films wh... ... Phil the Alien is one of those quirky films wh...\n","11 I saw this movie when I was about 12 when it c... ... I saw this movie when I was about 12 when it c...\n","12 So im not a big fan of Boll's work but then ag... ... So im not a big fan of Boll's work but then ag...\n","13 The cast played Shakespeare.

Shakes... ... The cast played Shakespeare.

Shakes...\n","14 This a fantastic movie of three prisoners who ... ... This a fantastic movie of three prisoners who ...\n","15 Kind of drawn in by the erotic scenes, only to... ... Kind of drawn in by the erotic scenes, only to...\n","16 Some films just simply should not be remade. T... ... Some films just simply should not be remade. T...\n","17 This movie made it into one of my top 10 most ... ... This movie made it into one of my top 10 most ...\n","18 I remember this film,it was the first film i h... ... I remember this film,it was the first film i h...\n","19 An awful film! It must have been up against so... ... An awful film! It must have been up against so...\n","20 After the success of Die Hard and it's sequels... ... After the success of Die Hard and it's sequels...\n","21 I had the terrible misfortune of having to vie... ... I had the terrible misfortune of having to vie...\n","22 What an absolutely stunning movie, if you have... ... What an absolutely stunning movie, if you have...\n","23 First of all, let's get a few things straight ... ... First of all, let's get a few things straight ...\n","24 This was the worst movie I saw at WorldFest an... ... This was the worst movie I saw at WorldFest an...\n","25 The Karen Carpenter Story shows a little more ... ... The Karen Carpenter Story shows a little more ...\n","26 \"The Cell\" is an exotic masterpiece, a dizzyin... ... \"The Cell\" is an exotic masterpiece, a dizzyin...\n","27 This film tried to be too many things all at o... ... This film tried to be too many things all at o...\n","28 This movie was so frustrating. Everything seem... ... This movie was so frustrating. Everything seem...\n","29 'War movie' is a Hollywood genre that has been... ... 'War movie' is a Hollywood genre that has been...\n","30 Taut and organically gripping, Edward Dmytryk'... ... Taut and organically gripping, Edward Dmytryk'...\n","31 \"Ardh Satya\" is one of the finest film ever ma... ... \"Ardh Satya\" is one of the finest film ever ma...\n","32 My first exposure to the Templarios & not a go... ... My first exposure to the Templarios & not a go...\n","33 One of the most significant quotes from the en... ... One of the most significant quotes from the en...\n","34 I watched this film not really expecting much,... ... I watched this film not really expecting much,...\n","35 I bought this film at Blockbuster for $3.00, b... ... I bought this film at Blockbuster for $3.00, b...\n","36 The plot is about the death of little children... ... The plot is about the death of little children...\n","37 Ever watched a movie that lost the plot? Well,... ... Ever watched a movie that lost the plot? Well,...\n","38 Okay, so this series kind of takes the route o... ... Okay, so this series kind of takes the route o...\n","39 After sitting through this pile of dung, my hu... ... After sitting through this pile of dung, my hu...\n","40 It had all the clichés of movies of this type ... ... It had all the clichés of movies of this type ...\n","41 This movie is based on the book, \"A Many Splen... ... This movie is based on the book, \"A Many Splen...\n","42 Of all the films I have seen, this one, The Ra... ... Of all the films I have seen, this one, The Ra...\n","43 I had heard good things about \"States of Grace... ... I had heard good things about \"States of Grace...\n","44 This movie struck home for me. Being 29, I rem... ... This movie struck home for me. Being 29, I rem...\n","45 As a disclaimer, I've seen the movie 5-6 times... ... As a disclaimer, I've seen the movie 5-6 times...\n","46 Protocol is an implausible movie whose only sa... ... Protocol is an implausible movie whose only sa...\n","47 How this film could be classified as Drama, I ... ... How this film could be classified as Drama, I ...\n","48 Preston Sturgis' THE POWER AND THE GLORY was u... ... Preston Sturgis' THE POWER AND THE GLORY was u...\n","49 Average (and surprisingly tame) Fulci giallo w... ... Average (and surprisingly tame) Fulci giallo w...\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609464663328,"user_tz":-300,"elapsed":2733,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ae604bdd-49fb-4b5e-978e-5190dd03b227"},"source":["fitted_pipe.predict('It was one of the best films i have ever watched in my entire life !!')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
default_name_embeddingssentimentsentiment_confidencedocument
origin_index
0[0.06468033790588379, -0.040837567299604416, -...positive0.982375Bitcoin is going to the moon!
\n","
"],"text/plain":[" default_name_embeddings ... document\n","origin_index ... \n","0 [0.06468033790588379, -0.040837567299604416, -... ... Bitcoin is going to the moon!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609464663334,"user_tz":-300,"elapsed":31,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a9059197-9e1c-4afe-ca3b-97c6d310f60c"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609464673090,"user_tz":-300,"elapsed":9777,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"164e4f03-f48a-4347-95e8-fd3509bf146e"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.81 0.96 0.88 27\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.94 0.70 0.80 23\n","\n"," accuracy 0.84 50\n"," macro avg 0.58 0.55 0.56 50\n","weighted avg 0.87 0.84 0.84 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textdefault_name_embeddingssentimentsentiment_confidenceydocument
origin_index
0One of the other reviewers has mentioned that ...[-0.04935329407453537, -0.01034686528146267, -...positive0.966858positiveOne of the other reviewers has mentioned that ...
1A wonderful little production. <br /><br />The...[0.040489643812179565, -0.054199717938899994, ...negative0.985679positiveA wonderful little production. <br /><br />The...
2I thought this was a wonderful way to spend ti...[0.026364900171756744, 0.07112795859575272, 0....negative0.988745positiveI thought this was a wonderful way to spend ti...
3Basically there's a family where a little boy ...[-0.05151151493191719, 0.008207003585994244, -...negative0.999291negativeBasically there's a family where a little boy ...
4Petter Mattei's \"Love in the Time of Money\" is...[0.06880538165569305, 0.019250543788075447, -0...positive0.999684positivePetter Mattei's \"Love in the Time of Money\" is...
5Probably my all-time favorite movie, a story o...[0.004764211364090443, 0.027671916410326958, -...positive0.996598positiveProbably my all-time favorite movie, a story o...
6I sure would like to see a resurrection of a u...[-0.03813941031694412, -0.03322296217083931, 0...positive0.960203positiveI sure would like to see a resurrection of a u...
7This show was an amazing, fresh & innovative i...[0.010670202784240246, -0.04322813078761101, -...negative0.753273negativeThis show was an amazing, fresh & innovative i...
8Encouraged by the positive comments about this...[0.010801736265420914, -0.07724311947822571, -...negative0.958928negativeEncouraged by the positive comments about this...
9If you like original gut wrenching laughter yo...[-0.0245585348457098, 0.0005475765210576355, -...neutral0.536441positiveIf you like original gut wrenching laughter yo...
10Phil the Alien is one of those quirky films wh...[0.023403573781251907, 0.017464609816670418, -...negative0.959978negativePhil the Alien is one of those quirky films wh...
11I saw this movie when I was about 12 when it c...[-0.046517230570316315, -0.025949953123927116,...negative0.999949negativeI saw this movie when I was about 12 when it c...
12So im not a big fan of Boll's work but then ag...[0.0032458826899528503, -0.013339877128601074,...negative0.999997negativeSo im not a big fan of Boll's work but then ag...
13The cast played Shakespeare.<br /><br />Shakes...[0.044309284538030624, 0.061706289649009705, -...negative0.984033negativeThe cast played Shakespeare.<br /><br />Shakes...
14This a fantastic movie of three prisoners who ...[0.005487383343279362, -0.005359508562833071, ...positive0.775998positiveThis a fantastic movie of three prisoners who ...
15Kind of drawn in by the erotic scenes, only to...[0.04357790946960449, -0.034652918577194214, -...negative0.999683negativeKind of drawn in by the erotic scenes, only to...
16Some films just simply should not be remade. T...[0.006823724135756493, -0.0692802369594574, -0...negative0.999245positiveSome films just simply should not be remade. T...
17This movie made it into one of my top 10 most ...[-0.013747279532253742, -0.0038213622756302357...negative0.999970negativeThis movie made it into one of my top 10 most ...
18I remember this film,it was the first film i h...[-0.005101265385746956, 0.022435873746871948, ...positive0.975574positiveI remember this film,it was the first film i h...
19An awful film! It must have been up against so...[0.011224010959267616, -0.007102800067514181, ...negative0.999990negativeAn awful film! It must have been up against so...
20After the success of Die Hard and it's sequels...[0.022048521786928177, -0.020497862249612808, ...positive0.951596positiveAfter the success of Die Hard and it's sequels...
21I had the terrible misfortune of having to vie...[-0.010102338157594204, -0.05102328583598137, ...negative0.999999negativeI had the terrible misfortune of having to vie...
22What an absolutely stunning movie, if you have...[-0.016428396105766296, 0.007074637804180384, ...positive0.931946positiveWhat an absolutely stunning movie, if you have...
23First of all, let's get a few things straight ...[-0.06437410414218903, -0.029181038960814476, ...negative0.990350negativeFirst of all, let's get a few things straight ...
24This was the worst movie I saw at WorldFest an...[0.03901044651865959, 0.06355303525924683, -0....negative0.999986negativeThis was the worst movie I saw at WorldFest an...
25The Karen Carpenter Story shows a little more ...[-0.021897025406360626, 0.04400184750556946, 0...positive0.999463positiveThe Karen Carpenter Story shows a little more ...
26\"The Cell\" is an exotic masterpiece, a dizzyin...[0.0439823754131794, -0.007468021009117365, -0...positive0.998291positive\"The Cell\" is an exotic masterpiece, a dizzyin...
27This film tried to be too many things all at o...[-0.004155139438807964, -0.03771881386637688, ...negative0.865707negativeThis film tried to be too many things all at o...
28This movie was so frustrating. Everything seem...[0.015594013035297394, -0.007509331218898296, ...negative0.999998negativeThis movie was so frustrating. Everything seem...
29'War movie' is a Hollywood genre that has been...[-0.036022596061229706, -0.006816706154495478,...negative0.993793positive'War movie' is a Hollywood genre that has been...
30Taut and organically gripping, Edward Dmytryk'...[0.0312348585575819, -0.04670163244009018, -0....positive0.997460positiveTaut and organically gripping, Edward Dmytryk'...
31\"Ardh Satya\" is one of the finest film ever ma...[0.060114260762929916, -0.0590929239988327, -0...positive0.999880positive\"Ardh Satya\" is one of the finest film ever ma...
32My first exposure to the Templarios & not a go...[0.013515714555978775, -0.004898980725556612, ...negative1.000000negativeMy first exposure to the Templarios & not a go...
33One of the most significant quotes from the en...[0.022280631586909294, -0.00839739479124546, -...positive0.999292positiveOne of the most significant quotes from the en...
34I watched this film not really expecting much,...[0.009434111416339874, -0.046402934938669205, ...negative0.999848negativeI watched this film not really expecting much,...
35I bought this film at Blockbuster for $3.00, b...[0.011683089658617973, -0.047437384724617004, ...negative0.999993negativeI bought this film at Blockbuster for $3.00, b...
36The plot is about the death of little children...[-0.0348515659570694, 0.01680166646838188, -0....negative0.997690negativeThe plot is about the death of little children...
37Ever watched a movie that lost the plot? Well,...[-0.02899913117289543, 0.0164097361266613, -0....negative0.999995negativeEver watched a movie that lost the plot? Well,...
38Okay, so this series kind of takes the route o...[0.002110496163368225, 0.02887572906911373, -0...positive0.993408positiveOkay, so this series kind of takes the route o...
39After sitting through this pile of dung, my hu...[0.013781447894871235, -0.010363072156906128, ...negative0.905860negativeAfter sitting through this pile of dung, my hu...
40It had all the clichés of movies of this type ...[0.03799372911453247, -0.038665950298309326, -...negative0.999892negativeIt had all the clichés of movies of this type ...
41This movie is based on the book, \"A Many Splen...[-0.00033091730438172817, -0.05126418545842171...positive0.999837positiveThis movie is based on the book, \"A Many Splen...
42Of all the films I have seen, this one, The Ra...[0.014630819670855999, -0.04907294735312462, -...negative1.000000negativeOf all the films I have seen, this one, The Ra...
43I had heard good things about \"States of Grace...[0.027017194777727127, 0.002088379580527544, 0...negative0.978662negativeI had heard good things about \"States of Grace...
44This movie struck home for me. Being 29, I rem...[-0.0009387845057062805, -0.048219360411167145...negative0.993965positiveThis movie struck home for me. Being 29, I rem...
45As a disclaimer, I've seen the movie 5-6 times...[0.0065035647712647915, 0.00230638706125319, 0...negative0.999341positiveAs a disclaimer, I've seen the movie 5-6 times...
46Protocol is an implausible movie whose only sa...[0.05113476142287254, 0.04671141505241394, -0....negative0.913287negativeProtocol is an implausible movie whose only sa...
47How this film could be classified as Drama, I ...[0.011419376358389854, -0.0828876867890358, -0...negative0.999841negativeHow this film could be classified as Drama, I ...
48Preston Sturgis' THE POWER AND THE GLORY was u...[0.024031344801187515, 0.03399205952882767, 0....positive0.998516positivePreston Sturgis' THE POWER AND THE GLORY was u...
49Average (and surprisingly tame) Fulci giallo w...[0.015038557350635529, -0.0037642912939190865,...positive0.995483negativeAverage (and surprisingly tame) Fulci giallo w...
\n","
"],"text/plain":[" text ... document\n","origin_index ... \n","0 One of the other reviewers has mentioned that ... ... One of the other reviewers has mentioned that ...\n","1 A wonderful little production.

The... ... A wonderful little production.

The...\n","2 I thought this was a wonderful way to spend ti... ... I thought this was a wonderful way to spend ti...\n","3 Basically there's a family where a little boy ... ... Basically there's a family where a little boy ...\n","4 Petter Mattei's \"Love in the Time of Money\" is... ... Petter Mattei's \"Love in the Time of Money\" is...\n","5 Probably my all-time favorite movie, a story o... ... Probably my all-time favorite movie, a story o...\n","6 I sure would like to see a resurrection of a u... ... I sure would like to see a resurrection of a u...\n","7 This show was an amazing, fresh & innovative i... ... This show was an amazing, fresh & innovative i...\n","8 Encouraged by the positive comments about this... ... Encouraged by the positive comments about this...\n","9 If you like original gut wrenching laughter yo... ... If you like original gut wrenching laughter yo...\n","10 Phil the Alien is one of those quirky films wh... ... Phil the Alien is one of those quirky films wh...\n","11 I saw this movie when I was about 12 when it c... ... I saw this movie when I was about 12 when it c...\n","12 So im not a big fan of Boll's work but then ag... ... So im not a big fan of Boll's work but then ag...\n","13 The cast played Shakespeare.

Shakes... ... The cast played Shakespeare.

Shakes...\n","14 This a fantastic movie of three prisoners who ... ... This a fantastic movie of three prisoners who ...\n","15 Kind of drawn in by the erotic scenes, only to... ... Kind of drawn in by the erotic scenes, only to...\n","16 Some films just simply should not be remade. T... ... Some films just simply should not be remade. T...\n","17 This movie made it into one of my top 10 most ... ... This movie made it into one of my top 10 most ...\n","18 I remember this film,it was the first film i h... ... I remember this film,it was the first film i h...\n","19 An awful film! It must have been up against so... ... An awful film! It must have been up against so...\n","20 After the success of Die Hard and it's sequels... ... After the success of Die Hard and it's sequels...\n","21 I had the terrible misfortune of having to vie... ... I had the terrible misfortune of having to vie...\n","22 What an absolutely stunning movie, if you have... ... What an absolutely stunning movie, if you have...\n","23 First of all, let's get a few things straight ... ... First of all, let's get a few things straight ...\n","24 This was the worst movie I saw at WorldFest an... ... This was the worst movie I saw at WorldFest an...\n","25 The Karen Carpenter Story shows a little more ... ... The Karen Carpenter Story shows a little more ...\n","26 \"The Cell\" is an exotic masterpiece, a dizzyin... ... \"The Cell\" is an exotic masterpiece, a dizzyin...\n","27 This film tried to be too many things all at o... ... This film tried to be too many things all at o...\n","28 This movie was so frustrating. Everything seem... ... This movie was so frustrating. Everything seem...\n","29 'War movie' is a Hollywood genre that has been... ... 'War movie' is a Hollywood genre that has been...\n","30 Taut and organically gripping, Edward Dmytryk'... ... Taut and organically gripping, Edward Dmytryk'...\n","31 \"Ardh Satya\" is one of the finest film ever ma... ... \"Ardh Satya\" is one of the finest film ever ma...\n","32 My first exposure to the Templarios & not a go... ... My first exposure to the Templarios & not a go...\n","33 One of the most significant quotes from the en... ... One of the most significant quotes from the en...\n","34 I watched this film not really expecting much,... ... I watched this film not really expecting much,...\n","35 I bought this film at Blockbuster for $3.00, b... ... I bought this film at Blockbuster for $3.00, b...\n","36 The plot is about the death of little children... ... The plot is about the death of little children...\n","37 Ever watched a movie that lost the plot? Well,... ... Ever watched a movie that lost the plot? Well,...\n","38 Okay, so this series kind of takes the route o... ... Okay, so this series kind of takes the route o...\n","39 After sitting through this pile of dung, my hu... ... After sitting through this pile of dung, my hu...\n","40 It had all the clichés of movies of this type ... ... It had all the clichés of movies of this type ...\n","41 This movie is based on the book, \"A Many Splen... ... This movie is based on the book, \"A Many Splen...\n","42 Of all the films I have seen, this one, The Ra... ... Of all the films I have seen, this one, The Ra...\n","43 I had heard good things about \"States of Grace... ... I had heard good things about \"States of Grace...\n","44 This movie struck home for me. Being 29, I rem... ... This movie struck home for me. Being 29, I rem...\n","45 As a disclaimer, I've seen the movie 5-6 times... ... As a disclaimer, I've seen the movie 5-6 times...\n","46 Protocol is an implausible movie whose only sa... ... Protocol is an implausible movie whose only sa...\n","47 How this film could be classified as Drama, I ... ... How this film could be classified as Drama, I ...\n","48 Preston Sturgis' THE POWER AND THE GLORY was u... ... Preston Sturgis' THE POWER AND THE GLORY was u...\n","49 Average (and surprisingly tame) Fulci giallo w... ... Average (and surprisingly tame) Fulci giallo w...\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1609469926255,"user_tz":-300,"elapsed":140492,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"64f54fdd-699a-4559-f6e4-74b7b5f3e92e"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(120) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.85 0.81 0.83 1234\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.87 0.79 0.83 1266\n","\n"," accuracy 0.80 2500\n"," macro avg 0.57 0.54 0.55 2500\n","weighted avg 0.86 0.80 0.83 2500\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1609470097011,"user_tz":-300,"elapsed":170766,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"59899be4-e33a-4b5e-ff37-df6a9a3994b2"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":127},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1609470112616,"user_tz":-300,"elapsed":15622,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cbe7fc37-7794-4c28-d1de-5ba88d3db58b"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It was one of the best films i have ever watched in my entire life !!')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentimenten_embed_sentence_small_bert_L12_768_embeddingssentiment_confidencedocument
origin_index
0positive[0.09222018718719482, 0.11720675230026245, 0.1...0.999543It was one of the best films i have ever watch...
\n","
"],"text/plain":[" sentiment ... document\n","origin_index ... \n","0 positive ... It was one of the best films i have ever watch...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1609470112618,"user_tz":-300,"elapsed":17,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c7467f93-d619-470f-fd40-c2be1805b83f"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_IMDB.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_IMDB.ipynb)\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 class IMDB Movie sentiment classifier training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download IMDB dataset\n","https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews\n","\n","IMDB dataset having 50K movie reviews for natural language processing or Text analytics.\n","This is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets. We provide a set of 25,000 highly polar movie reviews for training and 25,000 for testing. So, predict the number of positive and negative reviews using either classification or deep learning algorithms.\n","For more dataset information, please go through the following link,\n","http://ai.stanford.edu/~amaas/data/sentiment/"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610788112062,"user_tz":-300,"elapsed":2594,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a99c604d-fad2-4ace-c9b3-13dcb5893e03"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/IMDB-Dataset.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:07:54-- http://ckl-it.de/wp-content/uploads/2021/01/IMDB-Dataset.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 3288450 (3.1M) [text/csv]\n","Saving to: ‘IMDB-Dataset.csv’\n","\n","IMDB-Dataset.csv 100%[===================>] 3.14M 2.29MB/s in 1.4s \n","\n","2021-01-16 09:07:56 (2.29 MB/s) - ‘IMDB-Dataset.csv’ saved [3288450/3288450]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610788130009,"user_tz":-300,"elapsed":1019,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"fa40642c-aeea-4506-b40e-3542a49a2ee9"},"source":["import pandas as pd\n","train_path = '/content/IMDB-Dataset.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
0One of the other reviewers has mentioned that ...positive
1A wonderful little production. <br /><br />The...positive
2I thought this was a wonderful way to spend ti...positive
3Basically there's a family where a little boy ...negative
4Petter Mattei's \"Love in the Time of Money\" is...positive
.........
2495Another great movie by Costa-Gavras. It's a gr...negative
2496Though structured totally different from the b...positive
2497Handsome and dashing British airline pilot Geo...positive
2498This film breeches the fine line between satir...negative
2499Mardi Gras: Made in China provides a wonderful...positive
\n","

2500 rows × 2 columns

\n","
"],"text/plain":[" text y\n","0 One of the other reviewers has mentioned that ... positive\n","1 A wonderful little production.

The... positive\n","2 I thought this was a wonderful way to spend ti... positive\n","3 Basically there's a family where a little boy ... negative\n","4 Petter Mattei's \"Love in the Time of Money\" is... positive\n","... ... ...\n","2495 Another great movie by Costa-Gavras. It's a gr... negative\n","2496 Though structured totally different from the b... positive\n","2497 Handsome and dashing British airline pilot Geo... positive\n","2498 This film breeches the fine line between satir... negative\n","2499 Mardi Gras: Made in China provides a wonderful... positive\n","\n","[2500 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609464660630,"user_tz":-300,"elapsed":19440,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3be85e48-38c8-4a7c-bbd7-226e204fa739"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.70 0.70 0.70 27\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.79 0.65 0.71 23\n","\n"," accuracy 0.68 50\n"," macro avg 0.50 0.45 0.47 50\n","weighted avg 0.74 0.68 0.71 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textdefault_name_embeddingssentimentsentiment_confidenceydocument
origin_index
0One of the other reviewers has mentioned that ...[-0.04935329407453537, -0.01034686528146267, -...positive0.968638positiveOne of the other reviewers has mentioned that ...
1A wonderful little production. <br /><br />The...[0.040489643812179565, -0.054199717938899994, ...negative0.990273positiveA wonderful little production. <br /><br />The...
2I thought this was a wonderful way to spend ti...[0.026364900171756744, 0.07112795859575272, 0....negative0.957352positiveI thought this was a wonderful way to spend ti...
3Basically there's a family where a little boy ...[-0.05151151493191719, 0.008207003585994244, -...negative0.958503negativeBasically there's a family where a little boy ...
4Petter Mattei's \"Love in the Time of Money\" is...[0.06880538165569305, 0.019250543788075447, -0...positive0.999108positivePetter Mattei's \"Love in the Time of Money\" is...
5Probably my all-time favorite movie, a story o...[0.004764211364090443, 0.027671916410326958, -...positive0.993937positiveProbably my all-time favorite movie, a story o...
6I sure would like to see a resurrection of a u...[-0.03813941031694412, -0.03322296217083931, 0...positive0.974884positiveI sure would like to see a resurrection of a u...
7This show was an amazing, fresh & innovative i...[0.010670202784240246, -0.04322813078761101, -...negative0.721451negativeThis show was an amazing, fresh & innovative i...
8Encouraged by the positive comments about this...[0.010801736265420914, -0.07724311947822571, -...positive0.884824negativeEncouraged by the positive comments about this...
9If you like original gut wrenching laughter yo...[-0.0245585348457098, 0.0005475765210576355, -...negative0.850509positiveIf you like original gut wrenching laughter yo...
10Phil the Alien is one of those quirky films wh...[0.023403573781251907, 0.017464609816670418, -...negative0.836944negativePhil the Alien is one of those quirky films wh...
11I saw this movie when I was about 12 when it c...[-0.046517230570316315, -0.025949953123927116,...negative0.999218negativeI saw this movie when I was about 12 when it c...
12So im not a big fan of Boll's work but then ag...[0.0032458826899528503, -0.013339877128601074,...negative0.999841negativeSo im not a big fan of Boll's work but then ag...
13The cast played Shakespeare.<br /><br />Shakes...[0.044309284538030624, 0.061706289649009705, -...neutral0.504574negativeThe cast played Shakespeare.<br /><br />Shakes...
14This a fantastic movie of three prisoners who ...[0.005487383343279362, -0.005359508562833071, ...positive0.956110positiveThis a fantastic movie of three prisoners who ...
15Kind of drawn in by the erotic scenes, only to...[0.04357790946960449, -0.034652918577194214, -...negative0.990112negativeKind of drawn in by the erotic scenes, only to...
16Some films just simply should not be remade. T...[0.006823724135756493, -0.0692802369594574, -0...negative0.996081positiveSome films just simply should not be remade. T...
17This movie made it into one of my top 10 most ...[-0.013747279532253742, -0.0038213622756302357...negative0.999338negativeThis movie made it into one of my top 10 most ...
18I remember this film,it was the first film i h...[-0.005101265385746956, 0.022435873746871948, ...positive0.986708positiveI remember this film,it was the first film i h...
19An awful film! It must have been up against so...[0.011224010959267616, -0.007102800067514181, ...negative0.998881negativeAn awful film! It must have been up against so...
20After the success of Die Hard and it's sequels...[0.022048521786928177, -0.020497862249612808, ...negative0.650546positiveAfter the success of Die Hard and it's sequels...
21I had the terrible misfortune of having to vie...[-0.010102338157594204, -0.05102328583598137, ...negative0.999930negativeI had the terrible misfortune of having to vie...
22What an absolutely stunning movie, if you have...[-0.016428396105766296, 0.007074637804180384, ...positive0.981123positiveWhat an absolutely stunning movie, if you have...
23First of all, let's get a few things straight ...[-0.06437410414218903, -0.029181038960814476, ...negative0.957470negativeFirst of all, let's get a few things straight ...
24This was the worst movie I saw at WorldFest an...[0.03901044651865959, 0.06355303525924683, -0....negative0.999471negativeThis was the worst movie I saw at WorldFest an...
25The Karen Carpenter Story shows a little more ...[-0.021897025406360626, 0.04400184750556946, 0...positive0.997247positiveThe Karen Carpenter Story shows a little more ...
26\"The Cell\" is an exotic masterpiece, a dizzyin...[0.0439823754131794, -0.007468021009117365, -0...positive0.996351positive\"The Cell\" is an exotic masterpiece, a dizzyin...
27This film tried to be too many things all at o...[-0.004155139438807964, -0.03771881386637688, ...neutral0.570219negativeThis film tried to be too many things all at o...
28This movie was so frustrating. Everything seem...[0.015594013035297394, -0.007509331218898296, ...negative0.999906negativeThis movie was so frustrating. Everything seem...
29'War movie' is a Hollywood genre that has been...[-0.036022596061229706, -0.006816706154495478,...negative0.733068positive'War movie' is a Hollywood genre that has been...
30Taut and organically gripping, Edward Dmytryk'...[0.0312348585575819, -0.04670163244009018, -0....positive0.995882positiveTaut and organically gripping, Edward Dmytryk'...
31\"Ardh Satya\" is one of the finest film ever ma...[0.060114260762929916, -0.0590929239988327, -0...positive0.999671positive\"Ardh Satya\" is one of the finest film ever ma...
32My first exposure to the Templarios & not a go...[0.013515714555978775, -0.004898980725556612, ...negative0.999994negativeMy first exposure to the Templarios & not a go...
33One of the most significant quotes from the en...[0.022280631586909294, -0.00839739479124546, -...positive0.997032positiveOne of the most significant quotes from the en...
34I watched this film not really expecting much,...[0.009434111416339874, -0.046402934938669205, ...negative0.992625negativeI watched this film not really expecting much,...
35I bought this film at Blockbuster for $3.00, b...[0.011683089658617973, -0.047437384724617004, ...negative0.999485negativeI bought this film at Blockbuster for $3.00, b...
36The plot is about the death of little children...[-0.0348515659570694, 0.01680166646838188, -0....neutral0.537487negativeThe plot is about the death of little children...
37Ever watched a movie that lost the plot? Well,...[-0.02899913117289543, 0.0164097361266613, -0....negative0.998984negativeEver watched a movie that lost the plot? Well,...
38Okay, so this series kind of takes the route o...[0.002110496163368225, 0.02887572906911373, -0...positive0.965860positiveOkay, so this series kind of takes the route o...
39After sitting through this pile of dung, my hu...[0.013781447894871235, -0.010363072156906128, ...positive0.697232negativeAfter sitting through this pile of dung, my hu...
40It had all the clichés of movies of this type ...[0.03799372911453247, -0.038665950298309326, -...negative0.998307negativeIt had all the clichés of movies of this type ...
41This movie is based on the book, \"A Many Splen...[-0.00033091730438172817, -0.05126418545842171...positive0.998454positiveThis movie is based on the book, \"A Many Splen...
42Of all the films I have seen, this one, The Ra...[0.014630819670855999, -0.04907294735312462, -...negative0.999938negativeOf all the films I have seen, this one, The Ra...
43I had heard good things about \"States of Grace...[0.027017194777727127, 0.002088379580527544, 0...positive0.884466negativeI had heard good things about \"States of Grace...
44This movie struck home for me. Being 29, I rem...[-0.0009387845057062805, -0.048219360411167145...negative0.969341positiveThis movie struck home for me. Being 29, I rem...
45As a disclaimer, I've seen the movie 5-6 times...[0.0065035647712647915, 0.00230638706125319, 0...negative0.967124positiveAs a disclaimer, I've seen the movie 5-6 times...
46Protocol is an implausible movie whose only sa...[0.05113476142287254, 0.04671141505241394, -0....neutral0.593109negativeProtocol is an implausible movie whose only sa...
47How this film could be classified as Drama, I ...[0.011419376358389854, -0.0828876867890358, -0...negative0.991421negativeHow this film could be classified as Drama, I ...
48Preston Sturgis' THE POWER AND THE GLORY was u...[0.024031344801187515, 0.03399205952882767, 0....positive0.994996positivePreston Sturgis' THE POWER AND THE GLORY was u...
49Average (and surprisingly tame) Fulci giallo w...[0.015038557350635529, -0.0037642912939190865,...positive0.996770negativeAverage (and surprisingly tame) Fulci giallo w...
\n","
"],"text/plain":[" text ... document\n","origin_index ... \n","0 One of the other reviewers has mentioned that ... ... One of the other reviewers has mentioned that ...\n","1 A wonderful little production.

The... ... A wonderful little production.

The...\n","2 I thought this was a wonderful way to spend ti... ... I thought this was a wonderful way to spend ti...\n","3 Basically there's a family where a little boy ... ... Basically there's a family where a little boy ...\n","4 Petter Mattei's \"Love in the Time of Money\" is... ... Petter Mattei's \"Love in the Time of Money\" is...\n","5 Probably my all-time favorite movie, a story o... ... Probably my all-time favorite movie, a story o...\n","6 I sure would like to see a resurrection of a u... ... I sure would like to see a resurrection of a u...\n","7 This show was an amazing, fresh & innovative i... ... This show was an amazing, fresh & innovative i...\n","8 Encouraged by the positive comments about this... ... Encouraged by the positive comments about this...\n","9 If you like original gut wrenching laughter yo... ... If you like original gut wrenching laughter yo...\n","10 Phil the Alien is one of those quirky films wh... ... Phil the Alien is one of those quirky films wh...\n","11 I saw this movie when I was about 12 when it c... ... I saw this movie when I was about 12 when it c...\n","12 So im not a big fan of Boll's work but then ag... ... So im not a big fan of Boll's work but then ag...\n","13 The cast played Shakespeare.

Shakes... ... The cast played Shakespeare.

Shakes...\n","14 This a fantastic movie of three prisoners who ... ... This a fantastic movie of three prisoners who ...\n","15 Kind of drawn in by the erotic scenes, only to... ... Kind of drawn in by the erotic scenes, only to...\n","16 Some films just simply should not be remade. T... ... Some films just simply should not be remade. T...\n","17 This movie made it into one of my top 10 most ... ... This movie made it into one of my top 10 most ...\n","18 I remember this film,it was the first film i h... ... I remember this film,it was the first film i h...\n","19 An awful film! It must have been up against so... ... An awful film! It must have been up against so...\n","20 After the success of Die Hard and it's sequels... ... After the success of Die Hard and it's sequels...\n","21 I had the terrible misfortune of having to vie... ... I had the terrible misfortune of having to vie...\n","22 What an absolutely stunning movie, if you have... ... What an absolutely stunning movie, if you have...\n","23 First of all, let's get a few things straight ... ... First of all, let's get a few things straight ...\n","24 This was the worst movie I saw at WorldFest an... ... This was the worst movie I saw at WorldFest an...\n","25 The Karen Carpenter Story shows a little more ... ... The Karen Carpenter Story shows a little more ...\n","26 \"The Cell\" is an exotic masterpiece, a dizzyin... ... \"The Cell\" is an exotic masterpiece, a dizzyin...\n","27 This film tried to be too many things all at o... ... This film tried to be too many things all at o...\n","28 This movie was so frustrating. Everything seem... ... This movie was so frustrating. Everything seem...\n","29 'War movie' is a Hollywood genre that has been... ... 'War movie' is a Hollywood genre that has been...\n","30 Taut and organically gripping, Edward Dmytryk'... ... Taut and organically gripping, Edward Dmytryk'...\n","31 \"Ardh Satya\" is one of the finest film ever ma... ... \"Ardh Satya\" is one of the finest film ever ma...\n","32 My first exposure to the Templarios & not a go... ... My first exposure to the Templarios & not a go...\n","33 One of the most significant quotes from the en... ... One of the most significant quotes from the en...\n","34 I watched this film not really expecting much,... ... I watched this film not really expecting much,...\n","35 I bought this film at Blockbuster for $3.00, b... ... I bought this film at Blockbuster for $3.00, b...\n","36 The plot is about the death of little children... ... The plot is about the death of little children...\n","37 Ever watched a movie that lost the plot? Well,... ... Ever watched a movie that lost the plot? Well,...\n","38 Okay, so this series kind of takes the route o... ... Okay, so this series kind of takes the route o...\n","39 After sitting through this pile of dung, my hu... ... After sitting through this pile of dung, my hu...\n","40 It had all the clichés of movies of this type ... ... It had all the clichés of movies of this type ...\n","41 This movie is based on the book, \"A Many Splen... ... This movie is based on the book, \"A Many Splen...\n","42 Of all the films I have seen, this one, The Ra... ... Of all the films I have seen, this one, The Ra...\n","43 I had heard good things about \"States of Grace... ... I had heard good things about \"States of Grace...\n","44 This movie struck home for me. Being 29, I rem... ... This movie struck home for me. Being 29, I rem...\n","45 As a disclaimer, I've seen the movie 5-6 times... ... As a disclaimer, I've seen the movie 5-6 times...\n","46 Protocol is an implausible movie whose only sa... ... Protocol is an implausible movie whose only sa...\n","47 How this film could be classified as Drama, I ... ... How this film could be classified as Drama, I ...\n","48 Preston Sturgis' THE POWER AND THE GLORY was u... ... Preston Sturgis' THE POWER AND THE GLORY was u...\n","49 Average (and surprisingly tame) Fulci giallo w... ... Average (and surprisingly tame) Fulci giallo w...\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609464663328,"user_tz":-300,"elapsed":2733,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ae604bdd-49fb-4b5e-978e-5190dd03b227"},"source":["fitted_pipe.predict('It was one of the best films i have ever watched in my entire life !!')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
default_name_embeddingssentimentsentiment_confidencedocument
origin_index
0[0.06468033790588379, -0.040837567299604416, -...positive0.982375Bitcoin is going to the moon!
\n","
"],"text/plain":[" default_name_embeddings ... document\n","origin_index ... \n","0 [0.06468033790588379, -0.040837567299604416, -... ... Bitcoin is going to the moon!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609464663334,"user_tz":-300,"elapsed":31,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a9059197-9e1c-4afe-ca3b-97c6d310f60c"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609464673090,"user_tz":-300,"elapsed":9777,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"164e4f03-f48a-4347-95e8-fd3509bf146e"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.81 0.96 0.88 27\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.94 0.70 0.80 23\n","\n"," accuracy 0.84 50\n"," macro avg 0.58 0.55 0.56 50\n","weighted avg 0.87 0.84 0.84 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textdefault_name_embeddingssentimentsentiment_confidenceydocument
origin_index
0One of the other reviewers has mentioned that ...[-0.04935329407453537, -0.01034686528146267, -...positive0.966858positiveOne of the other reviewers has mentioned that ...
1A wonderful little production. <br /><br />The...[0.040489643812179565, -0.054199717938899994, ...negative0.985679positiveA wonderful little production. <br /><br />The...
2I thought this was a wonderful way to spend ti...[0.026364900171756744, 0.07112795859575272, 0....negative0.988745positiveI thought this was a wonderful way to spend ti...
3Basically there's a family where a little boy ...[-0.05151151493191719, 0.008207003585994244, -...negative0.999291negativeBasically there's a family where a little boy ...
4Petter Mattei's \"Love in the Time of Money\" is...[0.06880538165569305, 0.019250543788075447, -0...positive0.999684positivePetter Mattei's \"Love in the Time of Money\" is...
5Probably my all-time favorite movie, a story o...[0.004764211364090443, 0.027671916410326958, -...positive0.996598positiveProbably my all-time favorite movie, a story o...
6I sure would like to see a resurrection of a u...[-0.03813941031694412, -0.03322296217083931, 0...positive0.960203positiveI sure would like to see a resurrection of a u...
7This show was an amazing, fresh & innovative i...[0.010670202784240246, -0.04322813078761101, -...negative0.753273negativeThis show was an amazing, fresh & innovative i...
8Encouraged by the positive comments about this...[0.010801736265420914, -0.07724311947822571, -...negative0.958928negativeEncouraged by the positive comments about this...
9If you like original gut wrenching laughter yo...[-0.0245585348457098, 0.0005475765210576355, -...neutral0.536441positiveIf you like original gut wrenching laughter yo...
10Phil the Alien is one of those quirky films wh...[0.023403573781251907, 0.017464609816670418, -...negative0.959978negativePhil the Alien is one of those quirky films wh...
11I saw this movie when I was about 12 when it c...[-0.046517230570316315, -0.025949953123927116,...negative0.999949negativeI saw this movie when I was about 12 when it c...
12So im not a big fan of Boll's work but then ag...[0.0032458826899528503, -0.013339877128601074,...negative0.999997negativeSo im not a big fan of Boll's work but then ag...
13The cast played Shakespeare.<br /><br />Shakes...[0.044309284538030624, 0.061706289649009705, -...negative0.984033negativeThe cast played Shakespeare.<br /><br />Shakes...
14This a fantastic movie of three prisoners who ...[0.005487383343279362, -0.005359508562833071, ...positive0.775998positiveThis a fantastic movie of three prisoners who ...
15Kind of drawn in by the erotic scenes, only to...[0.04357790946960449, -0.034652918577194214, -...negative0.999683negativeKind of drawn in by the erotic scenes, only to...
16Some films just simply should not be remade. T...[0.006823724135756493, -0.0692802369594574, -0...negative0.999245positiveSome films just simply should not be remade. T...
17This movie made it into one of my top 10 most ...[-0.013747279532253742, -0.0038213622756302357...negative0.999970negativeThis movie made it into one of my top 10 most ...
18I remember this film,it was the first film i h...[-0.005101265385746956, 0.022435873746871948, ...positive0.975574positiveI remember this film,it was the first film i h...
19An awful film! It must have been up against so...[0.011224010959267616, -0.007102800067514181, ...negative0.999990negativeAn awful film! It must have been up against so...
20After the success of Die Hard and it's sequels...[0.022048521786928177, -0.020497862249612808, ...positive0.951596positiveAfter the success of Die Hard and it's sequels...
21I had the terrible misfortune of having to vie...[-0.010102338157594204, -0.05102328583598137, ...negative0.999999negativeI had the terrible misfortune of having to vie...
22What an absolutely stunning movie, if you have...[-0.016428396105766296, 0.007074637804180384, ...positive0.931946positiveWhat an absolutely stunning movie, if you have...
23First of all, let's get a few things straight ...[-0.06437410414218903, -0.029181038960814476, ...negative0.990350negativeFirst of all, let's get a few things straight ...
24This was the worst movie I saw at WorldFest an...[0.03901044651865959, 0.06355303525924683, -0....negative0.999986negativeThis was the worst movie I saw at WorldFest an...
25The Karen Carpenter Story shows a little more ...[-0.021897025406360626, 0.04400184750556946, 0...positive0.999463positiveThe Karen Carpenter Story shows a little more ...
26\"The Cell\" is an exotic masterpiece, a dizzyin...[0.0439823754131794, -0.007468021009117365, -0...positive0.998291positive\"The Cell\" is an exotic masterpiece, a dizzyin...
27This film tried to be too many things all at o...[-0.004155139438807964, -0.03771881386637688, ...negative0.865707negativeThis film tried to be too many things all at o...
28This movie was so frustrating. Everything seem...[0.015594013035297394, -0.007509331218898296, ...negative0.999998negativeThis movie was so frustrating. Everything seem...
29'War movie' is a Hollywood genre that has been...[-0.036022596061229706, -0.006816706154495478,...negative0.993793positive'War movie' is a Hollywood genre that has been...
30Taut and organically gripping, Edward Dmytryk'...[0.0312348585575819, -0.04670163244009018, -0....positive0.997460positiveTaut and organically gripping, Edward Dmytryk'...
31\"Ardh Satya\" is one of the finest film ever ma...[0.060114260762929916, -0.0590929239988327, -0...positive0.999880positive\"Ardh Satya\" is one of the finest film ever ma...
32My first exposure to the Templarios & not a go...[0.013515714555978775, -0.004898980725556612, ...negative1.000000negativeMy first exposure to the Templarios & not a go...
33One of the most significant quotes from the en...[0.022280631586909294, -0.00839739479124546, -...positive0.999292positiveOne of the most significant quotes from the en...
34I watched this film not really expecting much,...[0.009434111416339874, -0.046402934938669205, ...negative0.999848negativeI watched this film not really expecting much,...
35I bought this film at Blockbuster for $3.00, b...[0.011683089658617973, -0.047437384724617004, ...negative0.999993negativeI bought this film at Blockbuster for $3.00, b...
36The plot is about the death of little children...[-0.0348515659570694, 0.01680166646838188, -0....negative0.997690negativeThe plot is about the death of little children...
37Ever watched a movie that lost the plot? Well,...[-0.02899913117289543, 0.0164097361266613, -0....negative0.999995negativeEver watched a movie that lost the plot? Well,...
38Okay, so this series kind of takes the route o...[0.002110496163368225, 0.02887572906911373, -0...positive0.993408positiveOkay, so this series kind of takes the route o...
39After sitting through this pile of dung, my hu...[0.013781447894871235, -0.010363072156906128, ...negative0.905860negativeAfter sitting through this pile of dung, my hu...
40It had all the clichés of movies of this type ...[0.03799372911453247, -0.038665950298309326, -...negative0.999892negativeIt had all the clichés of movies of this type ...
41This movie is based on the book, \"A Many Splen...[-0.00033091730438172817, -0.05126418545842171...positive0.999837positiveThis movie is based on the book, \"A Many Splen...
42Of all the films I have seen, this one, The Ra...[0.014630819670855999, -0.04907294735312462, -...negative1.000000negativeOf all the films I have seen, this one, The Ra...
43I had heard good things about \"States of Grace...[0.027017194777727127, 0.002088379580527544, 0...negative0.978662negativeI had heard good things about \"States of Grace...
44This movie struck home for me. Being 29, I rem...[-0.0009387845057062805, -0.048219360411167145...negative0.993965positiveThis movie struck home for me. Being 29, I rem...
45As a disclaimer, I've seen the movie 5-6 times...[0.0065035647712647915, 0.00230638706125319, 0...negative0.999341positiveAs a disclaimer, I've seen the movie 5-6 times...
46Protocol is an implausible movie whose only sa...[0.05113476142287254, 0.04671141505241394, -0....negative0.913287negativeProtocol is an implausible movie whose only sa...
47How this film could be classified as Drama, I ...[0.011419376358389854, -0.0828876867890358, -0...negative0.999841negativeHow this film could be classified as Drama, I ...
48Preston Sturgis' THE POWER AND THE GLORY was u...[0.024031344801187515, 0.03399205952882767, 0....positive0.998516positivePreston Sturgis' THE POWER AND THE GLORY was u...
49Average (and surprisingly tame) Fulci giallo w...[0.015038557350635529, -0.0037642912939190865,...positive0.995483negativeAverage (and surprisingly tame) Fulci giallo w...
\n","
"],"text/plain":[" text ... document\n","origin_index ... \n","0 One of the other reviewers has mentioned that ... ... One of the other reviewers has mentioned that ...\n","1 A wonderful little production.

The... ... A wonderful little production.

The...\n","2 I thought this was a wonderful way to spend ti... ... I thought this was a wonderful way to spend ti...\n","3 Basically there's a family where a little boy ... ... Basically there's a family where a little boy ...\n","4 Petter Mattei's \"Love in the Time of Money\" is... ... Petter Mattei's \"Love in the Time of Money\" is...\n","5 Probably my all-time favorite movie, a story o... ... Probably my all-time favorite movie, a story o...\n","6 I sure would like to see a resurrection of a u... ... I sure would like to see a resurrection of a u...\n","7 This show was an amazing, fresh & innovative i... ... This show was an amazing, fresh & innovative i...\n","8 Encouraged by the positive comments about this... ... Encouraged by the positive comments about this...\n","9 If you like original gut wrenching laughter yo... ... If you like original gut wrenching laughter yo...\n","10 Phil the Alien is one of those quirky films wh... ... Phil the Alien is one of those quirky films wh...\n","11 I saw this movie when I was about 12 when it c... ... I saw this movie when I was about 12 when it c...\n","12 So im not a big fan of Boll's work but then ag... ... So im not a big fan of Boll's work but then ag...\n","13 The cast played Shakespeare.

Shakes... ... The cast played Shakespeare.

Shakes...\n","14 This a fantastic movie of three prisoners who ... ... This a fantastic movie of three prisoners who ...\n","15 Kind of drawn in by the erotic scenes, only to... ... Kind of drawn in by the erotic scenes, only to...\n","16 Some films just simply should not be remade. T... ... Some films just simply should not be remade. T...\n","17 This movie made it into one of my top 10 most ... ... This movie made it into one of my top 10 most ...\n","18 I remember this film,it was the first film i h... ... I remember this film,it was the first film i h...\n","19 An awful film! It must have been up against so... ... An awful film! It must have been up against so...\n","20 After the success of Die Hard and it's sequels... ... After the success of Die Hard and it's sequels...\n","21 I had the terrible misfortune of having to vie... ... I had the terrible misfortune of having to vie...\n","22 What an absolutely stunning movie, if you have... ... What an absolutely stunning movie, if you have...\n","23 First of all, let's get a few things straight ... ... First of all, let's get a few things straight ...\n","24 This was the worst movie I saw at WorldFest an... ... This was the worst movie I saw at WorldFest an...\n","25 The Karen Carpenter Story shows a little more ... ... The Karen Carpenter Story shows a little more ...\n","26 \"The Cell\" is an exotic masterpiece, a dizzyin... ... \"The Cell\" is an exotic masterpiece, a dizzyin...\n","27 This film tried to be too many things all at o... ... This film tried to be too many things all at o...\n","28 This movie was so frustrating. Everything seem... ... This movie was so frustrating. Everything seem...\n","29 'War movie' is a Hollywood genre that has been... ... 'War movie' is a Hollywood genre that has been...\n","30 Taut and organically gripping, Edward Dmytryk'... ... Taut and organically gripping, Edward Dmytryk'...\n","31 \"Ardh Satya\" is one of the finest film ever ma... ... \"Ardh Satya\" is one of the finest film ever ma...\n","32 My first exposure to the Templarios & not a go... ... My first exposure to the Templarios & not a go...\n","33 One of the most significant quotes from the en... ... One of the most significant quotes from the en...\n","34 I watched this film not really expecting much,... ... I watched this film not really expecting much,...\n","35 I bought this film at Blockbuster for $3.00, b... ... I bought this film at Blockbuster for $3.00, b...\n","36 The plot is about the death of little children... ... The plot is about the death of little children...\n","37 Ever watched a movie that lost the plot? Well,... ... Ever watched a movie that lost the plot? Well,...\n","38 Okay, so this series kind of takes the route o... ... Okay, so this series kind of takes the route o...\n","39 After sitting through this pile of dung, my hu... ... After sitting through this pile of dung, my hu...\n","40 It had all the clichés of movies of this type ... ... It had all the clichés of movies of this type ...\n","41 This movie is based on the book, \"A Many Splen... ... This movie is based on the book, \"A Many Splen...\n","42 Of all the films I have seen, this one, The Ra... ... Of all the films I have seen, this one, The Ra...\n","43 I had heard good things about \"States of Grace... ... I had heard good things about \"States of Grace...\n","44 This movie struck home for me. Being 29, I rem... ... This movie struck home for me. Being 29, I rem...\n","45 As a disclaimer, I've seen the movie 5-6 times... ... As a disclaimer, I've seen the movie 5-6 times...\n","46 Protocol is an implausible movie whose only sa... ... Protocol is an implausible movie whose only sa...\n","47 How this film could be classified as Drama, I ... ... How this film could be classified as Drama, I ...\n","48 Preston Sturgis' THE POWER AND THE GLORY was u... ... Preston Sturgis' THE POWER AND THE GLORY was u...\n","49 Average (and surprisingly tame) Fulci giallo w... ... Average (and surprisingly tame) Fulci giallo w...\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1609469926255,"user_tz":-300,"elapsed":140492,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"64f54fdd-699a-4559-f6e4-74b7b5f3e92e"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(120) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.85 0.81 0.83 1234\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.87 0.79 0.83 1266\n","\n"," accuracy 0.80 2500\n"," macro avg 0.57 0.54 0.55 2500\n","weighted avg 0.86 0.80 0.83 2500\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1609470097011,"user_tz":-300,"elapsed":170766,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"59899be4-e33a-4b5e-ff37-df6a9a3994b2"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":127},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1609470112616,"user_tz":-300,"elapsed":15622,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cbe7fc37-7794-4c28-d1de-5ba88d3db58b"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It was one of the best films i have ever watched in my entire life !!')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentimenten_embed_sentence_small_bert_L12_768_embeddingssentiment_confidencedocument
origin_index
0positive[0.09222018718719482, 0.11720675230026245, 0.1...0.999543It was one of the best films i have ever watch...
\n","
"],"text/plain":[" sentiment ... document\n","origin_index ... \n","0 positive ... It was one of the best films i have ever watch...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1609470112618,"user_tz":-300,"elapsed":17,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c7467f93-d619-470f-fd40-c2be1805b83f"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_apple_twitter.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_apple_twitter.ipynb index e2b1cd02..3808340d 100644 --- a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_apple_twitter.ipynb +++ b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_apple_twitter.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_apple_twitter.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"RIV-9vEqxTBB"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\r\n","\r\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_apple_twitter.ipynb)\r\n","\r\n","\r\n","\r\n","# Training a Sentiment Analysis Classifier with NLU \r\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \r\n","\r\n","This notebook showcases the following features : \r\n","\r\n","- How to train the deep learning classifier\r\n","- How to store a pipeline to disk\r\n","- How to load the pipeline from disk (Enables NLU offline mode)\r\n","\r\n"]},{"cell_type":"code","metadata":{"id":"05-mAOF6ol-0"},"source":["import os\r\n","from sklearn.metrics import classification_report\r\n","! apt-get update -qq > /dev/null \r\n","# Install java\r\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\r\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\r\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\r\n","! pip install nlu pyspark==2.4.7 > /dev/null \r\n","\r\n","\r\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download appple twitter Sentiment dataset \n","https://www.kaggle.com/seriousran/appletwittersentimenttexts\n","\n","this dataset contains tweets made towards apple and today we are going to train our model to predict whether the tweet contains sentiment!\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1609468082890,"user_tz":-300,"elapsed":77740,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a791d4cf-bfa3-4cc6-a60d-c885afe2e917"},"source":["! wget https://raw.githubusercontent.com/ahmedlone127/nlu-master/main/apple-twitter-sentiment-texts.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-01 02:27:38-- https://raw.githubusercontent.com/ahmedlone127/nlu-master/main/apple-twitter-sentiment-texts.csv\n","Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n","Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 31678 (31K) [text/plain]\n","Saving to: ‘apple-twitter-sentiment-texts.csv’\n","\n","apple-twitter-senti 100%[===================>] 30.94K --.-KB/s in 0.002s \n","\n","2021-01-01 02:27:39 (12.9 MB/s) - ‘apple-twitter-sentiment-texts.csv’ saved [31678/31678]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1609468083287,"user_tz":-300,"elapsed":78124,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1a23969f-abf0-4bc3-e2ec-0879b2b77cad"},"source":["import pandas as pd\n","train_path = '/content/apple-twitter-sentiment-texts.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df = train_df[~train_df[\"y\"].isin([\"neuteral\"])]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
0@Apple you need to sort your phones out.negative
1Wow. Yall needa step it up @Apple RT @heynyla:...negative
2I'm surprised there isn't more talk about what...negative
3Realised the reason @apple make huge phones is...negative
4Apple Inc. CEO Donates $291K To Pennsylvania S...positive
.........
281@apple so thanks for being greedy assholes who...negative
282@apple iCal AGAIN!!! it reset all my recurring...negative
283Just did my first transaction with @Apple Pay ...positive
284RT @JPDesloges: Kantar Worldpanel: iPhone sale...positive
285Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...positive
\n","

286 rows × 2 columns

\n","
"],"text/plain":[" text y\n","0 @Apple you need to sort your phones out. negative\n","1 Wow. Yall needa step it up @Apple RT @heynyla:... negative\n","2 I'm surprised there isn't more talk about what... negative\n","3 Realised the reason @apple make huge phones is... negative\n","4 Apple Inc. CEO Donates $291K To Pennsylvania S... positive\n",".. ... ...\n","281 @apple so thanks for being greedy assholes who... negative\n","282 @apple iCal AGAIN!!! it reset all my recurring... negative\n","283 Just did my first transaction with @Apple Pay ... positive\n","284 RT @JPDesloges: Kantar Worldpanel: iPhone sale... positive\n","285 Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc... positive\n","\n","[286 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":845},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609468191792,"user_tz":-300,"elapsed":186618,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"41d18f44-64e1-4766-a8cf-4545813930d7"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.91 0.80 0.85 143\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.82 0.91 0.86 143\n","\n"," accuracy 0.86 286\n"," macro avg 0.58 0.57 0.57 286\n","weighted avg 0.86 0.86 0.86 286\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidenceydefault_name_embeddingstextdocumentsentiment
origin_index
00.998447negative[-0.01731022447347641, 0.010604134760797024, -...@Apple you need to sort your phones out.@Apple you need to sort your phones out.negative
10.990570negative[0.019931159913539886, -0.04991159215569496, -...Wow. Yall needa step it up @Apple RT @heynyla:...Wow. Yall needa step it up @Apple RT @heynyla:...positive
20.969844negative[0.01646081730723381, -0.02681073546409607, -0...I'm surprised there isn't more talk about what...I'm surprised there isn't more talk about what...negative
30.996128negative[0.04638500511646271, -0.037105873227119446, -...Realised the reason @apple make huge phones is...Realised the reason @apple make huge phones is...negative
40.959235positive[-0.028623634949326515, 0.03947276994585991, -...Apple Inc. CEO Donates $291K To Pennsylvania S...Apple Inc. CEO Donates $291K To Pennsylvania S...positive
.....................
2810.978435negative[0.03778046742081642, 0.03407461196184158, 0.0...@apple so thanks for being greedy assholes who...@apple so thanks for being greedy assholes who...negative
2820.623791negative[-0.013547728769481182, -0.001025827950797975,...@apple iCal AGAIN!!! it reset all my recurring...@apple iCal AGAIN!!! it reset all my recurring...positive
2830.999104positive[-0.0015363194979727268, -0.01644994132220745,...Just did my first transaction with @Apple Pay ...Just did my first transaction with @Apple Pay ...positive
2840.999854positive[0.0656985342502594, 0.028557728976011276, -0....RT @JPDesloges: Kantar Worldpanel: iPhone sale...RT @JPDesloges: Kantar Worldpanel: iPhone sale...positive
2850.983244positive[0.02311933971941471, 0.05785432830452919, -0....Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...positive
\n","

286 rows × 6 columns

\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.998447 ... negative\n","1 0.990570 ... positive\n","2 0.969844 ... negative\n","3 0.996128 ... negative\n","4 0.959235 ... positive\n","... ... ... ...\n","281 0.978435 ... negative\n","282 0.623791 ... positive\n","283 0.999104 ... positive\n","284 0.999854 ... positive\n","285 0.983244 ... positive\n","\n","[286 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"id":"qdCUg2MR0PD2","colab":{"base_uri":"https://localhost:8080/","height":110},"executionInfo":{"status":"ok","timestamp":1609468194339,"user_tz":-300,"elapsed":189158,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"00d8c7b6-22e1-4979-8c51-58471540a3dd"},"source":["fitted_pipe.predict('I hate the newest update')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedefault_name_embeddingsdocumentsentiment
origin_index
00.996097[0.06468033790588379, -0.040837567299604416, -...Bitcoin is going to the moon!positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.996097 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"id":"UtsAUGTmOTms","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468194341,"user_tz":-300,"elapsed":189154,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3ab00ec5-5894-400f-c6c9-e32099fed1f5"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"id":"mptfvHx-MMMX","colab":{"base_uri":"https://localhost:8080/","height":793},"executionInfo":{"status":"ok","timestamp":1609468205048,"user_tz":-300,"elapsed":199854,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"9c9a1628-3034-4be0-94bc-7c109d2c3263"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.96 0.85 0.90 143\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.87 0.95 0.91 143\n","\n"," accuracy 0.90 286\n"," macro avg 0.61 0.60 0.60 286\n","weighted avg 0.92 0.90 0.91 286\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidenceydefault_name_embeddingstextdocumentsentiment
origin_index
00.999738negative[-0.01731022447347641, 0.010604134760797024, -...@Apple you need to sort your phones out.@Apple you need to sort your phones out.negative
10.937319negative[0.019931159913539886, -0.04991159215569496, -...Wow. Yall needa step it up @Apple RT @heynyla:...Wow. Yall needa step it up @Apple RT @heynyla:...positive
20.974594negative[0.01646081730723381, -0.02681073546409607, -0...I'm surprised there isn't more talk about what...I'm surprised there isn't more talk about what...negative
30.997196negative[0.04638500511646271, -0.037105873227119446, -...Realised the reason @apple make huge phones is...Realised the reason @apple make huge phones is...negative
40.709098positive[-0.028623634949326515, 0.03947276994585991, -...Apple Inc. CEO Donates $291K To Pennsylvania S...Apple Inc. CEO Donates $291K To Pennsylvania S...positive
.....................
2810.984257negative[0.03778046742081642, 0.03407461196184158, 0.0...@apple so thanks for being greedy assholes who...@apple so thanks for being greedy assholes who...negative
2820.904880negative[-0.013547728769481182, -0.001025827950797975,...@apple iCal AGAIN!!! it reset all my recurring...@apple iCal AGAIN!!! it reset all my recurring...negative
2830.995687positive[-0.0015363194979727268, -0.01644994132220745,...Just did my first transaction with @Apple Pay ...Just did my first transaction with @Apple Pay ...positive
2840.998746positive[0.0656985342502594, 0.028557728976011276, -0....RT @JPDesloges: Kantar Worldpanel: iPhone sale...RT @JPDesloges: Kantar Worldpanel: iPhone sale...positive
2850.710708positive[0.02311933971941471, 0.05785432830452919, -0....Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...positive
\n","

286 rows × 6 columns

\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.999738 ... negative\n","1 0.937319 ... positive\n","2 0.974594 ... negative\n","3 0.997196 ... negative\n","4 0.709098 ... positive\n","... ... ... ...\n","281 0.984257 ... negative\n","282 0.904880 ... negative\n","283 0.995687 ... positive\n","284 0.998746 ... positive\n","285 0.710708 ... positive\n","\n","[286 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468205058,"user_tz":-300,"elapsed":199858,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"11560398-8fb9-4110-aed3-f7d9c1f71268"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468415116,"user_tz":-300,"elapsed":409908,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b8e4f245-595a-40f3-9e1d-76f71e76b74e"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(110) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.96 0.85 0.90 143\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.92 0.92 0.92 143\n","\n"," accuracy 0.88 286\n"," macro avg 0.63 0.59 0.61 286\n","weighted avg 0.94 0.88 0.91 286\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"bZZpObLOtqo8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468632998,"user_tz":-300,"elapsed":627783,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e6a87d34-ce84-4968-c3a0-9aade476874b"},"source":["stored_model_path = './models/classifier_dl_trained' \r\n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":127},"executionInfo":{"status":"ok","timestamp":1609468646911,"user_tz":-300,"elapsed":641690,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"454b2c7d-7c32-4cc2-cf25-a52b5a879abd"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('I hate the newest update')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidenceen_embed_sentence_small_bert_L12_768_embeddingsdocumentsentiment
origin_index
00.974083[-0.058236218988895416, -0.3061041235923767, 0...I hate itnegative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.974083 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468646914,"user_tz":-300,"elapsed":641685,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"80ce5918-3803-45f4-e10f-300144342295"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_apple_twitter.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"RIV-9vEqxTBB"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\r\n","\r\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_apple_twitter.ipynb)\r\n","\r\n","\r\n","\r\n","# Training a Sentiment Analysis Classifier with NLU \r\n","## 2 class Apple Tweets sentiment classifier training\r\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \r\n","\r\n","This notebook showcases the following features : \r\n","\r\n","- How to train the deep learning classifier\r\n","- How to store a pipeline to disk\r\n","- How to load the pipeline from disk (Enables NLU offline mode)\r\n","\r\n"]},{"cell_type":"code","metadata":{"id":"05-mAOF6ol-0"},"source":["import os\r\n","from sklearn.metrics import classification_report\r\n","! apt-get update -qq > /dev/null \r\n","# Install java\r\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\r\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\r\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\r\n","! pip install nlu pyspark==2.4.7 > /dev/null \r\n","\r\n","\r\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download appple twitter Sentiment dataset \n","https://www.kaggle.com/seriousran/appletwittersentimenttexts\n","\n","this dataset contains tweets made towards apple and today we are going to train our model to predict whether the tweet contains sentiment!\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1609468082890,"user_tz":-300,"elapsed":77740,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a791d4cf-bfa3-4cc6-a60d-c885afe2e917"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/apple-twitter-sentiment-texts.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-01 02:27:38-- https://raw.githubusercontent.com/ahmedlone127/nlu-master/main/apple-twitter-sentiment-texts.csv\n","Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n","Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 31678 (31K) [text/plain]\n","Saving to: ‘apple-twitter-sentiment-texts.csv’\n","\n","apple-twitter-senti 100%[===================>] 30.94K --.-KB/s in 0.002s \n","\n","2021-01-01 02:27:39 (12.9 MB/s) - ‘apple-twitter-sentiment-texts.csv’ saved [31678/31678]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1609468083287,"user_tz":-300,"elapsed":78124,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1a23969f-abf0-4bc3-e2ec-0879b2b77cad"},"source":["import pandas as pd\n","train_path = '/content/apple-twitter-sentiment-texts.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df = train_df[~train_df[\"y\"].isin([\"neuteral\"])]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
0@Apple you need to sort your phones out.negative
1Wow. Yall needa step it up @Apple RT @heynyla:...negative
2I'm surprised there isn't more talk about what...negative
3Realised the reason @apple make huge phones is...negative
4Apple Inc. CEO Donates $291K To Pennsylvania S...positive
.........
281@apple so thanks for being greedy assholes who...negative
282@apple iCal AGAIN!!! it reset all my recurring...negative
283Just did my first transaction with @Apple Pay ...positive
284RT @JPDesloges: Kantar Worldpanel: iPhone sale...positive
285Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...positive
\n","

286 rows × 2 columns

\n","
"],"text/plain":[" text y\n","0 @Apple you need to sort your phones out. negative\n","1 Wow. Yall needa step it up @Apple RT @heynyla:... negative\n","2 I'm surprised there isn't more talk about what... negative\n","3 Realised the reason @apple make huge phones is... negative\n","4 Apple Inc. CEO Donates $291K To Pennsylvania S... positive\n",".. ... ...\n","281 @apple so thanks for being greedy assholes who... negative\n","282 @apple iCal AGAIN!!! it reset all my recurring... negative\n","283 Just did my first transaction with @Apple Pay ... positive\n","284 RT @JPDesloges: Kantar Worldpanel: iPhone sale... positive\n","285 Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc... positive\n","\n","[286 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":845},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609468191792,"user_tz":-300,"elapsed":186618,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"41d18f44-64e1-4766-a8cf-4545813930d7"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.91 0.80 0.85 143\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.82 0.91 0.86 143\n","\n"," accuracy 0.86 286\n"," macro avg 0.58 0.57 0.57 286\n","weighted avg 0.86 0.86 0.86 286\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidenceydefault_name_embeddingstextdocumentsentiment
origin_index
00.998447negative[-0.01731022447347641, 0.010604134760797024, -...@Apple you need to sort your phones out.@Apple you need to sort your phones out.negative
10.990570negative[0.019931159913539886, -0.04991159215569496, -...Wow. Yall needa step it up @Apple RT @heynyla:...Wow. Yall needa step it up @Apple RT @heynyla:...positive
20.969844negative[0.01646081730723381, -0.02681073546409607, -0...I'm surprised there isn't more talk about what...I'm surprised there isn't more talk about what...negative
30.996128negative[0.04638500511646271, -0.037105873227119446, -...Realised the reason @apple make huge phones is...Realised the reason @apple make huge phones is...negative
40.959235positive[-0.028623634949326515, 0.03947276994585991, -...Apple Inc. CEO Donates $291K To Pennsylvania S...Apple Inc. CEO Donates $291K To Pennsylvania S...positive
.....................
2810.978435negative[0.03778046742081642, 0.03407461196184158, 0.0...@apple so thanks for being greedy assholes who...@apple so thanks for being greedy assholes who...negative
2820.623791negative[-0.013547728769481182, -0.001025827950797975,...@apple iCal AGAIN!!! it reset all my recurring...@apple iCal AGAIN!!! it reset all my recurring...positive
2830.999104positive[-0.0015363194979727268, -0.01644994132220745,...Just did my first transaction with @Apple Pay ...Just did my first transaction with @Apple Pay ...positive
2840.999854positive[0.0656985342502594, 0.028557728976011276, -0....RT @JPDesloges: Kantar Worldpanel: iPhone sale...RT @JPDesloges: Kantar Worldpanel: iPhone sale...positive
2850.983244positive[0.02311933971941471, 0.05785432830452919, -0....Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...positive
\n","

286 rows × 6 columns

\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.998447 ... negative\n","1 0.990570 ... positive\n","2 0.969844 ... negative\n","3 0.996128 ... negative\n","4 0.959235 ... positive\n","... ... ... ...\n","281 0.978435 ... negative\n","282 0.623791 ... positive\n","283 0.999104 ... positive\n","284 0.999854 ... positive\n","285 0.983244 ... positive\n","\n","[286 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"id":"qdCUg2MR0PD2","colab":{"base_uri":"https://localhost:8080/","height":110},"executionInfo":{"status":"ok","timestamp":1609468194339,"user_tz":-300,"elapsed":189158,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"00d8c7b6-22e1-4979-8c51-58471540a3dd"},"source":["fitted_pipe.predict('I hate the newest update')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedefault_name_embeddingsdocumentsentiment
origin_index
00.996097[0.06468033790588379, -0.040837567299604416, -...Bitcoin is going to the moon!positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.996097 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"id":"UtsAUGTmOTms","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468194341,"user_tz":-300,"elapsed":189154,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3ab00ec5-5894-400f-c6c9-e32099fed1f5"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"id":"mptfvHx-MMMX","colab":{"base_uri":"https://localhost:8080/","height":793},"executionInfo":{"status":"ok","timestamp":1609468205048,"user_tz":-300,"elapsed":199854,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"9c9a1628-3034-4be0-94bc-7c109d2c3263"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.96 0.85 0.90 143\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.87 0.95 0.91 143\n","\n"," accuracy 0.90 286\n"," macro avg 0.61 0.60 0.60 286\n","weighted avg 0.92 0.90 0.91 286\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidenceydefault_name_embeddingstextdocumentsentiment
origin_index
00.999738negative[-0.01731022447347641, 0.010604134760797024, -...@Apple you need to sort your phones out.@Apple you need to sort your phones out.negative
10.937319negative[0.019931159913539886, -0.04991159215569496, -...Wow. Yall needa step it up @Apple RT @heynyla:...Wow. Yall needa step it up @Apple RT @heynyla:...positive
20.974594negative[0.01646081730723381, -0.02681073546409607, -0...I'm surprised there isn't more talk about what...I'm surprised there isn't more talk about what...negative
30.997196negative[0.04638500511646271, -0.037105873227119446, -...Realised the reason @apple make huge phones is...Realised the reason @apple make huge phones is...negative
40.709098positive[-0.028623634949326515, 0.03947276994585991, -...Apple Inc. CEO Donates $291K To Pennsylvania S...Apple Inc. CEO Donates $291K To Pennsylvania S...positive
.....................
2810.984257negative[0.03778046742081642, 0.03407461196184158, 0.0...@apple so thanks for being greedy assholes who...@apple so thanks for being greedy assholes who...negative
2820.904880negative[-0.013547728769481182, -0.001025827950797975,...@apple iCal AGAIN!!! it reset all my recurring...@apple iCal AGAIN!!! it reset all my recurring...negative
2830.995687positive[-0.0015363194979727268, -0.01644994132220745,...Just did my first transaction with @Apple Pay ...Just did my first transaction with @Apple Pay ...positive
2840.998746positive[0.0656985342502594, 0.028557728976011276, -0....RT @JPDesloges: Kantar Worldpanel: iPhone sale...RT @JPDesloges: Kantar Worldpanel: iPhone sale...positive
2850.710708positive[0.02311933971941471, 0.05785432830452919, -0....Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...positive
\n","

286 rows × 6 columns

\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.999738 ... negative\n","1 0.937319 ... positive\n","2 0.974594 ... negative\n","3 0.997196 ... negative\n","4 0.709098 ... positive\n","... ... ... ...\n","281 0.984257 ... negative\n","282 0.904880 ... negative\n","283 0.995687 ... positive\n","284 0.998746 ... positive\n","285 0.710708 ... positive\n","\n","[286 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468205058,"user_tz":-300,"elapsed":199858,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"11560398-8fb9-4110-aed3-f7d9c1f71268"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468415116,"user_tz":-300,"elapsed":409908,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b8e4f245-595a-40f3-9e1d-76f71e76b74e"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(110) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.96 0.85 0.90 143\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.92 0.92 0.92 143\n","\n"," accuracy 0.88 286\n"," macro avg 0.63 0.59 0.61 286\n","weighted avg 0.94 0.88 0.91 286\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"bZZpObLOtqo8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468632998,"user_tz":-300,"elapsed":627783,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e6a87d34-ce84-4968-c3a0-9aade476874b"},"source":["stored_model_path = './models/classifier_dl_trained' \r\n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":127},"executionInfo":{"status":"ok","timestamp":1609468646911,"user_tz":-300,"elapsed":641690,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"454b2c7d-7c32-4cc2-cf25-a52b5a879abd"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('I hate the newest update')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidenceen_embed_sentence_small_bert_L12_768_embeddingsdocumentsentiment
origin_index
00.974083[-0.058236218988895416, -0.3061041235923767, 0...I hate itnegative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.974083 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468646914,"user_tz":-300,"elapsed":641685,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"80ce5918-3803-45f4-e10f-300144342295"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_finanical_news.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_finanical_news.ipynb index 4d0dd044..81f918c0 100644 --- a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_finanical_news.ipynb +++ b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_finanical_news.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_finanical_news.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_finanical_news.ipynb)\n","\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Finanical News Sentiment dataset \n","https://www.kaggle.com/ankurzing/sentiment-analysis-for-financial-news\n","\n","This dataset contains the sentiments for financial news headlines from the perspective of a retail investor. Further details about the dataset can be found in: Malo, P., Sinha, A., Takala, P., Korhonen, P. and Wallenius, J. (2014): “Good debt or bad debt: Detecting semantic orientations in economic texts.” Journal of the American Society for Information Science and Technology."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610788018304,"user_tz":-300,"elapsed":2399,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f494fab0-8f9c-4087-f554-31a21764a207"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/all-data.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:06:20-- http://ckl-it.de/wp-content/uploads/2021/01/all-data.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 704799 (688K) [text/csv]\n","Saving to: ‘all-data.csv’\n","\n","all-data.csv 100%[===================>] 688.28K 1.09MB/s in 0.6s \n","\n","2021-01-16 09:06:21 (1.09 MB/s) - ‘all-data.csv’ saved [704799/704799]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610788018314,"user_tz":-300,"elapsed":660,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e1e2496a-8df8-4e5d-db53-63d62ef1f050"},"source":["import pandas as pd\n","train_path = '/content/all-data.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df = train_df[~train_df[\"y\"].isin([\"neutral\"])]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
1The international electronic industry company ...negative
2With the new production plant the company woul...positive
3According to the company 's updated strategy f...positive
4FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...positive
5For the last quarter of 2010 , Componenta 's n...positive
.........
4839HELSINKI Thomson Financial - Shares in Cargote...negative
4840LONDON MarketWatch -- Share prices ended lower...negative
4842Operating profit fell to EUR 35.4 mn from EUR ...negative
4843Net sales of the Paper segment decreased to EU...negative
4844Sales in Finland decreased by 10.5 % in Januar...negative
\n","

1967 rows × 2 columns

\n","
"],"text/plain":[" text y\n","1 The international electronic industry company ... negative\n","2 With the new production plant the company woul... positive\n","3 According to the company 's updated strategy f... positive\n","4 FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag... positive\n","5 For the last quarter of 2010 , Componenta 's n... positive\n","... ... ...\n","4839 HELSINKI Thomson Financial - Shares in Cargote... negative\n","4840 LONDON MarketWatch -- Share prices ended lower... negative\n","4842 Operating profit fell to EUR 35.4 mn from EUR ... negative\n","4843 Net sales of the Paper segment decreased to EU... negative\n","4844 Sales in Finland decreased by 10.5 % in Januar... negative\n","\n","[1967 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609264914996,"user_tz":-300,"elapsed":191025,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6dc536e4-252e-4324-e070-cd477a79330d"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.00 0.00 0.00 1\n"," positive 0.98 1.00 0.99 49\n","\n"," accuracy 0.98 50\n"," macro avg 0.49 0.50 0.49 50\n","weighted avg 0.96 0.98 0.97 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentsentimentysentiment_confidencetextdefault_name_embeddings
origin_index
1The international electronic industry company ...positivenegative1.000000The international electronic industry company ...[0.002136496128514409, 0.07194118946790695, -0...
2With the new production plant the company woul...positivepositive1.000000With the new production plant the company woul...[0.05198746547102928, 0.03577739745378494, -0....
3According to the company 's updated strategy f...positivepositive1.000000According to the company 's updated strategy f...[0.03416536748409271, 0.04053246229887009, -0....
4FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...positivepositive1.000000FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...[0.07730763405561447, -0.045694783329963684, -...
5For the last quarter of 2010 , Componenta 's n...positivepositive1.000000For the last quarter of 2010 , Componenta 's n...[0.05603468790650368, 0.04817350581288338, -0....
6In the third quarter of 2010 , net sales incre...positivepositive1.000000In the third quarter of 2010 , net sales incre...[0.037710510194301605, 0.037198420614004135, -...
7Operating profit rose to EUR 13.1 mn from EUR ...positivepositive1.000000Operating profit rose to EUR 13.1 mn from EUR ...[0.04557091370224953, 0.0453636609017849, -0.0...
8Operating profit totalled EUR 21.1 mn , up fro...positivepositive1.000000Operating profit totalled EUR 21.1 mn , up fro...[0.05191247910261154, 0.059505216777324677, -0...
9TeliaSonera TLSN said the offer is in line wit...positivepositive1.000000TeliaSonera TLSN said the offer is in line wit...[0.07441692799329758, -0.0487477071583271, -0....
10STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMEN...positivepositive1.000000STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMEN...[0.03200741112232208, 0.03773287683725357, -0....
11A purchase agreement for 7,200 tons of gasolin...positivepositive1.000000A purchase agreement for 7,200 tons of gasolin...[0.05590442568063736, 0.041032955050468445, -0...
12Finnish Talentum reports its operating profit ...positivepositive1.000000Finnish Talentum reports its operating profit ...[0.06596074998378754, 0.05897102504968643, -0....
13Clothing retail chain Sepp+Æl+Æ 's sales incre...positivepositive1.000000Clothing retail chain Sepp+Æl+Æ 's sales incre...[0.03395465016365051, 0.05171804875135422, 0.0...
14Consolidated net sales increased 16 % to reach...positivepositive1.000000Consolidated net sales increased 16 % to reach...[0.060446273535490036, 0.03799470514059067, -0...
15Foundries division reports its sales increased...positivepositive1.000000Foundries division reports its sales increased...[0.0494563989341259, 0.05158388614654541, -0.0...
16HELSINKI ( AFX ) - Shares closed higher , led ...positivepositive1.000000HELSINKI ( AFX ) - Shares closed higher , led ...[0.0629865899682045, -0.045351240783929825, -0...
17Incap Contract Manufacturing Services Pvt Ltd ...positivepositive1.000000Incap Contract Manufacturing Services Pvt Ltd ...[0.05365738272666931, -0.055247869342565536, -...
18Its board of directors will propose a dividend...positivepositive1.000000Its board of directors will propose a dividend...[0.0692642331123352, 0.02292279154062271, -0.0...
19Lifetree was founded in 2000 , and its revenue...positivepositive1.000000Lifetree was founded in 2000 , and its revenue...[0.0810408890247345, 0.039108917117118835, -0....
20( Filippova ) A trilateral agreement on invest...positivepositive0.999998( Filippova ) A trilateral agreement on invest...[0.05172618478536606, 0.02967883087694645, -0....
21MegaFon 's subscriber base increased 16.1 % in...positivepositive1.000000MegaFon 's subscriber base increased 16.1 % in...[0.03825156390666962, 0.001971189398318529, -0...
22Net income from life insurance doubled to EUR ...positivepositive1.000000Net income from life insurance doubled to EUR ...[0.05222763866186142, 0.05695151165127754, -0....
23Net sales increased to EUR193 .3 m from EUR179...positivepositive1.000000Net sales increased to EUR193 .3 m from EUR179...[0.02272764965891838, 0.016222774982452393, 0....
24Net sales surged by 18.5 % to EUR167 .8 m. Tel...positivepositive1.000000Net sales surged by 18.5 % to EUR167 .8 m. Tel...[0.05020830035209656, 0.03307913616299629, -0....
25Nordea Group 's operating profit increased in ...positivepositive1.000000Nordea Group 's operating profit increased in ...[0.0497022308409214, 0.023793146014213562, -0....
26Operating profit for the nine-month period inc...positivepositive1.000000Operating profit for the nine-month period inc...[0.04339126497507095, 0.024815633893013, -0.02...
27Operating profit for the nine-month period inc...positivepositive1.000000Operating profit for the nine-month period inc...[0.035663120448589325, 0.03037247434258461, -0...
28Operating profit for the three-month period in...positivepositive1.000000Operating profit for the three-month period in...[0.029575243592262268, 0.007764187641441822, -...
29The Brazilian unit of Finnish security solutio...positivepositive1.000000The Brazilian unit of Finnish security solutio...[0.047570426017045975, -0.023694489151239395, ...
30The company 's net profit rose 11.4 % on the y...positivepositive1.000000The company 's net profit rose 11.4 % on the y...[0.06896018236875534, 0.046189870685338974, -0...
31The Lithuanian beer market made up 14.41 milli...positivepositive0.999999The Lithuanian beer market made up 14.41 milli...[0.0020184037275612354, -0.044685497879981995,...
32Viking Line 's cargo revenue increased by 5.4 ...positivepositive1.000000Viking Line 's cargo revenue increased by 5.4 ...[-0.007756179664283991, -0.04868081212043762, ...
33The fair value of the property portfolio doubl...positivepositive1.000000The fair value of the property portfolio doubl...[0.06604734063148499, -0.025070184841752052, 0...
3410 February 2011 - Finnish media company Sanom...positivepositive1.00000010 February 2011 - Finnish media company Sanom...[0.05996786803007126, 0.03255663812160492, -0....
35A Helsinki : ELIiV today reported EPS of EUR1 ...positivepositive0.999999A Helsinki : ELIiV today reported EPS of EUR1 ...[0.051878154277801514, -0.03290269523859024, -...
36Aspo Plc STOCK EXCHANGE RELEASE February 11 , ...positivepositive1.000000Aspo Plc STOCK EXCHANGE RELEASE February 11 , ...[0.03545805439352989, -0.04956813529133797, -0...
37Commission income increased by 22 % to EUR 4.4...positivepositive1.000000Commission income increased by 22 % to EUR 4.4...[0.05664118379354477, 0.004533933009952307, -0...
38In January , traffic , measured in revenue pas...positivepositive1.000000In January , traffic , measured in revenue pas...[-0.026962362229824066, 0.010590712539851665, ...
39In January-September 2010 , Fiskars ' net prof...positivepositive1.000000In January-September 2010 , Fiskars ' net prof...[0.056088510900735855, 0.0369233600795269, -0....
40Net income from life insurance rose to EUR 16....positivepositive1.000000Net income from life insurance rose to EUR 16....[0.05793088302016258, 0.06312950700521469, -0....
41Nyrstar has also agreed to supply to Talvivaar...positivepositive1.000000Nyrstar has also agreed to supply to Talvivaar...[0.004785533994436264, 0.004442625679075718, -...
42Sales for both the Department Store Division a...positivepositive1.000000Sales for both the Department Store Division a...[-0.050088364630937576, 0.04885219410061836, 0...
43Sales have risen in other export markets .positivepositive1.000000Sales have risen in other export markets .[0.058916959911584854, 0.018443405628204346, -...
44Sales increased due to growing market rates an...positivepositive1.000000Sales increased due to growing market rates an...[0.047733016312122345, 0.010620158165693283, 0...
45The agreement strengthens our long-term partne...positivepositive1.000000The agreement strengthens our long-term partne...[0.06433788686990738, 0.027824176475405693, -0...
46The agreement was signed with Biohit Healthcar...positivepositive1.000000The agreement was signed with Biohit Healthcar...[0.03612205758690834, 0.038267459720373154, -0...
47The company also estimates the already carried...positivepositive1.000000The company also estimates the already carried...[0.04304526373744011, 0.023360760882496834, -0...
48The company 's order book stood at 1.5 bln eur...positivepositive1.000000The company 's order book stood at 1.5 bln eur...[0.036210183054208755, -0.010278576985001564, ...
49The company said that paper demand increased i...positivepositive1.000000The company said that paper demand increased i...[0.06558039039373398, 0.04877239838242531, -0....
50The world 's second largest stainless steel ma...positivepositive1.000000The world 's second largest stainless steel ma...[0.04267223924398422, 0.03184577450156212, -0....
\n","
"],"text/plain":[" document ... default_name_embeddings\n","origin_index ... \n","1 The international electronic industry company ... ... [0.002136496128514409, 0.07194118946790695, -0...\n","2 With the new production plant the company woul... ... [0.05198746547102928, 0.03577739745378494, -0....\n","3 According to the company 's updated strategy f... ... [0.03416536748409271, 0.04053246229887009, -0....\n","4 FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag... ... [0.07730763405561447, -0.045694783329963684, -...\n","5 For the last quarter of 2010 , Componenta 's n... ... [0.05603468790650368, 0.04817350581288338, -0....\n","6 In the third quarter of 2010 , net sales incre... ... [0.037710510194301605, 0.037198420614004135, -...\n","7 Operating profit rose to EUR 13.1 mn from EUR ... ... [0.04557091370224953, 0.0453636609017849, -0.0...\n","8 Operating profit totalled EUR 21.1 mn , up fro... ... [0.05191247910261154, 0.059505216777324677, -0...\n","9 TeliaSonera TLSN said the offer is in line wit... ... [0.07441692799329758, -0.0487477071583271, -0....\n","10 STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMEN... ... [0.03200741112232208, 0.03773287683725357, -0....\n","11 A purchase agreement for 7,200 tons of gasolin... ... [0.05590442568063736, 0.041032955050468445, -0...\n","12 Finnish Talentum reports its operating profit ... ... [0.06596074998378754, 0.05897102504968643, -0....\n","13 Clothing retail chain Sepp+Æl+Æ 's sales incre... ... [0.03395465016365051, 0.05171804875135422, 0.0...\n","14 Consolidated net sales increased 16 % to reach... ... [0.060446273535490036, 0.03799470514059067, -0...\n","15 Foundries division reports its sales increased... ... [0.0494563989341259, 0.05158388614654541, -0.0...\n","16 HELSINKI ( AFX ) - Shares closed higher , led ... ... [0.0629865899682045, -0.045351240783929825, -0...\n","17 Incap Contract Manufacturing Services Pvt Ltd ... ... [0.05365738272666931, -0.055247869342565536, -...\n","18 Its board of directors will propose a dividend... ... [0.0692642331123352, 0.02292279154062271, -0.0...\n","19 Lifetree was founded in 2000 , and its revenue... ... [0.0810408890247345, 0.039108917117118835, -0....\n","20 ( Filippova ) A trilateral agreement on invest... ... [0.05172618478536606, 0.02967883087694645, -0....\n","21 MegaFon 's subscriber base increased 16.1 % in... ... [0.03825156390666962, 0.001971189398318529, -0...\n","22 Net income from life insurance doubled to EUR ... ... [0.05222763866186142, 0.05695151165127754, -0....\n","23 Net sales increased to EUR193 .3 m from EUR179... ... [0.02272764965891838, 0.016222774982452393, 0....\n","24 Net sales surged by 18.5 % to EUR167 .8 m. Tel... ... [0.05020830035209656, 0.03307913616299629, -0....\n","25 Nordea Group 's operating profit increased in ... ... [0.0497022308409214, 0.023793146014213562, -0....\n","26 Operating profit for the nine-month period inc... ... [0.04339126497507095, 0.024815633893013, -0.02...\n","27 Operating profit for the nine-month period inc... ... [0.035663120448589325, 0.03037247434258461, -0...\n","28 Operating profit for the three-month period in... ... [0.029575243592262268, 0.007764187641441822, -...\n","29 The Brazilian unit of Finnish security solutio... ... [0.047570426017045975, -0.023694489151239395, ...\n","30 The company 's net profit rose 11.4 % on the y... ... [0.06896018236875534, 0.046189870685338974, -0...\n","31 The Lithuanian beer market made up 14.41 milli... ... [0.0020184037275612354, -0.044685497879981995,...\n","32 Viking Line 's cargo revenue increased by 5.4 ... ... [-0.007756179664283991, -0.04868081212043762, ...\n","33 The fair value of the property portfolio doubl... ... [0.06604734063148499, -0.025070184841752052, 0...\n","34 10 February 2011 - Finnish media company Sanom... ... [0.05996786803007126, 0.03255663812160492, -0....\n","35 A Helsinki : ELIiV today reported EPS of EUR1 ... ... [0.051878154277801514, -0.03290269523859024, -...\n","36 Aspo Plc STOCK EXCHANGE RELEASE February 11 , ... ... [0.03545805439352989, -0.04956813529133797, -0...\n","37 Commission income increased by 22 % to EUR 4.4... ... [0.05664118379354477, 0.004533933009952307, -0...\n","38 In January , traffic , measured in revenue pas... ... [-0.026962362229824066, 0.010590712539851665, ...\n","39 In January-September 2010 , Fiskars ' net prof... ... [0.056088510900735855, 0.0369233600795269, -0....\n","40 Net income from life insurance rose to EUR 16.... ... [0.05793088302016258, 0.06312950700521469, -0....\n","41 Nyrstar has also agreed to supply to Talvivaar... ... [0.004785533994436264, 0.004442625679075718, -...\n","42 Sales for both the Department Store Division a... ... [-0.050088364630937576, 0.04885219410061836, 0...\n","43 Sales have risen in other export markets . ... [0.058916959911584854, 0.018443405628204346, -...\n","44 Sales increased due to growing market rates an... ... [0.047733016312122345, 0.010620158165693283, 0...\n","45 The agreement strengthens our long-term partne... ... [0.06433788686990738, 0.027824176475405693, -0...\n","46 The agreement was signed with Biohit Healthcar... ... [0.03612205758690834, 0.038267459720373154, -0...\n","47 The company also estimates the already carried... ... [0.04304526373744011, 0.023360760882496834, -0...\n","48 The company 's order book stood at 1.5 bln eur... ... [0.036210183054208755, -0.010278576985001564, ...\n","49 The company said that paper demand increased i... ... [0.06558039039373398, 0.04877239838242531, -0....\n","50 The world 's second largest stainless steel ma... ... [0.04267223924398422, 0.03184577450156212, -0....\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":107},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609264917602,"user_tz":-300,"elapsed":193623,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8fe5b9aa-c87a-42d3-e00d-920e63ca6aa4"},"source":["fitted_pipe.predict('According to the most recent update there has been a major decrese in the rate of oil')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentsentimentsentiment_confidencedefault_name_embeddings
origin_index
0Bitcoin is going to the moon!positive0.999994[0.06468033790588379, -0.040837567299604416, -...
\n","
"],"text/plain":[" document ... default_name_embeddings\n","origin_index ... \n","0 Bitcoin is going to the moon! ... [0.06468033790588379, -0.040837567299604416, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609264917604,"user_tz":-300,"elapsed":193620,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ac9c8b1a-7fdd-4a6f-bdfd-1dbb823d9bf4"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":753},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609264924472,"user_tz":-300,"elapsed":200484,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1dd94bc8-09c8-45db-ab81-bbd64acb8a4b"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.00 0.00 0.00 1\n"," positive 0.99 1.00 0.99 99\n","\n"," accuracy 0.99 100\n"," macro avg 0.49 0.50 0.50 100\n","weighted avg 0.98 0.99 0.99 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentsentimentysentiment_confidencetextdefault_name_embeddings
origin_index
1The international electronic industry company ...positivenegative1.000000The international electronic industry company ...[0.002136496128514409, 0.07194118946790695, -0...
2With the new production plant the company woul...positivepositive1.000000With the new production plant the company woul...[0.05198746547102928, 0.03577739745378494, -0....
3According to the company 's updated strategy f...positivepositive1.000000According to the company 's updated strategy f...[0.03416536748409271, 0.04053246229887009, -0....
4FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...positivepositive1.000000FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...[0.07730763405561447, -0.045694783329963684, -...
5For the last quarter of 2010 , Componenta 's n...positivepositive1.000000For the last quarter of 2010 , Componenta 's n...[0.05603468790650368, 0.04817350581288338, -0....
.....................
116Operating profit margin increased from 11.2 % ...positivepositive1.000000Operating profit margin increased from 11.2 % ...[0.01058729737997055, -0.008798183873295784, -...
117Operating profit rose to EUR 3.11 mn from EUR ...positivepositive1.000000Operating profit rose to EUR 3.11 mn from EUR ...[0.03610285371541977, 0.04256380349397659, -0....
118Operating profit rose to EUR 5mn from EUR 2.8 ...positivepositive1.000000Operating profit rose to EUR 5mn from EUR 2.8 ...[0.04815328121185303, 0.050376053899526596, -0...
119Operating profit was EUR 24.5 mn , up from EUR...positivepositive1.000000Operating profit was EUR 24.5 mn , up from EUR...[0.048205215483903885, 0.05145161226391792, -0...
120Ramirent 's net sales in the second quarterend...positivepositive1.000000Ramirent 's net sales in the second quarterend...[0.0638015866279602, 0.0272374227643013, -0.04...
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" document ... default_name_embeddings\n","origin_index ... \n","1 The international electronic industry company ... ... [0.002136496128514409, 0.07194118946790695, -0...\n","2 With the new production plant the company woul... ... [0.05198746547102928, 0.03577739745378494, -0....\n","3 According to the company 's updated strategy f... ... [0.03416536748409271, 0.04053246229887009, -0....\n","4 FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag... ... [0.07730763405561447, -0.045694783329963684, -...\n","5 For the last quarter of 2010 , Componenta 's n... ... [0.05603468790650368, 0.04817350581288338, -0....\n","... ... ... ...\n","116 Operating profit margin increased from 11.2 % ... ... [0.01058729737997055, -0.008798183873295784, -...\n","117 Operating profit rose to EUR 3.11 mn from EUR ... ... [0.03610285371541977, 0.04256380349397659, -0....\n","118 Operating profit rose to EUR 5mn from EUR 2.8 ... ... [0.04815328121185303, 0.050376053899526596, -0...\n","119 Operating profit was EUR 24.5 mn , up from EUR... ... [0.048205215483903885, 0.05145161226391792, -0...\n","120 Ramirent 's net sales in the second quarterend... ... [0.0638015866279602, 0.0272374227643013, -0.04...\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609264924477,"user_tz":-300,"elapsed":200483,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e259763c-470b-4d46-b3d1-28cf545f5dcd"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609266286092,"user_tz":-300,"elapsed":1562094,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4237752f-4fbe-4235-b33d-5d7b8ba29d48"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(70) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.88 0.87 0.88 604\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.96 0.92 0.94 1363\n","\n"," accuracy 0.91 1967\n"," macro avg 0.62 0.60 0.61 1967\n","weighted avg 0.94 0.91 0.92 1967\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609266449598,"user_tz":-300,"elapsed":1725594,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b31b5e1e-3f09-4ab3-e97a-fb32ac87b319"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":124},"executionInfo":{"status":"ok","timestamp":1609266465229,"user_tz":-300,"elapsed":1741220,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5d9cc34a-693c-44d7-e50a-6e0ca5d4e024"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('According to the most recent update there has been a major decrese in the rate of oil')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentsentimentsentiment_confidenceen_embed_sentence_small_bert_L12_768_embeddings
origin_index
0Tesla plans to invest 10M into the ML sectorpositive0.999980[0.15737222135066986, 0.2598555386066437, 0.85...
\n","
"],"text/plain":[" document ... en_embed_sentence_small_bert_L12_768_embeddings\n","origin_index ... \n","0 Tesla plans to invest 10M into the ML sector ... [0.15737222135066986, 0.2598555386066437, 0.85...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609266465232,"user_tz":-300,"elapsed":1741218,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ec54f7c0-8174-4fd4-9db8-51c1d15be3eb"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_finanical_news.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_finanical_news.ipynb)\n","\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 class Finance News sentiment classifier training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Finanical News Sentiment dataset \n","https://www.kaggle.com/ankurzing/sentiment-analysis-for-financial-news\n","\n","This dataset contains the sentiments for financial news headlines from the perspective of a retail investor. Further details about the dataset can be found in: Malo, P., Sinha, A., Takala, P., Korhonen, P. and Wallenius, J. (2014): “Good debt or bad debt: Detecting semantic orientations in economic texts.” Journal of the American Society for Information Science and Technology."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610788018304,"user_tz":-300,"elapsed":2399,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f494fab0-8f9c-4087-f554-31a21764a207"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/all-data.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:06:20-- http://ckl-it.de/wp-content/uploads/2021/01/all-data.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 704799 (688K) [text/csv]\n","Saving to: ‘all-data.csv’\n","\n","all-data.csv 100%[===================>] 688.28K 1.09MB/s in 0.6s \n","\n","2021-01-16 09:06:21 (1.09 MB/s) - ‘all-data.csv’ saved [704799/704799]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610788018314,"user_tz":-300,"elapsed":660,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e1e2496a-8df8-4e5d-db53-63d62ef1f050"},"source":["import pandas as pd\n","train_path = '/content/all-data.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df = train_df[~train_df[\"y\"].isin([\"neutral\"])]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
1The international electronic industry company ...negative
2With the new production plant the company woul...positive
3According to the company 's updated strategy f...positive
4FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...positive
5For the last quarter of 2010 , Componenta 's n...positive
.........
4839HELSINKI Thomson Financial - Shares in Cargote...negative
4840LONDON MarketWatch -- Share prices ended lower...negative
4842Operating profit fell to EUR 35.4 mn from EUR ...negative
4843Net sales of the Paper segment decreased to EU...negative
4844Sales in Finland decreased by 10.5 % in Januar...negative
\n","

1967 rows × 2 columns

\n","
"],"text/plain":[" text y\n","1 The international electronic industry company ... negative\n","2 With the new production plant the company woul... positive\n","3 According to the company 's updated strategy f... positive\n","4 FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag... positive\n","5 For the last quarter of 2010 , Componenta 's n... positive\n","... ... ...\n","4839 HELSINKI Thomson Financial - Shares in Cargote... negative\n","4840 LONDON MarketWatch -- Share prices ended lower... negative\n","4842 Operating profit fell to EUR 35.4 mn from EUR ... negative\n","4843 Net sales of the Paper segment decreased to EU... negative\n","4844 Sales in Finland decreased by 10.5 % in Januar... negative\n","\n","[1967 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609264914996,"user_tz":-300,"elapsed":191025,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6dc536e4-252e-4324-e070-cd477a79330d"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.00 0.00 0.00 1\n"," positive 0.98 1.00 0.99 49\n","\n"," accuracy 0.98 50\n"," macro avg 0.49 0.50 0.49 50\n","weighted avg 0.96 0.98 0.97 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentsentimentysentiment_confidencetextdefault_name_embeddings
origin_index
1The international electronic industry company ...positivenegative1.000000The international electronic industry company ...[0.002136496128514409, 0.07194118946790695, -0...
2With the new production plant the company woul...positivepositive1.000000With the new production plant the company woul...[0.05198746547102928, 0.03577739745378494, -0....
3According to the company 's updated strategy f...positivepositive1.000000According to the company 's updated strategy f...[0.03416536748409271, 0.04053246229887009, -0....
4FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...positivepositive1.000000FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...[0.07730763405561447, -0.045694783329963684, -...
5For the last quarter of 2010 , Componenta 's n...positivepositive1.000000For the last quarter of 2010 , Componenta 's n...[0.05603468790650368, 0.04817350581288338, -0....
6In the third quarter of 2010 , net sales incre...positivepositive1.000000In the third quarter of 2010 , net sales incre...[0.037710510194301605, 0.037198420614004135, -...
7Operating profit rose to EUR 13.1 mn from EUR ...positivepositive1.000000Operating profit rose to EUR 13.1 mn from EUR ...[0.04557091370224953, 0.0453636609017849, -0.0...
8Operating profit totalled EUR 21.1 mn , up fro...positivepositive1.000000Operating profit totalled EUR 21.1 mn , up fro...[0.05191247910261154, 0.059505216777324677, -0...
9TeliaSonera TLSN said the offer is in line wit...positivepositive1.000000TeliaSonera TLSN said the offer is in line wit...[0.07441692799329758, -0.0487477071583271, -0....
10STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMEN...positivepositive1.000000STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMEN...[0.03200741112232208, 0.03773287683725357, -0....
11A purchase agreement for 7,200 tons of gasolin...positivepositive1.000000A purchase agreement for 7,200 tons of gasolin...[0.05590442568063736, 0.041032955050468445, -0...
12Finnish Talentum reports its operating profit ...positivepositive1.000000Finnish Talentum reports its operating profit ...[0.06596074998378754, 0.05897102504968643, -0....
13Clothing retail chain Sepp+Æl+Æ 's sales incre...positivepositive1.000000Clothing retail chain Sepp+Æl+Æ 's sales incre...[0.03395465016365051, 0.05171804875135422, 0.0...
14Consolidated net sales increased 16 % to reach...positivepositive1.000000Consolidated net sales increased 16 % to reach...[0.060446273535490036, 0.03799470514059067, -0...
15Foundries division reports its sales increased...positivepositive1.000000Foundries division reports its sales increased...[0.0494563989341259, 0.05158388614654541, -0.0...
16HELSINKI ( AFX ) - Shares closed higher , led ...positivepositive1.000000HELSINKI ( AFX ) - Shares closed higher , led ...[0.0629865899682045, -0.045351240783929825, -0...
17Incap Contract Manufacturing Services Pvt Ltd ...positivepositive1.000000Incap Contract Manufacturing Services Pvt Ltd ...[0.05365738272666931, -0.055247869342565536, -...
18Its board of directors will propose a dividend...positivepositive1.000000Its board of directors will propose a dividend...[0.0692642331123352, 0.02292279154062271, -0.0...
19Lifetree was founded in 2000 , and its revenue...positivepositive1.000000Lifetree was founded in 2000 , and its revenue...[0.0810408890247345, 0.039108917117118835, -0....
20( Filippova ) A trilateral agreement on invest...positivepositive0.999998( Filippova ) A trilateral agreement on invest...[0.05172618478536606, 0.02967883087694645, -0....
21MegaFon 's subscriber base increased 16.1 % in...positivepositive1.000000MegaFon 's subscriber base increased 16.1 % in...[0.03825156390666962, 0.001971189398318529, -0...
22Net income from life insurance doubled to EUR ...positivepositive1.000000Net income from life insurance doubled to EUR ...[0.05222763866186142, 0.05695151165127754, -0....
23Net sales increased to EUR193 .3 m from EUR179...positivepositive1.000000Net sales increased to EUR193 .3 m from EUR179...[0.02272764965891838, 0.016222774982452393, 0....
24Net sales surged by 18.5 % to EUR167 .8 m. Tel...positivepositive1.000000Net sales surged by 18.5 % to EUR167 .8 m. Tel...[0.05020830035209656, 0.03307913616299629, -0....
25Nordea Group 's operating profit increased in ...positivepositive1.000000Nordea Group 's operating profit increased in ...[0.0497022308409214, 0.023793146014213562, -0....
26Operating profit for the nine-month period inc...positivepositive1.000000Operating profit for the nine-month period inc...[0.04339126497507095, 0.024815633893013, -0.02...
27Operating profit for the nine-month period inc...positivepositive1.000000Operating profit for the nine-month period inc...[0.035663120448589325, 0.03037247434258461, -0...
28Operating profit for the three-month period in...positivepositive1.000000Operating profit for the three-month period in...[0.029575243592262268, 0.007764187641441822, -...
29The Brazilian unit of Finnish security solutio...positivepositive1.000000The Brazilian unit of Finnish security solutio...[0.047570426017045975, -0.023694489151239395, ...
30The company 's net profit rose 11.4 % on the y...positivepositive1.000000The company 's net profit rose 11.4 % on the y...[0.06896018236875534, 0.046189870685338974, -0...
31The Lithuanian beer market made up 14.41 milli...positivepositive0.999999The Lithuanian beer market made up 14.41 milli...[0.0020184037275612354, -0.044685497879981995,...
32Viking Line 's cargo revenue increased by 5.4 ...positivepositive1.000000Viking Line 's cargo revenue increased by 5.4 ...[-0.007756179664283991, -0.04868081212043762, ...
33The fair value of the property portfolio doubl...positivepositive1.000000The fair value of the property portfolio doubl...[0.06604734063148499, -0.025070184841752052, 0...
3410 February 2011 - Finnish media company Sanom...positivepositive1.00000010 February 2011 - Finnish media company Sanom...[0.05996786803007126, 0.03255663812160492, -0....
35A Helsinki : ELIiV today reported EPS of EUR1 ...positivepositive0.999999A Helsinki : ELIiV today reported EPS of EUR1 ...[0.051878154277801514, -0.03290269523859024, -...
36Aspo Plc STOCK EXCHANGE RELEASE February 11 , ...positivepositive1.000000Aspo Plc STOCK EXCHANGE RELEASE February 11 , ...[0.03545805439352989, -0.04956813529133797, -0...
37Commission income increased by 22 % to EUR 4.4...positivepositive1.000000Commission income increased by 22 % to EUR 4.4...[0.05664118379354477, 0.004533933009952307, -0...
38In January , traffic , measured in revenue pas...positivepositive1.000000In January , traffic , measured in revenue pas...[-0.026962362229824066, 0.010590712539851665, ...
39In January-September 2010 , Fiskars ' net prof...positivepositive1.000000In January-September 2010 , Fiskars ' net prof...[0.056088510900735855, 0.0369233600795269, -0....
40Net income from life insurance rose to EUR 16....positivepositive1.000000Net income from life insurance rose to EUR 16....[0.05793088302016258, 0.06312950700521469, -0....
41Nyrstar has also agreed to supply to Talvivaar...positivepositive1.000000Nyrstar has also agreed to supply to Talvivaar...[0.004785533994436264, 0.004442625679075718, -...
42Sales for both the Department Store Division a...positivepositive1.000000Sales for both the Department Store Division a...[-0.050088364630937576, 0.04885219410061836, 0...
43Sales have risen in other export markets .positivepositive1.000000Sales have risen in other export markets .[0.058916959911584854, 0.018443405628204346, -...
44Sales increased due to growing market rates an...positivepositive1.000000Sales increased due to growing market rates an...[0.047733016312122345, 0.010620158165693283, 0...
45The agreement strengthens our long-term partne...positivepositive1.000000The agreement strengthens our long-term partne...[0.06433788686990738, 0.027824176475405693, -0...
46The agreement was signed with Biohit Healthcar...positivepositive1.000000The agreement was signed with Biohit Healthcar...[0.03612205758690834, 0.038267459720373154, -0...
47The company also estimates the already carried...positivepositive1.000000The company also estimates the already carried...[0.04304526373744011, 0.023360760882496834, -0...
48The company 's order book stood at 1.5 bln eur...positivepositive1.000000The company 's order book stood at 1.5 bln eur...[0.036210183054208755, -0.010278576985001564, ...
49The company said that paper demand increased i...positivepositive1.000000The company said that paper demand increased i...[0.06558039039373398, 0.04877239838242531, -0....
50The world 's second largest stainless steel ma...positivepositive1.000000The world 's second largest stainless steel ma...[0.04267223924398422, 0.03184577450156212, -0....
\n","
"],"text/plain":[" document ... default_name_embeddings\n","origin_index ... \n","1 The international electronic industry company ... ... [0.002136496128514409, 0.07194118946790695, -0...\n","2 With the new production plant the company woul... ... [0.05198746547102928, 0.03577739745378494, -0....\n","3 According to the company 's updated strategy f... ... [0.03416536748409271, 0.04053246229887009, -0....\n","4 FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag... ... [0.07730763405561447, -0.045694783329963684, -...\n","5 For the last quarter of 2010 , Componenta 's n... ... [0.05603468790650368, 0.04817350581288338, -0....\n","6 In the third quarter of 2010 , net sales incre... ... [0.037710510194301605, 0.037198420614004135, -...\n","7 Operating profit rose to EUR 13.1 mn from EUR ... ... [0.04557091370224953, 0.0453636609017849, -0.0...\n","8 Operating profit totalled EUR 21.1 mn , up fro... ... [0.05191247910261154, 0.059505216777324677, -0...\n","9 TeliaSonera TLSN said the offer is in line wit... ... [0.07441692799329758, -0.0487477071583271, -0....\n","10 STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMEN... ... [0.03200741112232208, 0.03773287683725357, -0....\n","11 A purchase agreement for 7,200 tons of gasolin... ... [0.05590442568063736, 0.041032955050468445, -0...\n","12 Finnish Talentum reports its operating profit ... ... [0.06596074998378754, 0.05897102504968643, -0....\n","13 Clothing retail chain Sepp+Æl+Æ 's sales incre... ... [0.03395465016365051, 0.05171804875135422, 0.0...\n","14 Consolidated net sales increased 16 % to reach... ... [0.060446273535490036, 0.03799470514059067, -0...\n","15 Foundries division reports its sales increased... ... [0.0494563989341259, 0.05158388614654541, -0.0...\n","16 HELSINKI ( AFX ) - Shares closed higher , led ... ... [0.0629865899682045, -0.045351240783929825, -0...\n","17 Incap Contract Manufacturing Services Pvt Ltd ... ... [0.05365738272666931, -0.055247869342565536, -...\n","18 Its board of directors will propose a dividend... ... [0.0692642331123352, 0.02292279154062271, -0.0...\n","19 Lifetree was founded in 2000 , and its revenue... ... [0.0810408890247345, 0.039108917117118835, -0....\n","20 ( Filippova ) A trilateral agreement on invest... ... [0.05172618478536606, 0.02967883087694645, -0....\n","21 MegaFon 's subscriber base increased 16.1 % in... ... [0.03825156390666962, 0.001971189398318529, -0...\n","22 Net income from life insurance doubled to EUR ... ... [0.05222763866186142, 0.05695151165127754, -0....\n","23 Net sales increased to EUR193 .3 m from EUR179... ... [0.02272764965891838, 0.016222774982452393, 0....\n","24 Net sales surged by 18.5 % to EUR167 .8 m. Tel... ... [0.05020830035209656, 0.03307913616299629, -0....\n","25 Nordea Group 's operating profit increased in ... ... [0.0497022308409214, 0.023793146014213562, -0....\n","26 Operating profit for the nine-month period inc... ... [0.04339126497507095, 0.024815633893013, -0.02...\n","27 Operating profit for the nine-month period inc... ... [0.035663120448589325, 0.03037247434258461, -0...\n","28 Operating profit for the three-month period in... ... [0.029575243592262268, 0.007764187641441822, -...\n","29 The Brazilian unit of Finnish security solutio... ... [0.047570426017045975, -0.023694489151239395, ...\n","30 The company 's net profit rose 11.4 % on the y... ... [0.06896018236875534, 0.046189870685338974, -0...\n","31 The Lithuanian beer market made up 14.41 milli... ... [0.0020184037275612354, -0.044685497879981995,...\n","32 Viking Line 's cargo revenue increased by 5.4 ... ... [-0.007756179664283991, -0.04868081212043762, ...\n","33 The fair value of the property portfolio doubl... ... [0.06604734063148499, -0.025070184841752052, 0...\n","34 10 February 2011 - Finnish media company Sanom... ... [0.05996786803007126, 0.03255663812160492, -0....\n","35 A Helsinki : ELIiV today reported EPS of EUR1 ... ... [0.051878154277801514, -0.03290269523859024, -...\n","36 Aspo Plc STOCK EXCHANGE RELEASE February 11 , ... ... [0.03545805439352989, -0.04956813529133797, -0...\n","37 Commission income increased by 22 % to EUR 4.4... ... [0.05664118379354477, 0.004533933009952307, -0...\n","38 In January , traffic , measured in revenue pas... ... [-0.026962362229824066, 0.010590712539851665, ...\n","39 In January-September 2010 , Fiskars ' net prof... ... [0.056088510900735855, 0.0369233600795269, -0....\n","40 Net income from life insurance rose to EUR 16.... ... [0.05793088302016258, 0.06312950700521469, -0....\n","41 Nyrstar has also agreed to supply to Talvivaar... ... [0.004785533994436264, 0.004442625679075718, -...\n","42 Sales for both the Department Store Division a... ... [-0.050088364630937576, 0.04885219410061836, 0...\n","43 Sales have risen in other export markets . ... [0.058916959911584854, 0.018443405628204346, -...\n","44 Sales increased due to growing market rates an... ... [0.047733016312122345, 0.010620158165693283, 0...\n","45 The agreement strengthens our long-term partne... ... [0.06433788686990738, 0.027824176475405693, -0...\n","46 The agreement was signed with Biohit Healthcar... ... [0.03612205758690834, 0.038267459720373154, -0...\n","47 The company also estimates the already carried... ... [0.04304526373744011, 0.023360760882496834, -0...\n","48 The company 's order book stood at 1.5 bln eur... ... [0.036210183054208755, -0.010278576985001564, ...\n","49 The company said that paper demand increased i... ... [0.06558039039373398, 0.04877239838242531, -0....\n","50 The world 's second largest stainless steel ma... ... [0.04267223924398422, 0.03184577450156212, -0....\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":107},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609264917602,"user_tz":-300,"elapsed":193623,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8fe5b9aa-c87a-42d3-e00d-920e63ca6aa4"},"source":["fitted_pipe.predict('According to the most recent update there has been a major decrese in the rate of oil')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentsentimentsentiment_confidencedefault_name_embeddings
origin_index
0Bitcoin is going to the moon!positive0.999994[0.06468033790588379, -0.040837567299604416, -...
\n","
"],"text/plain":[" document ... default_name_embeddings\n","origin_index ... \n","0 Bitcoin is going to the moon! ... [0.06468033790588379, -0.040837567299604416, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609264917604,"user_tz":-300,"elapsed":193620,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ac9c8b1a-7fdd-4a6f-bdfd-1dbb823d9bf4"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":753},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609264924472,"user_tz":-300,"elapsed":200484,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1dd94bc8-09c8-45db-ab81-bbd64acb8a4b"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.00 0.00 0.00 1\n"," positive 0.99 1.00 0.99 99\n","\n"," accuracy 0.99 100\n"," macro avg 0.49 0.50 0.50 100\n","weighted avg 0.98 0.99 0.99 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentsentimentysentiment_confidencetextdefault_name_embeddings
origin_index
1The international electronic industry company ...positivenegative1.000000The international electronic industry company ...[0.002136496128514409, 0.07194118946790695, -0...
2With the new production plant the company woul...positivepositive1.000000With the new production plant the company woul...[0.05198746547102928, 0.03577739745378494, -0....
3According to the company 's updated strategy f...positivepositive1.000000According to the company 's updated strategy f...[0.03416536748409271, 0.04053246229887009, -0....
4FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...positivepositive1.000000FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...[0.07730763405561447, -0.045694783329963684, -...
5For the last quarter of 2010 , Componenta 's n...positivepositive1.000000For the last quarter of 2010 , Componenta 's n...[0.05603468790650368, 0.04817350581288338, -0....
.....................
116Operating profit margin increased from 11.2 % ...positivepositive1.000000Operating profit margin increased from 11.2 % ...[0.01058729737997055, -0.008798183873295784, -...
117Operating profit rose to EUR 3.11 mn from EUR ...positivepositive1.000000Operating profit rose to EUR 3.11 mn from EUR ...[0.03610285371541977, 0.04256380349397659, -0....
118Operating profit rose to EUR 5mn from EUR 2.8 ...positivepositive1.000000Operating profit rose to EUR 5mn from EUR 2.8 ...[0.04815328121185303, 0.050376053899526596, -0...
119Operating profit was EUR 24.5 mn , up from EUR...positivepositive1.000000Operating profit was EUR 24.5 mn , up from EUR...[0.048205215483903885, 0.05145161226391792, -0...
120Ramirent 's net sales in the second quarterend...positivepositive1.000000Ramirent 's net sales in the second quarterend...[0.0638015866279602, 0.0272374227643013, -0.04...
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" document ... default_name_embeddings\n","origin_index ... \n","1 The international electronic industry company ... ... [0.002136496128514409, 0.07194118946790695, -0...\n","2 With the new production plant the company woul... ... [0.05198746547102928, 0.03577739745378494, -0....\n","3 According to the company 's updated strategy f... ... [0.03416536748409271, 0.04053246229887009, -0....\n","4 FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag... ... [0.07730763405561447, -0.045694783329963684, -...\n","5 For the last quarter of 2010 , Componenta 's n... ... [0.05603468790650368, 0.04817350581288338, -0....\n","... ... ... ...\n","116 Operating profit margin increased from 11.2 % ... ... [0.01058729737997055, -0.008798183873295784, -...\n","117 Operating profit rose to EUR 3.11 mn from EUR ... ... [0.03610285371541977, 0.04256380349397659, -0....\n","118 Operating profit rose to EUR 5mn from EUR 2.8 ... ... [0.04815328121185303, 0.050376053899526596, -0...\n","119 Operating profit was EUR 24.5 mn , up from EUR... ... [0.048205215483903885, 0.05145161226391792, -0...\n","120 Ramirent 's net sales in the second quarterend... ... [0.0638015866279602, 0.0272374227643013, -0.04...\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609264924477,"user_tz":-300,"elapsed":200483,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e259763c-470b-4d46-b3d1-28cf545f5dcd"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609266286092,"user_tz":-300,"elapsed":1562094,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4237752f-4fbe-4235-b33d-5d7b8ba29d48"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(70) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.88 0.87 0.88 604\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.96 0.92 0.94 1363\n","\n"," accuracy 0.91 1967\n"," macro avg 0.62 0.60 0.61 1967\n","weighted avg 0.94 0.91 0.92 1967\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609266449598,"user_tz":-300,"elapsed":1725594,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b31b5e1e-3f09-4ab3-e97a-fb32ac87b319"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":124},"executionInfo":{"status":"ok","timestamp":1609266465229,"user_tz":-300,"elapsed":1741220,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5d9cc34a-693c-44d7-e50a-6e0ca5d4e024"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('According to the most recent update there has been a major decrese in the rate of oil')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentsentimentsentiment_confidenceen_embed_sentence_small_bert_L12_768_embeddings
origin_index
0Tesla plans to invest 10M into the ML sectorpositive0.999980[0.15737222135066986, 0.2598555386066437, 0.85...
\n","
"],"text/plain":[" document ... en_embed_sentence_small_bert_L12_768_embeddings\n","origin_index ... \n","0 Tesla plans to invest 10M into the ML sector ... [0.15737222135066986, 0.2598555386066437, 0.85...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609266465232,"user_tz":-300,"elapsed":1741218,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ec54f7c0-8174-4fd4-9db8-51c1d15be3eb"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_reddit.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_reddit.ipynb index bcb938e7..f21ed5a0 100644 --- a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_reddit.ipynb +++ b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_reddit.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_reddit.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_reddit.ipynb)\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Reddit Sentiment dataset \n","https://www.kaggle.com/cosmos98/twitter-and-reddit-sentimental-analysis-dataset\n","#Context\n","\n","This is was a Dataset Created as a part of the university Project On Sentimental Analysis On Multi-Source Social Media Platforms using PySpark."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610788117464,"user_tz":-300,"elapsed":1827,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"69719e54-e5be-4a26-bcde-b913531cbcd9"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/Reddit_Data.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:08:00-- http://ckl-it.de/wp-content/uploads/2021/01/Reddit_Data.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 153265 (150K) [text/csv]\n","Saving to: ‘Reddit_Data.csv’\n","\n","Reddit_Data.csv 100%[===================>] 149.67K 402KB/s in 0.4s \n","\n","2021-01-16 09:08:01 (402 KB/s) - ‘Reddit_Data.csv’ saved [153265/153265]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610788135165,"user_tz":-300,"elapsed":1025,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cdd2f3e1-3f96-4a5b-9291-34bce078fbf0"},"source":["import pandas as pd\n","train_path = '/content/Reddit_Data.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
0its true they had cut the power what douchebag...positive
1fuck giroud better finishing like this monthpositive
2looks shit now but still proud madepositive
3pelor the burning hate the best evil godnegative
4can ask what you with something this powerfulpositive
.........
595bangali desh bechne main sabse aagepositive
596national media channels were gaged not cover t...positive
597been following these threads from the beginni...negative
598pretty sure this sarcasm satire the news 1500...positive
599much would love for namo our next hard imagin...positive
\n","

600 rows × 2 columns

\n","
"],"text/plain":[" text y\n","0 its true they had cut the power what douchebag... positive\n","1 fuck giroud better finishing like this month positive\n","2 looks shit now but still proud made positive\n","3 pelor the burning hate the best evil god negative\n","4 can ask what you with something this powerful positive\n",".. ... ...\n","595 bangali desh bechne main sabse aage positive\n","596 national media channels were gaged not cover t... positive\n","597 been following these threads from the beginni... negative\n","598 pretty sure this sarcasm satire the news 1500... positive\n","599 much would love for namo our next hard imagin... positive\n","\n","[600 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609522047844,"user_tz":-300,"elapsed":222057,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"83383a78-d68b-43a0-a253-318696580942"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.67 1.00 0.80 24\n"," neutral 0.00 0.00 0.00 0\n"," positive 1.00 0.23 0.38 26\n","\n"," accuracy 0.60 50\n"," macro avg 0.56 0.41 0.39 50\n","weighted avg 0.84 0.60 0.58 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentytextsentimentdefault_name_embeddingssentiment_confidence
origin_index
0its true they had cut the power what douchebag...positiveits true they had cut the power what douchebag...negative[0.033111296594142914, 0.053994592279195786, -...0.632922
1fuck giroud better finishing like this monthpositivefuck giroud better finishing like this monthneutral[0.0678204670548439, 0.01411951333284378, -0.0...0.558096
2looks shit now but still proud madepositivelooks shit now but still proud madeneutral[0.03247416764497757, -0.09844464808702469, -0...0.594104
3pelor the burning hate the best evil godnegativepelor the burning hate the best evil godnegative[0.04032062739133835, 0.07666623592376709, -0....0.699286
4can ask what you with something this powerfulpositivecan ask what you with something this powerfulpositive[0.015518003143370152, -0.05116305500268936, -...0.615222
5aap’ shazia ilmi from puram constituency lag...negativeaap’ shazia ilmi from puram constituency lag...negative[0.02478150464594364, -0.06508146971464157, -0...0.751383
6fuck yeahnegativefuck yeahnegative[0.04602408409118652, -0.025047995150089264, -...0.663185
7honestly really surprised alice ranked that lo...positivehonestly really surprised alice ranked that lo...positive[-0.035716041922569275, -0.04127982258796692, ...0.605483
8didn care about politics before now hatenegativedidn care about politics before now hatenegative[-0.006816444452852011, 0.06221264973282814, -...0.701191
9hard nips and goosebumpsnegativehard nips and goosebumpsnegative[-0.02919699251651764, -0.030449824407696724, ...0.629745
10varadabhai ndtv trying too well dilute bjp tre...negativevaradabhai ndtv trying too well dilute bjp tre...negative[0.04727796092629433, -0.06792476028203964, -0...0.756238
11old man has lost his mindpositiveold man has lost his mindneutral[0.039657335728406906, -0.04277808964252472, -...0.502476
12why this being downvoted you might ask both mo...negativewhy this being downvoted you might ask both mo...negative[0.06581216305494308, -0.06079106032848358, -0...0.710366
13hasnt changed all apolitical before simply don...positivehasnt changed all apolitical before simply do...negative[0.03509754315018654, -0.004639611579477787, -...0.603606
14for one campaign pretty much just snatched the...negativefor one campaign pretty much just snatched the...negative[0.017386479303240776, 0.0443551279604435, -0....0.631376
15vajpayee managed forge much broader coalition ...positivevajpayee managed forge much broader coalition ...negative[0.0372871570289135, -0.051079731434583664, -0...0.685135
16lol this only proves how desperate they are ge...positivelol this only proves how desperate they are ge...negative[0.05233633145689964, -0.03147873282432556, 0....0.624959
17dont hate aap but your questions are example w...negativedont hate aap but your questions are example ...negative[0.026356497779488564, -0.04044198617339134, -...0.769971
18what were the other policies you discussed not...negativewhat were the other policies you discussed not...negative[-0.07521010935306549, 0.008543566800653934, 0...0.669384
19wow lots favorites this bracket haqua tsukushi...positivewow lots favorites this bracket haqua tsukushi...neutral[-0.0693160742521286, -0.015458519570529461, -...0.593471
20sorry know this isn what you asked just ventin...negativesorry know this isn what you asked just ventin...negative[0.016777772456407547, -0.05478338897228241, -...0.745406
21coming out strongly against gujarat chief mini...positivecoming out strongly against gujarat chief min...negative[0.06856723129749298, -0.019821858033537865, -...0.694449
22there one tool bjp can use their manifesto whi...positivethere one tool bjp can use their manifesto whi...negative[0.057847339659929276, -0.05365725979208946, -...0.623127
23jakiro spotted the middle top maybepositivejakiro spotted the middle top maybeneutral[-0.011690962128341198, -0.024473998695611954,...0.575394
24family mormon have never tried explain them th...positivefamily mormon have never tried explain them t...positive[0.03987010195851326, -0.0009543427731841803, ...0.606252
25with these results would have grudgingly accep...negativewith these results would have grudgingly accep...negative[0.034668292850255966, -0.05392604321241379, -...0.736970
26tea partier expresses support for namo after e...negativetea partier expresses support for namo after ...negative[0.032365716993808746, -0.056087080389261246, ...0.760564
27politically would stupid move take stand right...negativepolitically would stupid move take stand right...negative[-0.00040777752292342484, -0.01262842211872339...0.674769
28wtf whynegativewtf whynegative[0.025807170197367668, -0.07080958038568497, -...0.635538
29have actually seen lot users views change duri...positivehave actually seen lot users views change dur...negative[-0.009333955124020576, 0.01388698909431696, -...0.662819
30truth told there not insignificant percentage ...positivetruth told there not insignificant percentage ...negative[0.03927519917488098, -0.05597652122378349, -0...0.713786
31was anti bjp and neutral cong became anti bjp ...positivewas anti bjp and neutral cong became anti bjp ...negative[0.03805134445428848, -0.030298737809062004, -...0.732909
32most religions have dogmatic orthodox well eso...positivemost religions have dogmatic orthodox well eso...positive[0.03939439728856087, -0.02040349319577217, -0...0.625969
33laureatte sen said christian schools are perfe...positivelaureatte sen said christian schools are perfe...neutral[0.05267934128642082, 0.05836360529065132, 0.0...0.510249
34need stop watching the garbage that you watch ...positiveneed stop watching the garbage that you watch...neutral[-0.012382612563669682, 0.01988200470805168, 0...0.552975
35gandhi mandela hitler mao plato chandragupt ma...negativegandhi mandela hitler mao plato chandragupt ma...negative[0.027552243322134018, 0.013075066730380058, 0...0.719779
36hate aap for the other thread points such the ...negativehate aap for the other thread points such the...negative[0.01461736112833023, -0.038017574697732925, -...0.756800
37absolutely agree with you subsidies the worst ...negativeabsolutely agree with you subsidies the worst ...negative[0.010974399745464325, 0.0033110962249338627, ...0.655372
38are you corrupt mind have you benefited throug...negativeare you corrupt mind have you benefited throu...negative[0.03834373503923416, -0.06521473079919815, -0...0.752354
39congress needs bogeyman modi without the bad g...positivecongress needs bogeyman modi without the bad g...negative[0.03138439729809761, -0.06221967190504074, -0...0.703794
40protip don type uppercase text all caps harder...negativeprotip don type uppercase text all caps harder...negative[0.044019922614097595, 0.025341013446450233, 0...0.673459
41brother trog very wrathful indeed but his will...positivebrother trog very wrathful indeed but his wil...neutral[-0.024625714868307114, 0.06193268671631813, 0...0.537965
42start off saying that the craftsmanship this p...positivestart off saying that the craftsmanship this ...positive[0.05780623108148575, -0.06291749328374863, -0...0.723931
43have made request unban namoarmy hell moron ho...negativehave made request unban namoarmy hell moron h...negative[0.015555822290480137, -0.012748800218105316, ...0.718607
44child modi worked his father’ tea shop and y...negativechild modi worked his father’ tea shop and ...negative[0.05774841830134392, -0.059567004442214966, -...0.743616
45namo tea yuupea horrible rhyme knownegativenamo tea yuupea horrible rhyme knownegative[0.025534288957715034, 0.004176765214651823, -...0.760347
46great agility from akpom cut back and bendpositivegreat agility from akpom cut back and bendpositive[0.06865684688091278, -0.02164856530725956, -0...0.670042
47from undecided pro aap they are not perfect bu...positivefrom undecided pro aap they are not perfect bu...negative[0.01590304635465145, -0.0683458000421524, -0....0.647296
48woah there don insane with pray mean you don w...negativewoah there don insane with pray mean you don w...negative[0.050547026097774506, -0.01725909113883972, 0...0.711541
49porngress wont announce their candidate cuz th...positiveporngress wont announce their candidate cuz th...negative[0.05935536324977875, -0.051609162241220474, -...0.671247
\n","
"],"text/plain":[" document ... sentiment_confidence\n","origin_index ... \n","0 its true they had cut the power what douchebag... ... 0.632922\n","1 fuck giroud better finishing like this month ... 0.558096\n","2 looks shit now but still proud made ... 0.594104\n","3 pelor the burning hate the best evil god ... 0.699286\n","4 can ask what you with something this powerful ... 0.615222\n","5 aap’ shazia ilmi from puram constituency lag... ... 0.751383\n","6 fuck yeah ... 0.663185\n","7 honestly really surprised alice ranked that lo... ... 0.605483\n","8 didn care about politics before now hate ... 0.701191\n","9 hard nips and goosebumps ... 0.629745\n","10 varadabhai ndtv trying too well dilute bjp tre... ... 0.756238\n","11 old man has lost his mind ... 0.502476\n","12 why this being downvoted you might ask both mo... ... 0.710366\n","13 hasnt changed all apolitical before simply don... ... 0.603606\n","14 for one campaign pretty much just snatched the... ... 0.631376\n","15 vajpayee managed forge much broader coalition ... ... 0.685135\n","16 lol this only proves how desperate they are ge... ... 0.624959\n","17 dont hate aap but your questions are example w... ... 0.769971\n","18 what were the other policies you discussed not... ... 0.669384\n","19 wow lots favorites this bracket haqua tsukushi... ... 0.593471\n","20 sorry know this isn what you asked just ventin... ... 0.745406\n","21 coming out strongly against gujarat chief mini... ... 0.694449\n","22 there one tool bjp can use their manifesto whi... ... 0.623127\n","23 jakiro spotted the middle top maybe ... 0.575394\n","24 family mormon have never tried explain them th... ... 0.606252\n","25 with these results would have grudgingly accep... ... 0.736970\n","26 tea partier expresses support for namo after e... ... 0.760564\n","27 politically would stupid move take stand right... ... 0.674769\n","28 wtf why ... 0.635538\n","29 have actually seen lot users views change duri... ... 0.662819\n","30 truth told there not insignificant percentage ... ... 0.713786\n","31 was anti bjp and neutral cong became anti bjp ... ... 0.732909\n","32 most religions have dogmatic orthodox well eso... ... 0.625969\n","33 laureatte sen said christian schools are perfe... ... 0.510249\n","34 need stop watching the garbage that you watch ... ... 0.552975\n","35 gandhi mandela hitler mao plato chandragupt ma... ... 0.719779\n","36 hate aap for the other thread points such the ... ... 0.756800\n","37 absolutely agree with you subsidies the worst ... ... 0.655372\n","38 are you corrupt mind have you benefited throug... ... 0.752354\n","39 congress needs bogeyman modi without the bad g... ... 0.703794\n","40 protip don type uppercase text all caps harder... ... 0.673459\n","41 brother trog very wrathful indeed but his will... ... 0.537965\n","42 start off saying that the craftsmanship this p... ... 0.723931\n","43 have made request unban namoarmy hell moron ho... ... 0.718607\n","44 child modi worked his father’ tea shop and y... ... 0.743616\n","45 namo tea yuupea horrible rhyme know ... 0.760347\n","46 great agility from akpom cut back and bend ... 0.670042\n","47 from undecided pro aap they are not perfect bu... ... 0.647296\n","48 woah there don insane with pray mean you don w... ... 0.711541\n","49 porngress wont announce their candidate cuz th... ... 0.671247\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":109},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609522047859,"user_tz":-300,"elapsed":222054,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a862e5e9-9580-4257-de54-dec3acfbdd6e"},"source":["fitted_pipe.predict(\"Indian prime minister was assinated!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentsentimentdefault_name_embeddingssentiment_confidence
origin_index
0Bitcoin is going to the moon!neutral[0.06468033790588379, -0.040837567299604416, -...0.524234
\n","
"],"text/plain":[" document ... sentiment_confidence\n","origin_index ... \n","0 Bitcoin is going to the moon! ... 0.524234\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609522047861,"user_tz":-300,"elapsed":222040,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cf81e598-13e9-40fd-e8bb-937b8a8933f3"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609522047863,"user_tz":-300,"elapsed":222021,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"fe05a28d-53de-4ec5-e7fc-1ec49eaeddd6"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 1.00 0.83 0.91 24\n"," neutral 0.00 0.00 0.00 0\n"," positive 1.00 1.00 1.00 26\n","\n"," accuracy 0.92 50\n"," macro avg 0.67 0.61 0.64 50\n","weighted avg 1.00 0.92 0.96 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentytextsentimentdefault_name_embeddingssentiment_confidence
origin_index
0its true they had cut the power what douchebag...positiveits true they had cut the power what douchebag...positive[0.033111296594142914, 0.053994592279195786, -...0.761194
1fuck giroud better finishing like this monthpositivefuck giroud better finishing like this monthpositive[0.0678204670548439, 0.01411951333284378, -0.0...0.938677
2looks shit now but still proud madepositivelooks shit now but still proud madepositive[0.03247416764497757, -0.09844464808702469, -0...0.954937
3pelor the burning hate the best evil godnegativepelor the burning hate the best evil godnegative[0.04032062739133835, 0.07666623592376709, -0....0.810980
4can ask what you with something this powerfulpositivecan ask what you with something this powerfulpositive[0.015518003143370152, -0.05116305500268936, -...0.956043
5aap’ shazia ilmi from puram constituency lag...negativeaap’ shazia ilmi from puram constituency lag...negative[0.02478150464594364, -0.06508146971464157, -0...0.708917
6fuck yeahnegativefuck yeahnegative[0.04602408409118652, -0.025047995150089264, -...0.731940
7honestly really surprised alice ranked that lo...positivehonestly really surprised alice ranked that lo...positive[-0.035716041922569275, -0.04127982258796692, ...0.966494
8didn care about politics before now hatenegativedidn care about politics before now hatenegative[-0.006816444452852011, 0.06221264973282814, -...0.672320
9hard nips and goosebumpsnegativehard nips and goosebumpsnegative[-0.02919699251651764, -0.030449824407696724, ...0.604969
10varadabhai ndtv trying too well dilute bjp tre...negativevaradabhai ndtv trying too well dilute bjp tre...negative[0.04727796092629433, -0.06792476028203964, -0...0.639880
11old man has lost his mindpositiveold man has lost his mindpositive[0.039657335728406906, -0.04277808964252472, -...0.929136
12why this being downvoted you might ask both mo...negativewhy this being downvoted you might ask both mo...neutral[0.06581216305494308, -0.06079106032848358, -0...0.546161
13hasnt changed all apolitical before simply don...positivehasnt changed all apolitical before simply do...positive[0.03509754315018654, -0.004639611579477787, -...0.883017
14for one campaign pretty much just snatched the...negativefor one campaign pretty much just snatched the...negative[0.017386479303240776, 0.0443551279604435, -0....0.636396
15vajpayee managed forge much broader coalition ...positivevajpayee managed forge much broader coalition ...positive[0.0372871570289135, -0.051079731434583664, -0...0.848566
16lol this only proves how desperate they are ge...positivelol this only proves how desperate they are ge...positive[0.05233633145689964, -0.03147873282432556, 0....0.819890
17dont hate aap but your questions are example w...negativedont hate aap but your questions are example ...negative[0.026356497779488564, -0.04044198617339134, -...0.724538
18what were the other policies you discussed not...negativewhat were the other policies you discussed not...negative[-0.07521010935306549, 0.008543566800653934, 0...0.732422
19wow lots favorites this bracket haqua tsukushi...positivewow lots favorites this bracket haqua tsukushi...positive[-0.0693160742521286, -0.015458519570529461, -...0.971349
20sorry know this isn what you asked just ventin...negativesorry know this isn what you asked just ventin...negative[0.016777772456407547, -0.05478338897228241, -...0.623325
21coming out strongly against gujarat chief mini...positivecoming out strongly against gujarat chief min...positive[0.06856723129749298, -0.019821858033537865, -...0.736283
22there one tool bjp can use their manifesto whi...positivethere one tool bjp can use their manifesto whi...positive[0.057847339659929276, -0.05365725979208946, -...0.870023
23jakiro spotted the middle top maybepositivejakiro spotted the middle top maybepositive[-0.011690962128341198, -0.024473998695611954,...0.965604
24family mormon have never tried explain them th...positivefamily mormon have never tried explain them t...positive[0.03987010195851326, -0.0009543427731841803, ...0.964053
25with these results would have grudgingly accep...negativewith these results would have grudgingly accep...neutral[0.034668292850255966, -0.05392604321241379, -...0.521402
26tea partier expresses support for namo after e...negativetea partier expresses support for namo after ...negative[0.032365716993808746, -0.056087080389261246, ...0.837552
27politically would stupid move take stand right...negativepolitically would stupid move take stand right...neutral[-0.00040777752292342484, -0.01262842211872339...0.541656
28wtf whynegativewtf whynegative[0.025807170197367668, -0.07080958038568497, -...0.747054
29have actually seen lot users views change duri...positivehave actually seen lot users views change dur...positive[-0.009333955124020576, 0.01388698909431696, -...0.818759
30truth told there not insignificant percentage ...positivetruth told there not insignificant percentage ...positive[0.03927519917488098, -0.05597652122378349, -0...0.776765
31was anti bjp and neutral cong became anti bjp ...positivewas anti bjp and neutral cong became anti bjp ...positive[0.03805134445428848, -0.030298737809062004, -...0.630857
32most religions have dogmatic orthodox well eso...positivemost religions have dogmatic orthodox well eso...positive[0.03939439728856087, -0.02040349319577217, -0...0.972607
33laureatte sen said christian schools are perfe...positivelaureatte sen said christian schools are perfe...positive[0.05267934128642082, 0.05836360529065132, 0.0...0.911020
34need stop watching the garbage that you watch ...positiveneed stop watching the garbage that you watch...positive[-0.012382612563669682, 0.01988200470805168, 0...0.954440
35gandhi mandela hitler mao plato chandragupt ma...negativegandhi mandela hitler mao plato chandragupt ma...negative[0.027552243322134018, 0.013075066730380058, 0...0.767667
36hate aap for the other thread points such the ...negativehate aap for the other thread points such the...negative[0.01461736112833023, -0.038017574697732925, -...0.690414
37absolutely agree with you subsidies the worst ...negativeabsolutely agree with you subsidies the worst ...neutral[0.010974399745464325, 0.0033110962249338627, ...0.581476
38are you corrupt mind have you benefited throug...negativeare you corrupt mind have you benefited throu...negative[0.03834373503923416, -0.06521473079919815, -0...0.783217
39congress needs bogeyman modi without the bad g...positivecongress needs bogeyman modi without the bad g...positive[0.03138439729809761, -0.06221967190504074, -0...0.764358
40protip don type uppercase text all caps harder...negativeprotip don type uppercase text all caps harder...negative[0.044019922614097595, 0.025341013446450233, 0...0.738550
41brother trog very wrathful indeed but his will...positivebrother trog very wrathful indeed but his wil...positive[-0.024625714868307114, 0.06193268671631813, 0...0.923871
42start off saying that the craftsmanship this p...positivestart off saying that the craftsmanship this ...positive[0.05780623108148575, -0.06291749328374863, -0...0.985073
43have made request unban namoarmy hell moron ho...negativehave made request unban namoarmy hell moron h...negative[0.015555822290480137, -0.012748800218105316, ...0.796430
44child modi worked his father’ tea shop and y...negativechild modi worked his father’ tea shop and ...negative[0.05774841830134392, -0.059567004442214966, -...0.709697
45namo tea yuupea horrible rhyme knownegativenamo tea yuupea horrible rhyme knownegative[0.025534288957715034, 0.004176765214651823, -...0.851523
46great agility from akpom cut back and bendpositivegreat agility from akpom cut back and bendpositive[0.06865684688091278, -0.02164856530725956, -0...0.966416
47from undecided pro aap they are not perfect bu...positivefrom undecided pro aap they are not perfect bu...positive[0.01590304635465145, -0.0683458000421524, -0....0.891286
48woah there don insane with pray mean you don w...negativewoah there don insane with pray mean you don w...negative[0.050547026097774506, -0.01725909113883972, 0...0.798072
49porngress wont announce their candidate cuz th...positiveporngress wont announce their candidate cuz th...positive[0.05935536324977875, -0.051609162241220474, -...0.858501
\n","
"],"text/plain":[" document ... sentiment_confidence\n","origin_index ... \n","0 its true they had cut the power what douchebag... ... 0.761194\n","1 fuck giroud better finishing like this month ... 0.938677\n","2 looks shit now but still proud made ... 0.954937\n","3 pelor the burning hate the best evil god ... 0.810980\n","4 can ask what you with something this powerful ... 0.956043\n","5 aap’ shazia ilmi from puram constituency lag... ... 0.708917\n","6 fuck yeah ... 0.731940\n","7 honestly really surprised alice ranked that lo... ... 0.966494\n","8 didn care about politics before now hate ... 0.672320\n","9 hard nips and goosebumps ... 0.604969\n","10 varadabhai ndtv trying too well dilute bjp tre... ... 0.639880\n","11 old man has lost his mind ... 0.929136\n","12 why this being downvoted you might ask both mo... ... 0.546161\n","13 hasnt changed all apolitical before simply don... ... 0.883017\n","14 for one campaign pretty much just snatched the... ... 0.636396\n","15 vajpayee managed forge much broader coalition ... ... 0.848566\n","16 lol this only proves how desperate they are ge... ... 0.819890\n","17 dont hate aap but your questions are example w... ... 0.724538\n","18 what were the other policies you discussed not... ... 0.732422\n","19 wow lots favorites this bracket haqua tsukushi... ... 0.971349\n","20 sorry know this isn what you asked just ventin... ... 0.623325\n","21 coming out strongly against gujarat chief mini... ... 0.736283\n","22 there one tool bjp can use their manifesto whi... ... 0.870023\n","23 jakiro spotted the middle top maybe ... 0.965604\n","24 family mormon have never tried explain them th... ... 0.964053\n","25 with these results would have grudgingly accep... ... 0.521402\n","26 tea partier expresses support for namo after e... ... 0.837552\n","27 politically would stupid move take stand right... ... 0.541656\n","28 wtf why ... 0.747054\n","29 have actually seen lot users views change duri... ... 0.818759\n","30 truth told there not insignificant percentage ... ... 0.776765\n","31 was anti bjp and neutral cong became anti bjp ... ... 0.630857\n","32 most religions have dogmatic orthodox well eso... ... 0.972607\n","33 laureatte sen said christian schools are perfe... ... 0.911020\n","34 need stop watching the garbage that you watch ... ... 0.954440\n","35 gandhi mandela hitler mao plato chandragupt ma... ... 0.767667\n","36 hate aap for the other thread points such the ... ... 0.690414\n","37 absolutely agree with you subsidies the worst ... ... 0.581476\n","38 are you corrupt mind have you benefited throug... ... 0.783217\n","39 congress needs bogeyman modi without the bad g... ... 0.764358\n","40 protip don type uppercase text all caps harder... ... 0.738550\n","41 brother trog very wrathful indeed but his will... ... 0.923871\n","42 start off saying that the craftsmanship this p... ... 0.985073\n","43 have made request unban namoarmy hell moron ho... ... 0.796430\n","44 child modi worked his father’ tea shop and y... ... 0.709697\n","45 namo tea yuupea horrible rhyme know ... 0.851523\n","46 great agility from akpom cut back and bend ... 0.966416\n","47 from undecided pro aap they are not perfect bu... ... 0.891286\n","48 woah there don insane with pray mean you don w... ... 0.798072\n","49 porngress wont announce their candidate cuz th... ... 0.858501\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nxWFzQOhjWC8","executionInfo":{"status":"ok","timestamp":1609522047865,"user_tz":-300,"elapsed":221994,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6f5e4138-03d4-495a-ce16-0be512588c81"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1609523959387,"user_tz":-300,"elapsed":476394,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1baf2085-bee8-48c3-fd11-401722536642"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(70) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.84 0.77 0.80 300\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.91 0.72 0.80 300\n","\n"," accuracy 0.74 600\n"," macro avg 0.59 0.50 0.54 600\n","weighted avg 0.88 0.74 0.80 600\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1609522704715,"user_tz":-300,"elapsed":161180,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"bc27a985-38bf-4b98-f3fe-8e4955cd83cc"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":127},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1609522719523,"user_tz":-300,"elapsed":14825,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b14d7193-6b43-4c1d-eb42-326af88ffc0a"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Indian prime minister was assinated')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
en_embed_sentence_small_bert_L12_768_embeddingsdocumentsentimentsentiment_confidence
origin_index
0[0.15737222135066986, 0.2598555386066437, 0.85...Tesla plans to invest 10M into the ML sectorpositive0.638827
\n","
"],"text/plain":[" en_embed_sentence_small_bert_L12_768_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [0.15737222135066986, 0.2598555386066437, 0.85... ... 0.638827\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1609522719526,"user_tz":-300,"elapsed":24,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"acbf137f-60ff-4804-b903-bb88f00c78d0"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_reddit.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_reddit.ipynb)\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 class Reddit comment sentiment classifier training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Reddit Sentiment dataset \n","https://www.kaggle.com/cosmos98/twitter-and-reddit-sentimental-analysis-dataset\n","#Context\n","\n","This is was a Dataset Created as a part of the university Project On Sentimental Analysis On Multi-Source Social Media Platforms using PySpark."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610788117464,"user_tz":-300,"elapsed":1827,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"69719e54-e5be-4a26-bcde-b913531cbcd9"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/Reddit_Data.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:08:00-- http://ckl-it.de/wp-content/uploads/2021/01/Reddit_Data.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 153265 (150K) [text/csv]\n","Saving to: ‘Reddit_Data.csv’\n","\n","Reddit_Data.csv 100%[===================>] 149.67K 402KB/s in 0.4s \n","\n","2021-01-16 09:08:01 (402 KB/s) - ‘Reddit_Data.csv’ saved [153265/153265]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610788135165,"user_tz":-300,"elapsed":1025,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cdd2f3e1-3f96-4a5b-9291-34bce078fbf0"},"source":["import pandas as pd\n","train_path = '/content/Reddit_Data.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
0its true they had cut the power what douchebag...positive
1fuck giroud better finishing like this monthpositive
2looks shit now but still proud madepositive
3pelor the burning hate the best evil godnegative
4can ask what you with something this powerfulpositive
.........
595bangali desh bechne main sabse aagepositive
596national media channels were gaged not cover t...positive
597been following these threads from the beginni...negative
598pretty sure this sarcasm satire the news 1500...positive
599much would love for namo our next hard imagin...positive
\n","

600 rows × 2 columns

\n","
"],"text/plain":[" text y\n","0 its true they had cut the power what douchebag... positive\n","1 fuck giroud better finishing like this month positive\n","2 looks shit now but still proud made positive\n","3 pelor the burning hate the best evil god negative\n","4 can ask what you with something this powerful positive\n",".. ... ...\n","595 bangali desh bechne main sabse aage positive\n","596 national media channels were gaged not cover t... positive\n","597 been following these threads from the beginni... negative\n","598 pretty sure this sarcasm satire the news 1500... positive\n","599 much would love for namo our next hard imagin... positive\n","\n","[600 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609522047844,"user_tz":-300,"elapsed":222057,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"83383a78-d68b-43a0-a253-318696580942"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.67 1.00 0.80 24\n"," neutral 0.00 0.00 0.00 0\n"," positive 1.00 0.23 0.38 26\n","\n"," accuracy 0.60 50\n"," macro avg 0.56 0.41 0.39 50\n","weighted avg 0.84 0.60 0.58 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentytextsentimentdefault_name_embeddingssentiment_confidence
origin_index
0its true they had cut the power what douchebag...positiveits true they had cut the power what douchebag...negative[0.033111296594142914, 0.053994592279195786, -...0.632922
1fuck giroud better finishing like this monthpositivefuck giroud better finishing like this monthneutral[0.0678204670548439, 0.01411951333284378, -0.0...0.558096
2looks shit now but still proud madepositivelooks shit now but still proud madeneutral[0.03247416764497757, -0.09844464808702469, -0...0.594104
3pelor the burning hate the best evil godnegativepelor the burning hate the best evil godnegative[0.04032062739133835, 0.07666623592376709, -0....0.699286
4can ask what you with something this powerfulpositivecan ask what you with something this powerfulpositive[0.015518003143370152, -0.05116305500268936, -...0.615222
5aap’ shazia ilmi from puram constituency lag...negativeaap’ shazia ilmi from puram constituency lag...negative[0.02478150464594364, -0.06508146971464157, -0...0.751383
6fuck yeahnegativefuck yeahnegative[0.04602408409118652, -0.025047995150089264, -...0.663185
7honestly really surprised alice ranked that lo...positivehonestly really surprised alice ranked that lo...positive[-0.035716041922569275, -0.04127982258796692, ...0.605483
8didn care about politics before now hatenegativedidn care about politics before now hatenegative[-0.006816444452852011, 0.06221264973282814, -...0.701191
9hard nips and goosebumpsnegativehard nips and goosebumpsnegative[-0.02919699251651764, -0.030449824407696724, ...0.629745
10varadabhai ndtv trying too well dilute bjp tre...negativevaradabhai ndtv trying too well dilute bjp tre...negative[0.04727796092629433, -0.06792476028203964, -0...0.756238
11old man has lost his mindpositiveold man has lost his mindneutral[0.039657335728406906, -0.04277808964252472, -...0.502476
12why this being downvoted you might ask both mo...negativewhy this being downvoted you might ask both mo...negative[0.06581216305494308, -0.06079106032848358, -0...0.710366
13hasnt changed all apolitical before simply don...positivehasnt changed all apolitical before simply do...negative[0.03509754315018654, -0.004639611579477787, -...0.603606
14for one campaign pretty much just snatched the...negativefor one campaign pretty much just snatched the...negative[0.017386479303240776, 0.0443551279604435, -0....0.631376
15vajpayee managed forge much broader coalition ...positivevajpayee managed forge much broader coalition ...negative[0.0372871570289135, -0.051079731434583664, -0...0.685135
16lol this only proves how desperate they are ge...positivelol this only proves how desperate they are ge...negative[0.05233633145689964, -0.03147873282432556, 0....0.624959
17dont hate aap but your questions are example w...negativedont hate aap but your questions are example ...negative[0.026356497779488564, -0.04044198617339134, -...0.769971
18what were the other policies you discussed not...negativewhat were the other policies you discussed not...negative[-0.07521010935306549, 0.008543566800653934, 0...0.669384
19wow lots favorites this bracket haqua tsukushi...positivewow lots favorites this bracket haqua tsukushi...neutral[-0.0693160742521286, -0.015458519570529461, -...0.593471
20sorry know this isn what you asked just ventin...negativesorry know this isn what you asked just ventin...negative[0.016777772456407547, -0.05478338897228241, -...0.745406
21coming out strongly against gujarat chief mini...positivecoming out strongly against gujarat chief min...negative[0.06856723129749298, -0.019821858033537865, -...0.694449
22there one tool bjp can use their manifesto whi...positivethere one tool bjp can use their manifesto whi...negative[0.057847339659929276, -0.05365725979208946, -...0.623127
23jakiro spotted the middle top maybepositivejakiro spotted the middle top maybeneutral[-0.011690962128341198, -0.024473998695611954,...0.575394
24family mormon have never tried explain them th...positivefamily mormon have never tried explain them t...positive[0.03987010195851326, -0.0009543427731841803, ...0.606252
25with these results would have grudgingly accep...negativewith these results would have grudgingly accep...negative[0.034668292850255966, -0.05392604321241379, -...0.736970
26tea partier expresses support for namo after e...negativetea partier expresses support for namo after ...negative[0.032365716993808746, -0.056087080389261246, ...0.760564
27politically would stupid move take stand right...negativepolitically would stupid move take stand right...negative[-0.00040777752292342484, -0.01262842211872339...0.674769
28wtf whynegativewtf whynegative[0.025807170197367668, -0.07080958038568497, -...0.635538
29have actually seen lot users views change duri...positivehave actually seen lot users views change dur...negative[-0.009333955124020576, 0.01388698909431696, -...0.662819
30truth told there not insignificant percentage ...positivetruth told there not insignificant percentage ...negative[0.03927519917488098, -0.05597652122378349, -0...0.713786
31was anti bjp and neutral cong became anti bjp ...positivewas anti bjp and neutral cong became anti bjp ...negative[0.03805134445428848, -0.030298737809062004, -...0.732909
32most religions have dogmatic orthodox well eso...positivemost religions have dogmatic orthodox well eso...positive[0.03939439728856087, -0.02040349319577217, -0...0.625969
33laureatte sen said christian schools are perfe...positivelaureatte sen said christian schools are perfe...neutral[0.05267934128642082, 0.05836360529065132, 0.0...0.510249
34need stop watching the garbage that you watch ...positiveneed stop watching the garbage that you watch...neutral[-0.012382612563669682, 0.01988200470805168, 0...0.552975
35gandhi mandela hitler mao plato chandragupt ma...negativegandhi mandela hitler mao plato chandragupt ma...negative[0.027552243322134018, 0.013075066730380058, 0...0.719779
36hate aap for the other thread points such the ...negativehate aap for the other thread points such the...negative[0.01461736112833023, -0.038017574697732925, -...0.756800
37absolutely agree with you subsidies the worst ...negativeabsolutely agree with you subsidies the worst ...negative[0.010974399745464325, 0.0033110962249338627, ...0.655372
38are you corrupt mind have you benefited throug...negativeare you corrupt mind have you benefited throu...negative[0.03834373503923416, -0.06521473079919815, -0...0.752354
39congress needs bogeyman modi without the bad g...positivecongress needs bogeyman modi without the bad g...negative[0.03138439729809761, -0.06221967190504074, -0...0.703794
40protip don type uppercase text all caps harder...negativeprotip don type uppercase text all caps harder...negative[0.044019922614097595, 0.025341013446450233, 0...0.673459
41brother trog very wrathful indeed but his will...positivebrother trog very wrathful indeed but his wil...neutral[-0.024625714868307114, 0.06193268671631813, 0...0.537965
42start off saying that the craftsmanship this p...positivestart off saying that the craftsmanship this ...positive[0.05780623108148575, -0.06291749328374863, -0...0.723931
43have made request unban namoarmy hell moron ho...negativehave made request unban namoarmy hell moron h...negative[0.015555822290480137, -0.012748800218105316, ...0.718607
44child modi worked his father’ tea shop and y...negativechild modi worked his father’ tea shop and ...negative[0.05774841830134392, -0.059567004442214966, -...0.743616
45namo tea yuupea horrible rhyme knownegativenamo tea yuupea horrible rhyme knownegative[0.025534288957715034, 0.004176765214651823, -...0.760347
46great agility from akpom cut back and bendpositivegreat agility from akpom cut back and bendpositive[0.06865684688091278, -0.02164856530725956, -0...0.670042
47from undecided pro aap they are not perfect bu...positivefrom undecided pro aap they are not perfect bu...negative[0.01590304635465145, -0.0683458000421524, -0....0.647296
48woah there don insane with pray mean you don w...negativewoah there don insane with pray mean you don w...negative[0.050547026097774506, -0.01725909113883972, 0...0.711541
49porngress wont announce their candidate cuz th...positiveporngress wont announce their candidate cuz th...negative[0.05935536324977875, -0.051609162241220474, -...0.671247
\n","
"],"text/plain":[" document ... sentiment_confidence\n","origin_index ... \n","0 its true they had cut the power what douchebag... ... 0.632922\n","1 fuck giroud better finishing like this month ... 0.558096\n","2 looks shit now but still proud made ... 0.594104\n","3 pelor the burning hate the best evil god ... 0.699286\n","4 can ask what you with something this powerful ... 0.615222\n","5 aap’ shazia ilmi from puram constituency lag... ... 0.751383\n","6 fuck yeah ... 0.663185\n","7 honestly really surprised alice ranked that lo... ... 0.605483\n","8 didn care about politics before now hate ... 0.701191\n","9 hard nips and goosebumps ... 0.629745\n","10 varadabhai ndtv trying too well dilute bjp tre... ... 0.756238\n","11 old man has lost his mind ... 0.502476\n","12 why this being downvoted you might ask both mo... ... 0.710366\n","13 hasnt changed all apolitical before simply don... ... 0.603606\n","14 for one campaign pretty much just snatched the... ... 0.631376\n","15 vajpayee managed forge much broader coalition ... ... 0.685135\n","16 lol this only proves how desperate they are ge... ... 0.624959\n","17 dont hate aap but your questions are example w... ... 0.769971\n","18 what were the other policies you discussed not... ... 0.669384\n","19 wow lots favorites this bracket haqua tsukushi... ... 0.593471\n","20 sorry know this isn what you asked just ventin... ... 0.745406\n","21 coming out strongly against gujarat chief mini... ... 0.694449\n","22 there one tool bjp can use their manifesto whi... ... 0.623127\n","23 jakiro spotted the middle top maybe ... 0.575394\n","24 family mormon have never tried explain them th... ... 0.606252\n","25 with these results would have grudgingly accep... ... 0.736970\n","26 tea partier expresses support for namo after e... ... 0.760564\n","27 politically would stupid move take stand right... ... 0.674769\n","28 wtf why ... 0.635538\n","29 have actually seen lot users views change duri... ... 0.662819\n","30 truth told there not insignificant percentage ... ... 0.713786\n","31 was anti bjp and neutral cong became anti bjp ... ... 0.732909\n","32 most religions have dogmatic orthodox well eso... ... 0.625969\n","33 laureatte sen said christian schools are perfe... ... 0.510249\n","34 need stop watching the garbage that you watch ... ... 0.552975\n","35 gandhi mandela hitler mao plato chandragupt ma... ... 0.719779\n","36 hate aap for the other thread points such the ... ... 0.756800\n","37 absolutely agree with you subsidies the worst ... ... 0.655372\n","38 are you corrupt mind have you benefited throug... ... 0.752354\n","39 congress needs bogeyman modi without the bad g... ... 0.703794\n","40 protip don type uppercase text all caps harder... ... 0.673459\n","41 brother trog very wrathful indeed but his will... ... 0.537965\n","42 start off saying that the craftsmanship this p... ... 0.723931\n","43 have made request unban namoarmy hell moron ho... ... 0.718607\n","44 child modi worked his father’ tea shop and y... ... 0.743616\n","45 namo tea yuupea horrible rhyme know ... 0.760347\n","46 great agility from akpom cut back and bend ... 0.670042\n","47 from undecided pro aap they are not perfect bu... ... 0.647296\n","48 woah there don insane with pray mean you don w... ... 0.711541\n","49 porngress wont announce their candidate cuz th... ... 0.671247\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":109},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609522047859,"user_tz":-300,"elapsed":222054,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a862e5e9-9580-4257-de54-dec3acfbdd6e"},"source":["fitted_pipe.predict(\"Indian prime minister was assinated!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentsentimentdefault_name_embeddingssentiment_confidence
origin_index
0Bitcoin is going to the moon!neutral[0.06468033790588379, -0.040837567299604416, -...0.524234
\n","
"],"text/plain":[" document ... sentiment_confidence\n","origin_index ... \n","0 Bitcoin is going to the moon! ... 0.524234\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609522047861,"user_tz":-300,"elapsed":222040,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cf81e598-13e9-40fd-e8bb-937b8a8933f3"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609522047863,"user_tz":-300,"elapsed":222021,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"fe05a28d-53de-4ec5-e7fc-1ec49eaeddd6"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 1.00 0.83 0.91 24\n"," neutral 0.00 0.00 0.00 0\n"," positive 1.00 1.00 1.00 26\n","\n"," accuracy 0.92 50\n"," macro avg 0.67 0.61 0.64 50\n","weighted avg 1.00 0.92 0.96 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentytextsentimentdefault_name_embeddingssentiment_confidence
origin_index
0its true they had cut the power what douchebag...positiveits true they had cut the power what douchebag...positive[0.033111296594142914, 0.053994592279195786, -...0.761194
1fuck giroud better finishing like this monthpositivefuck giroud better finishing like this monthpositive[0.0678204670548439, 0.01411951333284378, -0.0...0.938677
2looks shit now but still proud madepositivelooks shit now but still proud madepositive[0.03247416764497757, -0.09844464808702469, -0...0.954937
3pelor the burning hate the best evil godnegativepelor the burning hate the best evil godnegative[0.04032062739133835, 0.07666623592376709, -0....0.810980
4can ask what you with something this powerfulpositivecan ask what you with something this powerfulpositive[0.015518003143370152, -0.05116305500268936, -...0.956043
5aap’ shazia ilmi from puram constituency lag...negativeaap’ shazia ilmi from puram constituency lag...negative[0.02478150464594364, -0.06508146971464157, -0...0.708917
6fuck yeahnegativefuck yeahnegative[0.04602408409118652, -0.025047995150089264, -...0.731940
7honestly really surprised alice ranked that lo...positivehonestly really surprised alice ranked that lo...positive[-0.035716041922569275, -0.04127982258796692, ...0.966494
8didn care about politics before now hatenegativedidn care about politics before now hatenegative[-0.006816444452852011, 0.06221264973282814, -...0.672320
9hard nips and goosebumpsnegativehard nips and goosebumpsnegative[-0.02919699251651764, -0.030449824407696724, ...0.604969
10varadabhai ndtv trying too well dilute bjp tre...negativevaradabhai ndtv trying too well dilute bjp tre...negative[0.04727796092629433, -0.06792476028203964, -0...0.639880
11old man has lost his mindpositiveold man has lost his mindpositive[0.039657335728406906, -0.04277808964252472, -...0.929136
12why this being downvoted you might ask both mo...negativewhy this being downvoted you might ask both mo...neutral[0.06581216305494308, -0.06079106032848358, -0...0.546161
13hasnt changed all apolitical before simply don...positivehasnt changed all apolitical before simply do...positive[0.03509754315018654, -0.004639611579477787, -...0.883017
14for one campaign pretty much just snatched the...negativefor one campaign pretty much just snatched the...negative[0.017386479303240776, 0.0443551279604435, -0....0.636396
15vajpayee managed forge much broader coalition ...positivevajpayee managed forge much broader coalition ...positive[0.0372871570289135, -0.051079731434583664, -0...0.848566
16lol this only proves how desperate they are ge...positivelol this only proves how desperate they are ge...positive[0.05233633145689964, -0.03147873282432556, 0....0.819890
17dont hate aap but your questions are example w...negativedont hate aap but your questions are example ...negative[0.026356497779488564, -0.04044198617339134, -...0.724538
18what were the other policies you discussed not...negativewhat were the other policies you discussed not...negative[-0.07521010935306549, 0.008543566800653934, 0...0.732422
19wow lots favorites this bracket haqua tsukushi...positivewow lots favorites this bracket haqua tsukushi...positive[-0.0693160742521286, -0.015458519570529461, -...0.971349
20sorry know this isn what you asked just ventin...negativesorry know this isn what you asked just ventin...negative[0.016777772456407547, -0.05478338897228241, -...0.623325
21coming out strongly against gujarat chief mini...positivecoming out strongly against gujarat chief min...positive[0.06856723129749298, -0.019821858033537865, -...0.736283
22there one tool bjp can use their manifesto whi...positivethere one tool bjp can use their manifesto whi...positive[0.057847339659929276, -0.05365725979208946, -...0.870023
23jakiro spotted the middle top maybepositivejakiro spotted the middle top maybepositive[-0.011690962128341198, -0.024473998695611954,...0.965604
24family mormon have never tried explain them th...positivefamily mormon have never tried explain them t...positive[0.03987010195851326, -0.0009543427731841803, ...0.964053
25with these results would have grudgingly accep...negativewith these results would have grudgingly accep...neutral[0.034668292850255966, -0.05392604321241379, -...0.521402
26tea partier expresses support for namo after e...negativetea partier expresses support for namo after ...negative[0.032365716993808746, -0.056087080389261246, ...0.837552
27politically would stupid move take stand right...negativepolitically would stupid move take stand right...neutral[-0.00040777752292342484, -0.01262842211872339...0.541656
28wtf whynegativewtf whynegative[0.025807170197367668, -0.07080958038568497, -...0.747054
29have actually seen lot users views change duri...positivehave actually seen lot users views change dur...positive[-0.009333955124020576, 0.01388698909431696, -...0.818759
30truth told there not insignificant percentage ...positivetruth told there not insignificant percentage ...positive[0.03927519917488098, -0.05597652122378349, -0...0.776765
31was anti bjp and neutral cong became anti bjp ...positivewas anti bjp and neutral cong became anti bjp ...positive[0.03805134445428848, -0.030298737809062004, -...0.630857
32most religions have dogmatic orthodox well eso...positivemost religions have dogmatic orthodox well eso...positive[0.03939439728856087, -0.02040349319577217, -0...0.972607
33laureatte sen said christian schools are perfe...positivelaureatte sen said christian schools are perfe...positive[0.05267934128642082, 0.05836360529065132, 0.0...0.911020
34need stop watching the garbage that you watch ...positiveneed stop watching the garbage that you watch...positive[-0.012382612563669682, 0.01988200470805168, 0...0.954440
35gandhi mandela hitler mao plato chandragupt ma...negativegandhi mandela hitler mao plato chandragupt ma...negative[0.027552243322134018, 0.013075066730380058, 0...0.767667
36hate aap for the other thread points such the ...negativehate aap for the other thread points such the...negative[0.01461736112833023, -0.038017574697732925, -...0.690414
37absolutely agree with you subsidies the worst ...negativeabsolutely agree with you subsidies the worst ...neutral[0.010974399745464325, 0.0033110962249338627, ...0.581476
38are you corrupt mind have you benefited throug...negativeare you corrupt mind have you benefited throu...negative[0.03834373503923416, -0.06521473079919815, -0...0.783217
39congress needs bogeyman modi without the bad g...positivecongress needs bogeyman modi without the bad g...positive[0.03138439729809761, -0.06221967190504074, -0...0.764358
40protip don type uppercase text all caps harder...negativeprotip don type uppercase text all caps harder...negative[0.044019922614097595, 0.025341013446450233, 0...0.738550
41brother trog very wrathful indeed but his will...positivebrother trog very wrathful indeed but his wil...positive[-0.024625714868307114, 0.06193268671631813, 0...0.923871
42start off saying that the craftsmanship this p...positivestart off saying that the craftsmanship this ...positive[0.05780623108148575, -0.06291749328374863, -0...0.985073
43have made request unban namoarmy hell moron ho...negativehave made request unban namoarmy hell moron h...negative[0.015555822290480137, -0.012748800218105316, ...0.796430
44child modi worked his father’ tea shop and y...negativechild modi worked his father’ tea shop and ...negative[0.05774841830134392, -0.059567004442214966, -...0.709697
45namo tea yuupea horrible rhyme knownegativenamo tea yuupea horrible rhyme knownegative[0.025534288957715034, 0.004176765214651823, -...0.851523
46great agility from akpom cut back and bendpositivegreat agility from akpom cut back and bendpositive[0.06865684688091278, -0.02164856530725956, -0...0.966416
47from undecided pro aap they are not perfect bu...positivefrom undecided pro aap they are not perfect bu...positive[0.01590304635465145, -0.0683458000421524, -0....0.891286
48woah there don insane with pray mean you don w...negativewoah there don insane with pray mean you don w...negative[0.050547026097774506, -0.01725909113883972, 0...0.798072
49porngress wont announce their candidate cuz th...positiveporngress wont announce their candidate cuz th...positive[0.05935536324977875, -0.051609162241220474, -...0.858501
\n","
"],"text/plain":[" document ... sentiment_confidence\n","origin_index ... \n","0 its true they had cut the power what douchebag... ... 0.761194\n","1 fuck giroud better finishing like this month ... 0.938677\n","2 looks shit now but still proud made ... 0.954937\n","3 pelor the burning hate the best evil god ... 0.810980\n","4 can ask what you with something this powerful ... 0.956043\n","5 aap’ shazia ilmi from puram constituency lag... ... 0.708917\n","6 fuck yeah ... 0.731940\n","7 honestly really surprised alice ranked that lo... ... 0.966494\n","8 didn care about politics before now hate ... 0.672320\n","9 hard nips and goosebumps ... 0.604969\n","10 varadabhai ndtv trying too well dilute bjp tre... ... 0.639880\n","11 old man has lost his mind ... 0.929136\n","12 why this being downvoted you might ask both mo... ... 0.546161\n","13 hasnt changed all apolitical before simply don... ... 0.883017\n","14 for one campaign pretty much just snatched the... ... 0.636396\n","15 vajpayee managed forge much broader coalition ... ... 0.848566\n","16 lol this only proves how desperate they are ge... ... 0.819890\n","17 dont hate aap but your questions are example w... ... 0.724538\n","18 what were the other policies you discussed not... ... 0.732422\n","19 wow lots favorites this bracket haqua tsukushi... ... 0.971349\n","20 sorry know this isn what you asked just ventin... ... 0.623325\n","21 coming out strongly against gujarat chief mini... ... 0.736283\n","22 there one tool bjp can use their manifesto whi... ... 0.870023\n","23 jakiro spotted the middle top maybe ... 0.965604\n","24 family mormon have never tried explain them th... ... 0.964053\n","25 with these results would have grudgingly accep... ... 0.521402\n","26 tea partier expresses support for namo after e... ... 0.837552\n","27 politically would stupid move take stand right... ... 0.541656\n","28 wtf why ... 0.747054\n","29 have actually seen lot users views change duri... ... 0.818759\n","30 truth told there not insignificant percentage ... ... 0.776765\n","31 was anti bjp and neutral cong became anti bjp ... ... 0.630857\n","32 most religions have dogmatic orthodox well eso... ... 0.972607\n","33 laureatte sen said christian schools are perfe... ... 0.911020\n","34 need stop watching the garbage that you watch ... ... 0.954440\n","35 gandhi mandela hitler mao plato chandragupt ma... ... 0.767667\n","36 hate aap for the other thread points such the ... ... 0.690414\n","37 absolutely agree with you subsidies the worst ... ... 0.581476\n","38 are you corrupt mind have you benefited throug... ... 0.783217\n","39 congress needs bogeyman modi without the bad g... ... 0.764358\n","40 protip don type uppercase text all caps harder... ... 0.738550\n","41 brother trog very wrathful indeed but his will... ... 0.923871\n","42 start off saying that the craftsmanship this p... ... 0.985073\n","43 have made request unban namoarmy hell moron ho... ... 0.796430\n","44 child modi worked his father’ tea shop and y... ... 0.709697\n","45 namo tea yuupea horrible rhyme know ... 0.851523\n","46 great agility from akpom cut back and bend ... 0.966416\n","47 from undecided pro aap they are not perfect bu... ... 0.891286\n","48 woah there don insane with pray mean you don w... ... 0.798072\n","49 porngress wont announce their candidate cuz th... ... 0.858501\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nxWFzQOhjWC8","executionInfo":{"status":"ok","timestamp":1609522047865,"user_tz":-300,"elapsed":221994,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6f5e4138-03d4-495a-ce16-0be512588c81"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1609523959387,"user_tz":-300,"elapsed":476394,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1baf2085-bee8-48c3-fd11-401722536642"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(70) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.84 0.77 0.80 300\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.91 0.72 0.80 300\n","\n"," accuracy 0.74 600\n"," macro avg 0.59 0.50 0.54 600\n","weighted avg 0.88 0.74 0.80 600\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1609522704715,"user_tz":-300,"elapsed":161180,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"bc27a985-38bf-4b98-f3fe-8e4955cd83cc"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":127},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1609522719523,"user_tz":-300,"elapsed":14825,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b14d7193-6b43-4c1d-eb42-326af88ffc0a"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Indian prime minister was assinated')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
en_embed_sentence_small_bert_L12_768_embeddingsdocumentsentimentsentiment_confidence
origin_index
0[0.15737222135066986, 0.2598555386066437, 0.85...Tesla plans to invest 10M into the ML sectorpositive0.638827
\n","
"],"text/plain":[" en_embed_sentence_small_bert_L12_768_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [0.15737222135066986, 0.2598555386066437, 0.85... ... 0.638827\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1609522719526,"user_tz":-300,"elapsed":24,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"acbf137f-60ff-4804-b903-bb88f00c78d0"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_twitter.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_twitter.ipynb index 3aca40f1..3f5c3d66 100644 --- a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_twitter.ipynb +++ b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_twitter.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_twitter.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_twitter.ipynb)\n","\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download twitter Sentiment dataset \n","https://www.kaggle.com/cosmos98/twitter-and-reddit-sentimental-analysis-dataset\n","#Context\n","\n","This is was a Dataset Created as a part of the university Project On Sentimental Analysis On Multi-Source Social Media Platforms using PySpark."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610788006096,"user_tz":-300,"elapsed":2486,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"610519c2-4cf5-4835-d1aa-8da2f83fadf7"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/Twitter_Data.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:06:08-- http://ckl-it.de/wp-content/uploads/2021/01/Twitter_Data.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 99657 (97K) [text/csv]\n","Saving to: ‘Twitter_Data.csv’\n","\n","Twitter_Data.csv 100%[===================>] 97.32K 122KB/s in 0.8s \n","\n","2021-01-16 09:06:10 (122 KB/s) - ‘Twitter_Data.csv’ saved [99657/99657]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610788009315,"user_tz":-300,"elapsed":1404,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b73ee311-b15e-4fc3-a5b5-5772f87dac99"},"source":["import pandas as pd\n","train_path = '/content/Twitter_Data.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
0how narendra modi has almost killed the indian...negative
1you think was modi behind that accidentnegative
2kamal haasan takes chowkidar modi kamal haasan...negative
3connected name with surname not bcz religion c...negative
4anyone better than modi when nehruji expired s...positive
.........
595perception makes fool some call “foreign inv...negative
596when will see your tweet for justice for you a...negative
597haha congress going gaga over this after looti...positive
598this movie shows the life histiry narendra mod...negative
599modi left his year old wife and returned her r...positive
\n","

600 rows × 2 columns

\n","
"],"text/plain":[" text y\n","0 how narendra modi has almost killed the indian... negative\n","1 you think was modi behind that accident negative\n","2 kamal haasan takes chowkidar modi kamal haasan... negative\n","3 connected name with surname not bcz religion c... negative\n","4 anyone better than modi when nehruji expired s... positive\n",".. ... ...\n","595 perception makes fool some call “foreign inv... negative\n","596 when will see your tweet for justice for you a... negative\n","597 haha congress going gaga over this after looti... positive\n","598 this movie shows the life histiry narendra mod... negative\n","599 modi left his year old wife and returned her r... positive\n","\n","[600 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609553662416,"user_tz":-300,"elapsed":192414,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a077e55a-4805-43a2-fb11-46074b487e2e"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.00 0.00 0.00 27\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.78 0.61 0.68 23\n","\n"," accuracy 0.28 50\n"," macro avg 0.26 0.20 0.23 50\n","weighted avg 0.36 0.28 0.31 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textdefault_name_embeddingssentimentsentiment_confidencedocumenty
origin_index
0how narendra modi has almost killed the indian...[0.060062434524297714, -0.05557167902588844, -...neutral0.590739how narendra modi has almost killed the indian...negative
1you think was modi behind that accident[0.05362718179821968, -0.004547705873847008, -...neutral0.577544you think was modi behind that accidentnegative
2kamal haasan takes chowkidar modi kamal haasan...[0.07274721562862396, -0.061593908816576004, -...neutral0.585589kamal haasan takes chowkidar modi kamal haasan...negative
3connected name with surname not bcz religion c...[0.06106054410338402, -0.060213156044483185, -...neutral0.554036connected name with surname not bcz religion c...negative
4anyone better than modi when nehruji expired s...[0.0737471655011177, 0.006071773823350668, -0....neutral0.595608anyone better than modi when nehruji expired s...positive
5\\r\\nmodiji wont tired crying foul\\r\\nmain chow...[0.05888385698199272, -0.0646616593003273, -0....neutral0.583403modiji wont tired crying foul main chowkidar h...negative
6poor chap modi hasn’ given him anything can ...[0.058948416262865067, -0.029682165011763573, ...neutral0.578272poor chap modi hasn’ given him anything can ...negative
7green underwear missing ive been doubting isi ...[0.05133155733346939, -0.06789954006671906, -0...neutral0.575918green underwear missing ive been doubting isi ...negative
8congress years wasnt able complete one rafale ...[0.044129759073257446, -0.06111813709139824, -...positive0.605829congress years wasnt able complete one rafale ...positive
9asked learn from how treat minority well does ...[0.03665374591946602, -0.03695330768823624, -0...neutral0.534121asked learn from how treat minority well does ...negative
10stop bull shitting worry about criminal vivek ...[0.07035735249519348, -0.06952506303787231, -0...neutral0.539481stop bull shitting worry about criminal vivek ...negative
11drswamys timesnow last year debate nearly mill...[0.013958276249468327, -0.030759528279304504, ...positive0.613331drswamys timesnow last year debate nearly mill...positive
12asshole bahujan radical marxist grow brain kno...[0.026277026161551476, -0.06238812580704689, -...neutral0.587796asshole bahujan radical marxist grow brain kno...negative
13from selling dreams 2014 selling tshirts 2019 ...[0.07457270473241806, -0.058670494705438614, -...neutral0.584601from selling dreams 2014 selling tshirts 2019 ...positive
14very true sir thats why they are against modi ...[0.061704088002443314, -0.04553354158997536, -...neutral0.584490very true sir thats why they are against modi ...positive
15they are giving jobs citizen india what you ar...[0.05342026799917221, -0.003889711806550622, -...neutral0.574127they are giving jobs citizen india what you ar...negative
16congress has always attempted empower people g...[0.027197618037462234, -0.036435648798942566, ...positive0.602392congress has always attempted empower people g...negative
17have never said that modi succeed yet even als...[0.06601183861494064, -0.020045211538672447, -...positive0.606807have never said that modi succeed yet even als...positive
18\\r\\nthe foundation for new india 2022 has alre...[0.04694363474845886, -0.06800008565187454, -0...neutral0.599807the foundation for new india 2022 has already ...positive
19only rahul gandhis politics love can defeat th...[0.05615750327706337, -0.002462629694491625, -...positive0.602275only rahul gandhis politics love can defeat th...negative
20one step time navigating thru looteyns when ev...[0.030352214351296425, -0.06195472553372383, 0...neutral0.570779one step time navigating thru looteyns when ev...negative
21why sir mam shabana azami hate much that have ...[0.07535804808139801, -0.05643236264586449, -0...neutral0.571882why sir mam shabana azami hate much that have ...negative
22modi will remain for next 510 years and till t...[0.05986170098185539, -0.0674145296216011, -0....neutral0.591540modi will remain for next 510 years and till t...negative
23pledge your first vote for modi[0.023959940299391747, -0.013972461223602295, ...positive0.606293pledge your first vote for modipositive
24why need modi lead bjp government again 2019 j...[0.04451165348291397, -0.06473662704229355, -0...positive0.609683why need modi lead bjp government again 2019 j...positive
25raghuram rajan sent list high profile bank fra...[0.06561190634965897, -0.0614917054772377, -0....neutral0.578591raghuram rajan sent list high profile bank fra...negative
26modi govts slashing indias education budget cl...[0.05217093601822853, -0.05785880982875824, -0...neutral0.594771modi govts slashing indias education budget cl...negative
27why are you hell bent manoj tiwari just her ph...[0.04579753428697586, -0.05176748335361481, -0...positive0.600511why are you hell bent manoj tiwari just her ph...positive
28know going into dirty details nehru family its...[0.047987841069698334, -0.050984784960746765, ...neutral0.533372know going into dirty details nehru family its...negative
29momota begum will let her state become total s...[0.04509664326906204, -0.05019481107592583, -0...neutral0.593740momota begum will let her state become total s...negative
30thanks anu sharma will vote and make sure peop...[0.04315190762281418, -0.04578147828578949, -0...positive0.601758thanks anu sharma will vote and make sure peop...positive
31those who themselves dont know how many father...[0.0144237345084548, -0.052222371101379395, -0...neutral0.589971those who themselves dont know how many father...positive
32the star campaigner myth bjp lost more than as...[0.02492097206413746, -0.0531931146979332, -0....positive0.607886the star campaigner myth bjp lost more than as...positive
33modi also live for few years only like you not...[0.040389616042375565, -0.06375984847545624, -...positive0.612952modi also live for few years only like you not...negative
34narendra modi more brainy than all the drswamy...[0.06742898374795914, -0.060488566756248474, -...positive0.621238narendra modi more brainy than all the drswamy...positive
35have started calling chowkidaar narendra modi ...[0.06360629200935364, -0.06786973774433136, -0...neutral0.593359have started calling chowkidaar narendra modi ...negative
36this the difference confident leaders call upo...[0.024233123287558556, -0.05243394151329994, -...positive0.610326this the difference confident leaders call upo...positive
37jawans killed the border\\r\\ncrimes against wom...[0.03928006440401077, -0.051466524600982666, -...neutral0.582484jawans killed the border crimes against women ...negative
38tag this fast growing youtuber cared abt this ...[0.05051109194755554, -0.0660049319267273, 0.0...neutral0.584719tag this fast growing youtuber cared abt this ...negative
39think hindus should back off and let them suff...[-0.010975896380841732, -0.059168506413698196,...neutral0.597051think hindus should back off and let them suff...positive
40yes cannot make any knee jerk moves drastic ac...[0.023108134046196938, -0.027600249275565147, ...positive0.618802yes cannot make any knee jerk moves drastic ac...positive
41why picked chairman the devious aadhaar isnt h...[0.043231260031461716, -0.07101075351238251, -...neutral0.581575why picked chairman the devious aadhaar isnt h...negative
42due automation and artificial intelligence fur...[0.04160398617386818, -0.06572042405605316, -0...neutral0.594700due automation and artificial intelligence fur...positive
43weak state capacity exacerbated excessive acco...[-0.00038854932063259184, -0.04599419981241226...neutral0.593749weak state capacity exacerbated excessive acco...positive
44our narendra modi ordered indian air force tak...[-0.02063656784594059, -0.07548005133867264, -...positive0.601453our narendra modi ordered indian air force tak...positive
45why vote modi dynasty visionary 3no high level...[0.01779576763510704, -0.06789527833461761, -0...neutral0.579034why vote modi dynasty visionary 3no high level...negative
46its modi chor corrupt maha thugbandhan janta w...[0.065566785633564, -0.04119298234581947, -0.0...positive0.602544its modi chor corrupt maha thugbandhan janta w...negative
47before modis arrival 2014 all supported him fo...[0.03988223522901535, -0.04965453967452049, -0...positive0.604502before modis arrival 2014 all supported him fo...positive
48think you forgot dollar india handled exceptio...[0.01084248349070549, 0.013633836060762405, -0...neutral0.598473think you forgot dollar india handled exceptio...positive
49tulsi gabbard rejected interviews with tyt but...[-0.01967957802116871, 0.05570048466324806, -0...positive0.621699tulsi gabbard rejected interviews with tyt but...positive
\n","
"],"text/plain":[" text ... y\n","origin_index ... \n","0 how narendra modi has almost killed the indian... ... negative\n","1 you think was modi behind that accident ... negative\n","2 kamal haasan takes chowkidar modi kamal haasan... ... negative\n","3 connected name with surname not bcz religion c... ... negative\n","4 anyone better than modi when nehruji expired s... ... positive\n","5 \\r\\nmodiji wont tired crying foul\\r\\nmain chow... ... negative\n","6 poor chap modi hasn’ given him anything can ... ... negative\n","7 green underwear missing ive been doubting isi ... ... negative\n","8 congress years wasnt able complete one rafale ... ... positive\n","9 asked learn from how treat minority well does ... ... negative\n","10 stop bull shitting worry about criminal vivek ... ... negative\n","11 drswamys timesnow last year debate nearly mill... ... positive\n","12 asshole bahujan radical marxist grow brain kno... ... negative\n","13 from selling dreams 2014 selling tshirts 2019 ... ... positive\n","14 very true sir thats why they are against modi ... ... positive\n","15 they are giving jobs citizen india what you ar... ... negative\n","16 congress has always attempted empower people g... ... negative\n","17 have never said that modi succeed yet even als... ... positive\n","18 \\r\\nthe foundation for new india 2022 has alre... ... positive\n","19 only rahul gandhis politics love can defeat th... ... negative\n","20 one step time navigating thru looteyns when ev... ... negative\n","21 why sir mam shabana azami hate much that have ... ... negative\n","22 modi will remain for next 510 years and till t... ... negative\n","23 pledge your first vote for modi ... positive\n","24 why need modi lead bjp government again 2019 j... ... positive\n","25 raghuram rajan sent list high profile bank fra... ... negative\n","26 modi govts slashing indias education budget cl... ... negative\n","27 why are you hell bent manoj tiwari just her ph... ... positive\n","28 know going into dirty details nehru family its... ... negative\n","29 momota begum will let her state become total s... ... negative\n","30 thanks anu sharma will vote and make sure peop... ... positive\n","31 those who themselves dont know how many father... ... positive\n","32 the star campaigner myth bjp lost more than as... ... positive\n","33 modi also live for few years only like you not... ... negative\n","34 narendra modi more brainy than all the drswamy... ... positive\n","35 have started calling chowkidaar narendra modi ... ... negative\n","36 this the difference confident leaders call upo... ... positive\n","37 jawans killed the border\\r\\ncrimes against wom... ... negative\n","38 tag this fast growing youtuber cared abt this ... ... negative\n","39 think hindus should back off and let them suff... ... positive\n","40 yes cannot make any knee jerk moves drastic ac... ... positive\n","41 why picked chairman the devious aadhaar isnt h... ... negative\n","42 due automation and artificial intelligence fur... ... positive\n","43 weak state capacity exacerbated excessive acco... ... positive\n","44 our narendra modi ordered indian air force tak... ... positive\n","45 why vote modi dynasty visionary 3no high level... ... negative\n","46 its modi chor corrupt maha thugbandhan janta w... ... negative\n","47 before modis arrival 2014 all supported him fo... ... positive\n","48 think you forgot dollar india handled exceptio... ... positive\n","49 tulsi gabbard rejected interviews with tyt but... ... positive\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":109},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609553664952,"user_tz":-300,"elapsed":194919,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8f463f08-944f-45dc-f463-e381c05f89db"},"source":["fitted_pipe.predict('the president of india just died')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
default_name_embeddingssentimentsentiment_confidencedocument
origin_index
0[0.06468033790588379, -0.040837567299604416, -...neutral0.562996Bitcoin is going to the moon!
\n","
"],"text/plain":[" default_name_embeddings ... document\n","origin_index ... \n","0 [0.06468033790588379, -0.040837567299604416, -... ... Bitcoin is going to the moon!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609553664954,"user_tz":-300,"elapsed":194907,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7be62829-d712-4afd-900f-fd655e8282d7"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609553671081,"user_tz":-300,"elapsed":201019,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"602947fd-13b8-438e-d5d1-64df15c2096b"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.79 0.96 0.87 27\n"," neutral 0.00 0.00 0.00 0\n"," positive 1.00 0.09 0.16 23\n","\n"," accuracy 0.56 50\n"," macro avg 0.60 0.35 0.34 50\n","weighted avg 0.89 0.56 0.54 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textdefault_name_embeddingssentimentsentiment_confidencedocumenty
origin_index
0how narendra modi has almost killed the indian...[0.060062434524297714, -0.05557167902588844, -...negative0.689142how narendra modi has almost killed the indian...negative
1you think was modi behind that accident[0.05362718179821968, -0.004547705873847008, -...negative0.689483you think was modi behind that accidentnegative
2kamal haasan takes chowkidar modi kamal haasan...[0.07274721562862396, -0.061593908816576004, -...negative0.707988kamal haasan takes chowkidar modi kamal haasan...negative
3connected name with surname not bcz religion c...[0.06106054410338402, -0.060213156044483185, -...negative0.675382connected name with surname not bcz religion c...negative
4anyone better than modi when nehruji expired s...[0.0737471655011177, 0.006071773823350668, -0....negative0.638730anyone better than modi when nehruji expired s...positive
5\\r\\nmodiji wont tired crying foul\\r\\nmain chow...[0.05888385698199272, -0.0646616593003273, -0....negative0.723110modiji wont tired crying foul main chowkidar h...negative
6poor chap modi hasn’ given him anything can ...[0.058948416262865067, -0.029682165011763573, ...negative0.690602poor chap modi hasn’ given him anything can ...negative
7green underwear missing ive been doubting isi ...[0.05133155733346939, -0.06789954006671906, -0...negative0.705077green underwear missing ive been doubting isi ...negative
8congress years wasnt able complete one rafale ...[0.044129759073257446, -0.06111813709139824, -...neutral0.561979congress years wasnt able complete one rafale ...positive
9asked learn from how treat minority well does ...[0.03665374591946602, -0.03695330768823624, -0...negative0.746584asked learn from how treat minority well does ...negative
10stop bull shitting worry about criminal vivek ...[0.07035735249519348, -0.06952506303787231, -0...negative0.768111stop bull shitting worry about criminal vivek ...negative
11drswamys timesnow last year debate nearly mill...[0.013958276249468327, -0.030759528279304504, ...neutral0.511294drswamys timesnow last year debate nearly mill...positive
12asshole bahujan radical marxist grow brain kno...[0.026277026161551476, -0.06238812580704689, -...negative0.689268asshole bahujan radical marxist grow brain kno...negative
13from selling dreams 2014 selling tshirts 2019 ...[0.07457270473241806, -0.058670494705438614, -...negative0.641822from selling dreams 2014 selling tshirts 2019 ...positive
14very true sir thats why they are against modi ...[0.061704088002443314, -0.04553354158997536, -...negative0.651231very true sir thats why they are against modi ...positive
15they are giving jobs citizen india what you ar...[0.05342026799917221, -0.003889711806550622, -...negative0.706768they are giving jobs citizen india what you ar...negative
16congress has always attempted empower people g...[0.027197618037462234, -0.036435648798942566, ...negative0.607062congress has always attempted empower people g...negative
17have never said that modi succeed yet even als...[0.06601183861494064, -0.020045211538672447, -...negative0.628577have never said that modi succeed yet even als...positive
18\\r\\nthe foundation for new india 2022 has alre...[0.04694363474845886, -0.06800008565187454, -0...neutral0.547697the foundation for new india 2022 has already ...positive
19only rahul gandhis politics love can defeat th...[0.05615750327706337, -0.002462629694491625, -...negative0.632572only rahul gandhis politics love can defeat th...negative
20one step time navigating thru looteyns when ev...[0.030352214351296425, -0.06195472553372383, 0...negative0.635106one step time navigating thru looteyns when ev...negative
21why sir mam shabana azami hate much that have ...[0.07535804808139801, -0.05643236264586449, -0...negative0.738669why sir mam shabana azami hate much that have ...negative
22modi will remain for next 510 years and till t...[0.05986170098185539, -0.0674145296216011, -0....negative0.659078modi will remain for next 510 years and till t...negative
23pledge your first vote for modi[0.023959940299391747, -0.013972461223602295, ...neutral0.555447pledge your first vote for modipositive
24why need modi lead bjp government again 2019 j...[0.04451165348291397, -0.06473662704229355, -0...neutral0.578395why need modi lead bjp government again 2019 j...positive
25raghuram rajan sent list high profile bank fra...[0.06561190634965897, -0.0614917054772377, -0....negative0.706507raghuram rajan sent list high profile bank fra...negative
26modi govts slashing indias education budget cl...[0.05217093601822853, -0.05785880982875824, -0...negative0.607360modi govts slashing indias education budget cl...negative
27why are you hell bent manoj tiwari just her ph...[0.04579753428697586, -0.05176748335361481, -0...neutral0.588993why are you hell bent manoj tiwari just her ph...positive
28know going into dirty details nehru family its...[0.047987841069698334, -0.050984784960746765, ...negative0.753084know going into dirty details nehru family its...negative
29momota begum will let her state become total s...[0.04509664326906204, -0.05019481107592583, -0...negative0.615988momota begum will let her state become total s...negative
30thanks anu sharma will vote and make sure peop...[0.04315190762281418, -0.04578147828578949, -0...neutral0.555271thanks anu sharma will vote and make sure peop...positive
31those who themselves dont know how many father...[0.0144237345084548, -0.052222371101379395, -0...negative0.631877those who themselves dont know how many father...positive
32the star campaigner myth bjp lost more than as...[0.02492097206413746, -0.0531931146979332, -0....neutral0.586682the star campaigner myth bjp lost more than as...positive
33modi also live for few years only like you not...[0.040389616042375565, -0.06375984847545624, -...neutral0.587196modi also live for few years only like you not...negative
34narendra modi more brainy than all the drswamy...[0.06742898374795914, -0.060488566756248474, -...neutral0.533663narendra modi more brainy than all the drswamy...positive
35have started calling chowkidaar narendra modi ...[0.06360629200935364, -0.06786973774433136, -0...negative0.672972have started calling chowkidaar narendra modi ...negative
36this the difference confident leaders call upo...[0.024233123287558556, -0.05243394151329994, -...neutral0.510922this the difference confident leaders call upo...positive
37jawans killed the border\\r\\ncrimes against wom...[0.03928006440401077, -0.051466524600982666, -...negative0.701794jawans killed the border crimes against women ...negative
38tag this fast growing youtuber cared abt this ...[0.05051109194755554, -0.0660049319267273, 0.0...negative0.714883tag this fast growing youtuber cared abt this ...negative
39think hindus should back off and let them suff...[-0.010975896380841732, -0.059168506413698196,...neutral0.553189think hindus should back off and let them suff...positive
40yes cannot make any knee jerk moves drastic ac...[0.023108134046196938, -0.027600249275565147, ...positive0.671809yes cannot make any knee jerk moves drastic ac...positive
41why picked chairman the devious aadhaar isnt h...[0.043231260031461716, -0.07101075351238251, -...negative0.709371why picked chairman the devious aadhaar isnt h...negative
42due automation and artificial intelligence fur...[0.04160398617386818, -0.06572042405605316, -0...neutral0.553482due automation and artificial intelligence fur...positive
43weak state capacity exacerbated excessive acco...[-0.00038854932063259184, -0.04599419981241226...negative0.609747weak state capacity exacerbated excessive acco...positive
44our narendra modi ordered indian air force tak...[-0.02063656784594059, -0.07548005133867264, -...neutral0.513191our narendra modi ordered indian air force tak...positive
45why vote modi dynasty visionary 3no high level...[0.01779576763510704, -0.06789527833461761, -0...negative0.635148why vote modi dynasty visionary 3no high level...negative
46its modi chor corrupt maha thugbandhan janta w...[0.065566785633564, -0.04119298234581947, -0.0...negative0.687171its modi chor corrupt maha thugbandhan janta w...negative
47before modis arrival 2014 all supported him fo...[0.03988223522901535, -0.04965453967452049, -0...neutral0.557571before modis arrival 2014 all supported him fo...positive
48think you forgot dollar india handled exceptio...[0.01084248349070549, 0.013633836060762405, -0...negative0.615532think you forgot dollar india handled exceptio...positive
49tulsi gabbard rejected interviews with tyt but...[-0.01967957802116871, 0.05570048466324806, -0...positive0.604604tulsi gabbard rejected interviews with tyt but...positive
\n","
"],"text/plain":[" text ... y\n","origin_index ... \n","0 how narendra modi has almost killed the indian... ... negative\n","1 you think was modi behind that accident ... negative\n","2 kamal haasan takes chowkidar modi kamal haasan... ... negative\n","3 connected name with surname not bcz religion c... ... negative\n","4 anyone better than modi when nehruji expired s... ... positive\n","5 \\r\\nmodiji wont tired crying foul\\r\\nmain chow... ... negative\n","6 poor chap modi hasn’ given him anything can ... ... negative\n","7 green underwear missing ive been doubting isi ... ... negative\n","8 congress years wasnt able complete one rafale ... ... positive\n","9 asked learn from how treat minority well does ... ... negative\n","10 stop bull shitting worry about criminal vivek ... ... negative\n","11 drswamys timesnow last year debate nearly mill... ... positive\n","12 asshole bahujan radical marxist grow brain kno... ... negative\n","13 from selling dreams 2014 selling tshirts 2019 ... ... positive\n","14 very true sir thats why they are against modi ... ... positive\n","15 they are giving jobs citizen india what you ar... ... negative\n","16 congress has always attempted empower people g... ... negative\n","17 have never said that modi succeed yet even als... ... positive\n","18 \\r\\nthe foundation for new india 2022 has alre... ... positive\n","19 only rahul gandhis politics love can defeat th... ... negative\n","20 one step time navigating thru looteyns when ev... ... negative\n","21 why sir mam shabana azami hate much that have ... ... negative\n","22 modi will remain for next 510 years and till t... ... negative\n","23 pledge your first vote for modi ... positive\n","24 why need modi lead bjp government again 2019 j... ... positive\n","25 raghuram rajan sent list high profile bank fra... ... negative\n","26 modi govts slashing indias education budget cl... ... negative\n","27 why are you hell bent manoj tiwari just her ph... ... positive\n","28 know going into dirty details nehru family its... ... negative\n","29 momota begum will let her state become total s... ... negative\n","30 thanks anu sharma will vote and make sure peop... ... positive\n","31 those who themselves dont know how many father... ... positive\n","32 the star campaigner myth bjp lost more than as... ... positive\n","33 modi also live for few years only like you not... ... negative\n","34 narendra modi more brainy than all the drswamy... ... positive\n","35 have started calling chowkidaar narendra modi ... ... negative\n","36 this the difference confident leaders call upo... ... positive\n","37 jawans killed the border\\r\\ncrimes against wom... ... negative\n","38 tag this fast growing youtuber cared abt this ... ... negative\n","39 think hindus should back off and let them suff... ... positive\n","40 yes cannot make any knee jerk moves drastic ac... ... positive\n","41 why picked chairman the devious aadhaar isnt h... ... negative\n","42 due automation and artificial intelligence fur... ... positive\n","43 weak state capacity exacerbated excessive acco... ... positive\n","44 our narendra modi ordered indian air force tak... ... positive\n","45 why vote modi dynasty visionary 3no high level... ... negative\n","46 its modi chor corrupt maha thugbandhan janta w... ... negative\n","47 before modis arrival 2014 all supported him fo... ... positive\n","48 think you forgot dollar india handled exceptio... ... positive\n","49 tulsi gabbard rejected interviews with tyt but... ... positive\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nxWFzQOhjWC8","executionInfo":{"status":"ok","timestamp":1609553671091,"user_tz":-300,"elapsed":200991,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2f071682-e615-4556-b813-a56f405ff9c3"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1609554113187,"user_tz":-300,"elapsed":140893,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"413eef4f-f423-439b-ad57-2ccfcf4bbe62"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(100) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.81 0.66 0.73 300\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.85 0.59 0.69 300\n","\n"," accuracy 0.62 600\n"," macro avg 0.55 0.42 0.47 600\n","weighted avg 0.83 0.62 0.71 600\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1609554302650,"user_tz":-300,"elapsed":189472,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"858c8cf2-ba4d-48fc-b333-e4b2819dadb2"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('the president of india just died')\n","preds"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1609554388428,"user_tz":-300,"elapsed":879,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"212c87f7-8200-4646-cfcd-5bae608b3848"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_twitter.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_twitter.ipynb)\n","\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 class twitter classifier training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download twitter Sentiment dataset \n","https://www.kaggle.com/cosmos98/twitter-and-reddit-sentimental-analysis-dataset\n","#Context\n","\n","This is was a Dataset Created as a part of the university Project On Sentimental Analysis On Multi-Source Social Media Platforms using PySpark."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610788006096,"user_tz":-300,"elapsed":2486,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"610519c2-4cf5-4835-d1aa-8da2f83fadf7"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/Twitter_Data.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:06:08-- http://ckl-it.de/wp-content/uploads/2021/01/Twitter_Data.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 99657 (97K) [text/csv]\n","Saving to: ‘Twitter_Data.csv’\n","\n","Twitter_Data.csv 100%[===================>] 97.32K 122KB/s in 0.8s \n","\n","2021-01-16 09:06:10 (122 KB/s) - ‘Twitter_Data.csv’ saved [99657/99657]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610788009315,"user_tz":-300,"elapsed":1404,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b73ee311-b15e-4fc3-a5b5-5772f87dac99"},"source":["import pandas as pd\n","train_path = '/content/Twitter_Data.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
0how narendra modi has almost killed the indian...negative
1you think was modi behind that accidentnegative
2kamal haasan takes chowkidar modi kamal haasan...negative
3connected name with surname not bcz religion c...negative
4anyone better than modi when nehruji expired s...positive
.........
595perception makes fool some call “foreign inv...negative
596when will see your tweet for justice for you a...negative
597haha congress going gaga over this after looti...positive
598this movie shows the life histiry narendra mod...negative
599modi left his year old wife and returned her r...positive
\n","

600 rows × 2 columns

\n","
"],"text/plain":[" text y\n","0 how narendra modi has almost killed the indian... negative\n","1 you think was modi behind that accident negative\n","2 kamal haasan takes chowkidar modi kamal haasan... negative\n","3 connected name with surname not bcz religion c... negative\n","4 anyone better than modi when nehruji expired s... positive\n",".. ... ...\n","595 perception makes fool some call “foreign inv... negative\n","596 when will see your tweet for justice for you a... negative\n","597 haha congress going gaga over this after looti... positive\n","598 this movie shows the life histiry narendra mod... negative\n","599 modi left his year old wife and returned her r... positive\n","\n","[600 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609553662416,"user_tz":-300,"elapsed":192414,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a077e55a-4805-43a2-fb11-46074b487e2e"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.00 0.00 0.00 27\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.78 0.61 0.68 23\n","\n"," accuracy 0.28 50\n"," macro avg 0.26 0.20 0.23 50\n","weighted avg 0.36 0.28 0.31 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textdefault_name_embeddingssentimentsentiment_confidencedocumenty
origin_index
0how narendra modi has almost killed the indian...[0.060062434524297714, -0.05557167902588844, -...neutral0.590739how narendra modi has almost killed the indian...negative
1you think was modi behind that accident[0.05362718179821968, -0.004547705873847008, -...neutral0.577544you think was modi behind that accidentnegative
2kamal haasan takes chowkidar modi kamal haasan...[0.07274721562862396, -0.061593908816576004, -...neutral0.585589kamal haasan takes chowkidar modi kamal haasan...negative
3connected name with surname not bcz religion c...[0.06106054410338402, -0.060213156044483185, -...neutral0.554036connected name with surname not bcz religion c...negative
4anyone better than modi when nehruji expired s...[0.0737471655011177, 0.006071773823350668, -0....neutral0.595608anyone better than modi when nehruji expired s...positive
5\\r\\nmodiji wont tired crying foul\\r\\nmain chow...[0.05888385698199272, -0.0646616593003273, -0....neutral0.583403modiji wont tired crying foul main chowkidar h...negative
6poor chap modi hasn’ given him anything can ...[0.058948416262865067, -0.029682165011763573, ...neutral0.578272poor chap modi hasn’ given him anything can ...negative
7green underwear missing ive been doubting isi ...[0.05133155733346939, -0.06789954006671906, -0...neutral0.575918green underwear missing ive been doubting isi ...negative
8congress years wasnt able complete one rafale ...[0.044129759073257446, -0.06111813709139824, -...positive0.605829congress years wasnt able complete one rafale ...positive
9asked learn from how treat minority well does ...[0.03665374591946602, -0.03695330768823624, -0...neutral0.534121asked learn from how treat minority well does ...negative
10stop bull shitting worry about criminal vivek ...[0.07035735249519348, -0.06952506303787231, -0...neutral0.539481stop bull shitting worry about criminal vivek ...negative
11drswamys timesnow last year debate nearly mill...[0.013958276249468327, -0.030759528279304504, ...positive0.613331drswamys timesnow last year debate nearly mill...positive
12asshole bahujan radical marxist grow brain kno...[0.026277026161551476, -0.06238812580704689, -...neutral0.587796asshole bahujan radical marxist grow brain kno...negative
13from selling dreams 2014 selling tshirts 2019 ...[0.07457270473241806, -0.058670494705438614, -...neutral0.584601from selling dreams 2014 selling tshirts 2019 ...positive
14very true sir thats why they are against modi ...[0.061704088002443314, -0.04553354158997536, -...neutral0.584490very true sir thats why they are against modi ...positive
15they are giving jobs citizen india what you ar...[0.05342026799917221, -0.003889711806550622, -...neutral0.574127they are giving jobs citizen india what you ar...negative
16congress has always attempted empower people g...[0.027197618037462234, -0.036435648798942566, ...positive0.602392congress has always attempted empower people g...negative
17have never said that modi succeed yet even als...[0.06601183861494064, -0.020045211538672447, -...positive0.606807have never said that modi succeed yet even als...positive
18\\r\\nthe foundation for new india 2022 has alre...[0.04694363474845886, -0.06800008565187454, -0...neutral0.599807the foundation for new india 2022 has already ...positive
19only rahul gandhis politics love can defeat th...[0.05615750327706337, -0.002462629694491625, -...positive0.602275only rahul gandhis politics love can defeat th...negative
20one step time navigating thru looteyns when ev...[0.030352214351296425, -0.06195472553372383, 0...neutral0.570779one step time navigating thru looteyns when ev...negative
21why sir mam shabana azami hate much that have ...[0.07535804808139801, -0.05643236264586449, -0...neutral0.571882why sir mam shabana azami hate much that have ...negative
22modi will remain for next 510 years and till t...[0.05986170098185539, -0.0674145296216011, -0....neutral0.591540modi will remain for next 510 years and till t...negative
23pledge your first vote for modi[0.023959940299391747, -0.013972461223602295, ...positive0.606293pledge your first vote for modipositive
24why need modi lead bjp government again 2019 j...[0.04451165348291397, -0.06473662704229355, -0...positive0.609683why need modi lead bjp government again 2019 j...positive
25raghuram rajan sent list high profile bank fra...[0.06561190634965897, -0.0614917054772377, -0....neutral0.578591raghuram rajan sent list high profile bank fra...negative
26modi govts slashing indias education budget cl...[0.05217093601822853, -0.05785880982875824, -0...neutral0.594771modi govts slashing indias education budget cl...negative
27why are you hell bent manoj tiwari just her ph...[0.04579753428697586, -0.05176748335361481, -0...positive0.600511why are you hell bent manoj tiwari just her ph...positive
28know going into dirty details nehru family its...[0.047987841069698334, -0.050984784960746765, ...neutral0.533372know going into dirty details nehru family its...negative
29momota begum will let her state become total s...[0.04509664326906204, -0.05019481107592583, -0...neutral0.593740momota begum will let her state become total s...negative
30thanks anu sharma will vote and make sure peop...[0.04315190762281418, -0.04578147828578949, -0...positive0.601758thanks anu sharma will vote and make sure peop...positive
31those who themselves dont know how many father...[0.0144237345084548, -0.052222371101379395, -0...neutral0.589971those who themselves dont know how many father...positive
32the star campaigner myth bjp lost more than as...[0.02492097206413746, -0.0531931146979332, -0....positive0.607886the star campaigner myth bjp lost more than as...positive
33modi also live for few years only like you not...[0.040389616042375565, -0.06375984847545624, -...positive0.612952modi also live for few years only like you not...negative
34narendra modi more brainy than all the drswamy...[0.06742898374795914, -0.060488566756248474, -...positive0.621238narendra modi more brainy than all the drswamy...positive
35have started calling chowkidaar narendra modi ...[0.06360629200935364, -0.06786973774433136, -0...neutral0.593359have started calling chowkidaar narendra modi ...negative
36this the difference confident leaders call upo...[0.024233123287558556, -0.05243394151329994, -...positive0.610326this the difference confident leaders call upo...positive
37jawans killed the border\\r\\ncrimes against wom...[0.03928006440401077, -0.051466524600982666, -...neutral0.582484jawans killed the border crimes against women ...negative
38tag this fast growing youtuber cared abt this ...[0.05051109194755554, -0.0660049319267273, 0.0...neutral0.584719tag this fast growing youtuber cared abt this ...negative
39think hindus should back off and let them suff...[-0.010975896380841732, -0.059168506413698196,...neutral0.597051think hindus should back off and let them suff...positive
40yes cannot make any knee jerk moves drastic ac...[0.023108134046196938, -0.027600249275565147, ...positive0.618802yes cannot make any knee jerk moves drastic ac...positive
41why picked chairman the devious aadhaar isnt h...[0.043231260031461716, -0.07101075351238251, -...neutral0.581575why picked chairman the devious aadhaar isnt h...negative
42due automation and artificial intelligence fur...[0.04160398617386818, -0.06572042405605316, -0...neutral0.594700due automation and artificial intelligence fur...positive
43weak state capacity exacerbated excessive acco...[-0.00038854932063259184, -0.04599419981241226...neutral0.593749weak state capacity exacerbated excessive acco...positive
44our narendra modi ordered indian air force tak...[-0.02063656784594059, -0.07548005133867264, -...positive0.601453our narendra modi ordered indian air force tak...positive
45why vote modi dynasty visionary 3no high level...[0.01779576763510704, -0.06789527833461761, -0...neutral0.579034why vote modi dynasty visionary 3no high level...negative
46its modi chor corrupt maha thugbandhan janta w...[0.065566785633564, -0.04119298234581947, -0.0...positive0.602544its modi chor corrupt maha thugbandhan janta w...negative
47before modis arrival 2014 all supported him fo...[0.03988223522901535, -0.04965453967452049, -0...positive0.604502before modis arrival 2014 all supported him fo...positive
48think you forgot dollar india handled exceptio...[0.01084248349070549, 0.013633836060762405, -0...neutral0.598473think you forgot dollar india handled exceptio...positive
49tulsi gabbard rejected interviews with tyt but...[-0.01967957802116871, 0.05570048466324806, -0...positive0.621699tulsi gabbard rejected interviews with tyt but...positive
\n","
"],"text/plain":[" text ... y\n","origin_index ... \n","0 how narendra modi has almost killed the indian... ... negative\n","1 you think was modi behind that accident ... negative\n","2 kamal haasan takes chowkidar modi kamal haasan... ... negative\n","3 connected name with surname not bcz religion c... ... negative\n","4 anyone better than modi when nehruji expired s... ... positive\n","5 \\r\\nmodiji wont tired crying foul\\r\\nmain chow... ... negative\n","6 poor chap modi hasn’ given him anything can ... ... negative\n","7 green underwear missing ive been doubting isi ... ... negative\n","8 congress years wasnt able complete one rafale ... ... positive\n","9 asked learn from how treat minority well does ... ... negative\n","10 stop bull shitting worry about criminal vivek ... ... negative\n","11 drswamys timesnow last year debate nearly mill... ... positive\n","12 asshole bahujan radical marxist grow brain kno... ... negative\n","13 from selling dreams 2014 selling tshirts 2019 ... ... positive\n","14 very true sir thats why they are against modi ... ... positive\n","15 they are giving jobs citizen india what you ar... ... negative\n","16 congress has always attempted empower people g... ... negative\n","17 have never said that modi succeed yet even als... ... positive\n","18 \\r\\nthe foundation for new india 2022 has alre... ... positive\n","19 only rahul gandhis politics love can defeat th... ... negative\n","20 one step time navigating thru looteyns when ev... ... negative\n","21 why sir mam shabana azami hate much that have ... ... negative\n","22 modi will remain for next 510 years and till t... ... negative\n","23 pledge your first vote for modi ... positive\n","24 why need modi lead bjp government again 2019 j... ... positive\n","25 raghuram rajan sent list high profile bank fra... ... negative\n","26 modi govts slashing indias education budget cl... ... negative\n","27 why are you hell bent manoj tiwari just her ph... ... positive\n","28 know going into dirty details nehru family its... ... negative\n","29 momota begum will let her state become total s... ... negative\n","30 thanks anu sharma will vote and make sure peop... ... positive\n","31 those who themselves dont know how many father... ... positive\n","32 the star campaigner myth bjp lost more than as... ... positive\n","33 modi also live for few years only like you not... ... negative\n","34 narendra modi more brainy than all the drswamy... ... positive\n","35 have started calling chowkidaar narendra modi ... ... negative\n","36 this the difference confident leaders call upo... ... positive\n","37 jawans killed the border\\r\\ncrimes against wom... ... negative\n","38 tag this fast growing youtuber cared abt this ... ... negative\n","39 think hindus should back off and let them suff... ... positive\n","40 yes cannot make any knee jerk moves drastic ac... ... positive\n","41 why picked chairman the devious aadhaar isnt h... ... negative\n","42 due automation and artificial intelligence fur... ... positive\n","43 weak state capacity exacerbated excessive acco... ... positive\n","44 our narendra modi ordered indian air force tak... ... positive\n","45 why vote modi dynasty visionary 3no high level... ... negative\n","46 its modi chor corrupt maha thugbandhan janta w... ... negative\n","47 before modis arrival 2014 all supported him fo... ... positive\n","48 think you forgot dollar india handled exceptio... ... positive\n","49 tulsi gabbard rejected interviews with tyt but... ... positive\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":109},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609553664952,"user_tz":-300,"elapsed":194919,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8f463f08-944f-45dc-f463-e381c05f89db"},"source":["fitted_pipe.predict('the president of india just died')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
default_name_embeddingssentimentsentiment_confidencedocument
origin_index
0[0.06468033790588379, -0.040837567299604416, -...neutral0.562996Bitcoin is going to the moon!
\n","
"],"text/plain":[" default_name_embeddings ... document\n","origin_index ... \n","0 [0.06468033790588379, -0.040837567299604416, -... ... Bitcoin is going to the moon!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609553664954,"user_tz":-300,"elapsed":194907,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7be62829-d712-4afd-900f-fd655e8282d7"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609553671081,"user_tz":-300,"elapsed":201019,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"602947fd-13b8-438e-d5d1-64df15c2096b"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.79 0.96 0.87 27\n"," neutral 0.00 0.00 0.00 0\n"," positive 1.00 0.09 0.16 23\n","\n"," accuracy 0.56 50\n"," macro avg 0.60 0.35 0.34 50\n","weighted avg 0.89 0.56 0.54 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textdefault_name_embeddingssentimentsentiment_confidencedocumenty
origin_index
0how narendra modi has almost killed the indian...[0.060062434524297714, -0.05557167902588844, -...negative0.689142how narendra modi has almost killed the indian...negative
1you think was modi behind that accident[0.05362718179821968, -0.004547705873847008, -...negative0.689483you think was modi behind that accidentnegative
2kamal haasan takes chowkidar modi kamal haasan...[0.07274721562862396, -0.061593908816576004, -...negative0.707988kamal haasan takes chowkidar modi kamal haasan...negative
3connected name with surname not bcz religion c...[0.06106054410338402, -0.060213156044483185, -...negative0.675382connected name with surname not bcz religion c...negative
4anyone better than modi when nehruji expired s...[0.0737471655011177, 0.006071773823350668, -0....negative0.638730anyone better than modi when nehruji expired s...positive
5\\r\\nmodiji wont tired crying foul\\r\\nmain chow...[0.05888385698199272, -0.0646616593003273, -0....negative0.723110modiji wont tired crying foul main chowkidar h...negative
6poor chap modi hasn’ given him anything can ...[0.058948416262865067, -0.029682165011763573, ...negative0.690602poor chap modi hasn’ given him anything can ...negative
7green underwear missing ive been doubting isi ...[0.05133155733346939, -0.06789954006671906, -0...negative0.705077green underwear missing ive been doubting isi ...negative
8congress years wasnt able complete one rafale ...[0.044129759073257446, -0.06111813709139824, -...neutral0.561979congress years wasnt able complete one rafale ...positive
9asked learn from how treat minority well does ...[0.03665374591946602, -0.03695330768823624, -0...negative0.746584asked learn from how treat minority well does ...negative
10stop bull shitting worry about criminal vivek ...[0.07035735249519348, -0.06952506303787231, -0...negative0.768111stop bull shitting worry about criminal vivek ...negative
11drswamys timesnow last year debate nearly mill...[0.013958276249468327, -0.030759528279304504, ...neutral0.511294drswamys timesnow last year debate nearly mill...positive
12asshole bahujan radical marxist grow brain kno...[0.026277026161551476, -0.06238812580704689, -...negative0.689268asshole bahujan radical marxist grow brain kno...negative
13from selling dreams 2014 selling tshirts 2019 ...[0.07457270473241806, -0.058670494705438614, -...negative0.641822from selling dreams 2014 selling tshirts 2019 ...positive
14very true sir thats why they are against modi ...[0.061704088002443314, -0.04553354158997536, -...negative0.651231very true sir thats why they are against modi ...positive
15they are giving jobs citizen india what you ar...[0.05342026799917221, -0.003889711806550622, -...negative0.706768they are giving jobs citizen india what you ar...negative
16congress has always attempted empower people g...[0.027197618037462234, -0.036435648798942566, ...negative0.607062congress has always attempted empower people g...negative
17have never said that modi succeed yet even als...[0.06601183861494064, -0.020045211538672447, -...negative0.628577have never said that modi succeed yet even als...positive
18\\r\\nthe foundation for new india 2022 has alre...[0.04694363474845886, -0.06800008565187454, -0...neutral0.547697the foundation for new india 2022 has already ...positive
19only rahul gandhis politics love can defeat th...[0.05615750327706337, -0.002462629694491625, -...negative0.632572only rahul gandhis politics love can defeat th...negative
20one step time navigating thru looteyns when ev...[0.030352214351296425, -0.06195472553372383, 0...negative0.635106one step time navigating thru looteyns when ev...negative
21why sir mam shabana azami hate much that have ...[0.07535804808139801, -0.05643236264586449, -0...negative0.738669why sir mam shabana azami hate much that have ...negative
22modi will remain for next 510 years and till t...[0.05986170098185539, -0.0674145296216011, -0....negative0.659078modi will remain for next 510 years and till t...negative
23pledge your first vote for modi[0.023959940299391747, -0.013972461223602295, ...neutral0.555447pledge your first vote for modipositive
24why need modi lead bjp government again 2019 j...[0.04451165348291397, -0.06473662704229355, -0...neutral0.578395why need modi lead bjp government again 2019 j...positive
25raghuram rajan sent list high profile bank fra...[0.06561190634965897, -0.0614917054772377, -0....negative0.706507raghuram rajan sent list high profile bank fra...negative
26modi govts slashing indias education budget cl...[0.05217093601822853, -0.05785880982875824, -0...negative0.607360modi govts slashing indias education budget cl...negative
27why are you hell bent manoj tiwari just her ph...[0.04579753428697586, -0.05176748335361481, -0...neutral0.588993why are you hell bent manoj tiwari just her ph...positive
28know going into dirty details nehru family its...[0.047987841069698334, -0.050984784960746765, ...negative0.753084know going into dirty details nehru family its...negative
29momota begum will let her state become total s...[0.04509664326906204, -0.05019481107592583, -0...negative0.615988momota begum will let her state become total s...negative
30thanks anu sharma will vote and make sure peop...[0.04315190762281418, -0.04578147828578949, -0...neutral0.555271thanks anu sharma will vote and make sure peop...positive
31those who themselves dont know how many father...[0.0144237345084548, -0.052222371101379395, -0...negative0.631877those who themselves dont know how many father...positive
32the star campaigner myth bjp lost more than as...[0.02492097206413746, -0.0531931146979332, -0....neutral0.586682the star campaigner myth bjp lost more than as...positive
33modi also live for few years only like you not...[0.040389616042375565, -0.06375984847545624, -...neutral0.587196modi also live for few years only like you not...negative
34narendra modi more brainy than all the drswamy...[0.06742898374795914, -0.060488566756248474, -...neutral0.533663narendra modi more brainy than all the drswamy...positive
35have started calling chowkidaar narendra modi ...[0.06360629200935364, -0.06786973774433136, -0...negative0.672972have started calling chowkidaar narendra modi ...negative
36this the difference confident leaders call upo...[0.024233123287558556, -0.05243394151329994, -...neutral0.510922this the difference confident leaders call upo...positive
37jawans killed the border\\r\\ncrimes against wom...[0.03928006440401077, -0.051466524600982666, -...negative0.701794jawans killed the border crimes against women ...negative
38tag this fast growing youtuber cared abt this ...[0.05051109194755554, -0.0660049319267273, 0.0...negative0.714883tag this fast growing youtuber cared abt this ...negative
39think hindus should back off and let them suff...[-0.010975896380841732, -0.059168506413698196,...neutral0.553189think hindus should back off and let them suff...positive
40yes cannot make any knee jerk moves drastic ac...[0.023108134046196938, -0.027600249275565147, ...positive0.671809yes cannot make any knee jerk moves drastic ac...positive
41why picked chairman the devious aadhaar isnt h...[0.043231260031461716, -0.07101075351238251, -...negative0.709371why picked chairman the devious aadhaar isnt h...negative
42due automation and artificial intelligence fur...[0.04160398617386818, -0.06572042405605316, -0...neutral0.553482due automation and artificial intelligence fur...positive
43weak state capacity exacerbated excessive acco...[-0.00038854932063259184, -0.04599419981241226...negative0.609747weak state capacity exacerbated excessive acco...positive
44our narendra modi ordered indian air force tak...[-0.02063656784594059, -0.07548005133867264, -...neutral0.513191our narendra modi ordered indian air force tak...positive
45why vote modi dynasty visionary 3no high level...[0.01779576763510704, -0.06789527833461761, -0...negative0.635148why vote modi dynasty visionary 3no high level...negative
46its modi chor corrupt maha thugbandhan janta w...[0.065566785633564, -0.04119298234581947, -0.0...negative0.687171its modi chor corrupt maha thugbandhan janta w...negative
47before modis arrival 2014 all supported him fo...[0.03988223522901535, -0.04965453967452049, -0...neutral0.557571before modis arrival 2014 all supported him fo...positive
48think you forgot dollar india handled exceptio...[0.01084248349070549, 0.013633836060762405, -0...negative0.615532think you forgot dollar india handled exceptio...positive
49tulsi gabbard rejected interviews with tyt but...[-0.01967957802116871, 0.05570048466324806, -0...positive0.604604tulsi gabbard rejected interviews with tyt but...positive
\n","
"],"text/plain":[" text ... y\n","origin_index ... \n","0 how narendra modi has almost killed the indian... ... negative\n","1 you think was modi behind that accident ... negative\n","2 kamal haasan takes chowkidar modi kamal haasan... ... negative\n","3 connected name with surname not bcz religion c... ... negative\n","4 anyone better than modi when nehruji expired s... ... positive\n","5 \\r\\nmodiji wont tired crying foul\\r\\nmain chow... ... negative\n","6 poor chap modi hasn’ given him anything can ... ... negative\n","7 green underwear missing ive been doubting isi ... ... negative\n","8 congress years wasnt able complete one rafale ... ... positive\n","9 asked learn from how treat minority well does ... ... negative\n","10 stop bull shitting worry about criminal vivek ... ... negative\n","11 drswamys timesnow last year debate nearly mill... ... positive\n","12 asshole bahujan radical marxist grow brain kno... ... negative\n","13 from selling dreams 2014 selling tshirts 2019 ... ... positive\n","14 very true sir thats why they are against modi ... ... positive\n","15 they are giving jobs citizen india what you ar... ... negative\n","16 congress has always attempted empower people g... ... negative\n","17 have never said that modi succeed yet even als... ... positive\n","18 \\r\\nthe foundation for new india 2022 has alre... ... positive\n","19 only rahul gandhis politics love can defeat th... ... negative\n","20 one step time navigating thru looteyns when ev... ... negative\n","21 why sir mam shabana azami hate much that have ... ... negative\n","22 modi will remain for next 510 years and till t... ... negative\n","23 pledge your first vote for modi ... positive\n","24 why need modi lead bjp government again 2019 j... ... positive\n","25 raghuram rajan sent list high profile bank fra... ... negative\n","26 modi govts slashing indias education budget cl... ... negative\n","27 why are you hell bent manoj tiwari just her ph... ... positive\n","28 know going into dirty details nehru family its... ... negative\n","29 momota begum will let her state become total s... ... negative\n","30 thanks anu sharma will vote and make sure peop... ... positive\n","31 those who themselves dont know how many father... ... positive\n","32 the star campaigner myth bjp lost more than as... ... positive\n","33 modi also live for few years only like you not... ... negative\n","34 narendra modi more brainy than all the drswamy... ... positive\n","35 have started calling chowkidaar narendra modi ... ... negative\n","36 this the difference confident leaders call upo... ... positive\n","37 jawans killed the border\\r\\ncrimes against wom... ... negative\n","38 tag this fast growing youtuber cared abt this ... ... negative\n","39 think hindus should back off and let them suff... ... positive\n","40 yes cannot make any knee jerk moves drastic ac... ... positive\n","41 why picked chairman the devious aadhaar isnt h... ... negative\n","42 due automation and artificial intelligence fur... ... positive\n","43 weak state capacity exacerbated excessive acco... ... positive\n","44 our narendra modi ordered indian air force tak... ... positive\n","45 why vote modi dynasty visionary 3no high level... ... negative\n","46 its modi chor corrupt maha thugbandhan janta w... ... negative\n","47 before modis arrival 2014 all supported him fo... ... positive\n","48 think you forgot dollar india handled exceptio... ... positive\n","49 tulsi gabbard rejected interviews with tyt but... ... positive\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nxWFzQOhjWC8","executionInfo":{"status":"ok","timestamp":1609553671091,"user_tz":-300,"elapsed":200991,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2f071682-e615-4556-b813-a56f405ff9c3"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1609554113187,"user_tz":-300,"elapsed":140893,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"413eef4f-f423-439b-ad57-2ccfcf4bbe62"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(100) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.81 0.66 0.73 300\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.85 0.59 0.69 300\n","\n"," accuracy 0.62 600\n"," macro avg 0.55 0.42 0.47 600\n","weighted avg 0.83 0.62 0.71 600\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1609554302650,"user_tz":-300,"elapsed":189472,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"858c8cf2-ba4d-48fc-b333-e4b2819dadb2"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('the president of india just died')\n","preds"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1609554388428,"user_tz":-300,"elapsed":879,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"212c87f7-8200-4646-cfcd-5bae608b3848"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo.ipynb b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo.ipynb index 600d3ee9..d259a24b 100644 --- a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo.ipynb +++ b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyM9f3LyT6TSckfAZm2wYkjU"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/collab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu > /dev/null pyspark==2.4.7\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download news classification dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1607912618662,"user_tz":-60,"elapsed":94251,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"4fe5c4cb-76ff-44a0-9936-dfbddfeb5140"},"source":["! wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_train.csv\n","! wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_test.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2020-12-14 02:23:36-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_train.csv\n","Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.154.38\n","Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.154.38|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 24032125 (23M) [text/csv]\n","Saving to: ‘news_category_train.csv’\n","\n","news_category_train 100%[===================>] 22.92M 21.7MB/s in 1.1s \n","\n","2020-12-14 02:23:37 (21.7 MB/s) - ‘news_category_train.csv’ saved [24032125/24032125]\n","\n","--2020-12-14 02:23:37-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_test.csv\n","Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.74.118\n","Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.74.118|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 1504408 (1.4M) [text/csv]\n","Saving to: ‘news_category_test.csv’\n","\n","news_category_test. 100%[===================>] 1.43M 2.77MB/s in 0.5s \n","\n","2020-12-14 02:23:38 (2.77 MB/s) - ‘news_category_test.csv’ saved [1504408/1504408]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":419},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1607912619037,"user_tz":-60,"elapsed":94620,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"1cf7867f-21ab-4ba1-9ab3-c95a191b0286"},"source":["import pandas as pd\n","test_path = '/content/news_category_test.csv'\n","train_df = pd.read_csv(test_path)\n","train_df.columns=['y','text']\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytext
0BusinessUnions representing workers at Turner Newall...
1Sci/TechTORONTO, Canada A second team of rocketeer...
2Sci/TechA company founded by a chemistry researcher a...
3Sci/TechIt's barely dawn when Mike Fitzpatrick starts...
4Sci/TechSouthern California's smog fighting agency we...
.........
7595WorldUkrainian presidential candidate Viktor Yushch...
7596SportsWith the supply of attractive pitching options...
7597SportsLike Roger Clemens did almost exactly eight ye...
7598BusinessSINGAPORE : Doctors in the United States have ...
7599BusinessEBay plans to buy the apartment and home renta...
\n","

7600 rows × 2 columns

\n","
"],"text/plain":[" y text\n","0 Business Unions representing workers at Turner Newall...\n","1 Sci/Tech TORONTO, Canada A second team of rocketeer...\n","2 Sci/Tech A company founded by a chemistry researcher a...\n","3 Sci/Tech It's barely dawn when Mike Fitzpatrick starts...\n","4 Sci/Tech Southern California's smog fighting agency we...\n","... ... ...\n","7595 World Ukrainian presidential candidate Viktor Yushch...\n","7596 Sports With the supply of attractive pitching options...\n","7597 Sports Like Roger Clemens did almost exactly eight ye...\n","7598 Business SINGAPORE : Doctors in the United States have ...\n","7599 Business EBay plans to buy the apartment and home renta...\n","\n","[7600 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","By default, the Universal Sentence Encoder Embeddings (USE) are beeing downloaded to provide embeddings for the classifier. You can use any of the 50+ other sentence Emeddings in NLU tough!\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":501},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1607912857369,"user_tz":-60,"elapsed":332946,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"8bce881e-edb7-4d2b-cf61-b9f26a05ea4b"},"source":["# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# Since there are no\n","fitted_pipe = nlu.load('train.classifier').fit(train_df)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df)\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ydefault_name_embeddingstextsentencecategory_confidencecategory
origin_index
0Business[0.012997539713978767, 0.019844762980937958, -...Unions representing workers at Turner Newall...Unions representing workers at Turner Newall s...0.999985Business
1Sci/Tech[0.023022323846817017, -0.01595703884959221, -...TORONTO, Canada A second team of rocketeer...TORONTO, Canada A second team of rocketeers co...1.000000Sports
1Sci/Tech[-0.010587693192064762, 0.011531050316989422, ...TORONTO, Canada A second team of rocketeer...10 million Ansari X Prize, a contest for priva...1.000000Sports
2Sci/Tech[0.038641855120658875, 0.02322080172598362, -0...A company founded by a chemistry researcher a...A company founded by a chemistry researcher at...0.744563Business
3Sci/Tech[-0.006857294123619795, 0.01967567577958107, -...It's barely dawn when Mike Fitzpatrick starts...It's barely dawn when Mike Fitzpatrick starts ...0.999360Sci/Tech
.....................
7596Sports[0.005107458680868149, -0.011805553920567036, ...With the supply of attractive pitching options....1.000000Sports
7596Sports[0.005107458680868149, -0.011805553920567036, ...With the supply of attractive pitching options....2.000000Sports
7597Sports[0.044696468859910965, 0.0015660696662962437, ...Like Roger Clemens did almost exactly eight ye...Like Roger Clemens did almost exactly eight ye...1.000000Sports
7598Business[0.05564942583441734, -0.021285761147737503, -...SINGAPORE : Doctors in the United States have ...SINGAPORE : Doctors in the United States have ...0.999433Business
7599Business[0.08172684907913208, -0.013251541182398796, -...EBay plans to buy the apartment and home renta...EBay plans to buy the apartment and home renta...0.820492Business
\n","

14399 rows × 6 columns

\n","
"],"text/plain":[" y ... category\n","origin_index ... \n","0 Business ... Business\n","1 Sci/Tech ... Sports\n","1 Sci/Tech ... Sports\n","2 Sci/Tech ... Business\n","3 Sci/Tech ... Sci/Tech\n","... ... ... ...\n","7596 Sports ... Sports\n","7596 Sports ... Sports\n","7597 Sports ... Sports\n","7598 Business ... Business\n","7599 Business ... Business\n","\n","[14399 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"DL_5aY9b3jSd"},"source":["# 4. Evaluate the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"djtoZVKBw2WU","executionInfo":{"status":"ok","timestamp":1607912858793,"user_tz":-60,"elapsed":334365,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"c41b52d9-2a4b-47ee-92e8-758399ef45cc"},"source":["from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," Business 0.76 0.81 0.78 3671\n"," Sci/Tech 0.80 0.79 0.79 3983\n"," Sports 0.86 0.92 0.89 3687\n"," World 0.89 0.77 0.83 3058\n","\n"," accuracy 0.82 14399\n"," macro avg 0.83 0.82 0.82 14399\n","weighted avg 0.82 0.82 0.82 14399\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mhFKVN93o1ZO"},"source":["# 5. Lets try different Sentence Emebddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CzJd8omao0gt","executionInfo":{"status":"ok","timestamp":1607912858794,"user_tz":-60,"elapsed":334358,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"8021f8c3-d711-4d06-d184-88df1a29441e"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ABHLgirmG1n9","executionInfo":{"status":"ok","timestamp":1607918642391,"user_tz":-60,"elapsed":6117950,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"fcc6f823-4332-471f-c2dc-201916ef1b97"},"source":["# Load pipe with bert embeds\n","# using large embeddings can take a few hours..\n","# fitted_pipe = nlu.load('en.embed_sentence.bert_large_uncased train.classifier').fit(train_df)\n","fitted_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier').fit(train_df)\n","\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df)\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," Business 0.00 0.00 0.00 1900\n"," Sci/Tech 0.25 1.00 0.40 1900\n"," Sports 0.00 0.00 0.00 1900\n"," World 0.00 0.00 0.00 1900\n","\n"," accuracy 0.25 7600\n"," macro avg 0.06 0.25 0.10 7600\n","weighted avg 0.06 0.25 0.10 7600\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nbpdZGoZPslz","executionInfo":{"status":"ok","timestamp":1607918778139,"user_tz":-60,"elapsed":6253693,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"1dcc8aa9-fd89-4b7a-d78d-c641c09f67d6"},"source":["# Load pipe with bert embeds\n","fitted_pipe = nlu.load('embed_sentence.bert train.classifier').fit(train_df)\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df)\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L2_128 download started this may take some time.\n","Approximate size to download 16.1 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," Business 0.81 0.74 0.77 1900\n"," Sci/Tech 0.74 0.87 0.80 1900\n"," Sports 0.92 0.94 0.93 1900\n"," World 0.91 0.81 0.86 1900\n","\n"," accuracy 0.84 7600\n"," macro avg 0.85 0.84 0.84 7600\n","weighted avg 0.85 0.84 0.84 7600\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607918802363,"user_tz":-60,"elapsed":6277910,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"79c442f9-959e-4b14-ae85-6ef9f654f297"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":128},"executionInfo":{"status":"ok","timestamp":1607918809822,"user_tz":-60,"elapsed":6285365,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"f755aaa0-974c-4c6f-c079-0f3d681dbc82"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Tesla plans to invest 10M into the ML sector')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
classifier_confidencedocumentclassifierembed_sentence_bert_embeddings
origin_index
00.997592Tesla plans to invest 10M into the ML sectorBusiness[-0.07111635059118271, 0.9532930850982666, -1....
\n","
"],"text/plain":[" classifier_confidence ... embed_sentence_bert_embeddings\n","origin_index ... \n","0 0.997592 ... [-0.07111635059118271, 0.9532930850982666, -1....\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607918809824,"user_tz":-60,"elapsed":6285363,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"5e8b8c8a-5cd1-4d20-bde2-a4003d5687d0"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(128) | Info: Number of embedding dimensions | Currently set to : 128\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L2_128') | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['World', 'Sci/Tech', 'Sports', 'Business']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['World', 'Sci/Tech', 'Sports', 'Business']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L2_128') | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"M1LjAwJVJxun"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu > /dev/null pyspark==2.4.7\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download news classification dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1607912618662,"user_tz":-60,"elapsed":94251,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"4fe5c4cb-76ff-44a0-9936-dfbddfeb5140"},"source":["! wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_train.csv\n","! wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_test.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2020-12-14 02:23:36-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_train.csv\n","Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.154.38\n","Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.154.38|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 24032125 (23M) [text/csv]\n","Saving to: ‘news_category_train.csv’\n","\n","news_category_train 100%[===================>] 22.92M 21.7MB/s in 1.1s \n","\n","2020-12-14 02:23:37 (21.7 MB/s) - ‘news_category_train.csv’ saved [24032125/24032125]\n","\n","--2020-12-14 02:23:37-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_test.csv\n","Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.74.118\n","Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.74.118|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 1504408 (1.4M) [text/csv]\n","Saving to: ‘news_category_test.csv’\n","\n","news_category_test. 100%[===================>] 1.43M 2.77MB/s in 0.5s \n","\n","2020-12-14 02:23:38 (2.77 MB/s) - ‘news_category_test.csv’ saved [1504408/1504408]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":419},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1607912619037,"user_tz":-60,"elapsed":94620,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"1cf7867f-21ab-4ba1-9ab3-c95a191b0286"},"source":["import pandas as pd\n","test_path = '/content/news_category_test.csv'\n","train_df = pd.read_csv(test_path)\n","train_df.columns=['y','text']\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytext
0BusinessUnions representing workers at Turner Newall...
1Sci/TechTORONTO, Canada A second team of rocketeer...
2Sci/TechA company founded by a chemistry researcher a...
3Sci/TechIt's barely dawn when Mike Fitzpatrick starts...
4Sci/TechSouthern California's smog fighting agency we...
.........
7595WorldUkrainian presidential candidate Viktor Yushch...
7596SportsWith the supply of attractive pitching options...
7597SportsLike Roger Clemens did almost exactly eight ye...
7598BusinessSINGAPORE : Doctors in the United States have ...
7599BusinessEBay plans to buy the apartment and home renta...
\n","

7600 rows × 2 columns

\n","
"],"text/plain":[" y text\n","0 Business Unions representing workers at Turner Newall...\n","1 Sci/Tech TORONTO, Canada A second team of rocketeer...\n","2 Sci/Tech A company founded by a chemistry researcher a...\n","3 Sci/Tech It's barely dawn when Mike Fitzpatrick starts...\n","4 Sci/Tech Southern California's smog fighting agency we...\n","... ... ...\n","7595 World Ukrainian presidential candidate Viktor Yushch...\n","7596 Sports With the supply of attractive pitching options...\n","7597 Sports Like Roger Clemens did almost exactly eight ye...\n","7598 Business SINGAPORE : Doctors in the United States have ...\n","7599 Business EBay plans to buy the apartment and home renta...\n","\n","[7600 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","By default, the Universal Sentence Encoder Embeddings (USE) are beeing downloaded to provide embeddings for the classifier. You can use any of the 50+ other sentence Emeddings in NLU tough!\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":501},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1607912857369,"user_tz":-60,"elapsed":332946,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"8bce881e-edb7-4d2b-cf61-b9f26a05ea4b"},"source":["# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# Since there are no\n","fitted_pipe = nlu.load('train.classifier').fit(train_df)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df)\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ydefault_name_embeddingstextsentencecategory_confidencecategory
origin_index
0Business[0.012997539713978767, 0.019844762980937958, -...Unions representing workers at Turner Newall...Unions representing workers at Turner Newall s...0.999985Business
1Sci/Tech[0.023022323846817017, -0.01595703884959221, -...TORONTO, Canada A second team of rocketeer...TORONTO, Canada A second team of rocketeers co...1.000000Sports
1Sci/Tech[-0.010587693192064762, 0.011531050316989422, ...TORONTO, Canada A second team of rocketeer...10 million Ansari X Prize, a contest for priva...1.000000Sports
2Sci/Tech[0.038641855120658875, 0.02322080172598362, -0...A company founded by a chemistry researcher a...A company founded by a chemistry researcher at...0.744563Business
3Sci/Tech[-0.006857294123619795, 0.01967567577958107, -...It's barely dawn when Mike Fitzpatrick starts...It's barely dawn when Mike Fitzpatrick starts ...0.999360Sci/Tech
.....................
7596Sports[0.005107458680868149, -0.011805553920567036, ...With the supply of attractive pitching options....1.000000Sports
7596Sports[0.005107458680868149, -0.011805553920567036, ...With the supply of attractive pitching options....2.000000Sports
7597Sports[0.044696468859910965, 0.0015660696662962437, ...Like Roger Clemens did almost exactly eight ye...Like Roger Clemens did almost exactly eight ye...1.000000Sports
7598Business[0.05564942583441734, -0.021285761147737503, -...SINGAPORE : Doctors in the United States have ...SINGAPORE : Doctors in the United States have ...0.999433Business
7599Business[0.08172684907913208, -0.013251541182398796, -...EBay plans to buy the apartment and home renta...EBay plans to buy the apartment and home renta...0.820492Business
\n","

14399 rows × 6 columns

\n","
"],"text/plain":[" y ... category\n","origin_index ... \n","0 Business ... Business\n","1 Sci/Tech ... Sports\n","1 Sci/Tech ... Sports\n","2 Sci/Tech ... Business\n","3 Sci/Tech ... Sci/Tech\n","... ... ... ...\n","7596 Sports ... Sports\n","7596 Sports ... Sports\n","7597 Sports ... Sports\n","7598 Business ... Business\n","7599 Business ... Business\n","\n","[14399 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"DL_5aY9b3jSd"},"source":["# 4. Evaluate the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"djtoZVKBw2WU","executionInfo":{"status":"ok","timestamp":1607912858793,"user_tz":-60,"elapsed":334365,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"c41b52d9-2a4b-47ee-92e8-758399ef45cc"},"source":["from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," Business 0.76 0.81 0.78 3671\n"," Sci/Tech 0.80 0.79 0.79 3983\n"," Sports 0.86 0.92 0.89 3687\n"," World 0.89 0.77 0.83 3058\n","\n"," accuracy 0.82 14399\n"," macro avg 0.83 0.82 0.82 14399\n","weighted avg 0.82 0.82 0.82 14399\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mhFKVN93o1ZO"},"source":["# 5. Lets try different Sentence Emebddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CzJd8omao0gt","executionInfo":{"status":"ok","timestamp":1607912858794,"user_tz":-60,"elapsed":334358,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"8021f8c3-d711-4d06-d184-88df1a29441e"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ABHLgirmG1n9","executionInfo":{"status":"ok","timestamp":1607918642391,"user_tz":-60,"elapsed":6117950,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"fcc6f823-4332-471f-c2dc-201916ef1b97"},"source":["# Load pipe with bert embeds\n","# using large embeddings can take a few hours..\n","# fitted_pipe = nlu.load('en.embed_sentence.bert_large_uncased train.classifier').fit(train_df)\n","fitted_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier').fit(train_df)\n","\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df)\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," Business 0.00 0.00 0.00 1900\n"," Sci/Tech 0.25 1.00 0.40 1900\n"," Sports 0.00 0.00 0.00 1900\n"," World 0.00 0.00 0.00 1900\n","\n"," accuracy 0.25 7600\n"," macro avg 0.06 0.25 0.10 7600\n","weighted avg 0.06 0.25 0.10 7600\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nbpdZGoZPslz","executionInfo":{"status":"ok","timestamp":1607918778139,"user_tz":-60,"elapsed":6253693,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"1dcc8aa9-fd89-4b7a-d78d-c641c09f67d6"},"source":["# Load pipe with bert embeds\n","fitted_pipe = nlu.load('embed_sentence.bert train.classifier').fit(train_df)\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df)\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L2_128 download started this may take some time.\n","Approximate size to download 16.1 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," Business 0.81 0.74 0.77 1900\n"," Sci/Tech 0.74 0.87 0.80 1900\n"," Sports 0.92 0.94 0.93 1900\n"," World 0.91 0.81 0.86 1900\n","\n"," accuracy 0.84 7600\n"," macro avg 0.85 0.84 0.84 7600\n","weighted avg 0.85 0.84 0.84 7600\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607918802363,"user_tz":-60,"elapsed":6277910,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"79c442f9-959e-4b14-ae85-6ef9f654f297"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":128},"executionInfo":{"status":"ok","timestamp":1607918809822,"user_tz":-60,"elapsed":6285365,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"f755aaa0-974c-4c6f-c079-0f3d681dbc82"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Tesla plans to invest 10M into the ML sector')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
classifier_confidencedocumentclassifierembed_sentence_bert_embeddings
origin_index
00.997592Tesla plans to invest 10M into the ML sectorBusiness[-0.07111635059118271, 0.9532930850982666, -1....
\n","
"],"text/plain":[" classifier_confidence ... embed_sentence_bert_embeddings\n","origin_index ... \n","0 0.997592 ... [-0.07111635059118271, 0.9532930850982666, -1....\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607918809824,"user_tz":-60,"elapsed":6285363,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"5e8b8c8a-5cd1-4d20-bde2-a4003d5687d0"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(128) | Info: Number of embedding dimensions | Currently set to : 128\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L2_128') | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['World', 'Sci/Tech', 'Sports', 'Business']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['World', 'Sci/Tech', 'Sports', 'Business']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L2_128') | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"M1LjAwJVJxun"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_amazon.ipynb b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_amazon.ipynb index 03ca9530..8255e3ed 100644 --- a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_amazon.ipynb +++ b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_amazon.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo_amazon.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_amazon.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Amazon Unlocked mobile phones dataset \n","https://www.kaggle.com/PromptCloudHQ/amazon-reviews-unlocked-mobile-phones\n","\n","dataset with unlocked mobile phone reviews in 5 review classes\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610787546042,"user_tz":-300,"elapsed":3459,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ca2d6419-7d62-400b-d3d7-9b16fa9bce2c"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/Amazon_Unlocked_Mobile.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 08:58:27-- http://ckl-it.de/wp-content/uploads/2021/01/Amazon_Unlocked_Mobile.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 452621 (442K) [text/csv]\n","Saving to: ‘Amazon_Unlocked_Mobile.csv’\n","\n","Amazon_Unlocked_Mob 100%[===================>] 442.01K 308KB/s in 1.4s \n","\n","2021-01-16 08:58:29 (308 KB/s) - ‘Amazon_Unlocked_Mobile.csv’ saved [452621/452621]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610787551525,"user_tz":-300,"elapsed":1188,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"dfe55b6f-f33a-4bd2-a2ba-5b1a306e1ab4"},"source":["import pandas as pd\n","test_path = '/content/Amazon_Unlocked_Mobile.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytext
0poorBought it, turned it on, did not work. Opened ...
1averageCurrently it is 2014, the 3gs is discontinued....
2good100% recomendado
3averageIt's a good phone but if you use it to browse ...
4averageIt's nice that this phone has LTE and it funct...
.........
1495poorNot happy with this phone. Not able to get but...
1496goodgreat phablet for all general uses
1497poorHate this phone had it for one day
1498goodGreat cheap phone.
1499goodVery good
\n","

1500 rows × 2 columns

\n","
"],"text/plain":[" y text\n","0 poor Bought it, turned it on, did not work. Opened ...\n","1 average Currently it is 2014, the 3gs is discontinued....\n","2 good 100% recomendado\n","3 average It's a good phone but if you use it to browse ...\n","4 average It's nice that this phone has LTE and it funct...\n","... ... ...\n","1495 poor Not happy with this phone. Not able to get but...\n","1496 good great phablet for all general uses\n","1497 poor Hate this phone had it for one day\n","1498 good Great cheap phone.\n","1499 good Very good\n","\n","[1500 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":501},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609621542716,"user_tz":-300,"elapsed":207913,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d0eb19cc-8849-43f7-9cdf-a88fd8f11676"},"source":["# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# Since there are no\n","\n","trainable_pipe = nlu.load('train.classifier')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50] )\n","\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50] )\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextcategorydefault_name_embeddingscategory_confidencesentence
origin_index
0poorBought it, turned it on, did not work. Opened ...average[0.020834514871239662, 0.03326118737459183, -0...0.763940Bought it, turned it on, did not work.
0poorBought it, turned it on, did not work. Opened ...average[0.030574046075344086, -0.009678893722593784, ...1.000000Opened up the back, made sure it was in right,...
0poorBought it, turned it on, did not work. Opened ...average[0.023421283811330795, 0.02294657751917839, -0...2.000000It was supposed to be new, but i it was used.
0poorBought it, turned it on, did not work. Opened ...average[0.06009713560342789, 0.046434734016656876, -0...3.000000Found scratches on cover.
1averageCurrently it is 2014, the 3gs is discontinued....average[0.04893391206860542, -0.010221654549241066, -...0.631228Currently it is 2014, the 3gs is discontinued.
.....................
47goodBought for my mom! She loves it!good[0.021471485495567322, -0.027823669835925102, ...0.656713Bought for my mom!
47goodBought for my mom! She loves it!good[0.0001737327256705612, -0.014630521647632122,...1.000000She loves it!
48goodGave the phone as a birthday gift. My friend s...good[0.03572574257850647, 0.013357092626392841, -0...0.701626Gave the phone as a birthday gift.
48goodGave the phone as a birthday gift. My friend s...good[0.08371475338935852, -0.01581401191651821, -0...1.000000My friend seems happy with it so far.
49goodGreat Productgood[0.03334435820579529, -0.05353177338838577, -0...0.593622Great Product
\n","

215 rows × 6 columns

\n","
"],"text/plain":[" y ... sentence\n","origin_index ... \n","0 poor ... Bought it, turned it on, did not work.\n","0 poor ... Opened up the back, made sure it was in right,...\n","0 poor ... It was supposed to be new, but i it was used.\n","0 poor ... Found scratches on cover.\n","1 average ... Currently it is 2014, the 3gs is discontinued.\n","... ... ... ...\n","47 good ... Bought for my mom!\n","47 good ... She loves it!\n","48 good ... Gave the phone as a birthday gift.\n","48 good ... My friend seems happy with it so far.\n","49 good ... Great Product\n","\n","[215 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609621546162,"user_tz":-300,"elapsed":211344,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5dc268e6-e97f-4378-85d1-8319d3f7893f"},"source":["fitted_pipe.predict(\"It worked perfectly .\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydefault_name_embeddingscategory_confidencesentence
origin_index
0average[0.06468033790588379, -0.040837567299604416, -...0.460187Bitcoin is going to the moon!
\n","
"],"text/plain":[" category ... sentence\n","origin_index ... \n","0 average ... Bitcoin is going to the moon!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609621546165,"user_tz":-300,"elapsed":211336,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c0633c00-9bfd-412b-ee55-0f6e5b150f39"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setMaxEpochs(3) | Info: Maximum number of epochs to train | Currently set to : 3\n","pipe['classifier_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['classifier_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['classifier_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['classifier_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":620},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609621557024,"user_tz":-300,"elapsed":222179,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"12b53152-fcdf-4180-91b8-cc150e5bb23a"},"source":["# Train longer!\n","trainable_pipe['classifier_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," average 0.00 0.00 0.00 29\n"," good 0.65 0.94 0.77 32\n"," poor 0.69 0.95 0.80 39\n","\n"," accuracy 0.67 100\n"," macro avg 0.45 0.63 0.52 100\n","weighted avg 0.48 0.67 0.56 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextdocumentcategorydefault_name_embeddingscategory_confidence
origin_index
0poorBought it, turned it on, did not work. Opened ...Bought it, turned it on, did not work. Opened ...poor[0.059367865324020386, 0.05043933913111687, -0...0.952295
1averageCurrently it is 2014, the 3gs is discontinued....Currently it is 2014, the 3gs is discontinued....good[0.0046275281347334385, 0.012452688068151474, ...0.396265
2good100% recomendado100% recomendadogood[0.008266163989901543, 0.00396152026951313, -0...0.773682
3averageIt's a good phone but if you use it to browse ...It's a good phone but if you use it to browse ...poor[0.05291805788874626, 0.002292224671691656, -0...0.506015
4averageIt's nice that this phone has LTE and it funct...It's nice that this phone has LTE and it funct...good[0.03426238149404526, -0.024366019293665886, -...0.648859
.....................
95poorHola, compramos dos teléfonos y vienieron tot...Hola, compramos dos teléfonos y vienieron tot...poor[0.06324272602796555, -0.06387951225042343, -0...0.790492
96goodExcelenteExcelentegood[0.03246314451098442, -0.01719777286052704, -0...0.813424
97poorthe product is good but the English language s...the product is good but the English language s...poor[0.056343767791986465, -0.016822000965476036, ...0.940151
98poorSupposed to be a brand new unlock phone. The p...Supposed to be a brand new unlock phone. The p...poor[0.03210984170436859, 0.018154876306653023, -0...0.984983
99averageMinor, very annoying glitch when texting. Not ...Minor, very annoying glitch when texting. Not ...poor[-0.026854539290070534, 0.03769969940185547, 0...0.969512
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" y ... category_confidence\n","origin_index ... \n","0 poor ... 0.952295\n","1 average ... 0.396265\n","2 good ... 0.773682\n","3 average ... 0.506015\n","4 average ... 0.648859\n","... ... ... ...\n","95 poor ... 0.790492\n","96 good ... 0.813424\n","97 poor ... 0.940151\n","98 poor ... 0.984983\n","99 average ... 0.969512\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609621557034,"user_tz":-300,"elapsed":222174,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1a690639-c397-4ced-c222-981776472766"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609622725629,"user_tz":-300,"elapsed":1390760,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"9e8f7464-0bca-4a03-9212-2ab8ccb8f319"},"source":["from sklearn.metrics import classification_report\n","trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['classifier_dl'].setMaxEpochs(90) \n","trainable_pipe['classifier_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," average 0.72 0.67 0.69 500\n"," good 0.85 0.87 0.86 500\n"," poor 0.78 0.83 0.80 500\n","\n"," accuracy 0.79 1500\n"," macro avg 0.78 0.79 0.79 1500\n","weighted avg 0.78 0.79 0.79 1500\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609622897186,"user_tz":-300,"elapsed":1562308,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a3175762-9ea0-472e-a8bf-0a64fd1176c9"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":111},"executionInfo":{"status":"ok","timestamp":1609622933158,"user_tz":-300,"elapsed":1598267,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a40c74fc-c2f1-4a58-ba4e-5d1e21e39da3"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It worked perfectly.')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
classifier_confidencedocumenten_embed_sentence_small_bert_L12_768_embeddingsclassifier
origin_index
00.950214It worked perfectly.[0.275971919298172, 0.4924655854701996, 0.2755...good
\n","
"],"text/plain":[" classifier_confidence ... classifier\n","origin_index ... \n","0 0.950214 ... good\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609622963569,"user_tz":-300,"elapsed":903,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"662a1dc1-b3fc-4137-b95a-8d7f38326fd5"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['average', 'poor', 'good']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['average', 'poor', 'good']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo_amazon.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_amazon.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","## 3 class Amazon Phone review classifier training]\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Amazon Unlocked mobile phones dataset \n","https://www.kaggle.com/PromptCloudHQ/amazon-reviews-unlocked-mobile-phones\n","\n","dataset with unlocked mobile phone reviews in 5 review classes\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610787546042,"user_tz":-300,"elapsed":3459,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ca2d6419-7d62-400b-d3d7-9b16fa9bce2c"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/Amazon_Unlocked_Mobile.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 08:58:27-- http://ckl-it.de/wp-content/uploads/2021/01/Amazon_Unlocked_Mobile.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 452621 (442K) [text/csv]\n","Saving to: ‘Amazon_Unlocked_Mobile.csv’\n","\n","Amazon_Unlocked_Mob 100%[===================>] 442.01K 308KB/s in 1.4s \n","\n","2021-01-16 08:58:29 (308 KB/s) - ‘Amazon_Unlocked_Mobile.csv’ saved [452621/452621]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610787551525,"user_tz":-300,"elapsed":1188,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"dfe55b6f-f33a-4bd2-a2ba-5b1a306e1ab4"},"source":["import pandas as pd\n","test_path = '/content/Amazon_Unlocked_Mobile.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytext
0poorBought it, turned it on, did not work. Opened ...
1averageCurrently it is 2014, the 3gs is discontinued....
2good100% recomendado
3averageIt's a good phone but if you use it to browse ...
4averageIt's nice that this phone has LTE and it funct...
.........
1495poorNot happy with this phone. Not able to get but...
1496goodgreat phablet for all general uses
1497poorHate this phone had it for one day
1498goodGreat cheap phone.
1499goodVery good
\n","

1500 rows × 2 columns

\n","
"],"text/plain":[" y text\n","0 poor Bought it, turned it on, did not work. Opened ...\n","1 average Currently it is 2014, the 3gs is discontinued....\n","2 good 100% recomendado\n","3 average It's a good phone but if you use it to browse ...\n","4 average It's nice that this phone has LTE and it funct...\n","... ... ...\n","1495 poor Not happy with this phone. Not able to get but...\n","1496 good great phablet for all general uses\n","1497 poor Hate this phone had it for one day\n","1498 good Great cheap phone.\n","1499 good Very good\n","\n","[1500 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":501},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609621542716,"user_tz":-300,"elapsed":207913,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d0eb19cc-8849-43f7-9cdf-a88fd8f11676"},"source":["# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# Since there are no\n","\n","trainable_pipe = nlu.load('train.classifier')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50] )\n","\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50] )\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextcategorydefault_name_embeddingscategory_confidencesentence
origin_index
0poorBought it, turned it on, did not work. Opened ...average[0.020834514871239662, 0.03326118737459183, -0...0.763940Bought it, turned it on, did not work.
0poorBought it, turned it on, did not work. Opened ...average[0.030574046075344086, -0.009678893722593784, ...1.000000Opened up the back, made sure it was in right,...
0poorBought it, turned it on, did not work. Opened ...average[0.023421283811330795, 0.02294657751917839, -0...2.000000It was supposed to be new, but i it was used.
0poorBought it, turned it on, did not work. Opened ...average[0.06009713560342789, 0.046434734016656876, -0...3.000000Found scratches on cover.
1averageCurrently it is 2014, the 3gs is discontinued....average[0.04893391206860542, -0.010221654549241066, -...0.631228Currently it is 2014, the 3gs is discontinued.
.....................
47goodBought for my mom! She loves it!good[0.021471485495567322, -0.027823669835925102, ...0.656713Bought for my mom!
47goodBought for my mom! She loves it!good[0.0001737327256705612, -0.014630521647632122,...1.000000She loves it!
48goodGave the phone as a birthday gift. My friend s...good[0.03572574257850647, 0.013357092626392841, -0...0.701626Gave the phone as a birthday gift.
48goodGave the phone as a birthday gift. My friend s...good[0.08371475338935852, -0.01581401191651821, -0...1.000000My friend seems happy with it so far.
49goodGreat Productgood[0.03334435820579529, -0.05353177338838577, -0...0.593622Great Product
\n","

215 rows × 6 columns

\n","
"],"text/plain":[" y ... sentence\n","origin_index ... \n","0 poor ... Bought it, turned it on, did not work.\n","0 poor ... Opened up the back, made sure it was in right,...\n","0 poor ... It was supposed to be new, but i it was used.\n","0 poor ... Found scratches on cover.\n","1 average ... Currently it is 2014, the 3gs is discontinued.\n","... ... ... ...\n","47 good ... Bought for my mom!\n","47 good ... She loves it!\n","48 good ... Gave the phone as a birthday gift.\n","48 good ... My friend seems happy with it so far.\n","49 good ... Great Product\n","\n","[215 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609621546162,"user_tz":-300,"elapsed":211344,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5dc268e6-e97f-4378-85d1-8319d3f7893f"},"source":["fitted_pipe.predict(\"It worked perfectly .\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydefault_name_embeddingscategory_confidencesentence
origin_index
0average[0.06468033790588379, -0.040837567299604416, -...0.460187Bitcoin is going to the moon!
\n","
"],"text/plain":[" category ... sentence\n","origin_index ... \n","0 average ... Bitcoin is going to the moon!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609621546165,"user_tz":-300,"elapsed":211336,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c0633c00-9bfd-412b-ee55-0f6e5b150f39"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setMaxEpochs(3) | Info: Maximum number of epochs to train | Currently set to : 3\n","pipe['classifier_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['classifier_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['classifier_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['classifier_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":620},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609621557024,"user_tz":-300,"elapsed":222179,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"12b53152-fcdf-4180-91b8-cc150e5bb23a"},"source":["# Train longer!\n","trainable_pipe['classifier_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," average 0.00 0.00 0.00 29\n"," good 0.65 0.94 0.77 32\n"," poor 0.69 0.95 0.80 39\n","\n"," accuracy 0.67 100\n"," macro avg 0.45 0.63 0.52 100\n","weighted avg 0.48 0.67 0.56 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextdocumentcategorydefault_name_embeddingscategory_confidence
origin_index
0poorBought it, turned it on, did not work. Opened ...Bought it, turned it on, did not work. Opened ...poor[0.059367865324020386, 0.05043933913111687, -0...0.952295
1averageCurrently it is 2014, the 3gs is discontinued....Currently it is 2014, the 3gs is discontinued....good[0.0046275281347334385, 0.012452688068151474, ...0.396265
2good100% recomendado100% recomendadogood[0.008266163989901543, 0.00396152026951313, -0...0.773682
3averageIt's a good phone but if you use it to browse ...It's a good phone but if you use it to browse ...poor[0.05291805788874626, 0.002292224671691656, -0...0.506015
4averageIt's nice that this phone has LTE and it funct...It's nice that this phone has LTE and it funct...good[0.03426238149404526, -0.024366019293665886, -...0.648859
.....................
95poorHola, compramos dos teléfonos y vienieron tot...Hola, compramos dos teléfonos y vienieron tot...poor[0.06324272602796555, -0.06387951225042343, -0...0.790492
96goodExcelenteExcelentegood[0.03246314451098442, -0.01719777286052704, -0...0.813424
97poorthe product is good but the English language s...the product is good but the English language s...poor[0.056343767791986465, -0.016822000965476036, ...0.940151
98poorSupposed to be a brand new unlock phone. The p...Supposed to be a brand new unlock phone. The p...poor[0.03210984170436859, 0.018154876306653023, -0...0.984983
99averageMinor, very annoying glitch when texting. Not ...Minor, very annoying glitch when texting. Not ...poor[-0.026854539290070534, 0.03769969940185547, 0...0.969512
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" y ... category_confidence\n","origin_index ... \n","0 poor ... 0.952295\n","1 average ... 0.396265\n","2 good ... 0.773682\n","3 average ... 0.506015\n","4 average ... 0.648859\n","... ... ... ...\n","95 poor ... 0.790492\n","96 good ... 0.813424\n","97 poor ... 0.940151\n","98 poor ... 0.984983\n","99 average ... 0.969512\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609621557034,"user_tz":-300,"elapsed":222174,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1a690639-c397-4ced-c222-981776472766"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609622725629,"user_tz":-300,"elapsed":1390760,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"9e8f7464-0bca-4a03-9212-2ab8ccb8f319"},"source":["from sklearn.metrics import classification_report\n","trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['classifier_dl'].setMaxEpochs(90) \n","trainable_pipe['classifier_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," average 0.72 0.67 0.69 500\n"," good 0.85 0.87 0.86 500\n"," poor 0.78 0.83 0.80 500\n","\n"," accuracy 0.79 1500\n"," macro avg 0.78 0.79 0.79 1500\n","weighted avg 0.78 0.79 0.79 1500\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609622897186,"user_tz":-300,"elapsed":1562308,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a3175762-9ea0-472e-a8bf-0a64fd1176c9"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":111},"executionInfo":{"status":"ok","timestamp":1609622933158,"user_tz":-300,"elapsed":1598267,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a40c74fc-c2f1-4a58-ba4e-5d1e21e39da3"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It worked perfectly.')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
classifier_confidencedocumenten_embed_sentence_small_bert_L12_768_embeddingsclassifier
origin_index
00.950214It worked perfectly.[0.275971919298172, 0.4924655854701996, 0.2755...good
\n","
"],"text/plain":[" classifier_confidence ... classifier\n","origin_index ... \n","0 0.950214 ... good\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609622963569,"user_tz":-300,"elapsed":903,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"662a1dc1-b3fc-4137-b95a-8d7f38326fd5"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['average', 'poor', 'good']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['average', 'poor', 'good']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_hotel_reviews.ipynb b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_hotel_reviews.ipynb index a62e0ab3..35d328ee 100644 --- a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_hotel_reviews.ipynb +++ b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_hotel_reviews.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo_hotel_reviews.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_hotel_reviews.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download hotel reviews dataset \n","https://www.kaggle.com/andrewmvd/trip-advisor-hotel-reviews\n","\n","Hotels play a crucial role in traveling and with the increased access to information new pathways of selecting the best ones emerged.\n","With this dataset, consisting of 20k reviews crawled from Tripadvisor, you can explore what makes a great hotel and maybe even use this model in your travels!\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610787917402,"user_tz":-300,"elapsed":5153,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0d1c17b6-555c-4df6-cfb6-08af37c3f9ef"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/tripadvisor_hotel_reviews.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:04:37-- http://ckl-it.de/wp-content/uploads/2021/01/tripadvisor_hotel_reviews.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 5160790 (4.9M) [text/csv]\n","Saving to: ‘tripadvisor_hotel_reviews.csv’\n","\n","tripadvisor_hotel_r 100%[===================>] 4.92M 1.46MB/s in 3.4s \n","\n","2021-01-16 09:04:41 (1.46 MB/s) - ‘tripadvisor_hotel_reviews.csv’ saved [5160790/5160790]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610787919775,"user_tz":-300,"elapsed":1300,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f579f26c-2a41-47ec-fe20-989c3ec16643"},"source":["import pandas as pd\n","test_path = '/content/tripadvisor_hotel_reviews.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytext
0greatgreat stayed hotel 5 nights end august 2005. r...
1poorwatch bait-and-switch room rates, rooms accept...
2averagegood check liked hotel good location friendly ...
3greatbest location value properties waikiki head ho...
4poorbotel not recommended little disappointed hone...
.........
6547greatbig bang buck st. charles great new orleans st...
6548greatloved minute, reading reviews hotel bit worrie...
6549greatwonderful, let tell place, 3 friends stayed ap...
6550averagesmall bathroom clean hmmm ok let stay used tra...
6551poorvvvv bad went hotel valantine day weekend, hot...
\n","

6552 rows × 2 columns

\n","
"],"text/plain":[" y text\n","0 great great stayed hotel 5 nights end august 2005. r...\n","1 poor watch bait-and-switch room rates, rooms accept...\n","2 average good check liked hotel good location friendly ...\n","3 great best location value properties waikiki head ho...\n","4 poor botel not recommended little disappointed hone...\n","... ... ...\n","6547 great big bang buck st. charles great new orleans st...\n","6548 great loved minute, reading reviews hotel bit worrie...\n","6549 great wonderful, let tell place, 3 friends stayed ap...\n","6550 average small bathroom clean hmmm ok let stay used tra...\n","6551 poor vvvv bad went hotel valantine day weekend, hot...\n","\n","[6552 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":501},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609621945982,"user_tz":-300,"elapsed":194629,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ea741862-2923-441b-ed64-bb5da1eb5e3e"},"source":["# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# Since there are no\n","\n","trainable_pipe = nlu.load('train.classifier')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50] )\n","\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50] )\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextcategory_confidencetokencategorydefault_name_embeddings
origin_index
0greatgreat stayed hotel 5 nights end august 2005. r...0.496030greatgreat[[0.03609783574938774, 0.05106373876333237, 0....
0greatgreat stayed hotel 5 nights end august 2005. r...0.496030stayedgreat[[0.03609783574938774, 0.05106373876333237, 0....
0greatgreat stayed hotel 5 nights end august 2005. r...0.496030hotelgreat[[0.03609783574938774, 0.05106373876333237, 0....
0greatgreat stayed hotel 5 nights end august 2005. r...0.4960305great[[0.03609783574938774, 0.05106373876333237, 0....
0greatgreat stayed hotel 5 nights end august 2005. r...0.496030nightsgreat[[0.03609783574938774, 0.05106373876333237, 0....
.....................
49poorkidding, arrived riu palace macao punta cana w...0.476485recommendaverage[[-0.017401963472366333, 0.04562698304653168, ...
49poorkidding, arrived riu palace macao punta cana w...0.476485riuaverage[[-0.017401963472366333, 0.04562698304653168, ...
49poorkidding, arrived riu palace macao punta cana w...0.476485palaceaverage[[-0.017401963472366333, 0.04562698304653168, ...
49poorkidding, arrived riu palace macao punta cana w...0.476485macaoaverage[[-0.017401963472366333, 0.04562698304653168, ...
49poorkidding, arrived riu palace macao punta cana w...0.476485,average[[-0.017401963472366333, 0.04562698304653168, ...
\n","

7014 rows × 6 columns

\n","
"],"text/plain":[" y ... default_name_embeddings\n","origin_index ... \n","0 great ... [[0.03609783574938774, 0.05106373876333237, 0....\n","0 great ... [[0.03609783574938774, 0.05106373876333237, 0....\n","0 great ... [[0.03609783574938774, 0.05106373876333237, 0....\n","0 great ... [[0.03609783574938774, 0.05106373876333237, 0....\n","0 great ... [[0.03609783574938774, 0.05106373876333237, 0....\n","... ... ... ...\n","49 poor ... [[-0.017401963472366333, 0.04562698304653168, ...\n","49 poor ... [[-0.017401963472366333, 0.04562698304653168, ...\n","49 poor ... [[-0.017401963472366333, 0.04562698304653168, ...\n","49 poor ... [[-0.017401963472366333, 0.04562698304653168, ...\n","49 poor ... [[-0.017401963472366333, 0.04562698304653168, ...\n","\n","[7014 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":297},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609621948873,"user_tz":-300,"elapsed":197503,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7c8077e8-b95b-40e5-839b-29e738884851"},"source":["fitted_pipe.predict(\"It was a good experince!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencetokencategorydefault_name_embeddings
origin_index
00.739900Bitcoinaverage[[0.06468033790588379, -0.040837567299604416, ...
00.739900isaverage[[0.06468033790588379, -0.040837567299604416, ...
00.739900goingaverage[[0.06468033790588379, -0.040837567299604416, ...
00.739900toaverage[[0.06468033790588379, -0.040837567299604416, ...
00.739900theaverage[[0.06468033790588379, -0.040837567299604416, ...
00.739900moonaverage[[0.06468033790588379, -0.040837567299604416, ...
00.739900!average[[0.06468033790588379, -0.040837567299604416, ...
\n","
"],"text/plain":[" category_confidence ... default_name_embeddings\n","origin_index ... \n","0 0.739900 ... [[0.06468033790588379, -0.040837567299604416, ...\n","0 0.739900 ... [[0.06468033790588379, -0.040837567299604416, ...\n","0 0.739900 ... [[0.06468033790588379, -0.040837567299604416, ...\n","0 0.739900 ... [[0.06468033790588379, -0.040837567299604416, ...\n","0 0.739900 ... [[0.06468033790588379, -0.040837567299604416, ...\n","0 0.739900 ... [[0.06468033790588379, -0.040837567299604416, ...\n","0 0.739900 ... [[0.06468033790588379, -0.040837567299604416, ...\n","\n","[7 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609621948879,"user_tz":-300,"elapsed":197499,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3b2e8fac-92a8-436c-93b7-d548f39f95a1"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setMaxEpochs(3) | Info: Maximum number of epochs to train | Currently set to : 3\n","pipe['classifier_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['classifier_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['classifier_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['classifier_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":620},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609621958501,"user_tz":-300,"elapsed":207107,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e86e37d9-df7d-4c70-fafe-f1b297860fa9"},"source":["# Train longer!\n","trainable_pipe['classifier_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," average 0.48 0.76 0.59 33\n"," great 0.86 0.51 0.64 35\n"," poor 0.74 0.62 0.68 32\n","\n"," accuracy 0.63 100\n"," macro avg 0.69 0.63 0.64 100\n","weighted avg 0.70 0.63 0.64 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextdocumentcategory_confidencecategorydefault_name_embeddings
origin_index
0greatgreat stayed hotel 5 nights end august 2005. r...great stayed hotel 5 nights end august 2005. r...0.595822average[0.06212242692708969, 0.04104098677635193, 0.0...
1poorwatch bait-and-switch room rates, rooms accept...watch bait-and-switch room rates, rooms accept...0.498284poor[0.0546528585255146, 0.02160552889108658, -0.0...
2averagegood check liked hotel good location friendly ...good check liked hotel good location friendly ...0.557739average[0.008103911764919758, 0.02573486790060997, 0....
3greatbest location value properties waikiki head ho...best location value properties waikiki head ho...0.418274average[0.05095028877258301, -0.003614993067458272, 0...
4poorbotel not recommended little disappointed hone...botel not recommended little disappointed hone...0.491956average[0.03620055690407753, 0.010797196999192238, 0....
.....................
95greatgreat location spent 7 days castle inn beginni...great location spent 7 days castle inn beginni...0.402236average[0.03295842185616493, 0.04682551696896553, 0.0...
96averagegreat location hard beds really liked hotel si...great location hard beds really liked hotel si...0.598560average[0.02258184179663658, 0.0432007722556591, -0.0...
97greatgreat location location hotel perfect right mi...great location location hotel perfect right mi...0.552369average[0.06024744734168053, 0.05366133153438568, -0....
98greatjust starting lose lustre stayed chancellor co...just starting lose lustre stayed chancellor co...0.374642poor[0.0255410298705101, 0.0401645191013813, 0.003...
99poorbittersweet memories glorious past recent stay...bittersweet memories glorious past recent stay...0.415380poor[0.03259000554680824, 0.049256037920713425, 0....
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" y ... default_name_embeddings\n","origin_index ... \n","0 great ... [0.06212242692708969, 0.04104098677635193, 0.0...\n","1 poor ... [0.0546528585255146, 0.02160552889108658, -0.0...\n","2 average ... [0.008103911764919758, 0.02573486790060997, 0....\n","3 great ... [0.05095028877258301, -0.003614993067458272, 0...\n","4 poor ... [0.03620055690407753, 0.010797196999192238, 0....\n","... ... ... ...\n","95 great ... [0.03295842185616493, 0.04682551696896553, 0.0...\n","96 average ... [0.02258184179663658, 0.0432007722556591, -0.0...\n","97 great ... [0.06024744734168053, 0.05366133153438568, -0....\n","98 great ... [0.0255410298705101, 0.0401645191013813, 0.003...\n","99 poor ... [0.03259000554680824, 0.049256037920713425, 0....\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609621958515,"user_tz":-300,"elapsed":207110,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"de28c144-5456-4998-ffad-2d5046d5efc4"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609631124604,"user_tz":-300,"elapsed":7463638,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"49f0d684-0253-441f-d322-2e286b89fa24"},"source":["from sklearn.metrics import classification_report\n","trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['classifier_dl'].setMaxEpochs(90) \n","trainable_pipe['classifier_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," average 0.66 0.65 0.65 2184\n"," great 0.79 0.81 0.80 2184\n"," poor 0.77 0.78 0.78 2184\n","\n"," accuracy 0.74 6552\n"," macro avg 0.74 0.74 0.74 6552\n","weighted avg 0.74 0.74 0.74 6552\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609631288714,"user_tz":-300,"elapsed":164136,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"42ce7ad9-b16c-404f-a717-b6d98651af95"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":111},"executionInfo":{"status":"ok","timestamp":1609631304458,"user_tz":-300,"elapsed":15754,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a0f4d6e7-e607-41f2-91ae-7b170b03b40a"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It was a good experince!')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
classifieren_embed_sentence_small_bert_L12_768_embeddingsdocumentclassifier_confidence
origin_index
0great[-0.07878006249666214, 0.1528550535440445, 0.1...It was one of the best wines i ever tasted .0.865597
\n","
"],"text/plain":[" classifier ... classifier_confidence\n","origin_index ... \n","0 great ... 0.865597\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609631312511,"user_tz":-300,"elapsed":2776,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3de0f51e-3fd0-4dae-ee05-81459d162c42"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['average', 'great', 'poor']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['average', 'great', 'poor']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo_hotel_reviews.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_hotel_reviews.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","## 3 class Tripadvisor Hotel review classifier training\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download hotel reviews dataset \n","https://www.kaggle.com/andrewmvd/trip-advisor-hotel-reviews\n","\n","Hotels play a crucial role in traveling and with the increased access to information new pathways of selecting the best ones emerged.\n","With this dataset, consisting of 20k reviews crawled from Tripadvisor, you can explore what makes a great hotel and maybe even use this model in your travels!\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610787917402,"user_tz":-300,"elapsed":5153,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0d1c17b6-555c-4df6-cfb6-08af37c3f9ef"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/tripadvisor_hotel_reviews.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:04:37-- http://ckl-it.de/wp-content/uploads/2021/01/tripadvisor_hotel_reviews.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 5160790 (4.9M) [text/csv]\n","Saving to: ‘tripadvisor_hotel_reviews.csv’\n","\n","tripadvisor_hotel_r 100%[===================>] 4.92M 1.46MB/s in 3.4s \n","\n","2021-01-16 09:04:41 (1.46 MB/s) - ‘tripadvisor_hotel_reviews.csv’ saved [5160790/5160790]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610787919775,"user_tz":-300,"elapsed":1300,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f579f26c-2a41-47ec-fe20-989c3ec16643"},"source":["import pandas as pd\n","test_path = '/content/tripadvisor_hotel_reviews.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytext
0greatgreat stayed hotel 5 nights end august 2005. r...
1poorwatch bait-and-switch room rates, rooms accept...
2averagegood check liked hotel good location friendly ...
3greatbest location value properties waikiki head ho...
4poorbotel not recommended little disappointed hone...
.........
6547greatbig bang buck st. charles great new orleans st...
6548greatloved minute, reading reviews hotel bit worrie...
6549greatwonderful, let tell place, 3 friends stayed ap...
6550averagesmall bathroom clean hmmm ok let stay used tra...
6551poorvvvv bad went hotel valantine day weekend, hot...
\n","

6552 rows × 2 columns

\n","
"],"text/plain":[" y text\n","0 great great stayed hotel 5 nights end august 2005. r...\n","1 poor watch bait-and-switch room rates, rooms accept...\n","2 average good check liked hotel good location friendly ...\n","3 great best location value properties waikiki head ho...\n","4 poor botel not recommended little disappointed hone...\n","... ... ...\n","6547 great big bang buck st. charles great new orleans st...\n","6548 great loved minute, reading reviews hotel bit worrie...\n","6549 great wonderful, let tell place, 3 friends stayed ap...\n","6550 average small bathroom clean hmmm ok let stay used tra...\n","6551 poor vvvv bad went hotel valantine day weekend, hot...\n","\n","[6552 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":501},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609621945982,"user_tz":-300,"elapsed":194629,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ea741862-2923-441b-ed64-bb5da1eb5e3e"},"source":["# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# Since there are no\n","\n","trainable_pipe = nlu.load('train.classifier')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50] )\n","\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50] )\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextcategory_confidencetokencategorydefault_name_embeddings
origin_index
0greatgreat stayed hotel 5 nights end august 2005. r...0.496030greatgreat[[0.03609783574938774, 0.05106373876333237, 0....
0greatgreat stayed hotel 5 nights end august 2005. r...0.496030stayedgreat[[0.03609783574938774, 0.05106373876333237, 0....
0greatgreat stayed hotel 5 nights end august 2005. r...0.496030hotelgreat[[0.03609783574938774, 0.05106373876333237, 0....
0greatgreat stayed hotel 5 nights end august 2005. r...0.4960305great[[0.03609783574938774, 0.05106373876333237, 0....
0greatgreat stayed hotel 5 nights end august 2005. r...0.496030nightsgreat[[0.03609783574938774, 0.05106373876333237, 0....
.....................
49poorkidding, arrived riu palace macao punta cana w...0.476485recommendaverage[[-0.017401963472366333, 0.04562698304653168, ...
49poorkidding, arrived riu palace macao punta cana w...0.476485riuaverage[[-0.017401963472366333, 0.04562698304653168, ...
49poorkidding, arrived riu palace macao punta cana w...0.476485palaceaverage[[-0.017401963472366333, 0.04562698304653168, ...
49poorkidding, arrived riu palace macao punta cana w...0.476485macaoaverage[[-0.017401963472366333, 0.04562698304653168, ...
49poorkidding, arrived riu palace macao punta cana w...0.476485,average[[-0.017401963472366333, 0.04562698304653168, ...
\n","

7014 rows × 6 columns

\n","
"],"text/plain":[" y ... default_name_embeddings\n","origin_index ... \n","0 great ... [[0.03609783574938774, 0.05106373876333237, 0....\n","0 great ... [[0.03609783574938774, 0.05106373876333237, 0....\n","0 great ... [[0.03609783574938774, 0.05106373876333237, 0....\n","0 great ... [[0.03609783574938774, 0.05106373876333237, 0....\n","0 great ... [[0.03609783574938774, 0.05106373876333237, 0....\n","... ... ... ...\n","49 poor ... [[-0.017401963472366333, 0.04562698304653168, ...\n","49 poor ... [[-0.017401963472366333, 0.04562698304653168, ...\n","49 poor ... [[-0.017401963472366333, 0.04562698304653168, ...\n","49 poor ... [[-0.017401963472366333, 0.04562698304653168, ...\n","49 poor ... [[-0.017401963472366333, 0.04562698304653168, ...\n","\n","[7014 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":297},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609621948873,"user_tz":-300,"elapsed":197503,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7c8077e8-b95b-40e5-839b-29e738884851"},"source":["fitted_pipe.predict(\"It was a good experince!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencetokencategorydefault_name_embeddings
origin_index
00.739900Bitcoinaverage[[0.06468033790588379, -0.040837567299604416, ...
00.739900isaverage[[0.06468033790588379, -0.040837567299604416, ...
00.739900goingaverage[[0.06468033790588379, -0.040837567299604416, ...
00.739900toaverage[[0.06468033790588379, -0.040837567299604416, ...
00.739900theaverage[[0.06468033790588379, -0.040837567299604416, ...
00.739900moonaverage[[0.06468033790588379, -0.040837567299604416, ...
00.739900!average[[0.06468033790588379, -0.040837567299604416, ...
\n","
"],"text/plain":[" category_confidence ... default_name_embeddings\n","origin_index ... \n","0 0.739900 ... [[0.06468033790588379, -0.040837567299604416, ...\n","0 0.739900 ... [[0.06468033790588379, -0.040837567299604416, ...\n","0 0.739900 ... [[0.06468033790588379, -0.040837567299604416, ...\n","0 0.739900 ... [[0.06468033790588379, -0.040837567299604416, ...\n","0 0.739900 ... [[0.06468033790588379, -0.040837567299604416, ...\n","0 0.739900 ... [[0.06468033790588379, -0.040837567299604416, ...\n","0 0.739900 ... [[0.06468033790588379, -0.040837567299604416, ...\n","\n","[7 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609621948879,"user_tz":-300,"elapsed":197499,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3b2e8fac-92a8-436c-93b7-d548f39f95a1"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setMaxEpochs(3) | Info: Maximum number of epochs to train | Currently set to : 3\n","pipe['classifier_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['classifier_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['classifier_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['classifier_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":620},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609621958501,"user_tz":-300,"elapsed":207107,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e86e37d9-df7d-4c70-fafe-f1b297860fa9"},"source":["# Train longer!\n","trainable_pipe['classifier_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," average 0.48 0.76 0.59 33\n"," great 0.86 0.51 0.64 35\n"," poor 0.74 0.62 0.68 32\n","\n"," accuracy 0.63 100\n"," macro avg 0.69 0.63 0.64 100\n","weighted avg 0.70 0.63 0.64 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextdocumentcategory_confidencecategorydefault_name_embeddings
origin_index
0greatgreat stayed hotel 5 nights end august 2005. r...great stayed hotel 5 nights end august 2005. r...0.595822average[0.06212242692708969, 0.04104098677635193, 0.0...
1poorwatch bait-and-switch room rates, rooms accept...watch bait-and-switch room rates, rooms accept...0.498284poor[0.0546528585255146, 0.02160552889108658, -0.0...
2averagegood check liked hotel good location friendly ...good check liked hotel good location friendly ...0.557739average[0.008103911764919758, 0.02573486790060997, 0....
3greatbest location value properties waikiki head ho...best location value properties waikiki head ho...0.418274average[0.05095028877258301, -0.003614993067458272, 0...
4poorbotel not recommended little disappointed hone...botel not recommended little disappointed hone...0.491956average[0.03620055690407753, 0.010797196999192238, 0....
.....................
95greatgreat location spent 7 days castle inn beginni...great location spent 7 days castle inn beginni...0.402236average[0.03295842185616493, 0.04682551696896553, 0.0...
96averagegreat location hard beds really liked hotel si...great location hard beds really liked hotel si...0.598560average[0.02258184179663658, 0.0432007722556591, -0.0...
97greatgreat location location hotel perfect right mi...great location location hotel perfect right mi...0.552369average[0.06024744734168053, 0.05366133153438568, -0....
98greatjust starting lose lustre stayed chancellor co...just starting lose lustre stayed chancellor co...0.374642poor[0.0255410298705101, 0.0401645191013813, 0.003...
99poorbittersweet memories glorious past recent stay...bittersweet memories glorious past recent stay...0.415380poor[0.03259000554680824, 0.049256037920713425, 0....
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" y ... default_name_embeddings\n","origin_index ... \n","0 great ... [0.06212242692708969, 0.04104098677635193, 0.0...\n","1 poor ... [0.0546528585255146, 0.02160552889108658, -0.0...\n","2 average ... [0.008103911764919758, 0.02573486790060997, 0....\n","3 great ... [0.05095028877258301, -0.003614993067458272, 0...\n","4 poor ... [0.03620055690407753, 0.010797196999192238, 0....\n","... ... ... ...\n","95 great ... [0.03295842185616493, 0.04682551696896553, 0.0...\n","96 average ... [0.02258184179663658, 0.0432007722556591, -0.0...\n","97 great ... [0.06024744734168053, 0.05366133153438568, -0....\n","98 great ... [0.0255410298705101, 0.0401645191013813, 0.003...\n","99 poor ... [0.03259000554680824, 0.049256037920713425, 0....\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609621958515,"user_tz":-300,"elapsed":207110,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"de28c144-5456-4998-ffad-2d5046d5efc4"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609631124604,"user_tz":-300,"elapsed":7463638,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"49f0d684-0253-441f-d322-2e286b89fa24"},"source":["from sklearn.metrics import classification_report\n","trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['classifier_dl'].setMaxEpochs(90) \n","trainable_pipe['classifier_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," average 0.66 0.65 0.65 2184\n"," great 0.79 0.81 0.80 2184\n"," poor 0.77 0.78 0.78 2184\n","\n"," accuracy 0.74 6552\n"," macro avg 0.74 0.74 0.74 6552\n","weighted avg 0.74 0.74 0.74 6552\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609631288714,"user_tz":-300,"elapsed":164136,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"42ce7ad9-b16c-404f-a717-b6d98651af95"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":111},"executionInfo":{"status":"ok","timestamp":1609631304458,"user_tz":-300,"elapsed":15754,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a0f4d6e7-e607-41f2-91ae-7b170b03b40a"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It was a good experince!')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
classifieren_embed_sentence_small_bert_L12_768_embeddingsdocumentclassifier_confidence
origin_index
0great[-0.07878006249666214, 0.1528550535440445, 0.1...It was one of the best wines i ever tasted .0.865597
\n","
"],"text/plain":[" classifier ... classifier_confidence\n","origin_index ... \n","0 great ... 0.865597\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609631312511,"user_tz":-300,"elapsed":2776,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3de0f51e-3fd0-4dae-ee05-81459d162c42"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['average', 'great', 'poor']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['average', 'great', 'poor']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_musical_instruments.ipynb b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_musical_instruments.ipynb index 3e474fa9..046560b8 100644 --- a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_musical_instruments.ipynb +++ b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_musical_instruments.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo_musical_instruments.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_musical_instruments.ipynb)\n","\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu > /dev/null\n","! pip install pyspark==2.4.7 > /dev/null\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download musical instruments classification dataset\r\n","\r\n","https://www.kaggle.com/eswarchandt/amazon-music-reviews\r\n","\r\n","dataset with products rated between 5 classes"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610787881309,"user_tz":-300,"elapsed":1350,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c7f2277f-e7a9-484a-cf3b-457bdc65e457"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/Musical_instruments_reviews.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:04:04-- http://ckl-it.de/wp-content/uploads/2021/01/Musical_instruments_reviews.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 51708 (50K) [text/csv]\n","Saving to: ‘Musical_instruments_reviews.csv’\n","\n","Musical_instruments 100%[===================>] 50.50K 241KB/s in 0.2s \n","\n","2021-01-16 09:04:05 (241 KB/s) - ‘Musical_instruments_reviews.csv’ saved [51708/51708]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610787895917,"user_tz":-300,"elapsed":1017,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1c4ca0c9-9489-47b9-a85e-3a8a3cc092a6"},"source":["import pandas as pd\n","test_path = '/content/Musical_instruments_reviews.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytext
0goodHosa products are a good bang for the buck. I ...
1averageI now use this cable to run from the output of...
2goodCheap and good texture rubber that does not ge...
3averageThese cables are a little thin compared to hos...
4averageIt is a decent cable. It does its job, but it ...
.........
115very poorIt just randomly pops off my bass, it's so sli...
116very goodThe primary job of this device is to block the...
117goodThe Hosa XLR cables are affordable and very he...
118averageIt's a cable, no frills, tangles pretty easy a...
119very poorIt hums, crackles, and I think I'm having prob...
\n","

120 rows × 2 columns

\n","
"],"text/plain":[" y text\n","0 good Hosa products are a good bang for the buck. I ...\n","1 average I now use this cable to run from the output of...\n","2 good Cheap and good texture rubber that does not ge...\n","3 average These cables are a little thin compared to hos...\n","4 average It is a decent cable. It does its job, but it ...\n",".. ... ...\n","115 very poor It just randomly pops off my bass, it's so sli...\n","116 very good The primary job of this device is to block the...\n","117 good The Hosa XLR cables are affordable and very he...\n","118 average It's a cable, no frills, tangles pretty easy a...\n","119 very poor It hums, crackles, and I think I'm having prob...\n","\n","[120 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","By default, the Universal Sentence Encoder Embeddings (USE) are beeing downloaded to provide embeddings for the classifier. You can use any of the 50+ other sentence Emeddings in NLU tough!\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":671},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609472199891,"user_tz":-300,"elapsed":191855,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"bd493b9c-fa33-44af-e941-1000f0aa137d"},"source":["# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# Since there are no\n","\n","trainable_pipe = nlu.load('train.classifier')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50] )\n","\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50] )\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencetextycategorydefault_name_embeddingssentence
origin_index
00.304148Hosa products are a good bang for the buck. I ...goodaverage[0.07208353281021118, 0.028736615553498268, -0...Hosa products are a good bang for the buck.
01.000000Hosa products are a good bang for the buck. I ...goodaverage[0.056614313274621964, -0.04707420617341995, -...I haven't looked up the specifications, but I'...
10.956961I now use this cable to run from the output of...averageaverage[0.06778458505868912, -0.0052166287787258625, ...I now use this cable to run from the output of...
11.000000I now use this cable to run from the output of...averageaverage[0.06371542811393738, -0.022252758964896202, -...After I bought Monster Cable to hook up my ped...
12.000000I now use this cable to run from the output of...averageaverage[0.018308864906430244, 0.0024022769648581743, ...I had been using a high end Planet Waves cable...
.....................
470.841045Update: The right angle switched end started d...averageaverage[-0.013615701347589493, -0.04160430282354355, ...I like knowing that.
470.841045Update: The right angle switched end started d...averageaverage[0.02372647449374199, 0.04573449119925499, -0....** EDIT: AS STATED ABOVE, YOU WILL NOT BE ABLE...
480.997217Doe's not stay on to well, moves to much even ...averageaverage[0.08493339270353317, 0.047714825719594955, -0...Doe's not stay on to well, moves to much even ...
490.401975These are not the greatest but they're cheap a...goodvery poor[0.03083745203912258, 0.01701708696782589, -0....These are not the greatest but they're cheap a...
491.000000These are not the greatest but they're cheap a...goodvery poor[0.06084448844194412, 0.0020018713548779488, 0...I've only had one fail and I've bought many of...
\n","

297 rows × 6 columns

\n","
"],"text/plain":[" category_confidence ... sentence\n","origin_index ... \n","0 0.304148 ... Hosa products are a good bang for the buck.\n","0 1.000000 ... I haven't looked up the specifications, but I'...\n","1 0.956961 ... I now use this cable to run from the output of...\n","1 1.000000 ... After I bought Monster Cable to hook up my ped...\n","1 2.000000 ... I had been using a high end Planet Waves cable...\n","... ... ... ...\n","47 0.841045 ... I like knowing that.\n","47 0.841045 ... ** EDIT: AS STATED ABOVE, YOU WILL NOT BE ABLE...\n","48 0.997217 ... Doe's not stay on to well, moves to much even ...\n","49 0.401975 ... These are not the greatest but they're cheap a...\n","49 1.000000 ... I've only had one fail and I've bought many of...\n","\n","[297 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"DL_5aY9b3jSd"},"source":["# 4. Evaluate the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"djtoZVKBw2WU","executionInfo":{"status":"ok","timestamp":1609472199894,"user_tz":-300,"elapsed":191838,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c3c86659-f624-486c-bb48-f514ac1e8fc0"},"source":["from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," average 0.63 0.76 0.69 123\n"," good 0.00 0.00 0.00 51\n"," very good 0.00 0.00 0.00 39\n"," very poor 0.50 0.87 0.63 84\n","\n"," accuracy 0.56 297\n"," macro avg 0.28 0.41 0.33 297\n","weighted avg 0.40 0.56 0.46 297\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mhFKVN93o1ZO"},"source":["# 5. Lets try different Sentence Emebddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CzJd8omao0gt","executionInfo":{"status":"ok","timestamp":1609472199895,"user_tz":-300,"elapsed":191822,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0257d8c7-ce4a-4ac4-837c-5513639da2d4"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ABHLgirmG1n9","executionInfo":{"status":"ok","timestamp":1609472351316,"user_tz":-300,"elapsed":343219,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b4823a3d-fcf8-4e40-e6dd-00051347b3a8"},"source":["# Load pipe with bert embeds\n","# using large embeddings can take a few hours..\n","# fitted_pipe = nlu.load('en.embed_sentence.bert_large_uncased train.classifier').fit(train_df)\n","fitted_pipe = nlu.load('en.embed_sentence.bert train.classifier').fit(train_df.iloc[:100])\n","\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100])\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_bert_base_uncased download started this may take some time.\n","Approximate size to download 392.5 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," average 0.29 1.00 0.45 27\n"," good 0.00 0.00 0.00 25\n"," very good 0.00 0.00 0.00 25\n"," very poor 1.00 0.30 0.47 23\n","\n"," accuracy 0.34 100\n"," macro avg 0.32 0.33 0.23 100\n","weighted avg 0.31 0.34 0.23 100\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nbpdZGoZPslz","executionInfo":{"status":"ok","timestamp":1609472368869,"user_tz":-300,"elapsed":360758,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a9574559-a655-464a-f159-b85c2c64b5b0"},"source":["# Load pipe with bert embeds\n","fitted_pipe = nlu.load('embed_sentence.bert train.classifier').fit(train_df.iloc[:100])\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100])\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L2_128 download started this may take some time.\n","Approximate size to download 16.1 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," average 0.00 0.00 0.00 27\n"," good 0.00 0.00 0.00 25\n"," very good 0.25 1.00 0.40 25\n"," very poor 0.00 0.00 0.00 23\n","\n"," accuracy 0.25 100\n"," macro avg 0.06 0.25 0.10 100\n","weighted avg 0.06 0.25 0.10 100\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"wYV7ivdsQY8Z","executionInfo":{"status":"ok","timestamp":1609475397624,"user_tz":-300,"elapsed":155002,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"759ff4c2-dcf3-4f65-bf3b-95f1f32e0a39"},"source":["from sklearn.metrics import classification_report\r\n","trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier')\r\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\r\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\r\n","# Also longer training gives more accuracy\r\n","trainable_pipe['classifier_dl'].setMaxEpochs(90) \r\n","trainable_pipe['classifier_dl'].setLr(0.0005) \r\n","fitted_pipe = trainable_pipe.fit(train_df)\r\n","# predict with the trainable pipeline on dataset and get predictions\r\n","preds = fitted_pipe.predict(train_df,output_level='document')\r\n","\r\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\r\n","preds.dropna(inplace=True)\r\n","print(classification_report(preds['y'], preds['category']))\r\n","\r\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," average 0.89 0.53 0.67 30\n"," good 0.62 0.83 0.71 30\n"," very good 0.93 0.47 0.62 30\n"," very poor 0.62 0.97 0.75 30\n","\n"," accuracy 0.70 120\n"," macro avg 0.77 0.70 0.69 120\n","weighted avg 0.77 0.70 0.69 120\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1609472722793,"user_tz":-300,"elapsed":714653,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4b052d0c-f581-4c96-a7d5-91885525e96e"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"qeuzjy2IJTif"},"source":[""]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":127},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1609472740229,"user_tz":-300,"elapsed":732057,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c57f3bd7-4590-4a82-9d01-99b7dd1e7a34"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It was really good ')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documenten_embed_sentence_small_bert_L12_768_embeddingsclassifier_confidenceclassifier
origin_index
0It was really good[-0.034663598984479904, 0.3307220935821533, 0....0.529977very good
\n","
"],"text/plain":[" document ... classifier\n","origin_index ... \n","0 It was really good ... very good\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1609472740233,"user_tz":-300,"elapsed":732044,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a85b252c-2f3a-401d-8579-de7c2c9acbc1"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['very good', 'very poor', 'average', 'good']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['very good', 'very poor', 'average', 'good']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo_musical_instruments.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_musical_instruments.ipynb)\n","\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","## 4 class Amazon Musical Instruments review classifier training\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu > /dev/null\n","! pip install pyspark==2.4.7 > /dev/null\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download musical instruments classification dataset\r\n","\r\n","https://www.kaggle.com/eswarchandt/amazon-music-reviews\r\n","\r\n","dataset with products rated between 5 classes"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610787881309,"user_tz":-300,"elapsed":1350,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c7f2277f-e7a9-484a-cf3b-457bdc65e457"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/Musical_instruments_reviews.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:04:04-- http://ckl-it.de/wp-content/uploads/2021/01/Musical_instruments_reviews.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 51708 (50K) [text/csv]\n","Saving to: ‘Musical_instruments_reviews.csv’\n","\n","Musical_instruments 100%[===================>] 50.50K 241KB/s in 0.2s \n","\n","2021-01-16 09:04:05 (241 KB/s) - ‘Musical_instruments_reviews.csv’ saved [51708/51708]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610787895917,"user_tz":-300,"elapsed":1017,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1c4ca0c9-9489-47b9-a85e-3a8a3cc092a6"},"source":["import pandas as pd\n","test_path = '/content/Musical_instruments_reviews.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytext
0goodHosa products are a good bang for the buck. I ...
1averageI now use this cable to run from the output of...
2goodCheap and good texture rubber that does not ge...
3averageThese cables are a little thin compared to hos...
4averageIt is a decent cable. It does its job, but it ...
.........
115very poorIt just randomly pops off my bass, it's so sli...
116very goodThe primary job of this device is to block the...
117goodThe Hosa XLR cables are affordable and very he...
118averageIt's a cable, no frills, tangles pretty easy a...
119very poorIt hums, crackles, and I think I'm having prob...
\n","

120 rows × 2 columns

\n","
"],"text/plain":[" y text\n","0 good Hosa products are a good bang for the buck. I ...\n","1 average I now use this cable to run from the output of...\n","2 good Cheap and good texture rubber that does not ge...\n","3 average These cables are a little thin compared to hos...\n","4 average It is a decent cable. It does its job, but it ...\n",".. ... ...\n","115 very poor It just randomly pops off my bass, it's so sli...\n","116 very good The primary job of this device is to block the...\n","117 good The Hosa XLR cables are affordable and very he...\n","118 average It's a cable, no frills, tangles pretty easy a...\n","119 very poor It hums, crackles, and I think I'm having prob...\n","\n","[120 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","By default, the Universal Sentence Encoder Embeddings (USE) are beeing downloaded to provide embeddings for the classifier. You can use any of the 50+ other sentence Emeddings in NLU tough!\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":671},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609472199891,"user_tz":-300,"elapsed":191855,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"bd493b9c-fa33-44af-e941-1000f0aa137d"},"source":["# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# Since there are no\n","\n","trainable_pipe = nlu.load('train.classifier')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50] )\n","\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50] )\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencetextycategorydefault_name_embeddingssentence
origin_index
00.304148Hosa products are a good bang for the buck. I ...goodaverage[0.07208353281021118, 0.028736615553498268, -0...Hosa products are a good bang for the buck.
01.000000Hosa products are a good bang for the buck. I ...goodaverage[0.056614313274621964, -0.04707420617341995, -...I haven't looked up the specifications, but I'...
10.956961I now use this cable to run from the output of...averageaverage[0.06778458505868912, -0.0052166287787258625, ...I now use this cable to run from the output of...
11.000000I now use this cable to run from the output of...averageaverage[0.06371542811393738, -0.022252758964896202, -...After I bought Monster Cable to hook up my ped...
12.000000I now use this cable to run from the output of...averageaverage[0.018308864906430244, 0.0024022769648581743, ...I had been using a high end Planet Waves cable...
.....................
470.841045Update: The right angle switched end started d...averageaverage[-0.013615701347589493, -0.04160430282354355, ...I like knowing that.
470.841045Update: The right angle switched end started d...averageaverage[0.02372647449374199, 0.04573449119925499, -0....** EDIT: AS STATED ABOVE, YOU WILL NOT BE ABLE...
480.997217Doe's not stay on to well, moves to much even ...averageaverage[0.08493339270353317, 0.047714825719594955, -0...Doe's not stay on to well, moves to much even ...
490.401975These are not the greatest but they're cheap a...goodvery poor[0.03083745203912258, 0.01701708696782589, -0....These are not the greatest but they're cheap a...
491.000000These are not the greatest but they're cheap a...goodvery poor[0.06084448844194412, 0.0020018713548779488, 0...I've only had one fail and I've bought many of...
\n","

297 rows × 6 columns

\n","
"],"text/plain":[" category_confidence ... sentence\n","origin_index ... \n","0 0.304148 ... Hosa products are a good bang for the buck.\n","0 1.000000 ... I haven't looked up the specifications, but I'...\n","1 0.956961 ... I now use this cable to run from the output of...\n","1 1.000000 ... After I bought Monster Cable to hook up my ped...\n","1 2.000000 ... I had been using a high end Planet Waves cable...\n","... ... ... ...\n","47 0.841045 ... I like knowing that.\n","47 0.841045 ... ** EDIT: AS STATED ABOVE, YOU WILL NOT BE ABLE...\n","48 0.997217 ... Doe's not stay on to well, moves to much even ...\n","49 0.401975 ... These are not the greatest but they're cheap a...\n","49 1.000000 ... I've only had one fail and I've bought many of...\n","\n","[297 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"DL_5aY9b3jSd"},"source":["# 4. Evaluate the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"djtoZVKBw2WU","executionInfo":{"status":"ok","timestamp":1609472199894,"user_tz":-300,"elapsed":191838,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c3c86659-f624-486c-bb48-f514ac1e8fc0"},"source":["from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," average 0.63 0.76 0.69 123\n"," good 0.00 0.00 0.00 51\n"," very good 0.00 0.00 0.00 39\n"," very poor 0.50 0.87 0.63 84\n","\n"," accuracy 0.56 297\n"," macro avg 0.28 0.41 0.33 297\n","weighted avg 0.40 0.56 0.46 297\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mhFKVN93o1ZO"},"source":["# 5. Lets try different Sentence Emebddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CzJd8omao0gt","executionInfo":{"status":"ok","timestamp":1609472199895,"user_tz":-300,"elapsed":191822,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0257d8c7-ce4a-4ac4-837c-5513639da2d4"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ABHLgirmG1n9","executionInfo":{"status":"ok","timestamp":1609472351316,"user_tz":-300,"elapsed":343219,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b4823a3d-fcf8-4e40-e6dd-00051347b3a8"},"source":["# Load pipe with bert embeds\n","# using large embeddings can take a few hours..\n","# fitted_pipe = nlu.load('en.embed_sentence.bert_large_uncased train.classifier').fit(train_df)\n","fitted_pipe = nlu.load('en.embed_sentence.bert train.classifier').fit(train_df.iloc[:100])\n","\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100])\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_bert_base_uncased download started this may take some time.\n","Approximate size to download 392.5 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," average 0.29 1.00 0.45 27\n"," good 0.00 0.00 0.00 25\n"," very good 0.00 0.00 0.00 25\n"," very poor 1.00 0.30 0.47 23\n","\n"," accuracy 0.34 100\n"," macro avg 0.32 0.33 0.23 100\n","weighted avg 0.31 0.34 0.23 100\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nbpdZGoZPslz","executionInfo":{"status":"ok","timestamp":1609472368869,"user_tz":-300,"elapsed":360758,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a9574559-a655-464a-f159-b85c2c64b5b0"},"source":["# Load pipe with bert embeds\n","fitted_pipe = nlu.load('embed_sentence.bert train.classifier').fit(train_df.iloc[:100])\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100])\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L2_128 download started this may take some time.\n","Approximate size to download 16.1 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," average 0.00 0.00 0.00 27\n"," good 0.00 0.00 0.00 25\n"," very good 0.25 1.00 0.40 25\n"," very poor 0.00 0.00 0.00 23\n","\n"," accuracy 0.25 100\n"," macro avg 0.06 0.25 0.10 100\n","weighted avg 0.06 0.25 0.10 100\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"wYV7ivdsQY8Z","executionInfo":{"status":"ok","timestamp":1609475397624,"user_tz":-300,"elapsed":155002,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"759ff4c2-dcf3-4f65-bf3b-95f1f32e0a39"},"source":["from sklearn.metrics import classification_report\r\n","trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier')\r\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\r\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\r\n","# Also longer training gives more accuracy\r\n","trainable_pipe['classifier_dl'].setMaxEpochs(90) \r\n","trainable_pipe['classifier_dl'].setLr(0.0005) \r\n","fitted_pipe = trainable_pipe.fit(train_df)\r\n","# predict with the trainable pipeline on dataset and get predictions\r\n","preds = fitted_pipe.predict(train_df,output_level='document')\r\n","\r\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\r\n","preds.dropna(inplace=True)\r\n","print(classification_report(preds['y'], preds['category']))\r\n","\r\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," average 0.89 0.53 0.67 30\n"," good 0.62 0.83 0.71 30\n"," very good 0.93 0.47 0.62 30\n"," very poor 0.62 0.97 0.75 30\n","\n"," accuracy 0.70 120\n"," macro avg 0.77 0.70 0.69 120\n","weighted avg 0.77 0.70 0.69 120\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1609472722793,"user_tz":-300,"elapsed":714653,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4b052d0c-f581-4c96-a7d5-91885525e96e"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"qeuzjy2IJTif"},"source":[""]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":127},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1609472740229,"user_tz":-300,"elapsed":732057,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c57f3bd7-4590-4a82-9d01-99b7dd1e7a34"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It was really good ')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documenten_embed_sentence_small_bert_L12_768_embeddingsclassifier_confidenceclassifier
origin_index
0It was really good[-0.034663598984479904, 0.3307220935821533, 0....0.529977very good
\n","
"],"text/plain":[" document ... classifier\n","origin_index ... \n","0 It was really good ... very good\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1609472740233,"user_tz":-300,"elapsed":732044,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a85b252c-2f3a-401d-8579-de7c2c9acbc1"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['very good', 'very poor', 'average', 'good']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['very good', 'very poor', 'average', 'good']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_wine.ipynb b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_wine.ipynb index b0cd3dbf..6447d424 100644 --- a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_wine.ipynb +++ b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_wine.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo_wine.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_wine.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download wine review dataset \n","https://www.kaggle.com/zynicide/wine-reviews\n","dataset with products between 5 review classes"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610787966036,"user_tz":-300,"elapsed":2003,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d4fe7b73-eebc-4c11-8e58-de7ebeb1a556"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/winemag-data_first150k.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:05:28-- http://ckl-it.de/wp-content/uploads/2021/01/winemag-data_first150k.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 1447273 (1.4M) [text/csv]\n","Saving to: ‘winemag-data_first150k.csv’\n","\n","winemag-data_first1 100%[===================>] 1.38M 1.74MB/s in 0.8s \n","\n","2021-01-16 09:05:30 (1.74 MB/s) - ‘winemag-data_first150k.csv’ saved [1447273/1447273]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610787968921,"user_tz":-300,"elapsed":925,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"393355a3-3aea-4805-a0ef-87ad8be6bf8e"},"source":["import pandas as pd\n","test_path = '/content/winemag-data_first150k.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytext
0acceptableThis wine is closed, tight and possibly still ...
1bestThis wine shows growing intensity the longer i...
2goodThis moderately aromatic wine conveys Red Hots...
3bestThis feels slightly softer in the mouth than t...
4bestA terrific Pinot, and one of the few that abso...
.........
5055very goodA classic Napa Valley Chardonnay, this is smoo...
5056very goodThe wine from this estate perched high above C...
5057very goodDistinct and delicious aromas of crème brûlÃ...
5058goodSmooth, deep aromas of licorice and blackberry...
5059very goodWonderfully aromatic fruit rises from the glas...
\n","

5060 rows × 2 columns

\n","
"],"text/plain":[" y text\n","0 acceptable This wine is closed, tight and possibly still ...\n","1 best This wine shows growing intensity the longer i...\n","2 good This moderately aromatic wine conveys Red Hots...\n","3 best This feels slightly softer in the mouth than t...\n","4 best A terrific Pinot, and one of the few that abso...\n","... ... ...\n","5055 very good A classic Napa Valley Chardonnay, this is smoo...\n","5056 very good The wine from this estate perched high above C...\n","5057 very good Distinct and delicious aromas of crème brûlÃ...\n","5058 good Smooth, deep aromas of licorice and blackberry...\n","5059 very good Wonderfully aromatic fruit rises from the glas...\n","\n","[5060 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":487},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609603533911,"user_tz":-300,"elapsed":208298,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"bbd738d4-b241-4994-979d-c5ca0989dc4b"},"source":["# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# Since there are no\n","\n","trainable_pipe = nlu.load('train.classifier')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50] )\n","\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50] )\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textcategory_confidencedefault_name_embeddingsycategorysentence
origin_index
0This wine is closed, tight and possibly still ...0.386967[-0.00495561771094799, -0.07129219174385071, -...acceptablevery goodThis wine is closed, tight and possibly still ...
0This wine is closed, tight and possibly still ...1.000000[0.06035454571247101, 0.041439250111579895, -0...acceptablevery goodThere's also a cheesy character that is less a...
1This wine shows growing intensity the longer i...0.454979[0.0541062131524086, -0.0517219714820385, -0.0...bestbestThis wine shows growing intensity the longer i...
1This wine shows growing intensity the longer i...1.000000[-0.026120899245142937, -0.0751243457198143, -...bestbestAromas include red fruit, spice and rosemary: ...
2This moderately aromatic wine conveys Red Hots...0.433734[-0.0444738008081913, -0.05501846224069595, 0....goodvery goodThis moderately aromatic wine conveys Red Hots...
.....................
48Bright sparks of red currant, black cherry and...0.439928[-0.001167353126220405, -0.062205277383327484,...very goodvery goodBright sparks of red currant, black cherry and...
48Bright sparks of red currant, black cherry and...1.000000[0.001156042329967022, -0.041525647044181824, ...very goodvery goodBold tannins frame its dense layers of fruit, ...
49Based in the Jura, this producer blends grapes...0.730394[-0.012110762298107147, -0.06961353123188019, ...acceptablebestBased in the Jura, this producer blends grapes...
49Based in the Jura, this producer blends grapes...1.000000[0.05220193415880203, 0.04676426202058792, -0....acceptablebestIt's light, bright and just off dry, with attr...
49Based in the Jura, this producer blends grapes...2.000000[0.09586171805858612, 0.029351763427257538, -0...acceptablebestLike it's rosé partner, it is really for apé...
\n","

158 rows × 6 columns

\n","
"],"text/plain":[" text ... sentence\n","origin_index ... \n","0 This wine is closed, tight and possibly still ... ... This wine is closed, tight and possibly still ...\n","0 This wine is closed, tight and possibly still ... ... There's also a cheesy character that is less a...\n","1 This wine shows growing intensity the longer i... ... This wine shows growing intensity the longer i...\n","1 This wine shows growing intensity the longer i... ... Aromas include red fruit, spice and rosemary: ...\n","2 This moderately aromatic wine conveys Red Hots... ... This moderately aromatic wine conveys Red Hots...\n","... ... ... ...\n","48 Bright sparks of red currant, black cherry and... ... Bright sparks of red currant, black cherry and...\n","48 Bright sparks of red currant, black cherry and... ... Bold tannins frame its dense layers of fruit, ...\n","49 Based in the Jura, this producer blends grapes... ... Based in the Jura, this producer blends grapes...\n","49 Based in the Jura, this producer blends grapes... ... It's light, bright and just off dry, with attr...\n","49 Based in the Jura, this producer blends grapes... ... Like it's rosé partner, it is really for apé...\n","\n","[158 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":107},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609603536901,"user_tz":-300,"elapsed":211278,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d50adb08-b82b-4edc-e473-d273f153fa62"},"source":["fitted_pipe.predict('It was one of the best wines i ever tasted .')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencedefault_name_embeddingscategorysentence
origin_index
00.842125[0.06468033790588379, -0.040837567299604416, -...bestBitcoin is going to the moon!
\n","
"],"text/plain":[" category_confidence ... sentence\n","origin_index ... \n","0 0.842125 ... Bitcoin is going to the moon!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609603536903,"user_tz":-300,"elapsed":211274,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e9ed8d20-ed9d-4522-bf9e-ed7414f7a686"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setMaxEpochs(3) | Info: Maximum number of epochs to train | Currently set to : 3\n","pipe['classifier_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['classifier_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['classifier_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['classifier_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":620},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609603618345,"user_tz":-300,"elapsed":11545,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"be93ca05-7e98-484b-9c24-a09976430afc"},"source":["# Train longer!\n","trainable_pipe['classifier_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," acceptable 0.00 0.00 0.00 22\n"," best 0.71 0.89 0.79 28\n"," good 0.42 0.96 0.58 28\n"," very good 0.00 0.00 0.00 22\n","\n"," accuracy 0.52 100\n"," macro avg 0.28 0.46 0.34 100\n","weighted avg 0.32 0.52 0.38 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textdocumentdefault_name_embeddingscategory_confidenceycategory
origin_index
0This wine is closed, tight and possibly still ...This wine is closed, tight and possibly still ...[0.02915436401963234, -0.0378003790974617, -0....0.584848acceptablegood
1This wine shows growing intensity the longer i...This wine shows growing intensity the longer i...[0.019120197743177414, -0.06991834938526154, 0...0.875611bestbest
2This moderately aromatic wine conveys Red Hots...This moderately aromatic wine conveys Red Hots...[-0.025461390614509583, -0.02650509588420391, ...0.783311goodgood
3This feels slightly softer in the mouth than t...This feels slightly softer in the mouth than t...[0.011777156963944435, 0.008188367821276188, -...0.711578bestgood
4A terrific Pinot, and one of the few that abso...A terrific Pinot, and one of the few that abso...[0.014174058102071285, -0.057778846472501755, ...0.794139bestbest
.....................
95Radiator dust, lees and vanilla cookie aromas ...Radiator dust, lees and vanilla cookie aromas ...[-0.009873664006590843, 0.0033919725101441145,...0.792627acceptablegood
96You'll detect aromas reminiscent of wood shop ...You'll detect aromas reminiscent of wood shop ...[0.03787693753838539, -0.030119985342025757, -...0.573790acceptablegood
97The old vines on the steep slopes of the Heili...The old vines on the steep slopes of the Heili...[0.020556319504976273, -0.059675734490156174, ...0.919109bestbest
98This wine takes time to unravel and reveal its...This wine takes time to unravel and reveal its...[-0.00832163542509079, -0.029637429863214493, ...0.485587very goodbest
99Buttery oak aromas cover up any white-fruit ch...Buttery oak aromas cover up any white-fruit ch...[0.02920656092464924, -0.05507100373506546, -0...0.768109acceptablegood
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" text ... category\n","origin_index ... \n","0 This wine is closed, tight and possibly still ... ... good\n","1 This wine shows growing intensity the longer i... ... best\n","2 This moderately aromatic wine conveys Red Hots... ... good\n","3 This feels slightly softer in the mouth than t... ... good\n","4 A terrific Pinot, and one of the few that abso... ... best\n","... ... ... ...\n","95 Radiator dust, lees and vanilla cookie aromas ... ... good\n","96 You'll detect aromas reminiscent of wood shop ... ... good\n","97 The old vines on the steep slopes of the Heili... ... best\n","98 This wine takes time to unravel and reveal its... ... best\n","99 Buttery oak aromas cover up any white-fruit ch... ... good\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609603620060,"user_tz":-300,"elapsed":1698,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"977caf3a-f20a-4f44-fd09-15069e2a6ef0"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609609274404,"user_tz":-300,"elapsed":92614,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"31f4bb74-2906-4d84-f353-2cb946407c63"},"source":["from sklearn.metrics import classification_report\n","trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['classifier_dl'].setMaxEpochs(90) \n","trainable_pipe['classifier_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," acceptable 0.78 0.84 0.81 1265\n"," best 0.87 0.90 0.88 1265\n"," good 0.59 0.54 0.56 1265\n"," very good 0.62 0.60 0.61 1265\n","\n"," accuracy 0.72 5060\n"," macro avg 0.71 0.72 0.72 5060\n","weighted avg 0.71 0.72 0.72 5060\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609609549407,"user_tz":-300,"elapsed":275012,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b307a3f0-a9eb-4332-eb86-c17cfb97aaf1"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":107},"executionInfo":{"status":"ok","timestamp":1609609567537,"user_tz":-300,"elapsed":18138,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"511b976f-b1cd-41a4-d425-555fe38c0e0a"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It was one of the best wines i ever tasted .')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
classifierclassifier_confidencedocumenten_embed_sentence_small_bert_L12_768_embeddings
origin_index
0good0.515783Tesla plans to invest 10M into the ML sector[0.15737222135066986, 0.2598555386066437, 0.85...
\n","
"],"text/plain":[" classifier ... en_embed_sentence_small_bert_L12_768_embeddings\n","origin_index ... \n","0 good ... [0.15737222135066986, 0.2598555386066437, 0.85...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609609567540,"user_tz":-300,"elapsed":99,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c05f0c7f-b826-45eb-a038-b9d9f1b12f7b"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['very good', 'acceptable', 'best', 'good']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['very good', 'acceptable', 'best', 'good']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo_wine.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_wine.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","## 4 class WineEnthusiast Wine review classifier training\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download wine review dataset \n","https://www.kaggle.com/zynicide/wine-reviews\n","dataset with products between 5 review classes"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610787966036,"user_tz":-300,"elapsed":2003,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d4fe7b73-eebc-4c11-8e58-de7ebeb1a556"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/winemag-data_first150k.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:05:28-- http://ckl-it.de/wp-content/uploads/2021/01/winemag-data_first150k.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 1447273 (1.4M) [text/csv]\n","Saving to: ‘winemag-data_first150k.csv’\n","\n","winemag-data_first1 100%[===================>] 1.38M 1.74MB/s in 0.8s \n","\n","2021-01-16 09:05:30 (1.74 MB/s) - ‘winemag-data_first150k.csv’ saved [1447273/1447273]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610787968921,"user_tz":-300,"elapsed":925,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"393355a3-3aea-4805-a0ef-87ad8be6bf8e"},"source":["import pandas as pd\n","test_path = '/content/winemag-data_first150k.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytext
0acceptableThis wine is closed, tight and possibly still ...
1bestThis wine shows growing intensity the longer i...
2goodThis moderately aromatic wine conveys Red Hots...
3bestThis feels slightly softer in the mouth than t...
4bestA terrific Pinot, and one of the few that abso...
.........
5055very goodA classic Napa Valley Chardonnay, this is smoo...
5056very goodThe wine from this estate perched high above C...
5057very goodDistinct and delicious aromas of crème brûlÃ...
5058goodSmooth, deep aromas of licorice and blackberry...
5059very goodWonderfully aromatic fruit rises from the glas...
\n","

5060 rows × 2 columns

\n","
"],"text/plain":[" y text\n","0 acceptable This wine is closed, tight and possibly still ...\n","1 best This wine shows growing intensity the longer i...\n","2 good This moderately aromatic wine conveys Red Hots...\n","3 best This feels slightly softer in the mouth than t...\n","4 best A terrific Pinot, and one of the few that abso...\n","... ... ...\n","5055 very good A classic Napa Valley Chardonnay, this is smoo...\n","5056 very good The wine from this estate perched high above C...\n","5057 very good Distinct and delicious aromas of crème brûlÃ...\n","5058 good Smooth, deep aromas of licorice and blackberry...\n","5059 very good Wonderfully aromatic fruit rises from the glas...\n","\n","[5060 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":487},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609603533911,"user_tz":-300,"elapsed":208298,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"bbd738d4-b241-4994-979d-c5ca0989dc4b"},"source":["# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# Since there are no\n","\n","trainable_pipe = nlu.load('train.classifier')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50] )\n","\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50] )\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textcategory_confidencedefault_name_embeddingsycategorysentence
origin_index
0This wine is closed, tight and possibly still ...0.386967[-0.00495561771094799, -0.07129219174385071, -...acceptablevery goodThis wine is closed, tight and possibly still ...
0This wine is closed, tight and possibly still ...1.000000[0.06035454571247101, 0.041439250111579895, -0...acceptablevery goodThere's also a cheesy character that is less a...
1This wine shows growing intensity the longer i...0.454979[0.0541062131524086, -0.0517219714820385, -0.0...bestbestThis wine shows growing intensity the longer i...
1This wine shows growing intensity the longer i...1.000000[-0.026120899245142937, -0.0751243457198143, -...bestbestAromas include red fruit, spice and rosemary: ...
2This moderately aromatic wine conveys Red Hots...0.433734[-0.0444738008081913, -0.05501846224069595, 0....goodvery goodThis moderately aromatic wine conveys Red Hots...
.....................
48Bright sparks of red currant, black cherry and...0.439928[-0.001167353126220405, -0.062205277383327484,...very goodvery goodBright sparks of red currant, black cherry and...
48Bright sparks of red currant, black cherry and...1.000000[0.001156042329967022, -0.041525647044181824, ...very goodvery goodBold tannins frame its dense layers of fruit, ...
49Based in the Jura, this producer blends grapes...0.730394[-0.012110762298107147, -0.06961353123188019, ...acceptablebestBased in the Jura, this producer blends grapes...
49Based in the Jura, this producer blends grapes...1.000000[0.05220193415880203, 0.04676426202058792, -0....acceptablebestIt's light, bright and just off dry, with attr...
49Based in the Jura, this producer blends grapes...2.000000[0.09586171805858612, 0.029351763427257538, -0...acceptablebestLike it's rosé partner, it is really for apé...
\n","

158 rows × 6 columns

\n","
"],"text/plain":[" text ... sentence\n","origin_index ... \n","0 This wine is closed, tight and possibly still ... ... This wine is closed, tight and possibly still ...\n","0 This wine is closed, tight and possibly still ... ... There's also a cheesy character that is less a...\n","1 This wine shows growing intensity the longer i... ... This wine shows growing intensity the longer i...\n","1 This wine shows growing intensity the longer i... ... Aromas include red fruit, spice and rosemary: ...\n","2 This moderately aromatic wine conveys Red Hots... ... This moderately aromatic wine conveys Red Hots...\n","... ... ... ...\n","48 Bright sparks of red currant, black cherry and... ... Bright sparks of red currant, black cherry and...\n","48 Bright sparks of red currant, black cherry and... ... Bold tannins frame its dense layers of fruit, ...\n","49 Based in the Jura, this producer blends grapes... ... Based in the Jura, this producer blends grapes...\n","49 Based in the Jura, this producer blends grapes... ... It's light, bright and just off dry, with attr...\n","49 Based in the Jura, this producer blends grapes... ... Like it's rosé partner, it is really for apé...\n","\n","[158 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":107},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609603536901,"user_tz":-300,"elapsed":211278,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d50adb08-b82b-4edc-e473-d273f153fa62"},"source":["fitted_pipe.predict('It was one of the best wines i ever tasted .')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencedefault_name_embeddingscategorysentence
origin_index
00.842125[0.06468033790588379, -0.040837567299604416, -...bestBitcoin is going to the moon!
\n","
"],"text/plain":[" category_confidence ... sentence\n","origin_index ... \n","0 0.842125 ... Bitcoin is going to the moon!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609603536903,"user_tz":-300,"elapsed":211274,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e9ed8d20-ed9d-4522-bf9e-ed7414f7a686"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setMaxEpochs(3) | Info: Maximum number of epochs to train | Currently set to : 3\n","pipe['classifier_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['classifier_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['classifier_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['classifier_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":620},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609603618345,"user_tz":-300,"elapsed":11545,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"be93ca05-7e98-484b-9c24-a09976430afc"},"source":["# Train longer!\n","trainable_pipe['classifier_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," acceptable 0.00 0.00 0.00 22\n"," best 0.71 0.89 0.79 28\n"," good 0.42 0.96 0.58 28\n"," very good 0.00 0.00 0.00 22\n","\n"," accuracy 0.52 100\n"," macro avg 0.28 0.46 0.34 100\n","weighted avg 0.32 0.52 0.38 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textdocumentdefault_name_embeddingscategory_confidenceycategory
origin_index
0This wine is closed, tight and possibly still ...This wine is closed, tight and possibly still ...[0.02915436401963234, -0.0378003790974617, -0....0.584848acceptablegood
1This wine shows growing intensity the longer i...This wine shows growing intensity the longer i...[0.019120197743177414, -0.06991834938526154, 0...0.875611bestbest
2This moderately aromatic wine conveys Red Hots...This moderately aromatic wine conveys Red Hots...[-0.025461390614509583, -0.02650509588420391, ...0.783311goodgood
3This feels slightly softer in the mouth than t...This feels slightly softer in the mouth than t...[0.011777156963944435, 0.008188367821276188, -...0.711578bestgood
4A terrific Pinot, and one of the few that abso...A terrific Pinot, and one of the few that abso...[0.014174058102071285, -0.057778846472501755, ...0.794139bestbest
.....................
95Radiator dust, lees and vanilla cookie aromas ...Radiator dust, lees and vanilla cookie aromas ...[-0.009873664006590843, 0.0033919725101441145,...0.792627acceptablegood
96You'll detect aromas reminiscent of wood shop ...You'll detect aromas reminiscent of wood shop ...[0.03787693753838539, -0.030119985342025757, -...0.573790acceptablegood
97The old vines on the steep slopes of the Heili...The old vines on the steep slopes of the Heili...[0.020556319504976273, -0.059675734490156174, ...0.919109bestbest
98This wine takes time to unravel and reveal its...This wine takes time to unravel and reveal its...[-0.00832163542509079, -0.029637429863214493, ...0.485587very goodbest
99Buttery oak aromas cover up any white-fruit ch...Buttery oak aromas cover up any white-fruit ch...[0.02920656092464924, -0.05507100373506546, -0...0.768109acceptablegood
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" text ... category\n","origin_index ... \n","0 This wine is closed, tight and possibly still ... ... good\n","1 This wine shows growing intensity the longer i... ... best\n","2 This moderately aromatic wine conveys Red Hots... ... good\n","3 This feels slightly softer in the mouth than t... ... good\n","4 A terrific Pinot, and one of the few that abso... ... best\n","... ... ... ...\n","95 Radiator dust, lees and vanilla cookie aromas ... ... good\n","96 You'll detect aromas reminiscent of wood shop ... ... good\n","97 The old vines on the steep slopes of the Heili... ... best\n","98 This wine takes time to unravel and reveal its... ... best\n","99 Buttery oak aromas cover up any white-fruit ch... ... good\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609603620060,"user_tz":-300,"elapsed":1698,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"977caf3a-f20a-4f44-fd09-15069e2a6ef0"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609609274404,"user_tz":-300,"elapsed":92614,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"31f4bb74-2906-4d84-f353-2cb946407c63"},"source":["from sklearn.metrics import classification_report\n","trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['classifier_dl'].setMaxEpochs(90) \n","trainable_pipe['classifier_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," acceptable 0.78 0.84 0.81 1265\n"," best 0.87 0.90 0.88 1265\n"," good 0.59 0.54 0.56 1265\n"," very good 0.62 0.60 0.61 1265\n","\n"," accuracy 0.72 5060\n"," macro avg 0.71 0.72 0.72 5060\n","weighted avg 0.71 0.72 0.72 5060\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609609549407,"user_tz":-300,"elapsed":275012,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b307a3f0-a9eb-4332-eb86-c17cfb97aaf1"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":107},"executionInfo":{"status":"ok","timestamp":1609609567537,"user_tz":-300,"elapsed":18138,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"511b976f-b1cd-41a4-d425-555fe38c0e0a"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It was one of the best wines i ever tasted .')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
classifierclassifier_confidencedocumenten_embed_sentence_small_bert_L12_768_embeddings
origin_index
0good0.515783Tesla plans to invest 10M into the ML sector[0.15737222135066986, 0.2598555386066437, 0.85...
\n","
"],"text/plain":[" classifier ... en_embed_sentence_small_bert_L12_768_embeddings\n","origin_index ... \n","0 good ... [0.15737222135066986, 0.2598555386066437, 0.85...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609609567540,"user_tz":-300,"elapsed":99,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c05f0c7f-b826-45eb-a038-b9d9f1b12f7b"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['very good', 'acceptable', 'best', 'good']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['very good', 'acceptable', 'best', 'good']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/multi_label_text_classification/NLU_traing_multi_label_classifier_E2e.ipynb b/examples/colab/Training/multi_label_text_classification/NLU_traing_multi_label_classifier_E2e.ipynb index 8da274a9..4da08e1d 100644 --- a/examples/colab/Training/multi_label_text_classification/NLU_traing_multi_label_classifier_E2e.ipynb +++ b/examples/colab/Training/multi_label_text_classification/NLU_traing_multi_label_classifier_E2e.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_traing_multi_label_classifier_E2e.ipynb","provenance":[],"collapsed_sections":[],"toc_visible":true,"authorship_tag":"ABX9TyPWs1vEzUhNrsIX3nR13R72"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/collab/Training/multi_label_text_classification/NLU_traing_multi_label_classifier_E2e.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier for multi label prediction\n","MultiClassifierDL is a Multi-label Text Classification. MultiClassifierDL uses a Bidirectional GRU with Convolution model that we have built inside TensorFlow and supports up to 100 classes. The input to MultiClassifierDL is Sentence Embeddings such as state-of-the-art UniversalSentenceEncoder, BertSentenceEmbeddings, or SentenceEmbeddings\n","\n","\n","\n","### Multi ClassifierDL (Multi-class Text Classification with multiple classes per sentence)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#multiclassifierdl-multi-label-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download E2E Challenge multi token label classification dataset\n","\n","http://www.macs.hw.ac.uk/InteractionLab/E2E/"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":586},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1609529840956,"user_tz":-60,"elapsed":160088,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"39519c61-f3a4-4369-f72a-1f0590d9bb2e"},"source":["import pandas as pd\n","!wget http://ckl-it.de/wp-content/uploads/2020/12/e2e.csv\n","test_path = '/content/e2e.csv'\n","train_df = pd.read_csv(test_path)\n","train_df = train_df.iloc[:3000]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-01 19:37:17-- http://ckl-it.de/wp-content/uploads/2020/12/e2e.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 1322591 (1.3M) [text/csv]\n","Saving to: ‘e2e.csv’\n","\n","e2e.csv 100%[===================>] 1.26M 715KB/s in 1.8s \n","\n","2021-01-01 19:37:20 (715 KB/s) - ‘e2e.csv’ saved [1322591/1322591]\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Unnamed: 0ytextorigin_index
00name[Blue Spice],eatType[coffee shop],area[cit...A coffee shop in the city centre area called B...0
11name[Blue Spice],eatType[coffee shop],area[cit...Blue Spice is a coffee shop in city centre.1
22name[Blue Spice],eatType[coffee shop],area[riv...There is a coffee shop Blue Spice in the river...2
33name[Blue Spice],eatType[coffee shop],area[riv...At the riverside, there is a coffee shop calle...3
44name[Blue Spice],eatType[coffee shop],customer...The coffee shop Blue Spice is based near Crown...4
...............
29952995name[The Punter],eatType[restaurant],food[Indi...Near Express by Holiday Inn, in the riverside ...2995
29962996name[The Punter],eatType[restaurant],food[Indi...In the riverside area, near Express by Holiday...2996
29972997name[The Punter],eatType[restaurant],food[Indi...The Punter is a restaurant with Indian food in...2997
29982998name[The Punter],eatType[restaurant],food[Indi...The Punter is a low rated restaurant that serv...2998
29992999name[The Punter],eatType[restaurant],food[Indi...The Punter is a restaurant providing Indian fo...2999
\n","

3000 rows × 4 columns

\n","
"],"text/plain":[" Unnamed: 0 ... origin_index\n","0 0 ... 0\n","1 1 ... 1\n","2 2 ... 2\n","3 3 ... 3\n","4 4 ... 4\n","... ... ... ...\n","2995 2995 ... 2995\n","2996 2996 ... 2996\n","2997 2997 ... 2997\n","2998 2998 ... 2998\n","2999 2999 ... 2999\n","\n","[3000 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.multi_classifier')\n","\n","By default, the Universal Sentence Encoder Embeddings (USE) are beeing downloaded to provide embeddings for the classifier. You can use any of the 50+ other sentence Emeddings in NLU tough!\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":471},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609522208492,"user_tz":-60,"elapsed":410284,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"bda58bd4-d56e-471c-deea-37fe6e06af5e"},"source":["import nlu\n","# load a trainable pipeline by specifying the train prefix \n","unfitted_pipe = nlu.load('train.multi_classifier')\n","#configure epochs\n","unfitted_pipe['multi_classifier'].setMaxEpochs(25)\n","# fit it on a datset with label='y' and text columns. Labels seperated by ','\n","fitted_pipe = unfitted_pipe.fit(train_df[['y','text']], label_seperator=',')\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df[['y','text']])\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
multi_classifier_classesmulti_classifier_confidencesdefault_name_embeddingsysentencetext
origin_index
0[near[Café Rouge], name[Blue Spice], near[Rain...[0.8555223, 0.99276984, 0.87128675, 0.9852337,...[0.026563657447695732, -0.058662936091423035, ...name[Blue Spice],eatType[coffee shop],area[cit...A coffee shop in the city centre area called B...A coffee shop in the city centre area called B...
1[near[Café Rouge], name[Blue Spice], near[Rain...[0.8142674, 0.99920505, 0.93413615, 0.98056525...[0.040952689945697784, -0.04276810586452484, -...name[Blue Spice],eatType[coffee shop],area[cit...Blue Spice is a coffee shop in city centre.Blue Spice is a coffee shop in city centre.
2[name[Blue Spice], near[Rainbow Vegetarian Caf...[0.9966337, 0.9044244, 0.904881, 0.56231284, 0...[0.03141527622938156, -0.05154882371425629, 0....name[Blue Spice],eatType[coffee shop],area[riv...There is a coffee shop Blue Spice in the river...There is a coffee shop Blue Spice in the river...
3[near[Café Rouge], name[Blue Spice], near[Rain...[0.5227911, 0.99917483, 0.9394022, 0.8839797, ...[0.03584946319460869, -0.036898739635944366, -...name[Blue Spice],eatType[coffee shop],area[riv...At the riverside, there is a coffee shop calle...At the riverside, there is a coffee shop calle...
4[near[Café Rouge], name[Blue Spice], near[Crow...[0.5985904, 0.7892299, 0.8222753, 0.9378743, 0...[0.0405426099896431, -0.0243277158588171, 0.00...name[Blue Spice],eatType[coffee shop],customer...The coffee shop Blue Spice is based near Crown...The coffee shop Blue Spice is based near Crown...
.....................
2998[near[Express by Holiday Inn], priceRange[high...[0.9999982, 0.8146039, 0.99978125, 0.8511795, ...[0.05956212058663368, 0.019028551876544952, -0...name[The Punter],eatType[restaurant],food[Indi...The Punter has a price range of less than £20,...The Punter is a low rated restaurant that serv...
2999[near[Express by Holiday Inn], food[Indian], c...[0.99992794, 0.99981034, 0.5099642, 0.9994041,...[0.04296032711863518, -0.0015949805965647101, ...name[The Punter],eatType[restaurant],food[Indi...The Punter is a restaurant providing Indian fo...The Punter is a restaurant providing Indian fo...
2999[near[Express by Holiday Inn], food[Indian], c...[0.99992794, 0.99981034, 0.5099642, 0.9994041,...[0.023289771750569344, 0.056861914694309235, -...name[The Punter],eatType[restaurant],food[Indi...It is located in the riverside.The Punter is a restaurant providing Indian fo...
2999[near[Express by Holiday Inn], food[Indian], c...[0.99992794, 0.99981034, 0.5099642, 0.9994041,...[0.033101629465818405, 0.06402800232172012, 0....name[The Punter],eatType[restaurant],food[Indi...It is near Express by Holiday Inn.The Punter is a restaurant providing Indian fo...
2999[near[Express by Holiday Inn], food[Indian], c...[0.99992794, 0.99981034, 0.5099642, 0.9994041,...[0.01677701249718666, 0.04876527190208435, -0....name[The Punter],eatType[restaurant],food[Indi...Its customer rating is low.The Punter is a restaurant providing Indian fo...
\n","

5266 rows × 6 columns

\n","
"],"text/plain":[" multi_classifier_classes ... text\n","origin_index ... \n","0 [near[Café Rouge], name[Blue Spice], near[Rain... ... A coffee shop in the city centre area called B...\n","1 [near[Café Rouge], name[Blue Spice], near[Rain... ... Blue Spice is a coffee shop in city centre.\n","2 [name[Blue Spice], near[Rainbow Vegetarian Caf... ... There is a coffee shop Blue Spice in the river...\n","3 [near[Café Rouge], name[Blue Spice], near[Rain... ... At the riverside, there is a coffee shop calle...\n","4 [near[Café Rouge], name[Blue Spice], near[Crow... ... The coffee shop Blue Spice is based near Crown...\n","... ... ... ...\n","2998 [near[Express by Holiday Inn], priceRange[high... ... The Punter is a low rated restaurant that serv...\n","2999 [near[Express by Holiday Inn], food[Indian], c... ... The Punter is a restaurant providing Indian fo...\n","2999 [near[Express by Holiday Inn], food[Indian], c... ... The Punter is a restaurant providing Indian fo...\n","2999 [near[Express by Holiday Inn], food[Indian], c... ... The Punter is a restaurant providing Indian fo...\n","2999 [near[Express by Holiday Inn], food[Indian], c... ... The Punter is a restaurant providing Indian fo...\n","\n","[5266 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"DL_5aY9b3jSd"},"source":["# 4. Evaluate the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0YDA2KunCeqQ","executionInfo":{"status":"ok","timestamp":1609522209572,"user_tz":-60,"elapsed":411343,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"37539c88-d18c-425d-a28d-4127dc9bbb99"},"source":["from sklearn.preprocessing import MultiLabelBinarizer\n","from sklearn.metrics import classification_report\n","from sklearn.metrics import f1_score\n","from sklearn.metrics import roc_auc_score\n","mlb = MultiLabelBinarizer()\n","mlb = mlb.fit(preds.y.str.split(','))\n","y_true = mlb.transform(preds['y'].str.split(','))\n","y_pred = mlb.transform(preds.multi_classifier_classes.str.join(',').str.split(','))\n","print(\"Classification report: \\n\", (classification_report(y_true, y_pred)))\n","print(\"F1 micro averaging:\",(f1_score(y_true, y_pred, average='micro')))\n","print(\"ROC: \",(roc_auc_score(y_true, y_pred, average=\"micro\")))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Classification report: \n"," precision recall f1-score support\n","\n"," 0 0.78 0.97 0.86 1700\n"," 1 0.95 0.83 0.89 2914\n"," 2 0.56 0.64 0.60 576\n"," 3 0.33 0.28 0.30 367\n"," 4 0.38 0.55 0.45 455\n"," 5 0.30 0.76 0.42 599\n"," 6 0.37 0.77 0.50 550\n"," 7 0.69 0.44 0.54 457\n"," 8 0.99 0.72 0.84 337\n"," 9 0.91 0.98 0.95 2211\n"," 10 0.89 0.99 0.94 2718\n"," 11 0.53 0.89 0.67 1914\n"," 12 0.88 0.79 0.84 3154\n"," 13 0.79 0.98 0.87 1087\n"," 14 0.69 0.97 0.81 1118\n"," 15 0.98 0.64 0.78 1077\n"," 16 0.82 0.96 0.88 671\n"," 17 0.71 1.00 0.83 323\n"," 18 0.57 0.65 0.61 130\n"," 19 0.96 0.80 0.87 186\n"," 20 0.77 0.99 0.87 366\n"," 21 0.57 0.20 0.30 40\n"," 22 0.36 0.10 0.15 42\n"," 23 0.00 0.00 0.00 4\n"," 24 0.97 0.97 0.97 322\n"," 25 0.99 0.83 0.91 338\n"," 26 0.00 0.00 0.00 6\n"," 27 0.00 0.00 0.00 34\n"," 28 0.94 0.99 0.96 1273\n"," 29 0.96 1.00 0.98 987\n"," 30 0.90 0.99 0.95 1140\n"," 31 0.74 0.85 0.79 186\n"," 32 0.45 0.98 0.62 528\n"," 33 0.91 0.97 0.93 662\n"," 34 0.90 0.60 0.72 116\n"," 35 0.67 0.09 0.16 22\n"," 36 0.58 0.98 0.73 484\n"," 37 0.88 0.77 0.82 601\n"," 38 0.94 0.97 0.96 711\n"," 39 0.99 0.96 0.97 620\n"," 40 0.96 0.99 0.98 526\n"," 41 0.98 1.00 0.99 1410\n"," 42 1.00 0.28 0.43 72\n"," 43 0.00 0.00 0.00 8\n"," 44 0.00 0.00 0.00 8\n"," 45 0.00 0.00 0.00 4\n"," 46 0.35 0.42 0.38 595\n"," 47 0.34 0.66 0.45 849\n"," 48 0.57 0.44 0.50 627\n"," 49 0.69 0.53 0.60 767\n"," 50 0.31 0.32 0.32 347\n"," 51 0.25 0.53 0.34 453\n","\n"," micro avg 0.73 0.84 0.78 36692\n"," macro avg 0.64 0.65 0.62 36692\n","weighted avg 0.78 0.84 0.80 36692\n"," samples avg 0.76 0.84 0.79 36692\n","\n","F1 micro averaging: 0.7831856729396004\n","ROC: 0.8980818453315285\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mhFKVN93o1ZO"},"source":["# 5. Lets try different Sentence Emebddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CzJd8omao0gt","executionInfo":{"status":"ok","timestamp":1609522209573,"user_tz":-60,"elapsed":411328,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"ce35ce12-fbc8-4e0f-c9a1-6feaf68da7b0"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0ofYHpu7sloS","executionInfo":{"status":"ok","timestamp":1609529895586,"user_tz":-60,"elapsed":54621,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"44154b28-c1db-4f58-bab1-7ac185fa40b8"},"source":["# You might need to restart your notebook to clear RAM, or you might run out of Memory when fitting\n","import nlu\n","pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.multi_classifier')\n","pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['en_embed_sentence_small_bert_L12_768'] has settable params:\n","pipe['en_embed_sentence_small_bert_L12_768'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['en_embed_sentence_small_bert_L12_768'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['en_embed_sentence_small_bert_L12_768'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['en_embed_sentence_small_bert_L12_768'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['en_embed_sentence_small_bert_L12_768'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['en_embed_sentence_small_bert_L12_768'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['multi_classifier'] has settable params:\n","pipe['multi_classifier'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['multi_classifier'].setLr(0.001) | Info: Learning Rate | Currently set to : 0.001\n","pipe['multi_classifier'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['multi_classifier'].setValidationSplit(0.0) | Info: Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off. | Currently set to : 0.0\n","pipe['multi_classifier'].setThreshold(0.5) | Info: The minimum threshold for each label to be accepted. Default is 0.5 | Currently set to : 0.5\n","pipe['multi_classifier'].setRandomSeed(44) | Info: Random seed | Currently set to : 44\n","pipe['multi_classifier'].setShufflePerEpoch(False) | Info: whether to shuffle the training data on each Epoch | Currently set to : False\n","pipe['multi_classifier'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"ABHLgirmG1n9","colab":{"base_uri":"https://localhost:8080/","height":417},"executionInfo":{"status":"ok","timestamp":1609531977887,"user_tz":-60,"elapsed":2136903,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"d312277d-3826-46e2-c67e-4a10a7116c4f"},"source":["\n","# Load pipe with bert embeds and configure hyper parameters\n","# using large embeddings can take a few hours..\n","pipe['multi_classifier'].setMaxEpochs(100) \n","pipe['multi_classifier'].setLr(0.0005) \n","fitted_pipe = pipe.fit(train_df[['y','text']],label_seperator=',')\n","preds = fitted_pipe.predict(train_df)\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textmulti_classifier_classesUnnamed: 0documentymulti_classifier_confidencesen_embed_sentence_small_bert_L12_768_embeddings
origin_index
0A coffee shop in the city centre area called B...[name[Blue Spice], eatType[coffee shop], area[...0A coffee shop in the city centre area called B...name[Blue Spice],eatType[coffee shop],area[cit...[0.9740321, 0.99538183, 0.92562413][-0.1427491158246994, 0.5036071538925171, 0.07...
1Blue Spice is a coffee shop in city centre.[name[Blue Spice], eatType[coffee shop], area[...1Blue Spice is a coffee shop in city centre.name[Blue Spice],eatType[coffee shop],area[cit...[0.9950888, 0.9989519, 0.8684354][-0.20697341859340668, 0.5286431312561035, 0.2...
2There is a coffee shop Blue Spice in the river...[name[Blue Spice], eatType[coffee shop], area[...2There is a coffee shop Blue Spice in the river...name[Blue Spice],eatType[coffee shop],area[riv...[0.95310336, 0.9655487, 0.9785502][0.005826675333082676, 0.49930453300476074, -0...
3At the riverside, there is a coffee shop calle...[name[Blue Spice], eatType[coffee shop], area[...3At the riverside, there is a coffee shop calle...name[Blue Spice],eatType[coffee shop],area[riv...[0.8858954, 0.931189, 0.9990605][0.12191159278154373, 0.37966835498809814, 0.0...
4The coffee shop Blue Spice is based near Crown...[near[Crowne Plaza Hotel], customer rating[5 o...4The coffee shop Blue Spice is based near Crown...name[Blue Spice],eatType[coffee shop],customer...[0.99912286, 0.7930833, 0.9730882][-0.37350592017173767, 0.1885937601327896, 0.1...
........................
2995Near Express by Holiday Inn, in the riverside ...[near[Express by Holiday Inn], customer rating...2995Near Express by Holiday Inn, in the riverside ...name[The Punter],eatType[restaurant],food[Indi...[0.9476669, 0.9914391, 0.8395983, 0.98047745, ...[0.0485222227871418, 0.2381688505411148, 0.227...
2996In the riverside area, near Express by Holiday...[near[Express by Holiday Inn], food[Indian], c...2996In the riverside area, near Express by Holiday...name[The Punter],eatType[restaurant],food[Indi...[0.94435394, 0.6119035, 0.7891044, 0.9885667, ...[0.06879807263612747, 0.23580998182296753, 0.1...
2997The Punter is a restaurant with Indian food in...[near[Express by Holiday Inn], food[Indian], c...2997The Punter is a restaurant with Indian food in...name[The Punter],eatType[restaurant],food[Indi...[0.99509084, 0.9424925, 0.7625178, 0.9907007, ...[-0.12667560577392578, 0.22056235373020172, 0....
2998The Punter is a low rated restaurant that serv...[near[Express by Holiday Inn], food[Indian], c...2998The Punter is a low rated restaurant that serv...name[The Punter],eatType[restaurant],food[Indi...[0.99541605, 0.9715836, 0.87202764, 0.99880993...[-0.13057495653629303, 0.21937601268291473, 0....
2999The Punter is a restaurant providing Indian fo...[near[Express by Holiday Inn], food[Indian], c...2999The Punter is a restaurant providing Indian fo...name[The Punter],eatType[restaurant],food[Indi...[0.98941034, 0.99086845, 0.82358456, 0.985973,...[-0.10767646133899689, 0.2529870569705963, 0.2...
\n","

3000 rows × 7 columns

\n","
"],"text/plain":[" text ... en_embed_sentence_small_bert_L12_768_embeddings\n","origin_index ... \n","0 A coffee shop in the city centre area called B... ... [-0.1427491158246994, 0.5036071538925171, 0.07...\n","1 Blue Spice is a coffee shop in city centre. ... [-0.20697341859340668, 0.5286431312561035, 0.2...\n","2 There is a coffee shop Blue Spice in the river... ... [0.005826675333082676, 0.49930453300476074, -0...\n","3 At the riverside, there is a coffee shop calle... ... [0.12191159278154373, 0.37966835498809814, 0.0...\n","4 The coffee shop Blue Spice is based near Crown... ... [-0.37350592017173767, 0.1885937601327896, 0.1...\n","... ... ... ...\n","2995 Near Express by Holiday Inn, in the riverside ... ... [0.0485222227871418, 0.2381688505411148, 0.227...\n","2996 In the riverside area, near Express by Holiday... ... [0.06879807263612747, 0.23580998182296753, 0.1...\n","2997 The Punter is a restaurant with Indian food in... ... [-0.12667560577392578, 0.22056235373020172, 0....\n","2998 The Punter is a low rated restaurant that serv... ... [-0.13057495653629303, 0.21937601268291473, 0....\n","2999 The Punter is a restaurant providing Indian fo... ... [-0.10767646133899689, 0.2529870569705963, 0.2...\n","\n","[3000 rows x 7 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"code","metadata":{"id":"E7ah2LM6tIhG","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609531978935,"user_tz":-60,"elapsed":2137934,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"2636e995-5ef1-4457-895e-adcdf34f40c1"},"source":["from sklearn.preprocessing import MultiLabelBinarizer\n","from sklearn.metrics import classification_report\n","from sklearn.metrics import f1_score\n","from sklearn.metrics import roc_auc_score\n","mlb = MultiLabelBinarizer()\n","mlb = mlb.fit(preds.y.str.split(','))\n","y_true = mlb.transform(preds['y'].str.split(','))\n","y_pred = mlb.transform(preds.multi_classifier_classes.str.join(',').str.split(','))\n","print(\"Classification report: \\n\", (classification_report(y_true, y_pred)))\n","print(\"F1 micro averaging:\",(f1_score(y_true, y_pred, average='micro')))\n","print(\"ROC: \",(roc_auc_score(y_true, y_pred, average=\"micro\")))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Classification report: \n"," precision recall f1-score support\n","\n"," 0 0.97 0.98 0.97 846\n"," 1 0.99 0.98 0.98 1642\n"," 2 0.93 0.70 0.80 300\n"," 3 0.90 0.56 0.69 209\n"," 4 0.91 0.72 0.81 246\n"," 5 0.91 0.79 0.85 333\n"," 6 0.95 0.84 0.90 288\n"," 7 0.91 0.82 0.86 260\n"," 8 0.99 0.99 0.99 267\n"," 9 1.00 0.99 0.99 1275\n"," 10 0.99 0.99 0.99 1458\n"," 11 0.96 0.90 0.93 976\n"," 12 0.95 0.97 0.96 1844\n"," 13 1.00 0.99 0.99 492\n"," 14 0.99 0.98 0.99 613\n"," 15 0.97 0.98 0.98 632\n"," 16 0.99 0.97 0.98 365\n"," 17 1.00 0.97 0.99 145\n"," 18 1.00 0.93 0.96 83\n"," 19 1.00 0.98 0.99 136\n"," 20 1.00 0.99 0.99 228\n"," 21 1.00 0.69 0.82 36\n"," 22 1.00 0.95 0.97 38\n"," 23 1.00 0.50 0.67 4\n"," 24 1.00 1.00 1.00 222\n"," 25 0.99 1.00 0.99 240\n"," 26 1.00 0.67 0.80 6\n"," 27 1.00 0.94 0.97 32\n"," 28 0.99 1.00 0.99 703\n"," 29 1.00 1.00 1.00 524\n"," 30 1.00 1.00 1.00 612\n"," 31 1.00 0.94 0.97 88\n"," 32 1.00 0.97 0.98 267\n"," 33 1.00 1.00 1.00 297\n"," 34 1.00 0.98 0.99 82\n"," 35 1.00 0.89 0.94 18\n"," 36 1.00 0.97 0.98 251\n"," 37 1.00 1.00 1.00 348\n"," 38 1.00 1.00 1.00 393\n"," 39 1.00 0.99 1.00 390\n"," 40 1.00 0.98 0.99 333\n"," 41 1.00 1.00 1.00 794\n"," 42 1.00 0.98 0.99 52\n"," 43 1.00 0.50 0.67 8\n"," 44 1.00 0.88 0.93 8\n"," 45 0.00 0.00 0.00 4\n"," 46 0.90 0.78 0.83 303\n"," 47 0.89 0.70 0.78 425\n"," 48 0.89 0.78 0.83 349\n"," 49 0.93 0.80 0.86 373\n"," 50 0.82 0.42 0.56 170\n"," 51 0.95 0.67 0.79 220\n","\n"," micro avg 0.98 0.94 0.95 20228\n"," macro avg 0.96 0.86 0.90 20228\n","weighted avg 0.97 0.94 0.95 20228\n"," samples avg 0.98 0.94 0.96 20228\n","\n","F1 micro averaging: 0.9549113112810033\n","ROC: 0.9659676982287029\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609535641300,"user_tz":-60,"elapsed":243837,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"458863e7-50f4-4cfe-dfdd-1b3edde4e8d8"},"source":["stored_model_path = './models/multi_classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/multi_classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":103},"executionInfo":{"status":"ok","timestamp":1609535674624,"user_tz":-60,"elapsed":274401,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"589912b1-32b5-4333-fe84-46cf40658451"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Tesla plans to invest 10M into the ML sector')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
multi_classifier_classesdocumentmulti_classifier_confidencesen_embed_sentence_small_bert_L12_768_embeddings
origin_index
0[customer rating[high], customer rating[low], ...Tesla plans to invest 10M into the ML sector[0.9597453, 0.6497742, 0.986845, 0.5315694, 0....[0.15737222135066986, 0.2598555386066437, 0.85...
\n","
"],"text/plain":[" multi_classifier_classes ... en_embed_sentence_small_bert_L12_768_embeddings\n","origin_index ... \n","0 [customer rating[high], customer rating[low], ... ... [0.15737222135066986, 0.2598555386066437, 0.85...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609535674627,"user_tz":-60,"elapsed":273679,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"926c0a81-339a-49b8-e9ea-7f3ce049ca01"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['multi_classifier'] has settable params:\n","pipe['multi_classifier'].setThreshold(0.5) | Info: The minimum threshold for each label to be accepted. Default is 0.5 | Currently set to : 0.5\n","pipe['multi_classifier'].setClasses(['name[Clowns]', 'name[Cotto]', 'near[Burger King]', 'near[Crowne Plaza Hotel]', 'customer rating[high]', 'near[Avalon]', 'near[The Bakers]', 'near[Ranch]', 'eatType[restaurant]', 'near[All Bar One]', 'customer rating[low]', 'near[Café Sicilia]', 'food[Indian]', 'eatType[pub]', 'name[Green Man]', 'name[Strada]', 'eatType[coffee shop]', 'name[Loch Fyne]', 'customer rating[5 out of 5]', 'near[Express by Holiday Inn]', 'food[French]', 'name[The Mill]', 'food[Japanese]', 'name[The Plough]', 'name[Cocum]', 'name[The Phoenix]', 'priceRange[cheap]', 'near[Rainbow Vegetarian Café]', 'near[The Rice Boat]', 'customer rating[3 out of 5]', 'customer rating[1 out of 5]', 'name[The Cricketers]', 'area[riverside]', 'name[Blue Spice]', 'priceRange[£20-25]', 'priceRange[less than £20]', 'priceRange[moderate]', 'priceRange[high]', 'name[Giraffe]', 'customer rating[average]', 'food[Fast food]', 'near[Café Rouge]', 'area[city centre]', 'familyFriendly[no]', 'food[Chinese]', 'food[Italian]', 'near[Raja Indian Cuisine]', 'priceRange[more than £30]', 'name[The Punter]', 'food[English]', 'near[The Sorrento]', 'familyFriendly[yes]']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['name[Clowns]', 'name[Cotto]', 'near[Burger King]', 'near[Crowne Plaza Hotel]', 'customer rating[high]', 'near[Avalon]', 'near[The Bakers]', 'near[Ranch]', 'eatType[restaurant]', 'near[All Bar One]', 'customer rating[low]', 'near[Café Sicilia]', 'food[Indian]', 'eatType[pub]', 'name[Green Man]', 'name[Strada]', 'eatType[coffee shop]', 'name[Loch Fyne]', 'customer rating[5 out of 5]', 'near[Express by Holiday Inn]', 'food[French]', 'name[The Mill]', 'food[Japanese]', 'name[The Plough]', 'name[Cocum]', 'name[The Phoenix]', 'priceRange[cheap]', 'near[Rainbow Vegetarian Café]', 'near[The Rice Boat]', 'customer rating[3 out of 5]', 'customer rating[1 out of 5]', 'name[The Cricketers]', 'area[riverside]', 'name[Blue Spice]', 'priceRange[£20-25]', 'priceRange[less than £20]', 'priceRange[moderate]', 'priceRange[high]', 'name[Giraffe]', 'customer rating[average]', 'food[Fast food]', 'near[Café Rouge]', 'area[city centre]', 'familyFriendly[no]', 'food[Chinese]', 'food[Italian]', 'near[Raja Indian Cuisine]', 'priceRange[more than £30]', 'name[The Punter]', 'food[English]', 'near[The Sorrento]', 'familyFriendly[yes]']\n","pipe['multi_classifier'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"M1LjAwJVJxun"},"source":[" "],"execution_count":null,"outputs":[]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_traing_multi_label_classifier_E2e.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_label_text_classification/NLU_traing_multi_label_classifier_E2e.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier for multi label prediction\n","MultiClassifierDL is a Multi-label Text Classification. MultiClassifierDL uses a Bidirectional GRU with Convolution model that we have built inside TensorFlow and supports up to 100 classes. The input to MultiClassifierDL is Sentence Embeddings such as state-of-the-art UniversalSentenceEncoder, BertSentenceEmbeddings, or SentenceEmbeddings\n","\n","\n","\n","### Multi ClassifierDL (Multi-class Text Classification with multiple classes per sentence)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#multiclassifierdl-multi-label-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download E2E Challenge multi token label classification dataset\n","\n","http://www.macs.hw.ac.uk/InteractionLab/E2E/"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":586},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1609529840956,"user_tz":-60,"elapsed":160088,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"39519c61-f3a4-4369-f72a-1f0590d9bb2e"},"source":["import pandas as pd\n","!wget http://ckl-it.de/wp-content/uploads/2020/12/e2e.csv\n","test_path = '/content/e2e.csv'\n","train_df = pd.read_csv(test_path)\n","train_df = train_df.iloc[:3000]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-01 19:37:17-- http://ckl-it.de/wp-content/uploads/2020/12/e2e.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 1322591 (1.3M) [text/csv]\n","Saving to: ‘e2e.csv’\n","\n","e2e.csv 100%[===================>] 1.26M 715KB/s in 1.8s \n","\n","2021-01-01 19:37:20 (715 KB/s) - ‘e2e.csv’ saved [1322591/1322591]\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Unnamed: 0ytextorigin_index
00name[Blue Spice],eatType[coffee shop],area[cit...A coffee shop in the city centre area called B...0
11name[Blue Spice],eatType[coffee shop],area[cit...Blue Spice is a coffee shop in city centre.1
22name[Blue Spice],eatType[coffee shop],area[riv...There is a coffee shop Blue Spice in the river...2
33name[Blue Spice],eatType[coffee shop],area[riv...At the riverside, there is a coffee shop calle...3
44name[Blue Spice],eatType[coffee shop],customer...The coffee shop Blue Spice is based near Crown...4
...............
29952995name[The Punter],eatType[restaurant],food[Indi...Near Express by Holiday Inn, in the riverside ...2995
29962996name[The Punter],eatType[restaurant],food[Indi...In the riverside area, near Express by Holiday...2996
29972997name[The Punter],eatType[restaurant],food[Indi...The Punter is a restaurant with Indian food in...2997
29982998name[The Punter],eatType[restaurant],food[Indi...The Punter is a low rated restaurant that serv...2998
29992999name[The Punter],eatType[restaurant],food[Indi...The Punter is a restaurant providing Indian fo...2999
\n","

3000 rows × 4 columns

\n","
"],"text/plain":[" Unnamed: 0 ... origin_index\n","0 0 ... 0\n","1 1 ... 1\n","2 2 ... 2\n","3 3 ... 3\n","4 4 ... 4\n","... ... ... ...\n","2995 2995 ... 2995\n","2996 2996 ... 2996\n","2997 2997 ... 2997\n","2998 2998 ... 2998\n","2999 2999 ... 2999\n","\n","[3000 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.multi_classifier')\n","\n","By default, the Universal Sentence Encoder Embeddings (USE) are beeing downloaded to provide embeddings for the classifier. You can use any of the 50+ other sentence Emeddings in NLU tough!\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":471},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609522208492,"user_tz":-60,"elapsed":410284,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"bda58bd4-d56e-471c-deea-37fe6e06af5e"},"source":["import nlu\n","# load a trainable pipeline by specifying the train prefix \n","unfitted_pipe = nlu.load('train.multi_classifier')\n","#configure epochs\n","unfitted_pipe['multi_classifier'].setMaxEpochs(25)\n","# fit it on a datset with label='y' and text columns. Labels seperated by ','\n","fitted_pipe = unfitted_pipe.fit(train_df[['y','text']], label_seperator=',')\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df[['y','text']])\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
multi_classifier_classesmulti_classifier_confidencesdefault_name_embeddingsysentencetext
origin_index
0[near[Café Rouge], name[Blue Spice], near[Rain...[0.8555223, 0.99276984, 0.87128675, 0.9852337,...[0.026563657447695732, -0.058662936091423035, ...name[Blue Spice],eatType[coffee shop],area[cit...A coffee shop in the city centre area called B...A coffee shop in the city centre area called B...
1[near[Café Rouge], name[Blue Spice], near[Rain...[0.8142674, 0.99920505, 0.93413615, 0.98056525...[0.040952689945697784, -0.04276810586452484, -...name[Blue Spice],eatType[coffee shop],area[cit...Blue Spice is a coffee shop in city centre.Blue Spice is a coffee shop in city centre.
2[name[Blue Spice], near[Rainbow Vegetarian Caf...[0.9966337, 0.9044244, 0.904881, 0.56231284, 0...[0.03141527622938156, -0.05154882371425629, 0....name[Blue Spice],eatType[coffee shop],area[riv...There is a coffee shop Blue Spice in the river...There is a coffee shop Blue Spice in the river...
3[near[Café Rouge], name[Blue Spice], near[Rain...[0.5227911, 0.99917483, 0.9394022, 0.8839797, ...[0.03584946319460869, -0.036898739635944366, -...name[Blue Spice],eatType[coffee shop],area[riv...At the riverside, there is a coffee shop calle...At the riverside, there is a coffee shop calle...
4[near[Café Rouge], name[Blue Spice], near[Crow...[0.5985904, 0.7892299, 0.8222753, 0.9378743, 0...[0.0405426099896431, -0.0243277158588171, 0.00...name[Blue Spice],eatType[coffee shop],customer...The coffee shop Blue Spice is based near Crown...The coffee shop Blue Spice is based near Crown...
.....................
2998[near[Express by Holiday Inn], priceRange[high...[0.9999982, 0.8146039, 0.99978125, 0.8511795, ...[0.05956212058663368, 0.019028551876544952, -0...name[The Punter],eatType[restaurant],food[Indi...The Punter has a price range of less than £20,...The Punter is a low rated restaurant that serv...
2999[near[Express by Holiday Inn], food[Indian], c...[0.99992794, 0.99981034, 0.5099642, 0.9994041,...[0.04296032711863518, -0.0015949805965647101, ...name[The Punter],eatType[restaurant],food[Indi...The Punter is a restaurant providing Indian fo...The Punter is a restaurant providing Indian fo...
2999[near[Express by Holiday Inn], food[Indian], c...[0.99992794, 0.99981034, 0.5099642, 0.9994041,...[0.023289771750569344, 0.056861914694309235, -...name[The Punter],eatType[restaurant],food[Indi...It is located in the riverside.The Punter is a restaurant providing Indian fo...
2999[near[Express by Holiday Inn], food[Indian], c...[0.99992794, 0.99981034, 0.5099642, 0.9994041,...[0.033101629465818405, 0.06402800232172012, 0....name[The Punter],eatType[restaurant],food[Indi...It is near Express by Holiday Inn.The Punter is a restaurant providing Indian fo...
2999[near[Express by Holiday Inn], food[Indian], c...[0.99992794, 0.99981034, 0.5099642, 0.9994041,...[0.01677701249718666, 0.04876527190208435, -0....name[The Punter],eatType[restaurant],food[Indi...Its customer rating is low.The Punter is a restaurant providing Indian fo...
\n","

5266 rows × 6 columns

\n","
"],"text/plain":[" multi_classifier_classes ... text\n","origin_index ... \n","0 [near[Café Rouge], name[Blue Spice], near[Rain... ... A coffee shop in the city centre area called B...\n","1 [near[Café Rouge], name[Blue Spice], near[Rain... ... Blue Spice is a coffee shop in city centre.\n","2 [name[Blue Spice], near[Rainbow Vegetarian Caf... ... There is a coffee shop Blue Spice in the river...\n","3 [near[Café Rouge], name[Blue Spice], near[Rain... ... At the riverside, there is a coffee shop calle...\n","4 [near[Café Rouge], name[Blue Spice], near[Crow... ... The coffee shop Blue Spice is based near Crown...\n","... ... ... ...\n","2998 [near[Express by Holiday Inn], priceRange[high... ... The Punter is a low rated restaurant that serv...\n","2999 [near[Express by Holiday Inn], food[Indian], c... ... The Punter is a restaurant providing Indian fo...\n","2999 [near[Express by Holiday Inn], food[Indian], c... ... The Punter is a restaurant providing Indian fo...\n","2999 [near[Express by Holiday Inn], food[Indian], c... ... The Punter is a restaurant providing Indian fo...\n","2999 [near[Express by Holiday Inn], food[Indian], c... ... The Punter is a restaurant providing Indian fo...\n","\n","[5266 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"DL_5aY9b3jSd"},"source":["# 4. Evaluate the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0YDA2KunCeqQ","executionInfo":{"status":"ok","timestamp":1609522209572,"user_tz":-60,"elapsed":411343,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"37539c88-d18c-425d-a28d-4127dc9bbb99"},"source":["from sklearn.preprocessing import MultiLabelBinarizer\n","from sklearn.metrics import classification_report\n","from sklearn.metrics import f1_score\n","from sklearn.metrics import roc_auc_score\n","mlb = MultiLabelBinarizer()\n","mlb = mlb.fit(preds.y.str.split(','))\n","y_true = mlb.transform(preds['y'].str.split(','))\n","y_pred = mlb.transform(preds.multi_classifier_classes.str.join(',').str.split(','))\n","print(\"Classification report: \\n\", (classification_report(y_true, y_pred)))\n","print(\"F1 micro averaging:\",(f1_score(y_true, y_pred, average='micro')))\n","print(\"ROC: \",(roc_auc_score(y_true, y_pred, average=\"micro\")))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Classification report: \n"," precision recall f1-score support\n","\n"," 0 0.78 0.97 0.86 1700\n"," 1 0.95 0.83 0.89 2914\n"," 2 0.56 0.64 0.60 576\n"," 3 0.33 0.28 0.30 367\n"," 4 0.38 0.55 0.45 455\n"," 5 0.30 0.76 0.42 599\n"," 6 0.37 0.77 0.50 550\n"," 7 0.69 0.44 0.54 457\n"," 8 0.99 0.72 0.84 337\n"," 9 0.91 0.98 0.95 2211\n"," 10 0.89 0.99 0.94 2718\n"," 11 0.53 0.89 0.67 1914\n"," 12 0.88 0.79 0.84 3154\n"," 13 0.79 0.98 0.87 1087\n"," 14 0.69 0.97 0.81 1118\n"," 15 0.98 0.64 0.78 1077\n"," 16 0.82 0.96 0.88 671\n"," 17 0.71 1.00 0.83 323\n"," 18 0.57 0.65 0.61 130\n"," 19 0.96 0.80 0.87 186\n"," 20 0.77 0.99 0.87 366\n"," 21 0.57 0.20 0.30 40\n"," 22 0.36 0.10 0.15 42\n"," 23 0.00 0.00 0.00 4\n"," 24 0.97 0.97 0.97 322\n"," 25 0.99 0.83 0.91 338\n"," 26 0.00 0.00 0.00 6\n"," 27 0.00 0.00 0.00 34\n"," 28 0.94 0.99 0.96 1273\n"," 29 0.96 1.00 0.98 987\n"," 30 0.90 0.99 0.95 1140\n"," 31 0.74 0.85 0.79 186\n"," 32 0.45 0.98 0.62 528\n"," 33 0.91 0.97 0.93 662\n"," 34 0.90 0.60 0.72 116\n"," 35 0.67 0.09 0.16 22\n"," 36 0.58 0.98 0.73 484\n"," 37 0.88 0.77 0.82 601\n"," 38 0.94 0.97 0.96 711\n"," 39 0.99 0.96 0.97 620\n"," 40 0.96 0.99 0.98 526\n"," 41 0.98 1.00 0.99 1410\n"," 42 1.00 0.28 0.43 72\n"," 43 0.00 0.00 0.00 8\n"," 44 0.00 0.00 0.00 8\n"," 45 0.00 0.00 0.00 4\n"," 46 0.35 0.42 0.38 595\n"," 47 0.34 0.66 0.45 849\n"," 48 0.57 0.44 0.50 627\n"," 49 0.69 0.53 0.60 767\n"," 50 0.31 0.32 0.32 347\n"," 51 0.25 0.53 0.34 453\n","\n"," micro avg 0.73 0.84 0.78 36692\n"," macro avg 0.64 0.65 0.62 36692\n","weighted avg 0.78 0.84 0.80 36692\n"," samples avg 0.76 0.84 0.79 36692\n","\n","F1 micro averaging: 0.7831856729396004\n","ROC: 0.8980818453315285\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mhFKVN93o1ZO"},"source":["# 5. Lets try different Sentence Emebddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CzJd8omao0gt","executionInfo":{"status":"ok","timestamp":1609522209573,"user_tz":-60,"elapsed":411328,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"ce35ce12-fbc8-4e0f-c9a1-6feaf68da7b0"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0ofYHpu7sloS","executionInfo":{"status":"ok","timestamp":1609529895586,"user_tz":-60,"elapsed":54621,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"44154b28-c1db-4f58-bab1-7ac185fa40b8"},"source":["# You might need to restart your notebook to clear RAM, or you might run out of Memory when fitting\n","import nlu\n","pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.multi_classifier')\n","pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['en_embed_sentence_small_bert_L12_768'] has settable params:\n","pipe['en_embed_sentence_small_bert_L12_768'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['en_embed_sentence_small_bert_L12_768'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['en_embed_sentence_small_bert_L12_768'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['en_embed_sentence_small_bert_L12_768'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['en_embed_sentence_small_bert_L12_768'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['en_embed_sentence_small_bert_L12_768'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['multi_classifier'] has settable params:\n","pipe['multi_classifier'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['multi_classifier'].setLr(0.001) | Info: Learning Rate | Currently set to : 0.001\n","pipe['multi_classifier'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['multi_classifier'].setValidationSplit(0.0) | Info: Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off. | Currently set to : 0.0\n","pipe['multi_classifier'].setThreshold(0.5) | Info: The minimum threshold for each label to be accepted. Default is 0.5 | Currently set to : 0.5\n","pipe['multi_classifier'].setRandomSeed(44) | Info: Random seed | Currently set to : 44\n","pipe['multi_classifier'].setShufflePerEpoch(False) | Info: whether to shuffle the training data on each Epoch | Currently set to : False\n","pipe['multi_classifier'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"ABHLgirmG1n9","colab":{"base_uri":"https://localhost:8080/","height":417},"executionInfo":{"status":"ok","timestamp":1609531977887,"user_tz":-60,"elapsed":2136903,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"d312277d-3826-46e2-c67e-4a10a7116c4f"},"source":["\n","# Load pipe with bert embeds and configure hyper parameters\n","# using large embeddings can take a few hours..\n","pipe['multi_classifier'].setMaxEpochs(100) \n","pipe['multi_classifier'].setLr(0.0005) \n","fitted_pipe = pipe.fit(train_df[['y','text']],label_seperator=',')\n","preds = fitted_pipe.predict(train_df)\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textmulti_classifier_classesUnnamed: 0documentymulti_classifier_confidencesen_embed_sentence_small_bert_L12_768_embeddings
origin_index
0A coffee shop in the city centre area called B...[name[Blue Spice], eatType[coffee shop], area[...0A coffee shop in the city centre area called B...name[Blue Spice],eatType[coffee shop],area[cit...[0.9740321, 0.99538183, 0.92562413][-0.1427491158246994, 0.5036071538925171, 0.07...
1Blue Spice is a coffee shop in city centre.[name[Blue Spice], eatType[coffee shop], area[...1Blue Spice is a coffee shop in city centre.name[Blue Spice],eatType[coffee shop],area[cit...[0.9950888, 0.9989519, 0.8684354][-0.20697341859340668, 0.5286431312561035, 0.2...
2There is a coffee shop Blue Spice in the river...[name[Blue Spice], eatType[coffee shop], area[...2There is a coffee shop Blue Spice in the river...name[Blue Spice],eatType[coffee shop],area[riv...[0.95310336, 0.9655487, 0.9785502][0.005826675333082676, 0.49930453300476074, -0...
3At the riverside, there is a coffee shop calle...[name[Blue Spice], eatType[coffee shop], area[...3At the riverside, there is a coffee shop calle...name[Blue Spice],eatType[coffee shop],area[riv...[0.8858954, 0.931189, 0.9990605][0.12191159278154373, 0.37966835498809814, 0.0...
4The coffee shop Blue Spice is based near Crown...[near[Crowne Plaza Hotel], customer rating[5 o...4The coffee shop Blue Spice is based near Crown...name[Blue Spice],eatType[coffee shop],customer...[0.99912286, 0.7930833, 0.9730882][-0.37350592017173767, 0.1885937601327896, 0.1...
........................
2995Near Express by Holiday Inn, in the riverside ...[near[Express by Holiday Inn], customer rating...2995Near Express by Holiday Inn, in the riverside ...name[The Punter],eatType[restaurant],food[Indi...[0.9476669, 0.9914391, 0.8395983, 0.98047745, ...[0.0485222227871418, 0.2381688505411148, 0.227...
2996In the riverside area, near Express by Holiday...[near[Express by Holiday Inn], food[Indian], c...2996In the riverside area, near Express by Holiday...name[The Punter],eatType[restaurant],food[Indi...[0.94435394, 0.6119035, 0.7891044, 0.9885667, ...[0.06879807263612747, 0.23580998182296753, 0.1...
2997The Punter is a restaurant with Indian food in...[near[Express by Holiday Inn], food[Indian], c...2997The Punter is a restaurant with Indian food in...name[The Punter],eatType[restaurant],food[Indi...[0.99509084, 0.9424925, 0.7625178, 0.9907007, ...[-0.12667560577392578, 0.22056235373020172, 0....
2998The Punter is a low rated restaurant that serv...[near[Express by Holiday Inn], food[Indian], c...2998The Punter is a low rated restaurant that serv...name[The Punter],eatType[restaurant],food[Indi...[0.99541605, 0.9715836, 0.87202764, 0.99880993...[-0.13057495653629303, 0.21937601268291473, 0....
2999The Punter is a restaurant providing Indian fo...[near[Express by Holiday Inn], food[Indian], c...2999The Punter is a restaurant providing Indian fo...name[The Punter],eatType[restaurant],food[Indi...[0.98941034, 0.99086845, 0.82358456, 0.985973,...[-0.10767646133899689, 0.2529870569705963, 0.2...
\n","

3000 rows × 7 columns

\n","
"],"text/plain":[" text ... en_embed_sentence_small_bert_L12_768_embeddings\n","origin_index ... \n","0 A coffee shop in the city centre area called B... ... [-0.1427491158246994, 0.5036071538925171, 0.07...\n","1 Blue Spice is a coffee shop in city centre. ... [-0.20697341859340668, 0.5286431312561035, 0.2...\n","2 There is a coffee shop Blue Spice in the river... ... [0.005826675333082676, 0.49930453300476074, -0...\n","3 At the riverside, there is a coffee shop calle... ... [0.12191159278154373, 0.37966835498809814, 0.0...\n","4 The coffee shop Blue Spice is based near Crown... ... [-0.37350592017173767, 0.1885937601327896, 0.1...\n","... ... ... ...\n","2995 Near Express by Holiday Inn, in the riverside ... ... [0.0485222227871418, 0.2381688505411148, 0.227...\n","2996 In the riverside area, near Express by Holiday... ... [0.06879807263612747, 0.23580998182296753, 0.1...\n","2997 The Punter is a restaurant with Indian food in... ... [-0.12667560577392578, 0.22056235373020172, 0....\n","2998 The Punter is a low rated restaurant that serv... ... [-0.13057495653629303, 0.21937601268291473, 0....\n","2999 The Punter is a restaurant providing Indian fo... ... [-0.10767646133899689, 0.2529870569705963, 0.2...\n","\n","[3000 rows x 7 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"code","metadata":{"id":"E7ah2LM6tIhG","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609531978935,"user_tz":-60,"elapsed":2137934,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"2636e995-5ef1-4457-895e-adcdf34f40c1"},"source":["from sklearn.preprocessing import MultiLabelBinarizer\n","from sklearn.metrics import classification_report\n","from sklearn.metrics import f1_score\n","from sklearn.metrics import roc_auc_score\n","mlb = MultiLabelBinarizer()\n","mlb = mlb.fit(preds.y.str.split(','))\n","y_true = mlb.transform(preds['y'].str.split(','))\n","y_pred = mlb.transform(preds.multi_classifier_classes.str.join(',').str.split(','))\n","print(\"Classification report: \\n\", (classification_report(y_true, y_pred)))\n","print(\"F1 micro averaging:\",(f1_score(y_true, y_pred, average='micro')))\n","print(\"ROC: \",(roc_auc_score(y_true, y_pred, average=\"micro\")))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Classification report: \n"," precision recall f1-score support\n","\n"," 0 0.97 0.98 0.97 846\n"," 1 0.99 0.98 0.98 1642\n"," 2 0.93 0.70 0.80 300\n"," 3 0.90 0.56 0.69 209\n"," 4 0.91 0.72 0.81 246\n"," 5 0.91 0.79 0.85 333\n"," 6 0.95 0.84 0.90 288\n"," 7 0.91 0.82 0.86 260\n"," 8 0.99 0.99 0.99 267\n"," 9 1.00 0.99 0.99 1275\n"," 10 0.99 0.99 0.99 1458\n"," 11 0.96 0.90 0.93 976\n"," 12 0.95 0.97 0.96 1844\n"," 13 1.00 0.99 0.99 492\n"," 14 0.99 0.98 0.99 613\n"," 15 0.97 0.98 0.98 632\n"," 16 0.99 0.97 0.98 365\n"," 17 1.00 0.97 0.99 145\n"," 18 1.00 0.93 0.96 83\n"," 19 1.00 0.98 0.99 136\n"," 20 1.00 0.99 0.99 228\n"," 21 1.00 0.69 0.82 36\n"," 22 1.00 0.95 0.97 38\n"," 23 1.00 0.50 0.67 4\n"," 24 1.00 1.00 1.00 222\n"," 25 0.99 1.00 0.99 240\n"," 26 1.00 0.67 0.80 6\n"," 27 1.00 0.94 0.97 32\n"," 28 0.99 1.00 0.99 703\n"," 29 1.00 1.00 1.00 524\n"," 30 1.00 1.00 1.00 612\n"," 31 1.00 0.94 0.97 88\n"," 32 1.00 0.97 0.98 267\n"," 33 1.00 1.00 1.00 297\n"," 34 1.00 0.98 0.99 82\n"," 35 1.00 0.89 0.94 18\n"," 36 1.00 0.97 0.98 251\n"," 37 1.00 1.00 1.00 348\n"," 38 1.00 1.00 1.00 393\n"," 39 1.00 0.99 1.00 390\n"," 40 1.00 0.98 0.99 333\n"," 41 1.00 1.00 1.00 794\n"," 42 1.00 0.98 0.99 52\n"," 43 1.00 0.50 0.67 8\n"," 44 1.00 0.88 0.93 8\n"," 45 0.00 0.00 0.00 4\n"," 46 0.90 0.78 0.83 303\n"," 47 0.89 0.70 0.78 425\n"," 48 0.89 0.78 0.83 349\n"," 49 0.93 0.80 0.86 373\n"," 50 0.82 0.42 0.56 170\n"," 51 0.95 0.67 0.79 220\n","\n"," micro avg 0.98 0.94 0.95 20228\n"," macro avg 0.96 0.86 0.90 20228\n","weighted avg 0.97 0.94 0.95 20228\n"," samples avg 0.98 0.94 0.96 20228\n","\n","F1 micro averaging: 0.9549113112810033\n","ROC: 0.9659676982287029\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609535641300,"user_tz":-60,"elapsed":243837,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"458863e7-50f4-4cfe-dfdd-1b3edde4e8d8"},"source":["stored_model_path = './models/multi_classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/multi_classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":103},"executionInfo":{"status":"ok","timestamp":1609535674624,"user_tz":-60,"elapsed":274401,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"589912b1-32b5-4333-fe84-46cf40658451"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Tesla plans to invest 10M into the ML sector')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
multi_classifier_classesdocumentmulti_classifier_confidencesen_embed_sentence_small_bert_L12_768_embeddings
origin_index
0[customer rating[high], customer rating[low], ...Tesla plans to invest 10M into the ML sector[0.9597453, 0.6497742, 0.986845, 0.5315694, 0....[0.15737222135066986, 0.2598555386066437, 0.85...
\n","
"],"text/plain":[" multi_classifier_classes ... en_embed_sentence_small_bert_L12_768_embeddings\n","origin_index ... \n","0 [customer rating[high], customer rating[low], ... ... [0.15737222135066986, 0.2598555386066437, 0.85...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609535674627,"user_tz":-60,"elapsed":273679,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"926c0a81-339a-49b8-e9ea-7f3ce049ca01"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['multi_classifier'] has settable params:\n","pipe['multi_classifier'].setThreshold(0.5) | Info: The minimum threshold for each label to be accepted. Default is 0.5 | Currently set to : 0.5\n","pipe['multi_classifier'].setClasses(['name[Clowns]', 'name[Cotto]', 'near[Burger King]', 'near[Crowne Plaza Hotel]', 'customer rating[high]', 'near[Avalon]', 'near[The Bakers]', 'near[Ranch]', 'eatType[restaurant]', 'near[All Bar One]', 'customer rating[low]', 'near[Café Sicilia]', 'food[Indian]', 'eatType[pub]', 'name[Green Man]', 'name[Strada]', 'eatType[coffee shop]', 'name[Loch Fyne]', 'customer rating[5 out of 5]', 'near[Express by Holiday Inn]', 'food[French]', 'name[The Mill]', 'food[Japanese]', 'name[The Plough]', 'name[Cocum]', 'name[The Phoenix]', 'priceRange[cheap]', 'near[Rainbow Vegetarian Café]', 'near[The Rice Boat]', 'customer rating[3 out of 5]', 'customer rating[1 out of 5]', 'name[The Cricketers]', 'area[riverside]', 'name[Blue Spice]', 'priceRange[£20-25]', 'priceRange[less than £20]', 'priceRange[moderate]', 'priceRange[high]', 'name[Giraffe]', 'customer rating[average]', 'food[Fast food]', 'near[Café Rouge]', 'area[city centre]', 'familyFriendly[no]', 'food[Chinese]', 'food[Italian]', 'near[Raja Indian Cuisine]', 'priceRange[more than £30]', 'name[The Punter]', 'food[English]', 'near[The Sorrento]', 'familyFriendly[yes]']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['name[Clowns]', 'name[Cotto]', 'near[Burger King]', 'near[Crowne Plaza Hotel]', 'customer rating[high]', 'near[Avalon]', 'near[The Bakers]', 'near[Ranch]', 'eatType[restaurant]', 'near[All Bar One]', 'customer rating[low]', 'near[Café Sicilia]', 'food[Indian]', 'eatType[pub]', 'name[Green Man]', 'name[Strada]', 'eatType[coffee shop]', 'name[Loch Fyne]', 'customer rating[5 out of 5]', 'near[Express by Holiday Inn]', 'food[French]', 'name[The Mill]', 'food[Japanese]', 'name[The Plough]', 'name[Cocum]', 'name[The Phoenix]', 'priceRange[cheap]', 'near[Rainbow Vegetarian Café]', 'near[The Rice Boat]', 'customer rating[3 out of 5]', 'customer rating[1 out of 5]', 'name[The Cricketers]', 'area[riverside]', 'name[Blue Spice]', 'priceRange[£20-25]', 'priceRange[less than £20]', 'priceRange[moderate]', 'priceRange[high]', 'name[Giraffe]', 'customer rating[average]', 'food[Fast food]', 'near[Café Rouge]', 'area[city centre]', 'familyFriendly[no]', 'food[Chinese]', 'food[Italian]', 'near[Raja Indian Cuisine]', 'priceRange[more than £30]', 'name[The Punter]', 'food[English]', 'near[The Sorrento]', 'familyFriendly[yes]']\n","pipe['multi_classifier'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"M1LjAwJVJxun"},"source":[" "],"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/examples/colab/Training/multi_label_text_classification/NLU_training_multi_token_label_text_classifier_stackoverflow_tags.ipynb b/examples/colab/Training/multi_label_text_classification/NLU_training_multi_token_label_text_classifier_stackoverflow_tags.ipynb index cd31b65d..0505a4cf 100644 --- a/examples/colab/Training/multi_label_text_classification/NLU_training_multi_token_label_text_classifier_stackoverflow_tags.ipynb +++ b/examples/colab/Training/multi_label_text_classification/NLU_training_multi_token_label_text_classifier_stackoverflow_tags.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_token_label_text_classifier_stackoverflow_tags.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/collab/Training/NLU_training/multi_label_text_classification/NLU_training_multi_token_label_text_classifier_stackoverflow_tags.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier for sentences with multiple classes at the same time \n","MultiClassifierDL is a Multi-label Text Classification. MultiClassifierDL uses a Bidirectional GRU with Convolution model that we have built inside TensorFlow and supports up to 100 classes. The input to MultiClassifierDL is Sentence Embeddings such as state-of-the-art UniversalSentenceEncoder, BertSentenceEmbeddings, or SentenceEmbeddings\n","\n","\n","\n","### Multi ClassifierDL (Multi-class Text Classification with multiple classes per sentence)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#multiclassifierdl-multi-label-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu > /dev/null pyspark==2.4.7\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2 Download sample dataset 60k Stack Overflow Questions with Quality Rating\n","\n","\n","https://www.kaggle.com/imoore/60k-stack-overflow-questions-with-quality-rate"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"y4xSRWIhwT28","outputId":"f7ac934c-b18f-4ffd-d773-842c81b2a80a"},"source":["import pandas as pd\n","! wget -N https://ckl-it.de/wp-content/uploads/2020/11/60kstackoverflow.csv -P /tmp\n","test_path = '/tmp/60kstackoverflow.csv'\n","train_df = pd.read_csv(test_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-02 11:20:29-- https://ckl-it.de/wp-content/uploads/2020/11/60kstackoverflow.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 50356825 (48M) [text/csv]\n","Saving to: ‘/tmp/60kstackoverflow.csv’\n","\n","60kstackoverflow.cs 100%[===================>] 48.02M 2.57MB/s in 21s \n","\n","2021-01-02 11:20:51 (2.32 MB/s) - ‘/tmp/60kstackoverflow.csv’ saved [50356825/50356825]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"gBxgVIB787wd"},"source":["# Split labels and clean them.\n","import pandas as pd\n","\n","train_df = pd.read_csv(test_path)\n","\n","f = lambda x : x.replace('<','').replace('>','')\n","g = lambda l : list(map(f,l))\n","train_df['y'] = train_df.Tags.str.split('><').map(g).str.join(',')\n","train_df['text'] = train_df['Title']\n","\n"," \n","# train_df = train_df.iloc[:50]"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":430},"id":"OfMCrNk-L_pq","outputId":"6ce7798d-ff2f-4b02-a066-67497ba0bdfa"},"source":["counts = train_df.explode('y').y.value_counts()\n","counts.iloc[0:100].plot.bar(figsize=(40,8), title='Distribution of Label Tags in Dataset')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":4},{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAACOAAAAJhCAYAAADinV3wAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdf9SnZV0n8PcnJiAVQWEiGdCxZPuxdSQbSbfaSsrCsWDPUbPcJKKlHx77YZ6cytR+7C62FavbZkuyhZo/kPJA4boaarW7qQ1qWmk14iAgP0YEFNQS/ewf32vyYXyGeR6uZ3geptfrnO957vu6rvu6P/f3e/8zc97nuqq7AwAAAAAAAAAA3DNfsN4FAAAAAAAAAADAfZkADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAABgzVTVb1fVL6zRXA+tqtur6rBx/taq+qG1mHvM97+q6qy1mm8V9/2VqvpIVd2whnNuraquqk335rUH076/PwAAAMBGJoADAAAArEhV7a6qT1bVx6vq1qr6f1X1I1X1z/+/0N0/0t2/vMK5vu3uxnT3h7r7Ad39mTWo/QVV9Yp95j+9uy+anXuVdTw0yU8n+aru/pJl+r+lqq69N2u6OyMAs/fz2fH77z1/2sG898zvP77Hzy6p9dqquriqHr2KOT7vnTkY7q37AAAAAAeXAA4AAACwGt/V3UcleViS85I8J8mFa32TjbYayxp6aJKbu/um9S5kJUYA5gHd/YAkH8ri99/b9vvrXd8BfHjUfVSSxyR5f5I/r6rT1rcsAAAA4FAkgAMAAACsWnff1t2XJfmeJGdV1VcnSVX9XlX9yjg+rqr+eKyW89Gq+vOq+oKqenkWQZQ/GquT/MySbZDOqaoPJXnzfrZG+rKqekdVfayqLq2qB497fd7KMXtX2amq70zyc0m+Z9zvr0b/P29pNep6blVdXVU3VdXLquro0be3jrOq6kNj+6if3993U1VHj+v3jPmeO+b/tiRvSnLCqOP3VvOdV9X2qnrXePZrquoFywz7war6cFVdX1XPXnLtF1TVjqr6QFXdPFaDefBq7r9PLadW1V+M3/b6qvrNqjp8Sf/jq+rvquq2qvqtqvrTJd/1I8b5beO7fM1+7nGX33/8Xr9cVf93rML0xqo67kC19sK13f28JC9N8sIl93jR+C4/VlVXVtU3jfb9vTNnV9X7xv2vqqofXjLXsu/76Duhqv5gvBMfrKofv7v7AAAAAPc9AjgAAADAPdbd70hybZJvWqb7p0ff5iTHZxE06O7+/tx1NZVfXXLNNyf5yiTfsZ9bPj3JDyZ5SJI7k7x4BTW+Icl/SvKacb9HLjPsB8bnW5N8aZIHJPnNfcZ8Y5IvT3JakudV1Vfu55b/LcnRY55vHjWf3d1/kuT0jJVZuvsHDlT7Pu4Ycx2TZHuSH62qM/cZ861JTk7y+CTPqc9t8/XMJGeOek5IckuS/77K+y/1mSQ/leS4JI/N4jv5sWQRRElySZKfTXJskr9L8m+WXPvLSd6Y5EFJTszi+1qp70tydpIvTnJ4kmff/fDP84dJHlVV9x/nf5nklCQPTvLKJK+tqiPv5p25KckTkzxw1HF+VT1q9C37vo8Qzh8l+askW7L4rn6yqr5jhe8mAAAAcB8ggAMAAADM+nAWAYZ9fTqLoMzDuvvT3f3n3d0HmOsF3X1Hd39yP/0v7+6/7u47kvxCkqdU1WH3vPR/9rQkv9HdV3X37VmER566z+o7v9jdn+zuv8oiTPF5YYlRy1OT/Gx3f7y7dyf59STfP1tgd7+1u9/b3Z/t7vckeVUWgZqlfnF8f+9N8rtJvne0/0iSnx8rwfxjkhckeVLdw62+uvvK7n5bd985nvF/LKnlCUn+prv/sLv3hqRuWHL5p7PYwuyE7v5Ud/+fVdz6d7v778f7cXEW4ZnV+HCSyiLElO5+RXffPJ7j15MckUXIalndfXl3f2CsqvOnWQSJ9obP9ve+PzrJ5u7+pe7+p+6+KsnvZPGeAAAAAIcIARwAAABg1pYkH12m/b8k2ZXkjWO7nh0rmOuaVfRfneQLs1iFZdYJY76lc2/KYiWTvZaGSD6RxSo5+zpu1LTvXFtmC6yqr6+qt4xtjG7LIlSz77Pv+/2cMI4fluR1Y3ukW5O8L4tVbI7PPVBV/2pst3RDVX0si1Vc9tZywtI6Rghl6fZgP5NFCOYdVfU3VfWDq7j1Sn6Du7MlSSe5dTzHs8eWUreN7+Xo3M37VFWnV9XbxhZTt2YRNto7fn/v+8Oy2Hbs1iXf/8/lHn73AAAAwMYkgAMAAADcY1X16CxCDZ+3islYAeanu/tLk3x3kmdV1Wl7u/cz5YFWyDlpyfFDs1h15CNZbM90vyV1HZbFVkArnffDWQQlls59Z5IbD3Ddvj6Sz63wsnSu61Y5z3JemeSyJCd199FJfjuLIMtS+34/Hx7H1yQ5vbuPWfI5srvvaV0vSfL+JCd39wOzCJTsreX6LLaWSpJUVS097+4buvs/dPcJSX44yW9V1SPuYR2r9e+SvLO776iqb8oiDPSUJA/q7mOS3LbkOe7yzlTVEUn+IMmvJTl+jH/93vF3875fk+SD+3z3R3X3E5a7DwAAAHDfJIADAAAArFpVPbCqnpjk1UleMbY82nfME6vqESOAcVsWK658dnTfmORL78Gt/31VfVVV3S/JLyW5pLs/k+TvkxxZVdur6guTPDeL7YT2ujHJ1qra3/+FvCrJT1XVw6vqAVms6PKasYXSio1aLk7yH6vqqKp6WJJnJXnFauapqiP3+VSSo5J8tLs/VVWnJvm+ZS79haq6X1X96yRnJ3nNaP/tUdPDxvybq+qM1dS0j6OSfCzJ7VX1FUl+dEnf5Um+pqrOHFtcPSPJlyx5tidX1d5Azi1ZBFA+m4OkFrZU1fOT/FAWYaG9z3Bnkj1JNlXV85I8cMml+74zh2fxTu1JcmdVnZ7k8Uvus7/3/R1JPl5Vz6mqL6qqw6rqq0d4bbn7AAAAAPdB/mEPAAAArMYfVdXHs1jV4+eT/EYWQY/lnJzkT5LcnuQvkvxWd79l9P3nJM8dW/I8exX3f3mS38tiK6Ijk/x4knT3bUl+LMlLs1ht5o7cdduj146/N1fVO5eZ93+Ouf8syQeTfCrJM1dR11LPHPe/KouVgV455l+pLUk+uc/ny7J4vl8a3//zsgj67OtPs9gG6Yokv9bdbxztL8pi9Zw3juvfluTrV/dYd/HsLAJAH0/yO/lc0Cfd/ZEkT07yq0luTvJVSXYm+ccx5NFJ3l5Vt4+afqK7r5qoZX9OGPe4PclfJvmaJN+y5Dv530nekEV46+osfvOlW3jd5Z3p7o9n8b5dnEVw6PtG/Xst+76PUNYTk5ySxbv1kSze06OXu89aPDgAAABw76vFNtwAAAAAsPbGyi7XJnnakgAWAAAAwCHFCjgAAAAArKmq+o6qOqaqjshiy6fKYtUdAAAAgEOSAA4AAAAAa+2xST6QxXZL35XkzO7+5PqWBAAAAHDw2IIKAAAAAAAAAAAmWAEHAAAAAAAAAAAmbFrvApLkuOOO661bt653GQAAAAAAAAAAsKwrr7zyI929ebm+DRHA2bp1a3bu3LneZQAAAAAAAAAAwLKq6ur99dmCCgAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYIIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMEEABwAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYIIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkrCuBU1U9V1d9U1V9X1auq6siqenhVvb2qdlXVa6rq8DH2iHG+a/RvPZgPAAAAAAAAAAAA6+mAAZyq2pLkx5Ns6+6vTnJYkqcmeWGS87v7EUluSXLOuOScJLeM9vPHOAAAAAAAAAAAOCStdAuqTUm+qKo2JblfkuuTPC7JJaP/oiRnjuMzxnlG/2lVVWtTLgAAAAAAAAAAbCwHDOB093VJfi3Jh7II3tyW5Mokt3b3nWPYtUm2jOMtSa4Z1945xh+777xVdW5V7ayqnXv27Jl9DgAAAAAAAAAAWBebDjSgqh6Uxao2D09ya5LXJvnO2Rt39wVJLkiSbdu29XJjtu64fMXz7T5v+2xJAAAAAAAAAACwaivZgurbknywu/d096eT/GGSb0hyzNiSKklOTHLdOL4uyUlJMvqPTnLzmlYNAAAAAAAAAAAbxEoCOB9K8piqul9VVZLTkvxtkrckedIYc1aSS8fxZeM8o//N3b3sCjcAAAAAAAAAAHBfd8AATne/PcklSd6Z5L3jmguSPCfJs6pqV5Jjk1w4LrkwybGj/VlJdhyEugEAAAAAAAAAYEPYdOAhSXc/P8nz92m+Ksmpy4z9VJInz5cGAAAAAAAAAAAb30q2oAIAAAAAAAAAAPZDAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgwqb1LmA9bN1x+YrH7j5v+0GsBAAAAAAAAACA+zor4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgwgEDOFX15VX17iWfj1XVT1bVg6vqTVX1D+Pvg8b4qqoXV9WuqnpPVT3q4D8GAAAAAAAAAACsjwMGcLr777r7lO4+JcnXJflEktcl2ZHkiu4+OckV4zxJTk9y8vicm+QlB6NwAAAAAAAAAADYCFa7BdVpST7Q3VcnOSPJRaP9oiRnjuMzkrysF96W5JiqesiaVAsAAAAAAAAAABvMagM4T03yqnF8fHdfP45vSHL8ON6S5Jol11w72u6iqs6tqp1VtXPPnj2rLAMAAAAAAAAAADaGFQdwqurwJN+d5LX79nV3J+nV3Li7L+jubd29bfPmzau5FAAAAAAAAAAANozVrIBzepJ3dveN4/zGvVtLjb83jfbrkpy05LoTRxsAAAAAAAAAABxyVhPA+d58bvupJLksyVnj+Kwkly5pf3otPCbJbUu2qgIAAAAAAAAAgEPKppUMqqr7J/n2JD+8pPm8JBdX1TlJrk7ylNH++iRPSLIrySeSnL1m1QIAAAAAAAAAwAazogBOd9+R5Nh92m5OctoyYzvJM9akOgAAAAAAAAAA2OBWswUVAAAAAAAAAACwDwEcAAAAAAAAAACYIIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMEEABwAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYIIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMGHTehdwKNm64/IVj9193vaDWAkAAAAAAAAAAPcWK+AAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABNWFMCpqmOq6pKqen9Vva+qHltVD66qN1XVP4y/Dxpjq6peXFW7quo9VfWog/sIAAAAAAAAAACwfla6As6Lkryhu78iySOTvC/JjiRXdPfJSa4Y50lyepKTx+fcJC9Z04oBAAAAAAAAAGADOWAAp6qOTvJvk1yYJN39T919a5Izklw0hl2U5MxxfEaSl/XC25IcU1UPWfPKAQAAAAAAAABgA1jJCjgPT7Inye9W1buq6qVVdf8kx3f39WPMDUmOH8dbklyz5PprR9tdVNW5VbWzqnbu2bPnnj8BAAAAAAAAAACso5UEcDYleVSSl3T31ya5I5/bbipJ0t2dpFdz4+6+oLu3dfe2zZs3r+ZSAAAAAAAAAADYMFYSwLk2ybXd/fZxfkkWgZwb924tNf7eNPqvS3LSkutPHG0AAAAAAAAAAHDIOWAAp7tvSHJNVX35aDotyd8muSzJWaPtrCSXjuPLkjy9Fh6T5LYlW1UBAAAAAAAAAMAhZdMKxz0zye9X1eFJrkpydhbhnYur6pwkVyd5yhj7+iRPSLIrySfGWAAAAAAAAAAAOCStKIDT3e9Osm2ZrtOWGdtJnjFZFwAAAAAAAAAA3CcccAsqAAAAAAAAAABg/wRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACZsWu8CWJmtOy5f8djd520/iJUAAAAAAAAAALCUFXAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMGHTehfA+tq64/IVj9193vaDWAkAAAAAAAAAwH2TFXAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMEEABwAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYsKIATlXtrqr3VtW7q2rnaHtwVb2pqv5h/H3QaK+qenFV7aqq91TVow7mAwAAAAAAAAAAwHpazQo439rdp3T3tnG+I8kV3X1ykivGeZKcnuTk8Tk3yUvWqlgAAAAAAAAAANhoZragOiPJReP4oiRnLml/WS+8LckxVfWQifsAAAAAAAAAAMCGtdIATid5Y1VdWVXnjrbju/v6cXxDkuPH8ZYk1yy59trRdhdVdW5V7ayqnXv27LkHpQMAAAAAAAAAwPrbtMJx39jd11XVFyd5U1W9f2lnd3dV9Wpu3N0XJLkgSbZt27aqawEAAAAAAAAAYKNY0Qo43X3d+HtTktclOTXJjXu3lhp/bxrDr0ty0pLLTxxtAAAAAAAAAABwyDlgAKeq7l9VR+09TvL4JH+d5LIkZ41hZyW5dBxfluTptfCYJLct2aoKAAAAAAAAAAAOKSvZgur4JK+rqr3jX9ndb6iqv0xycVWdk+TqJE8Z41+f5AlJdiX5RJKz17xqAAAAAAAAAADYIA4YwOnuq5I8cpn2m5Octkx7J3nGmlQHAAAAAAAAAAAb3AG3oAIAAAAAAAAAAPZPAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwIRN610Ah6atOy5f8djd520/iJUAAAAAAAAAABxcVsABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEzatdwGwWlt3XL7isbvP234QKwEAAAAAAAAAsAIOAAAAAAAAAABMEcABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAE1YcwKmqw6rqXVX1x+P84VX19qraVVWvqarDR/sR43zX6N96cEoHAAAAAAAAAID1t5oVcH4iyfuWnL8wyfnd/YgktyQ5Z7Sfk+SW0X7+GAcAAAAAAAAAAIekFQVwqurEJNuTvHScV5LHJblkDLkoyZnj+IxxntF/2hgPAAAAAAAAAACHnJWugPNfk/xMks+O82OT3Nrdd47za5NsGcdbklyTJKP/tjEeAAAAAAAAAAAOOQcM4FTVE5Pc1N1XruWNq+rcqtpZVTv37NmzllMDAAAAAAAAAMC9ZiUr4HxDku+uqt1JXp3F1lMvSnJMVW0aY05Mct04vi7JSUky+o9OcvO+k3b3Bd29rbu3bd68eeohAAAAAAAAAABgvRwwgNPdP9vdJ3b31iRPTfLm7n5akrckedIYdlaSS8fxZeM8o//N3d1rWjUAAAAAAAAAAGwQK1kBZ3+ek+RZVbUrybFJLhztFyY5drQ/K8mOuRIBAAAAAAAAAGDj2nTgIZ/T3W9N8tZxfFWSU5cZ86kkT16D2uBetXXH5Sseu/u87QexEgAAAAAAAADgvmRmBRwAAAAAAAAAAPgXTwAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMCETQcaUFVHJvmzJEeM8Zd09/Or6uFJXp3k2CRXJvn+7v6nqjoiycuSfF2Sm5N8T3fvPkj1w4a3dcflKx67+7ztB7ESAAAAAAAAAOBgWMkKOP+Y5HHd/cgkpyT5zqp6TJIXJjm/ux+R5JYk54zx5yS5ZbSfP8YBAAAAAAAAAMAh6YABnF64fZx+4fh0kscluWS0X5TkzHF8xjjP6D+tqmrNKgYAAAAAAAAAgA1kJSvgpKoOq6p3J7kpyZuSfCDJrd195xhybZIt43hLkmuSZPTflsU2VfvOeW5V7ayqnXv27Jl7CgAAAAAAAAAAWCcrCuB092e6+5QkJyY5NclXzN64uy/o7m3dvW3z5s2z0wEAAAAAAAAAwLpYUQBnr+6+Nclbkjw2yTFVtWl0nZjkunF8XZKTkmT0H53k5jWpFgAAAAAAAAAANpgDBnCqanNVHTOOvyjJtyd5XxZBnCeNYWcluXQcXzbOM/rf3N29lkUDAAAAAAAAAMBGsenAQ/KQJBdV1WFZBHYu7u4/rqq/TfLqqvqVJO9KcuEYf2GSl1fVriQfTfLUg1A3AAAAAAAAAABsCAcM4HT3e5J87TLtVyU5dZn2TyV58ppUBwAAAAAAAAAAG9wBt6ACAAAAAAAAAAD2TwAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACZvWuwDgntu64/IVj9193vaDWAkAAAAAAAAA/MslgAN8HsEeAP3xZUAAACAASURBVAAAAAAAAFg5W1ABAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYMKm9S4A+Jdj647LVzx293nbD2IlAAAAAAAAALB2rIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMEEABwAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYIIADAPD/2bv3eNuref/j73fl0pWiEyqVFDqIlBNC5FYhpDpJOp3cb4UfyjW3UzouR7lVonShKEcqQnQTqp3aKVLouCsOFQcpn98fnzH3mnvtudbea44x9t6z/Xo+Huux95xrrc/87rm/8/sdl8/4DAAAAAAAAAAAAKACCTgAAAAAAAAAAAAAAABABRJwAAAAAAAAAAAAAAAAgAok4AAAAAAAAAAAAAAAAAAVSMABAAAAAAAAAAAAAAAAKpCAAwAAAAAAAAAAAAAAAFQgAQcAAAAAAAAAAAAAAACoQAIOAAAAAAAAAAAAAAAAUIEEHAAAAAAAAAAAAAAAAKACCTgAAAAAAAAAAAAAAABAhVWW9QEAQK2NDzxzTj9//aE7dzoSAAAAAAAAAAAAAMCKiAo4AAAAAAAAAAAAAAAAQAUScAAAAAAAAAAAAAAAAIAKJOAAAAAAAAAAAAAAAAAAFUjAAQAAAAAAAAAAAAAAACqQgAMAAAAAAAAAAAAAAABUIAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFCBBBwAAAAAAAAAAAAAAACgAgk4AAAAAAAAAAAAAAAAQIXFJuDY3tD2N21fbfsq2/uX59ex/TXb15Y/1y7P2/bhtq+zPd/2Vr3/EQAAAAAAAAAAAAAAAMCysiQVcG6T9LqI2ELStpJeYXsLSQdKOiciNpN0TnksSTtK2qx8vVjSx5ofNQAAAAAAAAAAAAAAALCcWGwCTkT8OiIuK3+/RdIPJK0vaRdJx5UfO07Ss8rfd5H06UjfkXR32/dufuQAAAAAAAAAAAAAAADAcmBJKuAsYHtjSQ+X9F1J60XEr8u3fiNpvfL39SX9fOjXflGemx7rxbYvtX3pjTfeOMfDBgAAAAAAAAAAAAAAAJYPqyzpD9peQ9Kpkg6IiJttL/heRITtmMsLR8RRko6SpK233npOvwsAS8vGB565xD97/aE7dzwSAAAAAAAAAAAAAMDyaokq4Ni+kzL55sSIOK08/dvB1lLlzxvK87+UtOHQr29QngMAAAAAAAAAAAAAAADucBabgOMsdXOMpB9ExAeGvnW6pH3K3/eR9MWh51/gtK2km4a2qgIAAAAAAAAAAAAAAADuUJZkC6rHSNpb0pW2Ly/PvUnSoZJOsb2fpP+RtHv53lmSdpJ0naT/k7Rv0yMGAAAAAAAAAAAAAAAAliOLTcCJiAsleYZv7zDi50PSKyqPCwDu0DY+8Mwl/tnrD92545EAAAAAAAAAAAAAAGotdgsqAAAAAAAAAAAAAAAAADMjAQcAAAAAAAAAAAAAAACoQAIOAAAAAAAAAAAAAAAAUIEEHAAAAAAAAAAAAAAAAKACCTgAAAAAAAAAAAAAAABAhVWW9QEAANrZ+MAz5/Tz1x+6c6cjAQAAAAAAAAAAAIAVBxVwAAAAAAAAAAAAAAAAgAok4AAAAAAAAAAAAAAAAAAV2IIKALBE5rK9FVtbAQAAAAAAAAAAAFiRUAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFCBBBwAAAAAAAAAAAAAAACgAgk4AAAAAAAAAAAAAAAAQAUScAAAAAAAAAAAAAAAAIAKJOAAAAAAAAAAAAAAAAAAFUjAAQAAAAAAAAAAAAAAACqQgAMAAAAAAAAAAAAAAABUIAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFCBBBwAAAAAAAAAAAAAAACgAgk4AAAAAAAAAAAAAAAAQAUScAAAAAAAAAAAAAAAAIAKJOAAAAAAAAAAAAAAAAAAFUjAAQAAAAAAAAAAAAAAACqQgAMAAAAAAAAAAAAAAABUIAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFCBBBwAAAAAAAAAAAAAAACgAgk4AAAAAAAAAAAAAAAAQAUScAAAAAAAAAAAAAAAAIAKJOAAAAAAAAAAAAAAAAAAFUjAAQAAAAAAAAAAAAAAACqQgAMAAAAAAAAAAAAAAABUWGVZHwAAYMW28YFnLvHPXn/ozh2PBAAAAAAAAAAAAADGQwUcAAAAAAAAAAAAAAAAoAIJOAAAAAAAAAAAAAAAAEAFEnAAAAAAAAAAAAAAAACACiTgAAAAAAAAAAAAAAAAABVWWdYHAABADxsfeOacfv76Q3fudCQAAAAAAAAAAAAA7uiogAMAAAAAAAAAAAAAAABUIAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFBhlWV9AAAATJqNDzxziX/2+kN37ngkAAAAAAAAAAAAAJYHi62AY/uTtm+w/f2h59ax/TXb15Y/1y7P2/bhtq+zPd/2Vj0PHgAAAAAAAAAAAAAAAFjWlmQLqmMlPW3acwdKOiciNpN0TnksSTtK2qx8vVjSx9ocJgAAAAAAAAAAAAAAALB8WuwWVBFxvu2Npz29i6Tty9+Pk3SupDeW5z8dESHpO7bvbvveEfHrVgcMAMAdFVtbAQAAAAAAAAAAAJNpSSrgjLLeUFLNbyStV/6+vqSfD/3cL8pzi7D9YtuX2r70xhtvHPMwAAAAAAAAAAAAAAAAgGVr3AScBUq1mxjj946KiK0jYut111239jAAAAAAAAAAAAAAAACAZWKxW1DN4LeDraVs31vSDeX5X0racOjnNijPAQCAZWQuW1tJbG8FAAAAAAAAAAAAzNW4FXBOl7RP+fs+kr449PwLnLaVdNPQVlUAAAAAAAAAAAAAAADAHc5iK+DY/oyk7SXd0/YvJL1d0qGSTrG9n6T/kbR7+fGzJO0k6TpJ/ydp3w7HDAAAlhNzqa4zl8o6veICAAAAAAAAAAAAPSw2ASci9pzhWzuM+NmQ9IragwIAAOiBxB4AAAAAAAAAAAD0sNgEHAAAACweyT0AAAAAAAAAAAArLhJwAAAAlmOTuM0XyUgAAAAAAAAAAGBFQwIOAAAAJgLJSP3jAgAAAAAAAACA8ay0rA8AAAAAAAAAAAAAAAAAmGRUwAEAAAAgico6AAAAAAAAAACMiwQcAAAAAN2xHRcAAAAAAAAA4I6MBBwAAAAAmIbEHgAAAAAAAADAXKy0rA8AAAAAAAAAAAAAAAAAmGRUwAEAAACApWQulXUkqusAAAAAAAAAwKQgAQcAAAAA7gB6bZu1PMSda2wAAAAAAAAAWNpIwAEAAAAA3KH0ShoCAAAAAAAAgJmQgAMAAAAAwBIgsQcAAAAAAADATEjAAQAAAABgGeq5HdfysIUYyUgAAAAAAABYEZCAAwAAAAAAlgs9k5EAAAAAAACAnkjAAQAAAAAAd3hU7QEAAAAAAEBPJOAAAAAAAACMiW2+AAAAAAAAIJGAAwAAAAAAsEIhaQgAAAAAAKC9lZb1AQAAAAAAAAAAAAAAAACTjAo4AAAAAAAAWG71rKxD1R4AAAAAANAKCTgAAAAAAABAQyT2AAAAAACw4iEBBwAAAAAAAJgAk1gNiGQkAAAAAMCKggQcAAAAAAAAABNlEpORAAAAAAB3bCTgAAAAAAAAAEBnJPYAAAAAwB0bCTgAAAAAAAAAMKGWh+3DesYmGQkAAADApCABBwAAAAAAAAAw8UjsAQAAALAskYADAAAAAAAAAMAMJrEa0PIQt2fs5SEuAAAAMB0JOAAAAAAAAAAAAHNAMlL/uL1jAwAAtEYCDgAAAAAAAAAAAFYYJCMBAIAeSMABAAAAAAAAAAAAVkAkIwEA0A4JOAAAAAAAAAAAAABWaCQjrRhxe8aetLg9Y5P8hhUVCTgAAAAAAAAAAAAAAGCpIxlp+YrbM/byELc3EnAAAAAAAAAAAAAAAABwh9Y7sWelOf8GAAAAAAAAAAAAAAAAgAVIwAEAAAAAAAAAAAAAAAAqkIADAAAAAAAAAAAAAAAAVCABBwAAAAAAAAAAAAAAAKhAAg4AAAAAAAAAAAAAAABQgQQcAAAAAAAAAAAAAAAAoAIJOAAAAAAAAAAAAAAAAEAFEnAAAAAAAAAAAAAAAACACiTgAAAAAAAAAAAAAAAAABVIwAEAAAAAAAAAAAAAAAAqkIADAAAAAAAAAAAAAAAAVOiSgGP7abavsX2d7QN7vAYAAAAAAAAAAAAAAACwPGiegGN7ZUkfkbSjpC0k7Wl7i9avAwAAAAAAAAAAAAAAACwPelTAeaSk6yLiJxFxq6TPStqlw+sAAAAAAAAAAAAAAAAAy5wjom1A+7mSnhYRLyyP95b0LxHxymk/92JJLy4PHyDpmiV8iXtK+l2jw11asSctbs/YxO0fe9Li9ow9aXF7xp60uD1jT1rcnrEnLW7P2JMWt2fsSYvbMzZx+8eetLg9Y09a3J6xJy1uz9iTFrdn7EmL2zP2pMXtGXvS4vaMTdz+sSctbs/Ykxa3Z+xJi9sz9qTF7Rl70uL2jD1pcXvGnrS4PWMTt3/sSYvbM/akxe0Ze9Li9ow9aXF7xp60uD1jzyXuRhGx7qhvrNLueOYmIo6SdNRcf8/2pRGxdYdD6hZ70uL2jE3c/rEnLW7P2JMWt2fsSYvbM/akxe0Ze9Li9ow9aXF7xp60uD1jE7d/7EmL2zP2pMXtGXvS4vaMPWlxe8aetLg9Y09a3J6xJy1uz9jE7R970uL2jD1pcXvGnrS4PWNPWtyesSctbs/Ykxa3Z+xJi9szNnH7x560uD1jT1rcnrEnLW7P2JMWt2fsSYvbM3aruD22oPqlpA2HHm9QngMAAAAAAAAAAAAAAADucHok4FwiaTPbm9i+s6R/lXR6h9cBAAAAAAAAAAAAAAAAlrnmW1BFxG22XynpbEkrS/pkRFzV8CXmvG3VchB70uL2jE3c/rEnLW7P2JMWt2fsSYvbM/akxe0Ze9Li9ow9aXF7xp60uD1jE7d/7EmL2zP2pMXtGXvS4vaMPWlxe8aetLg9Y09a3J6xJy1uz9jE7R970uL2jD1pcXvGnrS4PWNPWtyesSctbs/Ykxa3Z+xJi9szNnH7x560uD1jT1rcnrEnLW7P2JMWt2fsSYvbM3aTuI6IFnEAAAAAAAAAAAAAAACAFVKPLagAAAAAAAAAAAAAAACAFQYJOAAAAAAAAAAAAAAAAEAFEnAA3GE4bbisjwMAAAAAAAAAAAAAsGJZ4RNwbP+T7fsOvhrFfO+SPAe0YPsZtifis2x7q9m+auNHREg6q8GhTjzbK9nefVLiYumwvfrgemF7c9vPtH2nZX1cd1S217b90GV9HHdk5Zq01rI+DqBWy3PZ9rtsrzL0eC3bn2oRG1iaJqmfA+COyfY6I75W2P6T7XuVPuQzbN9rWR8P7rhs72Z7zfL3t9g+rcW4IXBHYft423cberyR7XMaxb7LkjyHFYvtlZf1MWD5UObS71r+btv72j7C9suGx6KAHmyvYXuNZX0cS2KFHcwqHcZrJf1U0nmSrpf05UbhnzziuR1bBLZ9zxZxZon/z43jHWf77kOP17b9yYbxm3X+ZxhYWfDV6Hh7TH7vIela24fZfmD9UU4Z3EinPVdzDr6/fH1E0nclHSXp6PL3j1TEHXaZ7W1aBLJ9pe35I76utD2/Qfxu51xE/EPSG2qPcWnEtf0l26fP9NXytVrrmXBpe/8yaWrbx9i+zPZTKsOeL+mutteX9FVJe0s6tvI4u147be+2JM+NGfshLeJMi3lu+X9bR9Jlko62/YEGcVe2fWL9EY6M+5rWcUvs45fkuTHinlTe49UlfV/S1bZfXxu3xD6sxL6T7XNs32j7+S1iTxrbd7P9QduXlq/3Dw/wLY9s7zft8cq2376sjmdxOp7Lq0j6ru2H2n6ypEskzWsQV1L7BJ/y//TNNkc3Mv6mLgPGtre3/erh/snyyPZ65d7/5fJ4i+nn95hx91+S55aXuOrYz2mt9OsW6Ts1fo1e58UikzWjnhsj7l1sP8/2m2y/bfBVG7fE/qcRzz2gQdxu7c6heMt98nDn9uFjluS55chlkm6U9CNJ15a/X1/6Zo+oCexOiyNa36eH4rxQ0sWSniPpuZK+Y/vfa+OW2E3v1T37qLZfO9tXTexZXnPfit/tPa71Y9sn2n6p244pvzUibrG9naQnSTpG0scaxu/C9ibD7QHbq9reuDJm8z6qOy+QHHqdLW2/snxt2TDuyrbv44aLqjteOx9oewdPmyy0/bTK0Bcq+3w72X6RpK9J+q/KmAPfXsLnxlbe3zVbxuyhx7265+fP9rql7X2U7U8OvmpiDrnW9n/a3qJRvG5jndNeYyPbTyp/X7XFeee+8wGr2X6r7aPL481sP71F7IbO0lRuwaGSdlbO722jnO9bIfVoA/TSs63ci+2H2P6epKuU46fzbD+4YfxX2V67VTxJchaMWL45O+MHS9pIOaBsZbGL+1XEvELSEyV9PSIebvsJkp4fEWMPYNl+maSXS7qfpB8PfWtNSd+KiLEbx7ZXioh/2L4sIrYqz+0fER8aN+YMr7MgfqN434uIhy/uuTFjv1DS2yR9Q3lOPF7SOyNirEaF7Z9KihLrvpL+UP5+d0k/i4hNGhzzPEmPlbS2pG8pJ0RujYi9KuOuJWlPSfsq/w2fkvSZiLilMu6Vkl4UEd8pj3eVdEhEbF4Z9zRJb4+IK8vjB0s6OCKeWxO3xPqhpPtL+h9Jf9bU9WLOVShsbzTb9yPif8Y6yKn4w+fciPDjX+NK/EMl/U7Sycr3YhD4f5enuLYfP9v3I+K8ceIOxV9X0hslbSFpQSMoIp5YE7fEXuSaaXv+OOfbiNhXRMSWtp8q6SWS3irp+Jpr9OB4bb9K0qoRcZjtyyPiYRUxe5/Ho97jJvcq2xdIuosyCenEiLipQczvlXbFCyVtGBFvb3hOXCjpiRFxa22saXEvjohHtoxZ4i70/+RcLXNlRFR11gfnrO29JG0l6UBJ8xq9x4PYz5b0dEmvlXR+RIw9WGj7S8rPyEgR8cwx4x6xmLivHifuUPxTlUkhx5Wn9pa0ZUQ8pyLmlRp9zGPfq6fFP0nZbttP0jrKz/Z5EfH/xozX5f9uKH7Pc3kHSWco27OPi4jramMOxT5E0lOU7c71JH1Y0hER8eGKmOdIek6L6/CI2JdL2lrSxsqBoi9K+ueI2KkiZu9z+cvK9vybS1tgFUnfi4iqxNEZ7qnVfbNecUucXv2cwyS9W9JfJH1F0kMlvSYiThgz3l+U7eIvS/qMpLMj4vaaYxzxGk3PizI4uJqkb0raXlNtubUkfSUiqpKebH9F0k3KBMAF70VEvL8mbol9jXJy9pTy+HWS9mvQxujS7iz3p5cq34dLlO/xhyLiPyvjbiDpCEnbKT8fF0jaPyJ+URO3xF4q7cOZnhsj7ubKyfn1IuLBziqUz4yId1fGPVrS5yPi7PL4KZJ2VX4WPxQR/1IRu9f4UPP7dIl7jaRHR8Tvy+N7SLooIlokvzW9V/cc3/Nikrsj4h3jxp7lNX8WEWMlGCyFca27SPoX5bn8GEkPkDQ/Ip5dGXfQrz5E2Yc8qWHb4vDZvl/Tj7J9qfJzcmt5fGflnMDYCwU79VFnS36PRuNl+0t6kaTTylPPlnRURBxRGfdVkt4u6beS/lGebtH+7tHHebWkV0j6gaSHKe/RXyzfa3Hv207ZjvudpIdHxG8q491L0vqSTpD0PC3cNvx4bduwvMY2kj6pnC+zpD9K+veIGGvBiO1bNHu/rCrpuce9uufnz/ZFyvbg9Pb3qePGHIq9pqR/VX5GVlL+P342Im6ujNtlrLPEfpGkF0taJyI2tb2Z8lzeoTJuz/mAk5X/fy8obdrVlO2tscbubV8YEduN+KyM/RmxffWg31U+I9tELtpeMJ8xzrFOe42m/fVpsddV3p82VuYbSJIioiqxvEcboMRpfry95nN6jqOW69ubI+Kb5fH2kv4jIh49bsxp8d+tvMZdpry+nR1Rl0AzKeWgjpH0Gk27cVT6e0T83rn6aKWI+Kbt2izhk5QDbocoB9AHbonKCW9J59n+s6R7OTOkr5S0j6SmCTga/YGrsZLttSPiD5LkzJ5rdd69Xtm4XKjzr/xwzNmgA14GWL4QEWeVxztKelaTI5YcEf/nXKn40cHkd23QiLjZ9uclrSrpAGWn5vW2D6/s2DxP0idtnyvpPpLuoUxcq/WAKMk3khQR37f9oAZxJempjeIsMhBRJgCaXTdrBn2W0B7lz1cMv6wySXC5iRuVCTZL4ERlstDOyoHvfZQrF8fmqYTLTb3wqrE1lR2yFgbX452UiTdX2a69Rtv2oyTtpZyglqSqEqK9zuNy7d1J0vrTBsjWknRbi9eIiMeWzte/S5pn+2JJn4qIr1WEXcX2vSXtLunNLY5zyE8kfctZGWo4+a22ws63bH9YiybVXTZOMNsHSXqTpFVt36ypc/lWtVkJcSfniqNnSfpwRPzddquM8sE1fmdJn4uIm+o/dnpfbYAZXNop7sCmEbHr0ON3NGizdF21ExHPs72Hsp38Z0nPi4iaa3Kv/7uBLuey7cdJOlzSOyU9RNIRtveLiF/VxpakiDjI9teVK5xaJfj8SdKVtr+mha9DVYlkxT8i4rYyaXFERBzhXDVTo/cKtHtGxCnleqpy/GP3g23vqWzXb+KFqwuuJWnsfuoscdesiTusYz/nKRHxhnJeXK+s6HC+csJhHD9U9pOeK+l1kj5l+wvKZKFWbd2m54UywfsAZT9v+J5/s3LSqdYGEVG7unsm20s6ylmdZj3lxNbYySJLod25RTmX91KOGR2oHN+qSsBRJoGcJGlQpef55blRVZnnqnX78FGSHi1pXS9cJWQtVfZHiqOVY0RHSlJEzHcmPlUl4EjaNiJeNHgQEV+1/b6IeInrt+PoNT7U4z4tSb+XNJz8eEt5roWm9+qe43sR8Q7nwoJXR8QHa2IN88zVaKy8zo2l97iWcpz+7+XPf0i6oXzV+qXtI5XXs/eWz1urSv53VS7QOrk83k3S1WpT4WOV4UnkiLi1TMBVxSx/NuujRsQTKo9pSewn6V8i4s+S5KwO8W1l4miN/ZXjyq2uP5K6XTtfJOkREfEnZxWEz9veOHJBddV/ou29lQv1XqCcmD7L9r4RcUVF2KdK+jdJG0gaHme6WTnG08Ixkl4eERdIC5KIPqX8N8xZRPSuotP8Xt3587daRLyxR+DIBRBHKyt9P17ZBv1g6au9q+Lz0musU8q5i0cqP9eKiGs9opLmkhqaD7hfx/mATSNij9LXVjn/xr5eRMR25c+Wn5Wf235iRHxD2ZfeUNL/lPnZVlr314d9UZmo9nW1yzeQ+rQBpA7H23Fesuc46uqD5BtJiohznRXFm4iIt9h+q6aScT9s+xRJx0TEj2f/7dEmJQHnpohotT3UwB+dpf/Ol3Si7Rs0dIEfR+SKzZsk7eksFzdYhfQtVQ5AlknCuysHabaR9EJJm9v+rHJl79hlOJ2rNwbZbut5qCx0RLyz5riV2w192/bnyuPdJL2nMuZAr87/9AGWLzszLltoPvltexdl4/j+kj4t6ZERcYMzO/ZqVXRsIuJK2++RdLzy/X1cNFhNJ2m+7U9o6oa5l6Tq0rdS/eqdUWy/RNI7JP1VUxmcLRJZBvFnXfEw7iBnrxtpx4SLp0t6lxatdlZbov0eEXGMs2rYecqExksqY86X9AxlmcXhjk2LhMuBeba/KmkTSQc5Vxr8YzG/szgHSDpIOQh5le37KVfLVOtwHv+vMsHgmVp465RblEm5TZTO11vKax0u6eGlY/OmiDht9t8e6Z2SzpZ0YURcUt7jaxsd7o/L10rKzl0rg1UUw/f80JgJlxFxiKRDbB8SEQfVHtwIRyo7X1dIOt+5qrNqxc2QM5yV1P4i6WXOVQZ/rQnYK8kwIo5b/E9V+Yvt7SLiQklyVqT8S03A3hMAJaFuf0mnSnqQpL2dK2X/b5x4w/93tleVdN+IuKbJwaZe5/L7JO0WEVdLku3nKCtGNtnCp1OCz2maWh3b2t/L4NU+ynu3JFWVDV8Kk1l/LoNXUeJvq+xnjusiSb+WdE9l/2zgFtW1wXvFldS3n6P2k1kRuehkMCh9L2Uy7qG2N4iIDWuCF03PizL58yHbr6pMZprJRbYfMrzwopWI+LWzws5ByvbxgRHxp4qQv1Lfdmev5OF1I2J4e4xjbR/QIK7UuH0o6c6S1lB+9obbsTcrE9dqrRYRF0/7HLdInvq17TdK+mx5vIek35YEjNq+WfPxoRK0VyLudcrtTr6oPBd2UY7tvFaqnihrfq8uuozvRcTt5XibJeAok2yeqpz4H2bl/bZKx3Gtm5XJ7x+QdHTDpIjdJT1N0vsi4o/ORS5Nth1WTvRvFxG3SZLtj0u6ICJe2iD2jbafGRGnl9i7KKuT1GjeRx0obarXKvs5Ly79qQdExBktwmvhicLb1Wbh789V1y4eqdO1c6VB+yQirneu1v986ffVvhe7Ks/jGyR9xpn4fZym7t9zVsYYjrO9azSomDKD2wfJN+U1L7Rdfb/2YrZMqRir7TGXM2tV4THHIgfOsL3TIPG0pdL22Vk5Mb2xsv93orJC0FmSxt0xoddYpyT9rSRBSJKcFURr2uA9CzAM3FrGnwZ9vk0l/W3cYJ0+Gy+U9GnbByuvx5eXxLS7K+8pLfRYIDnQK1GtRxtA6nC8Hecjey60/0lJkDm+PH6+MoGvmYgI27+R9BtlX3Jt5X37axHxhrnGm5QtqA5V3thO09DFZtyToMRcXdlwXUl5A72bcvuJ6o5CXW5GxgAAIABJREFUOQl219Qg8rOUF4mxV944V4NepFxh+MiI+INzNcguyqSIsTP/bO8z9PCdym2dJLWZ3HHuCzkYpPnGYDKgQdxPKxvEC3X+y9fYnX/bZyszCoeTQx4XEdWVVZzZwa9Tlh57b5mYPSDqypseK+mTEXH+iO/tEBHnVMQ+RtKmyobV5sqKS0dExEfGjVni3lXSyyQ9rjx1vqSPRUSTDmRrtq+V9KiIaHHDHBX/O8qtJ+YrO2APVQ4A/1UVpSd7dqSd24ZN39Lp05Uxr1NmM18ZDW9Otr8TEduWz/bhygH2z0fEphUx50XEI9x4275pr7GSsuP8kzLgdA9J60dEk2S1En+NqCwVOhSv6Xnsqe2yToqI57U4xhGv8VDl9W1n5X7Zx0TEZbbvI+nbETFrue5lxfZq4yYULE0lkenZGtoWISL+u9NrrTIYQG0Qax1l8vftpb24ZlSWci5xu2xZ435bXG2pnPC+W3nqD5L2aXENmmkCIOq3rPuhpFdGxNfL+fdaZTnrf66M+wxlUsudI2IT2w9TbntauwXVyjG0RU055pVrz+Xpcctz92g1IeKsFvZv0xJ8/iPqt6vpkeQ06Iu8VHld/4ztTSTtHhHVe7R3PJcfoWyzPFi5Fdy6kp5b+/kb9IEjtzbeXJmU9eWI+HtN3BJ7I0mblc/fqsrVX7XbRB2rfv2cQ5X99L8oV0XeXdIZMeZ2Mp5lWwzbG01P2hrzNbZSJh01OS9cVi3ONAlQOfgv21crk6d+qhzHabJFW4n9dWWb/tXKFZfHKLfjGGvLwaG4d1IO9ja9Fjm3oHijMuFyZ+WWOCdExGMr456jsi1beWpPSftGZVn9ngafB2fiYtReJ4biflnSK5Vjb1vZfq5yW7IdK+PeU7nVyXblqW8pr/s3Kc+Tsasj9BgfKnF73ae7bb3U617deXzvg8okoVZVoo5RVmO9cMT3qvvEvca1yuTSdsp76a3Ksevzx71H214rsmLYqAnDkHTz9HbuGK9xjfK9+N/yeG1J34k226ltqpyQvo/yvvdzSXvHmCunh+IO91FXk7RWoz5q061OpsV+rTKp7gvlqWdJOjYixtqBwFPV0/5ZudXZmVp4nqiqWkaPa6ftb0h6bURcPvTcKsrq/XtFRIukywXjQ7bvHA228XEmkr9H0n0iYsdyjX5URBzTIPZ/KStbfkb5md5D2Zc6Qaq6hl6vbBMutOVg+fbY/TNnYtb/U9u5nE/N8u2Ium1lbpG0uvJ6POjjRdQvbpXtnygXcR4TERdN+97hDdovzcc6nUm3f1RWinqVsnrN1RFRVbHc9shtISPiZ6Oen2PsJ0t6i3Le5avK7R3/LSLOHTNel62GSuwHKecMV5H0C0mXRNmKqlbr/vq02O9W3uuaJqrZvr/yWnaf8tQv1KYN0Px43Xkryh5j4KW99g5N9csukHRwlN13ajm3znyBMmnqE5L+O3LhzEqSrh1nLnFSEnBGnQxVJ0FptJ0cEb8c/8hmjH2NpC2jJBOUAcjLaxrypQH8KOUH+FLlyoj7K6tFXBARTbYh6DmZ3Fqvzn/p1LxdCyeHvCPaZbEOXqd68tuZefz16FS60Ll67kNRLhS27ybpAxGx3+y/ecfiXF35nNaNwKH4p0l6e5TVoc7kloMjomolYK+OdPnsba9sCJ4laUdltY/a4/2mpB1aNdSG4j5deUPeUDlpsZbyM336rL84e8zvKBNNdtFU2eIFajsdQ6+ztqTNtHCi0yKTUHOId5JyYPN25b7Fayk/47Xl75ufx7a/L+k/lPe5RVa61U4Kldc4Tzlh87mI+Mu07+0dEceP/s1ZY/bco/ZR5XjXiIj7lgSJl0TEyyvjrqd8r5sOrtj+qLKtMpgU2kPSjyPiFTP/1qzxnh8RJ3jhbQsGQlk16fSahrdz7+JjlFuGNGnAD8UerLYdnFeDPcM/Jo1fwc32hyTdS1MTC3tK+q2k/y5xx1p9MPQ+r1H+/JNysmne8ADimLF7TQCsNb1dZXvziPhRZdx5ymTyc6NMsNu+MiIeUhn3J5I+r5xs+UFNrGlxN1eeV+uV+/9DJT0zKhYDTIvfPMHHnZKcRrzO2pI2jHbJrN2StMvg/AOUAxTXRJskmXnKFYprKyeRL5F0a0TsNesvLj7uiyS9WNI6EbGpM+n741GRBNC7n1Neo1nCpe3txx0UnePrNDsvbL8jIt5eJgEGg7IL/qwZ/C/xRyYyj3u/mxb7WTGU1Fvel4Mi4l2VcZfKtai8VnXycHmPj1COFYVy4vvVjQb/e7UPt1YmDQ1WON+kTJadN/NvLVHc+ym3On20cvLtp5KeHxHX18SdRD3u00uDOyTi9hzf6zFW3dNSGNd6oHJs6ABJ/xQRq44Z54yIePqICcPB39dQVtoZezsc2/tKOlg5kWzl+XFwNKww6qy6r6iozubZE2UH/d8Lp3/e5/gal0bE1h5KJLZ9RURsOW7MafEHOwRIOX8x9tZyveYBhuL36ONsIOm2Ue1L24+Jiu2Se40PldhfVt6r3xwRW5Z21vdq+78ldpdJX8+w5WBEvGSceENxu1RznES216i5ps0St+e5vJKyctFTlNf7syV9YjDXVRF3kFxg5Zj9Jsq+WdXir6H495C0bYn/nR7jDa3Zfnq0qZ42HLNX8mmXRLXBfaRFG2Ba3MHx/k15vK12jeim1xh4T86KTp8adWy2HzTOuO1EJOD0UBptuysbqycrJ99+2yj2NyU9OyL+WB7fXdJpLTph0xrEVyozvh4fEa+qjT09PtrrMfntXPX2nMgt0JZrtk+JiN09QwZkNFgN2YPthys7Ht/VwqsrWiVZXDW9gTbquTHidulIl/+/LZWdry3L4OwJEfHkyrjbKJMtzlPDVSw9OFdCPknSezVUNWygxcCN7Rcqt1LZQNLlyob3t2vuJbYvj4iH2d5LWa3mQOVkeouVyE3PY+c+0Hsp79XTk6WqJ4WGXufOygoAoewsVa0WGnqPny3p6coKHOe3GMCy/V1lif7Thz7T34+IB1fG7TK44qxG8qBBx7Z0fK+KiAeNGe8lEXHkLANv91BWCdx2vCOWnKsV9lUmC12qfF++Wts5L7EXaWO5QeLz4Fq/uOfGiHuSpK2Vnz8rz+f5ylLDn4uIscv395oAGJosXD8intZwsnBQSW34fjq/9trp3FrwX5Xn3ErKlZCfjcrKZM7kwtdLOrLltWIofvMEH49OcmpyzLbPVW4rs4oyMfkG5QrG6tLIHc/l+crtTk6OytVS0+IOqsu9StKqEXHY4L5VGfdy5aq070bbJLVu/Rx3TLhsbYaJtwWivlLN67ToJOfYCZeevXpBzRYA3c1wLRr7XPZSSB7upWP7cL6kV0TZfqK0+T/aaizAmUy3UrSrrLO5cuX7xhraarDR+N7Wkt40InZt+6JLIm7n92KpJb9NohaTnr3GtWyfqhwf+rFy4dMFyvZAlwrXzgTd74/bpxyKcy9Jg1X0320xoTfiNc6IiKdX/P5wouwo91C258Yei7N9kaQdlG3jrZwVfD4TEY8cN+ZQ7FHtgFuiQVL50Gs0q+zc69pZYh8fEXsv7rk5xuwyPlTiXBIR20zr/1b3GXoa1V5r1B+5QNJdJB2r3D2jWb/E/ZKdn6mpxNNzo1FShDPZ+UPKxO9/SPq2csFh1RYwPc/laa+zjqQNotFinGmxt5L08oh4YWWMGUXFbjBLKX6Twg5eCsmnvdj+mXIx7snKnWCW++QLd9pBo+UYuO3/iogDPEMF+Bb9hdK+vCoqK4ZO13Kf+m6cVTeGVyycp+yI1ex3/g5J7yiNqT0knWf7FxHxpOoDzsGqq5zbRoWkJ0u62Pbh5bVrOje7Dv39woj4vHLlbCvLbani6Vp3/pfGB1nSFmUwci/lfpEHKicCaqpP/EnSleV8Gy572yQ5ZDrbB0fEwWP++v7lz7E7oMvIkZK+odzXuml1lmK+7U9o4bLILRqDTfcMHTLYuuA2Z+nwG5TVZWq9R3k+31XSnRvEk9SngxCZef5Z2z+IiCvaHOki9pe0jTLT/QnOFWX/URnzTs7S+s+S9OHIMnqtGoNNz+PI0tsXlkSC6jK3o9jeSfn5/rFy0mmTkuTx5YqwPfeoVUT8fFq8Fh2Oe0bEKbYPKq9xm+0Wca9TbrEwyBzfsDw3log4svz16zFtpZjL6jHb7xw3fnmN6yS92bmd6NOVCRG3l0HPD1VOGtpDq9xsP1qZdFFrddv3G1zTyjVv9QZxN5C01WCw35n4dKayPT5P0tgJOJIOknRRGWhpmdh6rMpkYXn8I2XHt/YacpXt50lauXRGX62sMlClTA4eLelo5zYUJ0n6oO3PS3pXjL+txWoRcfG0a0WTLdqKo1USfCQpIuY7E7ZqBqf/PuJ62arNdbfS/n6hpE+XiYxWg269zuVnKPuop9j+h/I8PiXqq1rYucJwL+VKQCm3eq71t4i4dfD/VybqW7QvevZz9lAmv11iu2nC5XS2j4qIF1eEeMYs3wtNbX09rkdodMLlS22Pk3B5UokxT4uWOw9JVVu0zaTB+yyNvhbVnBOD+/GaM3x/E+XWzHNKHrb9hpJAd8So42v0GenVPrx9kHxT4l5oe+x7lEcnN2nwfxj1izk+J+njyjLkrQf6T1TeT1uPM/S4T0t934uDlYmc50pSRFxe2rRjKckVb1e+r29TbjvxHEk/lLR/RPy68ni7TZzO4Gplv6pGr3GtQ5TJeUtlIqy8TlXyTbGypBuV/ffNnVUzx642PIP1a365tFlXUm4Xesqon3FuXVbjYOVk4Ya2T1TZ6qQy5sBlWnRLoN/Y/q2kF8WYlc88YnGr7RaVnXtdO6XcNmuB0lZ+RG3QTuNDkvRnZwWOwXjytsr5riZs76x8T4arfVeN5Uj6le23aOExyV9VxlREPLbMQe0raZ5zq7JjI+KrtbHVYfzCuW3PNso2hiTtX8ahDqo4zoGTJH1EudW8lIuJPqOpZMax9TqXPWIxju2LIuI1LeIPRMRltmvfh/fP9hLKRQI1PqpcjDtfeU1+qHLh4WA77dr4rQa/H69sr8zUB76Hcouu2oXgPRLVHqjsB79C0jG2z1Ausltke9ElPMYHRsQPZ0qeqk2aKj6l/Gw8ujz+pbLNX/t+tBwDH1TReV/lMc0osnLRNbbv22DcbYGJSMBRTn58X7kKXpL2Vp4Ys64EW0I3SPqNpN9L+qcG8aTc2/QLQ4/PbRRXkt5ue/+I+GNEvMxZRv390agSQOXE0tLWuvPf/YOsPpPfp6l+0HUuxi4NPRjkiNzzfT1lg1CSLo6IG1ocXCd3igarpGexr3LgdZCgdL5KObZKB6tPR/pSZ2Wvo5Xnw5+USS217tM6u71o3kEYDHpLeuGoz3CjQe+/RsRfbcv2XUqDq3ZP8iMlXS/pCknnO8vWV68UKpqexy4Z75L+4BFZ79FgCypJH5D0hMEkd0lSO1OZIDmuM5yVX/4i6WW211V2Zlr4eWmwRrmX7C+pxbY1vQZX1pT0gzJAEcpB9Uttny5VJbYeoew0LvJcRCxSkWqunMnZ+0raSdKpyoGL7ZQdwJoVX/tJ+qQzsdzKgcgW7bcDJJ3r3M5IysTk2glIKdvFw0mbf1euBvyL7dpkzl4TAL0mC1+lHBT7m/KecrYaDMQ6V1jsrDzfNlYOuJyo3CLoLOVe2uP4XbmeDT7Tz5VUPdE0pEeCT5ckp2IV2/dW9iWr9nofocu5HFny9jBJh5X3463Kqnu1yTIHKJOGvhARV5XJzdnKwC+p82y/SdKqzj3rXy7pSw3iduvndE64nO7Ixf/IzCJi31YHMoOmCZdRVvtHxCaNj3Nxqt7noum1qGPy8KD912Qr8hn0ah+eZ/tIZX8slMlw5w4GlscYSB4kNz1AOb4wqJz5DEkX1x+ubouIFn3zUW6Mim2RZ9ErEbfne9E6EfdY5XVsdeV97kRlm+tZynHEXSpiD79Gs4nTmZLJpAXbLtXqNa51haRX2B5eNPvxaFjlZMDtVte/V3ntuUpT51koxzBaGnu7pYGy+O0NkkYm4ETEfqOen0P8rzqrvw22Otk/2m118jVJn4+IsyXJ9lOUC4s/pZwIHndMrsfiVqnDtbP0Swdt5Js1NUF9q3LbxBq9xoekrIZwuqRNbX9L0rrKCiXVbH9c0mqSnqCc03mu2tyv91QmXg7m5M4rz1WLiB+V5J5LJR0u6eHOE+VNlWOTPcYvdpL0sIj4hyTZPk55LWqRgLNaRBw/9PgE269vELfnudxlMc60e/ZKyjHKqoSv6Lj1cvErZfLjlZJk+8HK7RfH/myX+YrB+OBLRjw3Z0sj+bRXolpkZeRTlAup1lYuCD9P44/jvE7SizQ6OatF0pQkbRoRe9jeU8p/g91kJXGzMfBBwm5EnNfguGaztnI84GItvABs7MIcE7EFlUeUuBv13Bxjvlw5CLuuMpHjlIi4uu5I+/Po0k3Nto2yfWpE7Lr4n1z2bM+LiOps8Rli31lTkx7XtOo42n61pDcqO6g7K1ewnBARj62M23yv7J5s767sGJ2rvAA/VtLrIys6LXds/4cyaeFLWnh1c/OENTcuh+jOe4ba3li5/2aLxuthysHpFqsIhuMusk2IK7fisv2MiPiS7X1GfT/abEH1BeWk7AHKBtUflINmO9XGHnoNS1o5IlpWRmhyHnv2csvRIvHUpazu0GMrEwK3meXXliTu8B61q0taM9rsUXtPZeP9ScrP9FeVA2Rj70le4m6lTGB5sDLheV1Ju0VldSdnRY8ZzbXh7Kza8GjlZ+KDQ99aS7n1Z4ttvuZJ+qNy4PzU4U6j7dMiojr5u3Q+FI1KF9veTZkQsolyZc+jldtF1JaPfasycfGL5alnKAfh3i/pqIjYa6bfXYLYXbY8da5u2lXS1yJLqG8r6b0RMeu5uKyUpKlvSjomIi6a9r3Dx03mLEkVRynPhT9I+qmk50fE9XVHvCD+lyW9Ulnla6uS4LNfROxYEXM15QTWU8pTZyurAFVX7ivH9zZl9dCXl/fnP1v0eXqdyyX2RsrJoT2Uiw1OjojZVsUtM2WAbD/l/5+V/3+fiAaDDT37OdMSLs/WVMLl3jVjDb2UPslhMbXV9dqSXhcRb6mM+0NJDxn0eW3fRdIVEfHAcc5xdy5v3tO0a9HgXH5XVG6lMmrSuNVEci8ztA+fW9vvc27ZPpOI8Ssany9p5yhbTzm3eTwzIh43+28uNu7ByoV7X1DjsQDbOygnB8+ZFrt2W7mm92lPbSPzavV7L45Rvg8HKttyr1b2fV86ZrzhbVN+FhH3Hfpek21U3HiLFtt/VY6TjeqbvyYi7l5xuN3GtZxVcO8kaTAGsrey0tXY23D0ZvsaSQ9t0c6cIX7TtkuZKPydMsFreEKoxWfvS8oFBqdHxJ8X9/NzjD1qS6D5EfHQys/KVcqFMScpF7eeVzvGV+I27+MMxT6kdmJ3RMwu40ND8VdRJrdabedGBufA4M81lBPtVXMj015jZUmrR5utyQb9hZ2VSWXHRFY7uY+kb0fERhWxz1Xj8YuSXLL94PpQ7uHnTh8XHzP2e5XjC5/VVBL12irJb+Nek3qey7avVLbrj1OOk10yap5gjLhvH3p4m/L+empNn8Gzb73Uon14VURMr8a1yHNzjNmtj+OsiL91bZwZYs/XwolqKyur+bX4nDxe+dl4mjJp7+SIOLU2bi/uuBVliV89Bl4+xzOOLbX4fyuvM/LaO9f5i4ViNhgT6872t5WT8xeWx4+R9L6IeFRFzEOUJ/+c9zSfJeYpEbH7TCdEow/wFcqb6B/K43UknTe9QVsRv9sgcivTOv83KldFtuw8bq+8KV+vvOlvKGmfaF+KtMnktzvslV0GHl+pPI+PUFYMGZTrfWfU7z19haQnR6l646wQ8fUWk6c92P7piKcjIpqUT/eIcoiSqsshtu5I9x5Mt32LcoXa35SVFpxhY60x4w2uFW/UiA5Ci86v7U0j4se1cZbgdR4v6W6SvhIRt1bE6TJxU2Kdqw7ncQ9DnZknS9pImZ0eknaT9LOIeHlF7Fco94Uefo/3jIiP1h11Xisj4sbaOCPi3kU5ubtgcEXSSr0GJMdVPgfbK8tNf3zoW7dI+lJEXNvgNRZs5dSa7f2Vq/0G2w5tJenAqEw6HBq42k7Su5RtgrdFRHUZYNtbK6unSdkZa7LSvuMEQK/Jwq8pk9KGP9efjYinVsZdo7ZNtZj4qys/y7c0jts8wcf2bhHxucU9N2bs4yQdMNR/alZBtOO5/F3lZNbnlH3WquuSl85Wu8316OcMxW6acFkG7V6orCbzlRiqdmL7LRHRompWs33Up8VomnDZK8GixB61rcyuylWyTbaVacmdkodn+iwPNPqMbCLp51q4ffiwiLikNnYP0yfUS/t2fkRUVRDtORZg+wRlqfqFqnDU3p9a36fLezB9O7mBVu9F00Tc4cl42+8e7u+2mHgrcc5Vw4nTMgHyqhixLY/tn0dE1bbfvc7lUYkPLZIhSpyNJG0WEV93JrWs0qJdWxItduvRDu80RtvzOjSYKNxZuZ3TZyWdUTOJPBT7q8rEus+Wp/ZQjsE8TdIl47Zf3G9xa+9FDD22O+nGWZFkYw3toBERn24Q9+KIeKTt7yjnGn4v6aqIuH9l3EW2JlNWtayqjGT7PGWlns9HxF+mfW/vWLgizFxjNx+/cFaxOFS50MfKc+7AiDh53JhDsUddiwbGvib1GusssXdTVpP9VuRuIs0W45T4a0hSi/uJ7YMj4mB3Wohq+zPKJM7hbdrWiIg5V4oqfbL1S6znaaqNuJayCt4Da461vEbP5NMuiWq2r1dWnDpFDebjPEMy1kA02B3AWcH4LZK2UCa/PUbSv0XEuZVxm42Bl/aglFt7SVM72Txf+dk4sOZYe5qUBJwtJX1aOfloSf+rPAmqVmaX2P+khfebHHt/L9v3johfD50QC4ksJV7F9guUpQsHg9G7SXpP5c1+sBLEyhKtO5a/V70fvYzo/C90EjfoPM6T9LwoKxWc+3x+JhpU2+kx+V2O94nKm8Rg1c33o2IrH9unKAfbVlUOuP1AebN7pqR7RcTe48Yu8RdaBeFcNXtFNEoka832Xad3Pkc9VxH/exHxcGc5xA2jlENscNNv2pHuOZjew1IaKDxPOdFyiaQLJJ0fpZRjRcx1Zvt+TUOz18TNcOwO5/GPJX1H+f5eEBFXNTjWUZ2ZBaJiqwePrtrXJLnV9o+UE70nKycL/1gbs8RtumLB9oURsZ0zqW74Hl2VVDcUf6MWbapZ4vfYj3zBQLTtpyoHht4i6fgGE6eDz94hkq6MiJNanXO9dB5Ebr5Sb4ZrZ/V7bPuuyqoh08+32oGVUdsL3CRpXrRdfNAswaf1dWhanG4VRDtOZj0gGlZ8sf2IiJjnDit6SvzHKLc+3Ug5SD+43rfolzXt5wzFbppw6awAsJqyhP7eykUyry3fa3Uuz5e0TUwlGKwq6dKoWLU4FLt5wmWPPpTtr2hqW5nnKasWnaTcVuZJEVG1rUzp+/8/LTrhNG5Vli7Jw0Of5edIupemBtL3lPTbaJAAXz5/z4yIX5bHj5P0kXH77LafHxEnzHCPUkR8YPyjlWy/WVnl+gvKa9AuygTGQ2ri9mT7mqhMEFpM/C6JuD24cSKuc2u3w6ZPiNm+v6RDo2K7haFYg4nTf1YmUVVNnDq3m/59jKhabHu9iPht5fF2GdeyfZkymeXH5fH9lBPVtX2cFym31V0nIjZ1bgv48YjYoSZuiX2qpC21aPWp6q3EZ2i7LFIJZnnjTCR+onKri6fV9tlLzHsqk2a3K099S9I7lP2S+0bZCrwF26tEo8rOPa6dpa/+SE1td7KnMgnpTRUxDx/x9E3KtuEXR3xvLrGPl7SppMuVCS1Stu1bfEbeqrx27iDpI8oxo6OjcjvxwVicc2uyrVS2Jqsdk+yt0/jFvZXb60hZ5bu6IndPvcY6e3Ju33S8pMEY/u+UC/i/XxFz/4j4kO3tohSiaKmMP71MU4mA50v62DjtAOeOAP8maWstvCXuLZKOjTaJIT3HDbskqtleKxpU3hqKN9v8RdSOHZbXWEf5HizYQUNZyX+2hLslidt8DLznvFaJta3y/vQgSXdWbh3255o20SqL/5FlLzLRZkvba5XHLcrHPUPSByTdR7lKfyNlksHYg1eRyTcrKy8yXfbsi4hP275UU/u7PSfqt846TlOT1BuVx1a7feSairKXfBlwfLmyIR/KydmPz/KrS+pOwwPekft83qlBXEnacbhxHRF/sL2T8gI0rtZ7ZUvS5pHVnCzp18rBzLB9oXKFQa2v2D5bue+7lEkiZzWI28tFysb74p4b1yqlcby7pvYPr1YmU86b1pH+pDIbeZx4XfcitX3O9MGUUc8tqcG1oqeIeLxzy7ptlIPrZzorGsyaRLMY8zR1Tb6vcuWNJd1d0s8k1fy7VvbQXqzlOnqXinjDupzHygzsf1FuVfefZWByfkQ8e9yAUZFgswRWtu2IzHAun787twgcEZvbfqSyKtmbbV+trMJxwmJ+dSRPrVhY1fbDpYVWLKxWcZzblT/XHDfGYnyiDMg3rUZSYvXaj1yaen93Uu49fZXdZF/dX9o+Urma8L3OFd8rNYjb04NGTQDUBvWiq0M2t32TMjHphorQ/7B93yiJ6c5k+xarGI5XVhd8qqR3KlcgtdjrfOvy9aXy+OmS5kt6qe3PRcRhNcGnT56W03isBB/bOyo/E+tPG0ReS6O3YBjHSrbXjoUriLbqBzc9lwcT1JJ2diYDLmTcCerov2f2MZJeo2zD3L6Yn52LHv0cSVJE/MRtEy4fORjgt/1hSR+1fZpygqXFtV7KSZtzhgbi9tXUlh9VSsJNkypnQ3r0odaLiCMkyfbLI+K95fkjbO9XEXfgc8oxhU+owbk81B87NhomDw8+y7bfHwuXZv9SGS8wY8J4AAAgAElEQVRq4aWS/ruMnW0l6RDl9Xpcq5c/u7QPI+I9zqoWj1Xeo/eNiO+NG8+dtwEoLrK9RYMxvYW0vE9Pi3NXjRiDq03gKA7S1ELD2Z5bIoOJXNsrR8TtQ89fp2zft3C1MuHr/5QTTv8t6UfjBotZEm9rk2+KXuNar5f0TefWqoNx5Rb97VcoExa+K0kRca1zEW0Lp5evHka1XVpsyflg5djIcJuluhpJib2qsvrdHsrzoVXb4nfKSnWjVCXfjGrDKftTNTG7XDuLnbXwdifHKaskjJ2Ao/y3P1BT18ldlVV7trT9hIg4oCL21pK2GIxrteJcgHtOGcs51fYZku4abbbnvlOZv3mWcmuyv7cYbinJf4do0c9fk6r4yuvcxsr+6Va2qz7bzoURl0fE6bafL+kNtj/Uoh1axjd31qKJ6lVJ1K3HOofZ3kA5oT5YaHCBsmrmLypDHyXptRHxzfI622uqgta49lVuxXW42s05LVDaax+U9MEyJrLBuG24iDhO0nG2d41O2yv1nNeJiM84KxkOEtXe2ChR7R7O7Ws31sKfkbGq4A3mL2xvMj0hxlmttIUvKeeszyxxH6S8r9QueuoxBm7bj4lSbdhZqa3l+PeHldehzynvgy+QtHlNwIlIwCkTCbuqnLiD/6eKwTFJercyq+vrkauGn6AsWVQlIm63/Q/bd2vUeBj1GlcrO3qt4i2YVC9ZZMtd0s0MjpN0s/KmJOUKuOOUk7815jlXMA6XY2s1iNVj8vsq288rsTdTbs11UWVMSZlGafusQYO7PK5qfJcL7eHKG9xgFcRREfGFuqNtr9fk9AjvVJZYvjByL9L7SareSkXq05F2VuJaxLgdhDKQt5qke5aJ9OH3ef2xDnLh+POVyV7VWziMiL2dcpD3scoEmTOUjfmxDSUZHi3pCxFxVnm8o7IjWaPbxI36nce3K7cku1056XZD+apW3odRW3HUZJB/RdLJJSFCkl5SnmsiIi6WdLGzotoHlP9/43ZKn6pcsbCBcpuJwWfvZtUNBA065ldFg7KjI9wzhlbElGTWVoOxj46p/cjfYfv9kr7cKPY8ZxnuTSQdZHtNtZlI3l1Zxvt9EfHHkgj3+gZxe+o1AbCfpEcpV7FImRg5T9Imtt8Z41eMfLOkC51Vz6y85r+k8lgl6f4RsZvtXSLiOGfZ7Kp7SLGBpK2irPp27k9+pnJVzzxJVQk4apvg8ytlO/uZ5dgGblEmdLTwfknftr1QBdFGsVufy7NNUI/dBnf/PbNviohW18ph3fo5bp9wuSDZNnIF9ottv03SNyStURF3gYh4b2nXDpLT3xURZ7eI3VLnPtTwANv0vkeLwbfbIuJjDeJM1yt5eHUPVXMqA7GrL+Z3lkhpz79aWYr8r/r/7J13mGRV9bXfNQTJQUSSgoIEkShZFEExK4KkH0lEREBEEMGEKIKKCHwKiIDkZAARFCQjOWeQZA6IiCLKIFnX98c+d+pWdXX3TJ1zerpl1vPM09Strn0v1feesPfaa0VTzsD2ALaPTevDJ2x/c9QPDIZmv2Dy11hvJp7f9/V5z4QVei7WBu5UdPg+C1PUw3LH5FpE3FOJ+fnI9Hprgky8+aAXqvpE3F8p1E5OKk10Ir6PJ4CvpdfZ30cbkj7nAgpOtfNati9Pc3Sj5vSgy1gZP2v7uaYGoFCJKEIGSIXDWii+dklr+fUJAsAFhHL9tQydBweJfSZBALiIKDxd1ZBECsQuqirXiluraaZqEwORM2wUrefNjAWwErBuQzKUdDSxl3wjkKXMTdghLUw05RaD7f9KOgpYNb1+lpYKVSaOJVRU7gKuVjTNlKjNnUQoOX2TuOd2oFDBV8MoDZH3bB9NkLBWBvYiGiVOJdY1uTiPWBPeQ6GmiAaFc51tnESoZTZz87bp2Nsy487ZkG8AbF+pUM/Kwf2SfgUsmvZ8DYqsDxPhZCNiPL4NeFTS9c5Tzrwyrd8acva1hO3iYznX2qAW+bQiUe1c4pk7j7LPyNkMzTH9CMh2bCHWsecpRCKWI8aLabKgHgY1cuA7AidKapySHgeyVYDasP1rdQj8J0m6g2gKGAgTgoBDeJH/ixgYSk3Kz9t+TNIkSZNsXyHpW4ViPwncI+lSuv3psmX6ZqALK9hevvX6isSQzcUuRKdF8/e6BvhOgbhQp/i9O1EYepYgGVwMHJgZ81aFiseT7UK0pKWIhMvAaJF6VqRM0qom2sXpNpt7MpnF6R783C1p5ZRAzfYirbiRXqP137MRRYDbGXyDsDOwJ6FIdhvdJIBvDxizjYaAdJak/xKSlme6jMXelcQ1HwRcYPu5AjEbrG17p+aF7QslZW34U+HmLmDDdKhk4abKfUzcB/cQz+BxpRbxCW3P7dmATYhicA4+Q0hl75peX0okhbKhUALchGBjL0V0XK45aLxWx8Kne5NJuUz6REh+UC3VkIKopUYCkUwAeErSooQf+SKFYu8IrAL81vZTkhagQHeo7adozae2/0LhRFkpjAGxdWZCkeSv6XwLEXPTWoS87kAEHNsXKWwG1k6H9nQfe4AB0MhL/zMlFh4BSpDJXk73nul5QjXiaUkl9lLFCD4OpdO7FNYKXWtihWf04bkX6woKoj33cjsRkqsg1pA3L2u6elrnXLfPR6YW78347NTgCkmHEGNR28rh9sy4NfY5DUoTLm+V9E7bU0i3tg+Q9DCRCC+CRHSqQXYqiZp7qJ+09qlTVGQVtjIDK0+0cJ6kjxFrrPa9PLAFbEIt8vAnicR3W3kiiyAq6Ty611VzEPm4ExRd2QN1cMKU9eFWRBGrKNKcsRORoBZwuqTvOikmTSscdrqTgAttn1nwUoEpjUk7AzVsVWsRcWvk4GoTcVcm9k7Hp7/niQT5rYQ9QK2cZIPNiTxDLqqMyRpGHQp4TRorcvN9V0n6PLHWehuhvnTeKJ8ZEZLOdKh99yUmFyC/Qffa5XvE2uUrmTE3I+7lO2zvkPY4JQrTEIXCrdxSiiqIoqpyLdRqmqnZxHAQcIekLruTvMtlfoLk3ZBM5iQs2/5TYM/3MuA+STfTvR4aeA3QwuWSNgV+bBdV2DnP9hQip6Q/UqYwO3siGioV5/dXWM1lWWYl1FAaeiHVXt5PWIeeoDIqkRCKKcUtvUrnOnuwoO22jc/JknLUoRr8VmGn1uSYtgWymn5tb5XyDBcT66LSmNf2E5I+QiiSfKmH6DMIfkDk2pr8/zZE3WXDYT8xlahJPqUeUe2Z9jiUC0nLEWpv8/asu+ahWwFuYNj+mUI97FKiEWwT2yX21cVz4A5l55UTAQeXF0B5SuF0cWeqwf2FTMKlyo7vdaBCPu89MS8jlAS+DixAdNOvYTtHJqyJvX2/45UZ9kUg6UO2T57e1zE1kHQ6ISl4Y3q9FrCb7b7qHFMZs2bHfnOOd9KZhC4tWPweE6QFZ64KzinE3+6WQpdVFaoop5fi/4pgu59EJPhKyNNOIjZzB1faSLfPNR+RxHpnZpzdB02QTsM5lgb2A7axPVOBePMRMpbrEcSk/wI32N6vQOyLCQJgW41rPRew2amBGvdxivt+gkm/JvAc0T12te3LS8TvOdckQsEney1QA4ru2HMJAtkNBeMO8UuVdJvtLCa9pKuJzqab6SYkZ20k0zz6XaCtRvLREvOpKvmRD3OuRYB/uEyH6ISAKvtES7qvXQhJxa17bS+vPl7B0xD3QGB/d7oL5wEOd6adXUp8nA2sCJxMJFD3a5EwBo27H5HA+kk69D5CZv8wQnUwq5tF0gPAik7+9Aq10rtsLzfo9zzMODTw36w2eu7l9nq2uZezlB2H+T6KeVuXRiom9MIex8qqkm62vaakG4EPEITLe22/Zjpf2rBQBU/ymqi9h6qBtNbqhZ1pM5AKNZv0kIfPKfFMpzG4yV88kLuukDRi4tmZVnaSvgnMQiTn2+vDLMJeKiCsY/vf6fWcxL4st1v4VnfbfBWDpHscjUml4xafp1Oc4jm4VuxZCCL14h7BiinzHG8mCBHzEV3DBzosqQaNV+37SPGKzvulx2R1Ggv7wc5TlW325jsCbyf2fBcDx+fkGSQtYvsvaQweApexaHl97njWJ2azZrmNUOCYDNxfKnetegoD2TmFYeLeZHut0mu4WmNnK/4idBoab3am3UkiVXyBaAxsSD1fI0jr+9seWBV3uLVA7hogxZ5MkIVeIBqgGnWPrPVsxbzW9URO8keEMt6fga/bXnbED05d7LOATziaqIpAodp7EVHoXo+oed5VYr0h6WDCQuyS3Fg9cavkOlPsy4k89ffToa0Im9K3Dv+pqYo7P/Blum05v+xke10KJecUBfn07YQYwL4Otcu7c9bK/er2pda36Xob8unKSuRT27nqRVPGC4Vi7Z8TUS173aVQwFuaUBDNbkxKNZGNCUJW2zpzMlGLG1hhT9KRdJOR3wr8hlASKy4mUiIHnv5eQ+A8p6R2/CWAvxK5lk8SanXfydkvTBQFnOslrWg7Vz6vjY2ISX4PgqE4DzFolsCPCLZbk6SfiXyboapIhIg9GvJNmkQOy90wVcZqxL3RdNYvDjyYBmcPMnm4bsd+c46LKGhHkhLe/To3iia8Fd1jHy1UVF8L2EbSH4ikWymp5Vo4P02gr6JbNrXI4E54CW5IMPOPUCjXnOwMtqlD1nNz218b/bez8W8g23fS9pEK78ZX0f09l9j4L0Go4GxJdN58OjcmgMPu5bfAK4lumTcQCeUS2IqQOW2KeFenY8WgIKM+T3RFnD/a74+C4vcxgO2fEN3OyxFs9z2Jv9/smdfbD0tTRn1iCtL8+hTxHf8iI85MRIfQpwpeW20mfTYRrR9cSY1Edf3I++E0YClJZ9veu9I5xhVc3yf6yvR3a9S4NkvH5gT+OfzHRsXMhBzyDsBChDpbFmE03W9PpATN1UApD3lsHyjpQjo+57vYbghPJaRkzwBuktQm+Hwvfc/T1PmtUELYGni1pHZCYW46Eu3jDq17eVtiHf4qOmuXFenM3dMESesQa4kFJe3VemsegmxRFKXWAW5ZGpdE5X3OeQoi9SGEkqOB4wrEnYJm/1QwZHFP8sq4XNL/I5L/EMTZA0rPq5LOt11E5cnJCrYC+lkZZt8bGmoNvLJCeWLg/VNTXFOoIf7F9jPp9ezEHJiLVdLP9l7adFTKBoXoVlj4TzqWi8sk7c1QwlCJOep2SWu4fGNSsXm6B8VzcC28EziUSHi/WtIqxHiRS9yfCXgPUYh8FUFGPoN4Bi8gbwwt/n2kIqSJe3cRddStsomAlM9r3Wn7cElvtH1t5rUNgUO9+TgKzs1NkbsE0WYEHKZQMPgRYYM+cA6ghVvTmuU4QonlSaBIkVp1FQZqqcqdn76Pb9BRziqhOlx87FS3UibAQ+nnopIWzSmsp4LxBXSUQj5vu1F0zrKkLkG0GSF2P6vdgTEGea09CDXATxBKnBsAfZvvpxbqqA3OTXmloS2J/fWOth+RtDix3ymBG4FzUi7jeQqQp2rkOnvwYSJ306gvXkcBJWpCDWgs3E6OJ9+qvcEBBJn12kS+WRL4VWbMSyT9H9CoRW6WzlECT6f61guKJrhHifpLCUyW9DmCF7BeuqdL1HNWBLYj9jWNC8XA+5xWTWSd0uQ0upsioVuJsgZK5MD/3frv2Qil5/uzryyhtT58hkJckXGtgKOOJOTMRFHst2T6Iku61vYbE9u2+Z9vNuX/JRK9h9ge2HJIwcDe0B3JwrmASzxOO+qhf4dpv2PjCRqmY6HBoBsqVerYH+Zc2UlvSW0m92yE5NsLtosQDFrnKdZ9M9zfrvImeGBIuoiODd6UpJ7twyqcawNC8WROwrf2s4NOsJIOJTblRWU91S1LPonYqJ9pO0s+VcN43+YuaCXdRCyizkzXmSUJ2RP7t8ADBNP9GqKLpaQNVVUoLHYWIeyujioYt+R9fDbBeP8NUaRuvudnRvzg6HGbJP2TrcOPAJ8rSQyQtAaRjF3T9mcyY91ge50yV1aXSZ/i9y3c2P59TtwUa35ibdju0ru6QNwxXfuk+3B52/eO1TnHAxTdhJtSmNiavs8PEF1IEMmVs0vMgZLeStjWPU6okQ3cAdGKWbOr/o3A0rZPkrQgMJftfsoOg8ZfnQ7B57oWwWda4yxBkHgPoluGfTJwt+0Xsi60MhRqdY8TBI7sNaKi23R9whL3mNZbkwlZ9dwEWe/5qqwDUuzsbr1a+5yUYFu7mefSmFSccFly/5Ti3Wp7dbU6Fcfznj2t4X5Bx3J5O2Bl28PZlgx6nqrfgaSFndmlnuK8jA55+MZC5OE2GXSKNbDtzQrEvpWw+XguvZ6VGO/XGPmT0weJtLg9HQLkxkQzQJbVvCqpIqXYDwCvIWyoijYmlZqne2JWycGl2LcRxYkrm+dZBTqo0379CuCE3r2NpCNycg01v48Uv+jYVjqvJelO26tUmOv62kM1yHk+emoBXW9RQIGjdZ6FgS2IQvg8BBEn14aqif0qYB7bubYhTbyaCgO1VOVmJyy/30RHeeLo3BxRil107FR/lcgGdgapPO19twGWdNieLg4sbPvmQWOOcr4ixHJJl7tHfaTfsWmIV1MhYiZCWb5ow5Qqqw3WQnqm3w/cU7jOUDTXORaQdA0huHAS8L3S+8jWeYqtBSQtYPuxErFaMRtFq4b4PolOPTWXnPUdwirz/4BPEfn7O52pRJ1iL0wQ1W6xfU0aP9d3ZhO4pF8TOd6itSFJryCIZM38dA0hpvHQ8J8a6DzzA68stcboE79oDjzlcS62vX6heOsC+xO2zu1c9cDrlvFOwKm6mRnmnAsA1ztDRq7ZhIx2bDxB0l3EIPN4ev1S4Krcze5ExHCLoBqLn4rF75ttl/LKbGJe5HyLoZeO9L7LdJEVhyrY4PXEX4Bg3G5HyJydQGwaVgHO8oCdmD2LoKcplFToeUZeAP5QYsKXdD/lvW+RtKzryVhPcnRm1Yi9DLA3QwvU49LOoXUff5AgspS6jz8NHOPwqd2PIEgeaPuOAtdc9dkuDUlHA4sRne9tgmiubU8NJn21wo3CtmcPQnXqTqKYdUOJZ6MGcXGizn01UZPYmvYPS9u+TNIcwEy2J2fGXI/wiD6d6GiZn+goe3jED44e9+vA3yncVa/oZF0dWNb2Mmm9eZbtdUf56LScoyrBZ6Kg1jwiaYkae92xhKTjbO9UIW6Rfc5YEFdK7J964l1NqA0eT6y1/gJ8yPbKpc5REmOVF5F0oisq90r6me33FIhThTzcc44i1sApVr+/312595ukrwHfcCgONt/Lp2x/ISduivV6OiTcawrtF2brLer2OzZg7Jo2OBNqnpZ0o+2122OzMm0RUowh6iyS1rV9XU7csUAFAk7RNYuk7xPrzcWANjE9i0jWei52Sz9PSz+3TXGzGr/GEpJWJNR7t7Q96wCfH5HY5AK2JKpsb1UDCrXlyXTs2rcG5rW9RYHYE2bsTPmh/wJvsf3aNJ9ekptvGeF8q9keWCFB0myEkswVRMNB0xA/D3BR7j1XMa91o+21R//NgWLPSUflYxnCUvRCJxu0aYzVT3SggSkjOnA1UT8smgevletMsZcEDifyhibyfZ90gebc9DfbAdicaOQ/yfaluXF7zrGx7XMLxfoVkUM9ibjPxi8xoAelyae1IOlc4KO2Hy0c91LCRrW9JtrGZciyVxIExpmJHO2jRO5+r5E+N0K8McuBp3nvFheyEVc0R3ySobnqgYlr49qCqtl0SjrN9nbt9xRKCdv1/WDeOR+TtH5mmH+r1fGn6Nx7Ovvi6uIw4AaF9yTExPHV6Xg90w01iDYjnOth4GEyJL56BrVJhAzuvJmXNgSFkse30ZHTXZzoGBbhwf1HyLcxqoQaNnht3EBMoBv3EFlulXTMMJ8ZFS4s69mKW+sZ+QWwMFFMKIZe8o2iO+IR2zcVCP91SV8hxviLgJWIhfzpI39sqnAW0f1+PN1y6tMMDd9FVtL+rbmPN7L959bxrPsY2Nb2N1Ii5C2ENPkxhJVdLm5TIdl3dStDDYHLqKjNRvibt4kmBnI3pR+VNKRIWqCYNbNbrH/bzyUSTi72ILzTb7S9gUJyuJTd3s7AXsALkkr5kbfnvl6YgtZDEwivKFmUbpDu448CLyUU1RYjxossf29i3Nnc9n3pPB8g/N9zE9Nbpp+7tY6VuCc2IciKt0OsNyUVWxO0CT5E8mYWIgE+MMGnJ1E4a4r571zS8Big1hrxKUmHEHLq7WJ9FtFQQzt6StlaDEEJ8k3lfc7lkjalsFJkGxXGue2I7+HjRGLolYQq0HjF0+3id7r/iudFapJvUvwS5Ju+5GHybZd6UcQaOOFvkjay/VOYsofKVu0B3mX7880L249LejeQRcCRtDZwbysPN4+ktQrs+a5nqPR/v2PTDNt/kLQyoeIAQRq6KzdujXl6mPM0ku9H2f52Zrh7FfZIM0lamrD6yFLjTDiCoX+rI/scG48oTRIqumaxvZWig/xionBTBK1awNt6CEifkXQ73YqJ4w6SXkus8Tcl9u4/JDr3B8FIzQklrPugor0VgKQVCNXs9no2195qBdvLt15fISnHXg+oO3Ym8snHCJJoo9pzTCaZcy3br5d0B0yZT0vkW6ZAYfti25NzyDcJOxOW8osS91qTH3mCsFnNQi/5RmF/9hihiJujqnqHwiq5ODmEUPl+U0OeAm4hxo9pto22/cb0s+++X0l0ABiYgEO4k1ypsLtuW2b9v4yYUC/XCUFaOIrIkUCoqXyfAjll27+U9AXCyucIYFVJIuzgchslFyP27P9QNISVIO4vQzRzfBg4IpEZT7b9ywGubznbDwxHFM0hiI5EPlWmyu4YENXmAx6QdAvlbOUAXm77pNbrkyXtmRmzwbyOpuePAKfa/pKkHKJTtRx4T31rJmBBwhqwFP5l+8KC8cY3AaeF17VfKOTfVhvmd7Ph5AmbgT2BsyQ9TNxoC9NJsI9L2D5V0aneTHQfaIoMLxYMM/BOQU4BoHLxuz2ovQD8DtgxI94UJCbvPgyV3RrUt/DVKe5xwDm2L0iv30XIRY4rqNsGbweFfHGWDd4wWHa4xL/tgwcNmhZ92wCvtn2gpFcCizhTjnSYZ+VfxILzU9PKIldd79t+WAtYUdLMtt+VGevttj8taRPg94T1ydV0OnFy8ILtowvEgfDErI1tCGnITSS1x4uVcu5jOuSj9wDH2f5ZIj2VwFrANpJKyL4fWuiahoULyGwOg7YF4mzExjRL2SOhVuHmGdvPSELSS9Kmb2DlwgYKS5J3unBHrAdUf/ofRy3Swm7AmsBNALZ/JenlBeKuY7vd/fBjSVlk1HS/fdb2D7Ovbiies21JTueas3D84gSfdqIwrV/eT8eqZdxhDNaIZxAFm/cSdlTbA3/LjAmhTjeko6cEEjGtKSpca/ucUT4yNai2z6EO4bL4/qkVdybga7a3oaAneWXsDpwgqSFNPQ4UsfkcSzJZIVQhD6u/NfBZw39imrALcIakRqn3T5Rpgpsprd+eBVBYibykQNyj6SZVPNnn2FQjEQsWA2aXtCrdnfpzZFxn+xx7ADvRKTCdrrD4OHKEj00NqhJxGzgUFxagzHy9O7AvMZd+nyB1DJxQl7QO8AZgQYU9WYN5iIT9uIftj5eIU3nN8jfgF66j2ie11IokvYEY52qcqCSZ7ERiDfcOZ6pl2t4g81pGRFpzH+RQJDtGoVJa0t7qS4TSyfLABcC7gGuBXALO7ZLWtn1jOs9aRD4yFzXHzlMJ1Z5mfN+aaGDbPCPm82l92Oz5FiQUcbKhsFI/kcjTStI/gQ/nkHBsHw4cLmn3AvPc1EDE3mQb8kiCNckhsv2UpB2B7ziaD7OJuP3gMqIDv0v/Zk3/iqBirhNgDtuntV6fLmmf3KCSViLUb94DXAq8z/btCvXhG8i4PyQdTNSS76OzZzdRbxgYqfZ0KXCppA2I2sXH0j332V4S2yjYi2h+60cUzSWIViOfjgFR7UsDfm40/F3StsQaGWArYlwqgZklLUJYZ+6bG6xyDrxd33oB+GsmwbIXVyga4X5Md21yYNLXuCbgSPocUcybXdITzWHgOeC70+3CRoHtW1JCpSkGPegBpOPGGolw86Ii3bTRDLySDiQUOE4j7rdtCJuoHFQrflce1BoFjuMom6Rf262uWNsXSvpGwfilMBakBYDVJO3L0ARybvHmOyQ5UiJ59STB+s6VI/0W8BDBIhfBHl+K2KSeSGywpwXViQttuNVxWQCzpJ/vIew9/hU5jCI4L3VsnEP3pD/NUn29CbHUxVJ6DXA6YZn1Cwpt+hP+LOlY4G3AwQp/z1KJt3cUijMm6mmpqHc0sJDtFdKGbyNn+snb7iqGKeTErx3m16cFTeHm28RY8SfCoiwXDym69M4lNo6PA9lJX4fs77eJpFtxjNTFkc6fLSE+3jEGpIVnHUpLzflmZgRy9TRgKYUsctezBwz87KX7bR8iQV8aZ6Zxcz6FKtCHibVcKVQl+KTE0LkpaT9eu5xrrxEXsH2CpD3S/HKVoosqF8U7egAU/uyvoZMQ2lnShrZ3G+Fjo6LWPqcW4TKhyv7J9n8kLSFpVhf2lK+I7xLy2M0c/R6iWakEkboamayNgkXZKuRhuvdRxayBAWz/Blhb0lzp9ZMl4hIEw8slNV2cOwCnFIirdlNLmmdz9jvvAD5EqBYdRoeAM5nIU5bAjoSCwb9hSsHlBjoF2kFRZZ6WdLDtz/Qc/nSfY9MM208RSf/sxH/CrMBcxJqzXWR5Atis0DmqQtLrbN9bIFTNfOR/JC1eaW7aETgxkThFkDirKJ6VJJPZXqfAJXVBUt89tDOVZNI4cQFhsYvt3+fE64PNgJWBO2zvIGkhMprVWnvJWYiGjj+m10sADxS43pp7nBqqPUcQ+cKXS/oq8X1n2zkmnAB8zPY1QGPNdRKh+p2LRyTNbXuyQg4jWOsAACAASURBVD3k9cBXcvIhaW2/me0zm2O2jxrhI1MbdybgMdt758Ya/hRah6g9NU0GRYmGJQmGtqs0ANTKdSZcKOmzwA+I8WJL4AIlpdVBcuwJRxKK9Z+3PUXhMxH3cp/DjYlm7WdH/c1pQJrntiUI9X8lyM8/BVYh9rBTvfe2/dH03H2h9J66Nvl0lHNnEdXatQFJ77V9/ki/Pw34MHHPfZO4j68n9iklcABBfL828RqWBH6VG7RSDvwr7uOU1HssA40y1uqtY1mkL3kCWL1JOsj256b3dUwtJG1OeFcWW0jMwNhBfXzN+x3LPEdX8Ttjsm+6TYeFMyTvJN1mu7jalKSLCbnNZuO1DbCe7WLF8IkESQ8SnbL30CIu5HYRSbrdSY7UHR/17Ht5mGfkTturlH5WclDz2Wid4+vEwvhpQnVhPuB829lSlpL6+UzbGZ29knYmOqafoVOUzorZin1twyQvCUlzAO8E7nGoWSwCrGj7ktLnKgFJ7yUIb72EumwbFYXixj7Asa1n+he2V8iN3XOeZYGfuZyHaunCTTv2mwk7kotKJH0lHUrqVmkXcUpA0o3EmvBu4r5YiejUe4a4R0rbUIw7SFpipPcLzHvfAP5JEL12J2S+77OdVcyp9eylOeTvBAmnLWedsy4UUSxcDng7ca9d7EJe5Cn+foQqwNuAg4hkwPec0cnYM2dPIja8b65RxJgIkHSj7bXTmvkIQpXsR7aXyoz7daLzv1hHT4r7APDaZtxMybh7bb82M27Nfc6U9XFJ1No/pdinAq8lEqXtMSNX9r0KUvLuLKLLez0i2fs+2/8qEPumEuvtqTzXAkQDyc8yYpxDEE32JBJ4jwOz2H535rUNIUQMQ5IYJPa8RBfneunQVcABhf5+7yQk8AEutX1xgZg/Bq4kCjgQa4ANbGcp7Ura1D1k9VJIxeQ1nCxIFBYlt9heMTPu3sDSFJynU9zbbb++59jdLqAMnIpvewOvoqx62BK568vphX7f93hE7bkpjUWUGHtaMV9mu4Qyazvmmba30FAF9OxGA0ntZ3c2wl73dtvZZDJJpwDfdgFb7j6xb7a9pqTbgA0IAuP9tgey8R2DvWSVsTPFPp34ntuqPbvZHqhBKa211ybsUt5K3GeX275/xA9Offwh6+RSY1IzbyRSz1eAQ4Av5q7rJN1qe/XRf3Oa495Qa0+qsBbaG7jO9sFp7byn7U8UPk/2WjbFuYI+DU4F5upquc5hcusNiuTDS0Nh8bV56RyqpF8SggMnuYewL+kzHkC9vtaeOsWuQj4d5lwllfCamMXWcZLe5Z5GKkm72D6mRPwaqJED7/1OFc0Wd7ub4DquMK4VcFpYVuEJfZHtkl31tbCf7bPSQuKtRFfS0RTwFpyBMcG/JW1Dhxm7Fa1NZA6GK36T4X1HMKTfAPw8vd6AYEH+jXxJxGIKHD3YikjmNdL0V6djEwIVJuW/OVm0FEYtOdKnJG0B/Ci93oy4p6HPQnxQSLoMeJ74ngdhDL9vhPeKyIXa/mwq+P7L0f31FGGbkQ3X6frem+i8KZpsSviSpOOBy+keL7K+Z0c35I9br/9CqJSNV3yLsCK7pzSBg5BOvVndKkvZUovq2Mop/XwEKFG42YPokpoMHJfY758tQZ5Ka6ylbZ+UxrbFCCncXFSxJEl4GNjJyXZJ0grA/iUSpxMFY1AA+QzwEYLQujMhdX58gbhVnj06FrVtlZCsdaEdnaypeFeEdNMn/ubEc/IEofj5xQIEn/ac/QJh61hkPp2g+EoqOH2K6HSah1D7yEXxjp6EXwOL01E6eWU6loua+5zLJW1KecJlrf0TwG/Sv0l0KzqMS9j+raStCMW6PxJ2HE+P8rGpRXF5aOhflLX9GJBVsLC9SfrP/VPxYl7gopyYCW9j6JrtXX2ODYITCWXLLdLr7Yh13YjEuNGgUBS4xPZFifS9rKRZnK8avQtBWPwCMT5cTkjj5+IVigaqyYSyVbH1LPF93pQIWhCNHSfkBExE2R8SRNwi87SkXQlC05KS2tY0cwOlup4b9bDjKaBqJelbtvcEvq2kZtGGy9tc10Axad2+wcvltarMTb0kwFSgzSIBSpqU6gqXkOzpFGqDhxe45D3Sz+KKQ7Z3b79WqMH+oFD4krbcvbg1XetxhGLdk0Szy0CouZesMXb2YDU6qj0Q6+YHG8LWtH7fDpW3o1Lhu4T6Ty+uUiiqfp+OasiVKaeTu95q28x/1+Vs5i9LJKpizS0Jd0r6KTFPteNm5TpTvn6j9nxk+7dAFvlmGCJ2EbU6IqfcYDZgU8rkRWrlW2oqqi5NEPWWJ76L5nw5TbNHEs/bU8R915tfzyVmLTvcvncQ8k1CrT01dDs4TCGfkm9jOAQua6vaoOQ6bj9Jz9r+OYBCTfstxNo5C6kBYEfgdXTfy7mqg8Vy4Bojp6QaDSgTRQFnQ6JbaG1isjvJ9oPT96qGR8P8k3QQUYD7Xk024AyUhaRXAYcD6xKT3nUE+/j3BWL/ClinZPFb0iXA9qkojUId4mQXUJMZhiU8LtnBY41S7PEU660EAakocSERybYkEgunkORIbZ+VGXdJ4hlZh3hGbiSKQn8GVrNdwroGhW/qIsT3nC0dOhaQtDrwsDO9vlvxVmDoYn7ghabC0/sDidRSFKmjZzngXjpELxdYsE0opKLKW2sQhlMXxMcJu7PXS9oM2NH2u0qfqwSUFLEkvYMoinwBOC23A0BhS7M6sXlcJo0VZ9leN/+q60HSvbZfN9qxFyNKFABSAuteD9hROUrsifbsVetkHYv4MzBxIOk8Yi04L5Eguzm9tSZws+31M+PX3OdMBuYkkrvFCJdjsX9SRWW5EtDQ7v+XA/8i7XNKFPXSeqsX9oAduE1Rtt1VV7Ao25yjlzw8l+2ByMMtQsRSdJPd5ia6qLctcL132l5ltGMDxL0NeBMwP2F5eith+7FNTtxaqLGelfTq5m+fipmNiug1tu8ocM33OFNFpyfevMTf6yC6rSEnFyIXosLqYZJWs32bQi1zCDwG9sGDIO1zmsaInWkVVmwfUOF8JfNaRecmSWcTJMDGom47YGXbA5MAJV1DFNBXItRN7gHOy92f9pyjmjJZK94swC9sZ1sZahhVmdJkl5Rrn8f23aP86nRD6bGzJ3Zx9R7VVe/tt85qMPB6K8U+n8gfv43IVz9N7Bty1dqrrMHVsczsjZud61RSPc2N0xOzmlrdMOe72faamTGq5VtSnug9DFXYy1Jpk3QtUaj/JtFMtAMwyfYXM2JuP9L7trMsW1PNYl+GKrbnKLRV2VMPc675gB/YfmeheAsTeQsTY9BfC8R8iZN1mKQ1E7FsyrGMuC8DzieUot5J1F+2chkl+LMIIufWhB3VNoRa3R4jfnD0uMVz4KrslFRl7TkRCDgN0qZvK2Kg+BPBoD7d+Z0yRVFrITEDEx81it+S7ndL3l2FJN9rQpXkhWuh9ga6JnFB0nJUkCOtAUnvI2xvihIXFD7TXwMWtf0uScsTRLis7sJhznUKkcz5pe0tR/v9UWJ9CVifIOBcQHSzXusMtQxJq5I6LSnLokfSgyUSQBMdktYgLKiuovs7zpbgTuS37xJqAI8Tii/bDkoQVR0/1nb8Rlr4cOBK2+eUICRLuhNYlZDdbuRpiyUUJM1PSE+3iW9XF4j7fSLh27ZfnMv2hFGAq4kSBQBJPwF2t/3HUX952uIWffZ6YhclWqaYDwCvIdRISney9osPDFZUl3TESO+XmJ8mIlJxfieGrpWz1oYq3NHTKmyuTqindUlZ5xY4J+I+pybSeHEa8NJ06O/AB23fO/2uaihqFJlqo3ZRtjR5eIwIETcA+zg1V0haFzjUmTYM6tgk7w7MbvsbJYg9/c5RKFbx9WxDNpF0ue23lrjOnvg1LWUawpAJslfWfqEVd3/gUeqoh00Y9BTfDgCmFPJyC28pfo1iU5W5qSIJcD5CjeVkYGWikHU+cJXto0f46NTGL178VofwDKE0tDxwpu3PDv+paT7Hy+nejwy8n5K0nO0Hhsk3GPjHOF0L1Bw7X9rn8OScmtZYFr5LQhPMZr4mJB1NqDlnq+uopVZHqJI1KEnObt/HkwhlpyNyc8GV8y0XEM/HPbRcAWx/OTNus5abQtwrRSZWqEU+Y/s/6fVMwEty64mSHiQIHL3fxbgbj/uhMPn0I8Qa6+fE2PlmIi9yYmbcfmuAUvZ9LwcuI9YwHy5FvFRHTKTZ88xCNAVkkQNr5MDTfvRO2/+WtC3BvTi81D1cY+05USyomqT8dsC2wB3AGcSmb3uiQDmesAWxkDjU9j/TQmKf6XxNMzCVqJXwTvgcITlZsvh9uaSLCVlICMWTyzLiTUEacHelk6S/kvDjzCW9FZUXHgPUlPaG8H0vTlyQtBTwO9tHSVofeJukv9j+Z2bcWs/IlsC3Etv0RNulZFRPJkgn+6bXvyQkSYsTcGxvDyCphPTyZkRC6A7bOyQi0emjfGY0HEssLrsW24VwvaTlbd9XOO5Ew1cJaeXZgFlLBnbI0W6YNmOTbE/ODHnYSKcj35LkNoV6wauBz6XnosR995xtK8nJp++jCNImbA/gFcCdhPriDeR/FxAdMbvSkSe/mrAofVFimAJAbvft/MC9km6mO4GVZTNQ4dkDhidaki+pm60OMobxP0DMz/MTybYZCPwEuIZY05dcKxe1lGkINmmd+XngH8Qa66wSRT2G7nP+j0L7HKhDuKy4f4JITO9l+4p0rvWJpqQ3FIhdDGORxC1NJrP9plZRdg3CznAZST+gTFF2ExJ5OJ3v4Zz9Qvr//JekLwCP2H423Q8rSTo1d7+XsAtwavquIcboETtzpxKStA6RhN0xHZupQNyucxSMVWM9O0nS54l7bK/eNwsQ96tYykjaj5g/msLgSZLOsl3COqS5t9q501y79iZRvz9Du73Hpapzm2SjUOHKJt204vUWm46UlF1sot7c9LSkN/aQALNsDCVdSlhZ/pcgWjwu6Q7g03Tmk0FjT1Em01CrtutzYgOHtv77BeAPth8a7penBZI2IvICixIkuCWA+wkrikHxKSJnOFy+YQGFuth2GeeogZp2XLcTFq2Pp7jzAY9I+ith0XHbtAa0Xc2OVD124pS1Xzy2/be3/RdJ3yCs4QZGIvbsBSxu+6MKi6BlbZ+fGbeWPQsp3mN055sGtdn9HnAhFcnZxDq5UWl7gSDK7DjiJ6YCtfItCa8o9Az34llFc8ivJH2cEGOYq1Dsy4ENidwywOzE85E7r/7N9k8zY3RB0ibAz5s9WNpPrW/73AKx+5JPc+Mm7AOs6rAabrgH1xP5kmlGym8uRlgkrUpnLzIPMMegF5mIlm2izazE2ngzSaUIl02e4p+JVP0IoWCbixo58KOBlSWtTKw1jifyp30VLwdA+bVnIaJUVSj8kJclGPUn2X6k9d6ttlcf9sPTESXZ4zMwdpB0PZHwvo1Wwtv22QVi30wUVnqZprkSch8gZJwBrrZ9zki/Pw1xjwdmoVt26z+2P5IZt6i8cC2MBXs8neck4JDSxAWFSsTqBFHmZ8BPgdfZfndm3JrPyDyE0tkOxALjJOD7OYtvSbfYXkOtTsVc9mordjXmrZKMp0KmfQNi43u/M+xVVNEOUWEhsxSxAXuWwooLEwWSfmF7hUqxiyZBJG1h+0xJS6YNb1GkzegqwG8TIXkBYDFnyk4rfL2XJsiRBxGd6t+zfWSBa76HKLzdaHsVhYrY15whNznMeV5KJALGrQR3TVTsNhkTmwFJ7ycKnjdlxrmHDtFy5YZoafttJa5zIkDSfURy6UKCjNRVNC2YLJxQKLVOmZq4Jc8laSWCUL0p8JDtDQvE3ITufU52Mi/F7Uu4dKYiZ639U4p9l3tUdfsdezFAheWhW0XZrYE1W0XZ9wPr2c4iwbfW9Y36y5zE/ZZLiGjv9y4gyHtZ+70eQoiI7nqIYqRzySFprv4UsZ8+WNH1vGdmU1LvOb5i+wuFYhVfz0paFtgY2JOWxVAD53dlV7GUUXROr2z7mfR6dmIvnNv5PgnY3PYPc+IME/sBwi67N3fxWOlzlUbpvXv6+72ht9hU4O9XZW5KxZVTCZtLESTfD9m+KyPmHISV+umE/d1ChKrjgUS3960Zsasrk9WApLuI4v9ljg74DQjlieyi+ijnvcT222ueY1pRa+xMsY8DfmT74vT67cR6+SQih7hWgXMsQqgLZVmdpFhV7MRT7C41CIW6xz22l8+M+0NirP+g7RXS83597j5HlexZakKV1OpqoSbhS9LBhCNAUYUlhfr5/QSZ7kBirvqG7RsLxK6lAPdWouZyOd3CAIMQvoa9rlLrl578Xmny6fUEUei59HpWQulyIJKTQsHwQ8S+rL2WeAI4Jec7ro2UFzkbWJFoYp8L2M/2sQXPUSQH3tpLfxH4s+0TeueUzPirEPmFYmvPiaKA812CZbousLrCY+9o28+MR/KNhrLHFycm6hz2+AyMHeZwQW/eHsxie0iHUy7SIF5jIF+jZ9P887Q5y8V5kj7G+JcXHgv2OETC/06FX21J4sJ/bb+QCFrftn1kSiTnotozYvsJST8i2N17Et2i+0g6IqO4/u+UXGrUMtYGBuqO7YOazNtbFczx44hN5JOEEkcOLpT0UeA8yj97RTxY/wdwgaS3l97cJXzY9uEpCdIoA57G4J1CnyW6B35EbHBLw0SHwnuJRMWctIjJg0CSCHWF5YiNzLLAF21fmnepU/CM7WckofDpfSAVSbIh6UpgI2L9fRvwqKTrbX+yRPwJhqLdJg1KE21GwFrAipJmdp4v+dO2/yvphURAfZTojCyKRJAEOMr2t0vHz8QxRAJoSeK5aCAKdL5PYJwv6d22Lygct3hHTw8eJTqmHiOja0rStbbfqE7HV0PM2knSf4lEyCG2v5NxrXvQIVxu0BAuM+I1qLV/AvitQoHitPR6W6A4gXaCYCnbm7ZefzmRUQbF+4mi7EcJ1ZeFCBWATYnGg4GR1i7nSzoWmE/STgR5+LicuAnt/d6RhfZ7TUf9ssQz8hPiGdwWuDkzdjNXX5UKY03XcxHyTUqeLwP8QNIsLqA85bBHbheuZgUezIz5IHCwQur9wpxYw8SvpUL1MLGWfya9fgnR8Z2FtBbah1jjl8a/anzHY4TS9mSPEcXNBpPTsVxUmZtSsWPltEbG9hMFYj5FqOs9Yvt9MIUQ/ydChWlgAo47ymSHEySIySn+PJLWyiHua2gHPERO61bgU85rpnne9mOSJkmaZPsKSd/KiNc0iQ4L2z8eb+QbqK7gt7btnVrnukTSobZ3lvSSQuc4jVBgOtv23pmxmrX3u4FTbd+b1jODB5Q+Ryhmzi7pidY5niNqf7lYyvaWkraCeN5zrznhNbY3l/R+26dI+h6Za8MGkpYh8soLJdLQSsBGzlCWU121OiS9gaGK+LkKvqVznW3cCJyTyL7PQxmrNnes6p4kmohL4t+SXt8QpyStRpk9+w5ELnUWOsIAgyouNZjU51gRzkHl/N6vgZsUNvYm9oJ3KzUieBobDhziCqdI2tQFGtRHQ0nCJfGsbUo8101zy0K5QSvlwCenuWRbYL30XM+Se60NbN9J4bXnRCHg7EAUWY5Ir7cmbozNp9sVjYwDiYJ6F3t8Ol/TDEw9aiW8oWDxu09iuhePkZ+Y/o+kpWz/Jp1zScrI4FeRFy6NZgMNbNXLHicS/6VQi7jwfNp4fBB4XzpWYlKq8owoFAU+RHQgnUp0nz6akrP3AYMScPYi1H+WknQdsCBh71QCL9h2uvZvJ+ZtkU4h2x9L/3mMpIuAeXKZwgTTHcIOb8qpKPDsVU5WTCTsCuwt6VkKbu4SSidBHlOS1Jc0RIbUmbY9wHeITd1bCALOZIJVv8agAdPzdoHDY7kU6aaNhxLx7VzgUkmPA6Xu7XkTyfAjxN/vS+qWJ38xoVYBoAuSLiOew6OcKT3dhu3PFwpVg2g5BLZfm0hOWR7ONWD7COAISUfb3nV6X884wh7A59Nc8kI6VmIuaVvKTOnoyYxJItZvQayxziKk9AdWdrT9xvSzr7R+i7SXs8+pRbistX+CIG18mUiSmkj+l5C/n4goSiarXJS1pM2JPUlp8nDx/Z6T+oqkq4HXt4rI+5NvFYnCfuoEortycUUjw86tvc+gcdcnksa/J8a3V0ra3pm2cn1QssD5c0lbM7SQdUBm3C4UJOL+i7D6vJQYg94G3CzpCMi2Vr9MoXL5Q7ptRHPzLldIOoQYN9s5uHGrBiDp1cBfmv93hdLQQrZ/nxm6aLGphfbcBGExkD03JVJCUxCaudn2Fno+2gTOa23/iGhKKYGj6W5uebLPsWnFt4CHiCZBEZacSxHkwBMJFclB8U9JcxF/tzMkPUrrGRwQzXz0csIy5efp9QbE+m3cKgG0UXDsBPiLpM8AP0ivtwT+qlB/KWIPb3vDlB/KUpJJKG6/aPsg4CBJB9n+3KgfmHY8l8bLpvlyKVrjfgZq2bNA5AH2AY4FsH13IvjkkGW2pVut7uuE2mc2AUfSacTYcyed/Y3Jt9AuTvhq4f8RJPt77HwrGHXbIg1BgTwqREPyWZIeJr6bhYlxPxdrOFP1rg9ulfT/gKPS64/T3Vg1MCqTT39Dt9vFT9LPXGu/6ySdACxq+12SlgfWsX1CZtxelNyP/IT4Xm+jzJjZoEYOfEuCG7Kj7UckLQ4cknuhkra1fbp6rIFba8+BFWAnigXVfe6Roet3bLxAyRZL0em2aurmeFFKQ09EpMF9TmLAKVo8VSic9MKu4D+tAnKyCmm6k4juFRFdgDs4eTu/WNCHPb4xUIw9Xgtpkt+FkDf/fkrkbGH74My4VZ4RSScDJ/ZLkkp6q+3LM2LPTCS7BTzoAt2QKe5VwEVEgulNROf3XYkcUCL+B+gQv651pr2cpNmaTdhIx2ZgfEJhV7cYkQRZGZiJkMgcyNJP0SX8emLhPsQaI7fbQB1pyLb9Wwk58lMIwtsto/5y3nneTMhOXuQkS5oZ7x7g7URxaF/btyg6n19UNm0Akk4l5E27CgDpX9bmpuc8iwKLEB2HR432+6PEWoFIZrbtZXMTTe34r6IM0RJJB7tHqa7fsRl48UIFO3pSvIOAH6aOoTGBpEVs/yXj8+cQjT57EkTRxwm10lyr1hn7pzGAKliTpLhTbDlLEgNrrV1q7fdS7AeBlZw6K1Mx/O7cxL2km4hmiJ+21ofZFq4K296tHeoyTVf59wddJ49yLgHL2743M85FdBLebXukw/KusO+5FiDWQwOTqBTy+sPCGdbqtXJlkvqNvXam3WBNSLqVsIpq2yJcZ3vgJoYU50sjve9M67N0jpmAOUusL2o+H2lM3sP2P9Pr+YHDbJcgDvWz48ja8/XbQzfnyd1fKywRnyHm0m2IefUMF7BpSwSO7Zv1mqJj/2Tb78iNPVYoMXamOC8DvkTk9yAaO79M3OOL2/51RuyX071H/WPGpTYxq9iJt+JvBKyXXl7pAs0yCluvfYk9+yWEk8aHbF+ZGbeaPYukW2yv0ZMzy7IaSvPeJq3xbT7gxyXmvURKW74EkaUnbtFcZ0/sqwmroSJENw1je94gN4+azvESgvDWrLkfBCY5U+0kfc+HOKNRpk/MOYH9CFtxiEbJr9jOJXIi6UCGJ5/uanv93HOUhqQLiVzAvg4bv5kJy/kidaKec5Xaj2Tvw4aJWzUHLum9JeaOFGtn28cOs1a2M8jfE4WAczqRrLgxvV4L2M32B6fvlfWHott2Y8K25mVEQXYND+ghNwNjD4Uv3dJ0L2BLTKBjWvzOTUynGC+hNeHnTvatuFULWSWhSl7nI5yvuFWEWtKFheIVfUZSouYy2xsUuLze2LsRCYR2cmUr56lDNbEXJpi3t9i+JjFv1y9xL0v6DqEG9P10aEvgN7Z3y4g5xBez37EZGBySLrf91tGODRi7ShJE0oK2/5Z7fX3i3kR0vN2SiDgLApc40wtY0gPEs/EHojuvlG1fVSi63/cjyHQfU6giHOJuC40XBcaiAFAS6XrXJ9YtFwDvIv6OpdTUmvMUkZEdZqx/UZK9/hcgaX/b+xeK1dVN3hzPSSj8L6AC4bLK/qnPeb5GFG6OL1Eom4ioQCarUpTts3YBoOS4XGG/ty/RhNI0AGxMEO0Oyox7k+21VJ6gPWSeK5zkrVHgrJXwfpntv5eOOwNjg2EIHOO2qVOh1rALQZK5BZgHONx2VkdyrecjxZ4y/ox0bMDYPwauJFRvAD4GbGB744yYNwDfpKPSsxmwl+21c4v1NSHpftuvbb2eBNzbPjbekHJ8axJNIjfb/ut0vqRhkUgshwGLErWnJYD7bb+uUPz5GZr3zVaVUxD31wTOSIe2InJG2Sqz6qi+irCZHddzYSrWf5xo8n29pM0IZYeBba4lnUuoTnep1RFEhiy1OklnAZ/IrTX1iVuN8KVo9l0SuJBuJbwiTV/pHKXX4FVy96netBTwO+K7KJpLLUnCTfFqkk+XAfZmaF4ki6hWmlSX6m/DwvlKkUj6LmFlfE9urJ64VXPgNepZkta1fd1ox6YFE8WCajXgeknNBndx4MHEohqPBZf3E/LHn6TDHn9RJzUnEhKzeQ/gFYSk3tqERGYJ/+XrGSo72u/YwKhA3liNzmS0iqRsosxwhSzyJQtroYrX+XBwHauI4yl0n9V4Rmz/R9J/Jc3rsP4qiZ3cUj6w/bikncizLWhiPSLpDGANSe8lNuil7uO3AK91YsqmgsBArOaURFiM8FpeFabIe84DzFHgWl/0kDQb8V2+LCUq2t/xYiXO4ejYaDyAm4JsdsGtl3xTsKB3BFG0WUjSV4lE4Rcy4jUYs465wnPqz22f1bxwdNi/6Mg3UI9go7Ag2Z9IPs5MJ6GQqzS4GdGJdYftHSQtBJyeGbMfsmRkJe1KJPmXVLe069xEl+UMTExsRNzXJVBLXqXGmwAAIABJREFUXnhCo0SjRQ+K75+Gwc1EEvWbhAXRiwa9ZDKVsyZZqSHfpHiPp7VzLsZi7VJsvwdg+6upMPSmdGgH23cUCP0nSW8ALGkWYl95/yifmRrcKul4OvPzNmRYhzUYrsAJlChwXi9pxVIJb0mT0n7hEtK9IGkP24dnxj3T9hZNDrb3/RI5WYXt9F6EEsRHJS0NLOvMzlZJX+x3fJwTT/8maSPbPwVQ2F1nF5FrFZuI7usnJG1DFDg/S6wzci0Bij4fPZgkaX7bj8OUAlepGskuxD74C8Tzcjnw0cyY2wCHEzksAzcC2yqaAz8+SED1t/YAilpoXy7pYrqbyi4rELcKUq7zi4RlloAjJR1g+8QCsfvZ1jRWKsd6sMbcA4l87GW2V5W0AWE/lI1h8r43EHnKXLwHWCXNV02u8w4gi4CTvuPvEQp72eobI5zn/cAjtm8qEG434LvAcpL+TBAjtsmMeQ4d8jQEIbAUXgbcJ+lmusksWbZLtXKdCb9L/2ZN/2qgyBp8DHL37ywQowv9SLiSskm4CU9J2oJu8mkzVuaqipwFHEP87UrZRQP8O9X0mlrO2sRYPyhuS7FEcCIeT/89H/BH4NWDBm6t62cGdpD0W8oSs2rnwEvZ1LVxJEOf5X7HphoThYBTfHCojJ2JzqA/ExJLMzCxsAfBFL7R9gaSlgO+lhOwZwJtP7DFi98lyRuq5+05VoWsUqjpdQ4M22WRJXHae4qCsYo/IwlPAvek77ndGZr7/c4kSS0iy0wUWnSnheAhxIam2aDv4/AQz8WvicXVH9LrV6Zjg+AdwIeIzXOb5T+ZzE3uDEzBzoSNxaLEArl55p4AiihZ9aBkQbYXRQp6ts9Q2AE05LyNbWcXWGz/YfTfKoPChMgbJd1JyJFe2IxJL0ZULACcQBDgu6TqC+Bph6XsCwq1hUeJMbkobG+oqCIParP7PaLwcRBR/GgwuURnzAxMN5Rcw73C9kTbW48JShEuK+6fhsD2uaVjTiDUIpNVKcqO0dqleBIydfMW6+hN2IUoJC9GNLpcTBSgcrFritPsHa+hQMMFFQuchBXJhxT2SyUS3ldJ+jewsKR3AvcA2xPfdw72SD/fmxlnJJxEPM+NcvifieJIrrR8uwg7G/H/UILwVRO7AGdIauaihyhDsqxVbJolkek2JtTrn5dUYp9T+vlo4zDgBoWaA8DmwFcLxMX2o4RNRjGkotX7hnn72gFjzj34FU31OT4uaRM6VkPfdaatemXsA6zaNCGlPMD1QDYBh7AmXZBuMtJkYBngOGC7AWI+b/sxSZMSAfMKSd8qcK1QL+/bYD7CPhSicb0EDiW+169LugX4AXD+gOSmkbAWsKKkmZ2hVANTnu0NFTY+k2xPzr04Z9hBTgX2rxi7QdFcZ9P8JWmu9PrJUrFbKLUGr5q7r7QfqUXChQrk0xZesH306L82zdgL+CnRWHcdMe4PrJxt+9UAko4DzrF9QXr9LmLdlYOa63qonwPfuVQgSesQe5AFJe3VemsewhJvYEwIAs5YFloKYW7gEkn/AH5IyMiNW8nCGRiCZ2w/IwlJL7H9gKRcq6H2BHpo6/hk4HM5gSUdbPszPYc/3efYIFidCt6ejFEhqyBqsserdlm0UFJtoMYzAvDj9K80LgJ+KKnx5905HSuBfQmLwUcBFBY7l9FhZ+dgbuD+1FlggqB1q6SfwrR1GKQN2CmSNrV9doFrm4EepM7SwyXtbvvIMThlsSKLeuQUbZ+bjh1WIPwcxGLVwOwF4g1BqaJpilVzTl2G8EX+MHCEpDOBk23/skDsiYZaBYB/2b6wYLwGtyq8048jkglPEh2ARaChthYDqZ05FOT+BWyVCN9vJJ696+gkOGdgAqBnXF6tz7FBUbObfEKjIOGy+P5J0pGM0OVXohlgAqIWmaxaUXYMMK7sG4eDww4it7u7X9xniWJFMUuBhJoFzqzCXS9svymtV24jCqcfAZaR9APgqkELDbb/kppYTnYFy+iEpWxvKWmrdM6nEik5C737GUmHEqSvcQvbvwHWrlAsrFVsOhb4PXAXcLWkJYgmlFwUfT7asH2qpFvpqHl8wPZ9JWKnRoOjgYVsryBpJWAj21/JiLkgsBNDmxey7BHHAolwM55JN208RuTpG0ymnArHG2yv0Xp9njpWJQPt/YB/pnHiaoK09yjdpMMc1Mr7QjSL3CHpCiKvtR7dzSMDwaFoeVWar95CPDMnEgXUYnABq6wGkn5DkAquSf8GvRfGRK3O5VVD+6EooVzSCoTS8EvT678DHxw05zIMiqzBxzp3XyiXWouEW4V8qo6l03mSdiPqUG01p6y8me3bFfbWyxL38oO2n8+JmbC27Z1a57lQ0jdyAo4B56JYDlzSB4Y5/goA27n1xFmBuYg1Vpug/AQZBCoAvYibb6sjLbK3JKSVHrK94XS+pBmYCkg6B9iBUDJ4CyHtNYvtdxeIvS2xCHoVnU2TnSF/q/6+kEW8zlXP2/M7BHP3/4BPEYWsO23vUPI8EwWSHiQ2Y11dFrYH2tyoW2VpCJzpS1r5GZmdkJ1+MDdWK+YkQvK3GYMvJax1sou+ku6xvWLPue5qH8uI/eaR3h9k46Meuf5WrPEswT3hoJDVfxXd33HRznd1JOZLxKrlL/xFonB1NrHx2JggJQ+cfBzhXAsQG5Is5bCac2pPzA0I5bc5iWT1Z20XI3SMd0i6zfZqFeJ+nSB89W6iS/pxvwqYx2X8yPvaWtjOsrWQtB+wBR1Sa7VnbwbqoPS4rG554aWJLtzivu8TDeqvQJkbs/j+SdL2I71fudN1XEKVvOpT7OXpFGV/XqooWxK193s1IWlJopt1beLZuwH4ZEqyDxKvasFJ0mXEPHoQYb3wKNGA8YYRPzhyzHkc3cIv7ff+oMl/hZLs9cDWwJoOC7U7gPcD69nOUh2WdDlBVChtGY2kxtL6Otuvl7QU8H3baxY+z/zALbZfUzJuTSjT7qR1n32CuH/PoWCxaZhzzmz7hdJxJwIkXUUoqRxre9V07Be2V8iIeT1RnO9S+axRoC3Z3NIn9mXA8yl2rrpVcUg6FViRUNkzMXbenf5he2CCZ/pe32H7j+n14sDFiQB+R3OvTGPMOYGngUkEsXVe4Azn2Yg3savlfVP8RQiiKMQa/JFCcWcnCvZbEpYh59vevVDs1xUmbTQ52rUIu891iaL93bY3GSDWIokwu0S/92sU22s80yVznSne9cC+tq9Ir9cHvjboOm6s1uCS3kPYnbYbtYrn7nNzqZI+AXyGyGu+h1DyP932m0b84NTFLk4+VajqNZZO0LN3cL59fZWagMLO8Rq6rXbXsz0WNsfZyM2BSzop/efLCaWan6fXGxA11CJqPpKWKD1WziDgVERK6G1OkAzmfrEmNicyUgF8XuAi288ViHcxsWi9ne5N0zSrDEjaFfgYsCTwm9ZbcxNJi4FlkdXxpp0bWIWwIynm7dlzrldRqJBVGmPBHk/nuR5Yv7nHJM0KXJmxGLxihLftfIuP9rmKPSOS3kcoRM1q+9WSVgEOyL3fJC3fmziXtL7tK3PipjiHACvRLSN7t8uoZRSHpIvoyPVnjUEz0B8axnrCGd3pqtT5ro7E4p6E3VSDeYBNbK88SNxW/AeBlZ0kf1My5M5ByYWtuENUavodm8aY1ebU1jkWICwLPgg8Qtgl/ZSYZ89ykhb9X0ZPAeBvFO42GWb+KzLvpY6LRlHmWheQT5d0F5HQ7LK1sL1jZtwqz94M1EetcXm4RGyDGgnZ8Q4NVaB8M7HuHEiBciz3Ty9mzCCTBVrz3WyE6tJdxHewEnCr7XWm17WNBkk3AkfR2T/9H7C77bUGjFe14FSjwCnpfNvv7VMESJc8WPJf0hzAOkSC+1ZgIeA1hI3WNbZvHfSaU/yfAKsSDS0lLaOR9DbgC4QN5yVEIfJDuXv2nlzOTIQVwAGliQU1IelrBClgILuTYe6zBgPfb634Y9LkU5MYUhLqqJpMIVVIutP2Khkxsz4/wPmKNLf0ibsosEiKfVTJ2CUg6Usjve9kZTNg7HcTCrC/IZ7FVxP5hyuBnWxPs7Kawibjh7b/POh1TeV5itZGUsyN6FiTXWX7vAIxzySI9RcRjhRXFSZyZDep9Yk5M0FEejORa1iAyCkPZK2iUP+5zPXU6nrPl/VM18p19pzjrt49dL9j0xCv+hpc0jGEmvgGhGr0ZgRRLStPlGIXb0Dpc44iJNya5NOUI/sYnRzfNcAxtp/OjFu8JpDivhT4Ep1x82rgyzVI1KXQyoFvB/yVAjlwSZcA2zs1OyUy58mliEipmWFz2/9Mr+cHfpATf0JYUE00SPoY0XW6ICGxv1Nv8XcGJgZcXlpvsYLMxO8R3ooH0S3VOLnA4Hvo6L9SBrZ/L2kRhaTls6N/YkwxFl7nAL8GbkoJrSldFmkzNc1dFmO10E7nKvmM7E8sAq9Mse9UdEjm4szUyXIIsUD+BrFIzl4U295H0qZEghAqeloX6iyoJdc/Ax3UsO7LSpaPgGoSiwkPE89c47n9EqBEcuhtRIdFG+/qc2xaUHNObXADIX27UU+S7Na0wX4x4Da6CwCf6nk/a8yvNf8plPteQ6dYuLOkDW3vlhm6lq1FrWdvBuqjyrjcFKAlnWZ7u/Z7KUm0Xd8P/m9jH2BV9yhQElL1g6D6/qlF8umLFwnJp/a+bEKgme8k/Rh4vZMSkEJmf//peGlTgzlsn9Z6fbqkfQYN1iRgKxIJd6ZT4CyiNuXUpVmafG37KeBySY/Yfh9MIaD8Cdie/D1FLctobF8q6XZCGUnAHg67sly0x4wXgL+WKAqNJZxpdzIGJP+f0GnyqZbTczmryNr4u0LByQCSNgNyVfHOl/Ru2xdkX10fDFOQLUq+AbD9MLFPua107BLIIdhMRewLJC0NLJcOPdg0SwCD7v/mBi6R9A+CcHJWjWJ66dqIQrV2DeCMdOgTktbJGesUauR3Alu5gNr5cKepEPMJ4B7CPvO4HHIvgO3/SPqvpHldQa2uz/lyn+lauc42fqtQCG7WntsSBP6BMEZr8DfYXkmhxv1lSYcR+cos9GlAOVLSwA0orbhD1HqAEiTcOVyvwfkU4vk7Ir3eOh3bIjNujZpA06y4x6i/OL7Q5MA3tv1Q63hODvyV7lYa/iuhulQKL2vINwAONdGX5wScoYBTAZIOIjbod07va5mB8QVVkspWyN81jM3rXE7urrjKwDDnuYxgh55te++SsXMxFuzxWl0WGsYfsRW3SvJsUEi60fbaPZ1C2dYvqWvxYGA1YoN6BnBwyU6IsUCJbqFaY9AMdKBK1n21kMa4M21vWiH2uURi5VJifnoboQjwEEx7N8tYqNSk89SaU9cg7BeXoLs79EXRrd9GxW6TeenuCrmK6HLOSj5JegB4bbOJTgm+e22/NjNucVuLFLfoszcDYw9VkL5Ncbs6N9MccI/t5Uufa7xDhRUoW3Gr7Z8kHQ4sTEd6eisi6XQuVGkeGbcYjkzWe+x/HZLudY9tYb9j4wmSDiZUgX9AzFFbAvMTzRLTrIYnaTIjE9PmGfhimbJX3wIoXuBMBMirCXWaB0rETHGXdLL0knS07V0LxZ0TeKYpbqY55CWJ+FMifnG1wYkMFbY7kTQb/dffz4z4wdHjZtkrjRB3TPKRpZGayL5LKBo+DvyOULj8fUbMyYRtw7NEU1aj+pY1vqXYRRUBe2KvSxSkm/1vc90lGu2KQ9IywN4MVXMqpiLeOtfqwMOJwJAbayViLt0UeMj2hrkx+5yjmAKVpLuBVZqcbJpL7iiQ+x3IymuUmF+i00C0M6FiBJRR+VJYDL6RIMA9RzQDXG378oyYNdXqJtQzDVMULL5M99z3ZduPZ8attgaXdJPttRSqkR8AHiPyT1n2mQql5Df0NqA4QylZddV6vpKurzj5VNJ9vTmQfscGiFu0JiDpW7b3HK4Rx+O4AUeSShORJH2bUMFtO1H82uWsBm8jlKcbu8glgHOcoX42g4BTEYkd1fbp++N0vJwZmI5QRansxOLdgk4n0sZEUugrWRfN0CR9OpZNiBjmXCIYokX9VEtAFb3Oa0LSz+jji0jYftgZnpk1IOkE4HJCfWJTwqJkFtu7ZMadFfgqUYCcC/iC7R9kxrzW9hv7JHyLJUJKouYYNAMBjYH1xHAL7txzSLrBFawKJG0/0vu2p6mLOJEr5qeiSk3lOfVBIqH3C2AKAbBGkX28QyEP/QSdrretgXltZ3WbSDqb+H6be2s7woppRELqVMQ9H9jNHRWRJYBvO3WXZ8QtbmuR4hZ99mZg7KHC0reSPkcQAGcHmkKpiETvd21/Lv+qJxYU6ogrEp37UxQo079pVqBsxa22f5J0q+3VRzv2YsAMMllA0veJ4kpDytoGmMv2VtPvqkaGwg5nOAxcxJF0IKEycRoxvm0DLGL7i4PE6xO/eIFTYT35pvRvKeAOovB2eGbcUwgFmfYcclju/j8Vgja0/WR6PRdwSS5xMcXqVRvcEviN89UGe88zIWyMoP98khnvTGAynfFia2A+25tnxq3VaDhm+cgaSOv8SbYnT+9rGQk1CrKt2A8An2SofUjWXqcWFPbAxzD0erMUe1K++xW2/9Q6dgphWfNL21tmxl8Y2JywdJy71jOiQtZkiYCzfpPDUVirXFmAgHMoobjw41JF35599QEEWQ0ou6eWtByh6rwn8HLbs2fE6psLKHG9tZ7pWrnOmqi5Bk95ySMJu/KmAfd42/tlxi3egNLMy62fcwEX2n5TzrWm2DXJp6cTOb0b0+u1iJzfBzPjXkHBmoCk1WzfprACHAKP4wacRDTdl6GEvdyx/gPE3gli31SMsC/pnQSJ+iriet8EfNT2xQPHnEHAKQ9J7yOk4xYlOlmXAO4vwYCcgYkJDeNH3iCn+JY2Sys7da0ousrvzGSvNioDSxH2SA1KqwxMCJJaTfZ4il+ly0KVfRFLQ+FZvy/wdmKSuxg40PkdWXcRxZUDCGvAY4DnchNNtVGys6DmGDQDgeEWww1KLIpVqfNd0tHAYoRtZnuMy1LJSuuhn7mC2pTqqdQUn1Nbsa+1/cbcOP8LqNhtcqftVUY7Ng3xmkTQvISizM3prTWJrp71My4XhdVkY2sxAzMwBerTwdnv2ABxD3oxkm36QYUVKMdi/5QKx+9xR9liSWKezVLjmkiYQSbrhkLRYlc6ym9XA0fn7p8mIiTdZXvl0Y5lxK9S4EzksTWIZpldgKdtLzfyp0aNWWsOKbrO6olTRW1wmHMVKSLXRom/WU+8Wuvv+wjy1O8o0OSjMVI9LQ1J29o+Pa3ve2FCReunHlB1IRHplqY7h3r1QBfbHbeKImCKdZPttXLjjBUk3WZ7tUqx77G9Yp/jcw9K0pL0MaJ5aEEil3Om7fvyrnRK7Je5jA1gv9hbAV8HriDGi/WAz9r+YWbcplj/H6LJpWiDZGlSZIp5NrAyMdZdk/7dlLOOU0W1ulrPdK1cZ4pdtLGlFbfaGjzlIXcliv+Nak92bFVoQFEltZ5W/JcydO4rkV+/H1gWaGqRiwMPEnalOWuYakSZND8vk14+aPv53Jg1kfLr+xA2exOmCVXSy+jYnd6YOxfOPPqvzMAA+ArxR7rM9qqKrpZxuUGYgbFB5YHlYWISaibhlwC5hZzvEd6SVVQGJG0EHEYPSY3wixyPqOZ1nnAWQQo5nhaDvABq+yIWRdoM7Jv+lcROxKLq87YPkLQ7kMVoHiOcQJ/OgkEw3hc3/wsYI9b5uu7ucj9P0fn+ycy4sxGbpDbpz+SPe1sC30pJhRNdSFpfQ1VqTpJURKWGOnNqgy9JOp5Q+mp3QowrO8Axwu2S1u7pNinh//20pDfavjbFXZdIvg2KQ9PP1YkuiIdG+N1BMDdwiaQithaSzrS9hTqqZ10oVSycgTHB/2fvvcMsq6r0/8/byABKg4og+JOkoygKSEMTRlAQGDEBBkCCOhjG9EVQxzSmUVFUxATKDAYyCq0EBUVschxyEIFRcRwRAUVAQFHC+/tj79t1q6iqbu7Z+557qtbneeqpOud2rbu6qs45O6z1vg9LWsPjpW9LdM6sI+mlwGk1iiO7xKMtsFkCqs6fMvsCZ0u6KR+vBfxrodidwPb+wP5RTJbIC/Ffyh+dQEna+1vAsb3NkELcJ2kPxqytdqOvsHxQJtngfEvBDc4zSJuFF5E2V+bbvr1A6DmSntDb6M+bFyXWf++TNK9X+C5pI5qNs/r5JWmtojdvXZ3xxYwDMdkmslOn/kgW32i83cmTJfWrLTS1O6k1/n5JgRj9DON5WoPH5c9zp3h9bdKG6mZTvD4lSjZR+wBPBa7KMS5i/Px9UH4J/HduPFy0IdsrJBpkQ7aPsyQdQJq3989/izTPlCLfIyGtr7yTR+Zb4u/uCknzbV/af3LQ4pvM6sC+tq9qltoYkubkOcLpwLx8bh83VGbrx/Z3JJ1NKj418AHbtxaIO9W1VwpViLk/yX6r5F7AGcC2wL35eDnS77NxUR31rulaa50AT+ofb9q+MzeEN6LyGPwIkmLdV/Px7sCRpPFoE37F+MLWk/PnJtfOKZIeT7KSvYJ0TX+zQbxFTPHsuxDYpkD47QvEeAS19gYkbUX6u/hf0r1odUlvKFGIW5E/2P5BiUCq7EQh6Vm2b8jNvpD2BgDWyOtyA9/jQgGnAvkBsbGS6sKGth8u2XkTBP1IOok0aP0p6Qa0HalD+2ZoptIi6YskibsiC0x9ca8mTRTHFam5gD9kDWpWj+d4Vbos9EhfxNcCv3AhX8TSKMn0TbZh2FQJ6BBSpe2LbD87V7ufbnt+k7i1GUa3kDokwd1VJC0kSWV+zfYpDWN1rvNd0gqkDZC9SNf3YcB3miw0qa5KTc1n6tHAs4DrGKv+t0fMDnAYVOw22YC0MLEiaSL2J+BfbF/dMN+PkxY7ihTKTBK/iK2FpNVs/15TqJ5FMWZ3UAXp2xx3W9L9eDPSRvJhtm9smG4nUT0Fyirzpxx7Z5JC5NrADqSF9A+P2kbWMJB0Amlxd1YWk3W54FLSP5LuQ7uSNv8PI83NGi1OSloL+ArwfLJCImlj8n8bxt2fpFZXbIOzL/aXgI1Im1gXkLqnL7LdqKhF0utJSlEL8qmdgU/bPqph3PmkAqdbSM+mVYFd3cCeRZOrDRrYlAZqg71N5H7FgtKbyDVQRbuTWuPvHHsL4Bm2D5O0MsmGYzq7uSWNW0X1tC0kfdID2OLle/18Uif285Tsaj7jhja7OXZRRcAJsc+aPGSzsVZplKwRe4VvMOG56gGtESe8xw0kpajfkIpDi9nBq6C6vKTzcn7rA28kKRf80OWVX17F2LV9vgtYiEjq2U+ubftTklYnWVFesphvXdL4TyxdBChpacarqJwD/KcbKFuorlpdlWt6krXOtYEflVjrzIXfr5zQ2HLioH/TwxiDq5JiXW0kLQMsa/vuQvGqPfuGiaRDbTdqmsl/x7v31m7yWsZ3auwnlkLSNqT9gJFvQpX0DdtvqXGPiwKcCuTNtp1IcnorkRQ+5ruAfGMQTERTeHv2aDJJz5Wme5EWpXubpo0fol0rUlMlr/O+Lot3AX+gQpeFpFfSJ4dYYlJTi9xB12NZ0kbkg7bf3zDuFbbnqU/CudTkoyaSPgssReVuIXVEgrurSHoKsBrpZ/y1xf37xcR6MfANYFznu+3TG8ZdFngTSYWsf+GmSGFI/ht7Halz/3rSwtNXbR80YLyzSBPonoTs40k+340X8io/U28sUSQ0E5iqQKRH00KRXPiF7T83iTNJ3CKFMpPELWZrkYuEF9reukRuQXuosPTthNgrkhZDPgz8lvRsObrJYm/XyPOQ/2SC0mCTjeQct8r8Kce+xvb6eaPzUySVro/VLtgeRWZ7MdlMKLhUshh6OXAI6Ro8DPhK6c2tUpTc4Jwk9lzgX0hFgavaXqZAzHUZU8c4s1RRYN4s7I1nG8vfq5KN77A2kWuiwnYntcbfuYBjY2Ad28/M898Ftp8/SLy+uBNVT3fKcUuonlYjN8l8hfR8Mkmp5t29jeUBY15qe76kq4BNbf9N0nW2R1VFvJPkxp53MFYYch6pGKKx0leN57WS5fcXmaAu3/TvIq+vXA4cTrJH2h44BTjH9iFNYuf4XyetCfWaRXcFfmX7nQ3jdq75UkkheWmSsgWkdbOHbL+5QcwLgL09Xq3uYNubN823Fn3NJzeRitPWpMBa54TYRRpbhjEGV2reO9jjFeveabuRmn+NBpS8/vSySWI2UU/rxZ4Rzz5JGxVYY7hm4jrhZOdGCVVqQlVqvNwyH55r+5om8WoTFlR12IFkXbAPyXpqBaC0xHUQAM27YBYT+5vANyWtQ1rgvCYP5L5he7KKwCXlrlzEci5wjKTbKSAPXZFle8U3ALbvlfTYAnEvZ3yXxXsnvN6oy0JJuecHtk/Mv8N1JC09qpsrkwxGLpBUolPhgTwg7PnJr8wkVeojSG8zpV+G0zSQGZb0OdsfmHD6/ZOcCwYkL4Kc6tyNbfsWUpdoo8F2ZgXguYzvfC+xIXsUcAPwYlKn5R6kQplGSNqRtJnwjyRVkk1s357vnz8HBirAAe4GrlPycl6kUiPpq9BMpabmMxW4UNK6pTZAukytjcHccfNq8uQ/NcEVkezvcTtwK8myrbFssSrYWth+SNLDklYstekftMZDpL+5ZYF1JeECEsN9RZF7AlcCx5A2Gt4AbNU0fod4sMQmwkQqzp9grFDoZTneqZJGeiOyFrYXAgv7iskWSpo1xWTONsO2fyPpyaTuUEiKISUsjKqSC1r3Al4KfJ+x+9CZwEBNEpIOY/JO5KaLvJNucFLAPlvS/yMtIG9EkpT/NmnDtzF5PFFayXhnkurUzyR9BJgnab8mDSKDFtgsQdwt+zaR5wNvBp4p6bsU2kQeAkXtTioW5r0S2JBkPYHtW3JRWVMA5V0yAAAgAElEQVT2ZLzq6WdJFhSj/tw7Fvga6ecCqcD+O4yt7wzCzfnv+STgp5LuZMyurRE1NmT7Yq8IfJzx6h6fHOE5yhHAnxlv+3IEzW1fal1/+5EKvcapyzcJmNdYLiRtlh7sZNdzJfB+xn6PTXkR8GzbvTXaI0gbtE3ZtNd8CYushv6hQFwAJH3f9qtLxcvMn9CMfGZuEmjCvsACSePU6hrGBOpd07ZPk/QM0mY9wA22/zbd9zzK2PMYa2zZt0ljS80xuMZUdZYmrR/+Xz5ek7Rm25QFpAaUb9LXgNKQH5L2wa9lrMiiFNWefbXJTYG2fU/T4pvM5blg7+h8vAdlbERrMr90E6qkfYC3MFacfYySwtCg+wu9uNOqKrmBak8U4BRE2YsMuI2xiX9vwrSfpD8BB9j+eisJBjMKDUl2OhcuPCt//BG4GniPpLfafu2AYXckeYW/m/TAWJG06TuqVPE6t712jjdpl0XT+KQCpy1z1f9ppAfzrqSf+cihMUUggDmkhcgVC4T+KnAisIqkTwOvAT5SIG5VKqkXbAdMLLZ5ySTngsHZFfiypO8D37ZdYpLU46O2F+QFzReROt8PodliHsA/2t5Z0o62j5B0LGUW/3cHvtS/YdwrApPUxHLwxPzR4+wGsYChPVM3A65Skrj+G5STnQ4WcTKpQOty+pTDmlKjUCazOmkBqLStxb3AtXkBdVGBc5PitGC4aHK/84toUISb455IUi44Cni57VvzS8dJGvUFnCL0jTd/KOmd1FGgrDF/AvidpP8ijec+l4sO5zTNt6tEMRlI2gU4gDQWEnCQpPfZ/l6riU2DkoT6XcC3gA/2bbD8t6Qmihn9Vq/Lkja/b2kQr0fxDc4+liUV91xu+8FCMWvSm4tsAWxDubkIku5hbAz+D6TNp/tsrzBgvGFsItdmm5rBVc6O+u+2Lam3mf645tkB6fpdlrSxB7AM8LtCsWvyWI+3ezta0vuaBLTdK+b5DyU12BVJa3wlqLEh2+PbwM8YK2B5HUntbFTtQ57r8RYvZ0ka5eaZB2zfIWmOku3eWZK+3DDmjsDmwL8CR+YCgzVJTS5FCkSBX5Is8Hob6avnc02ZrPmyZEFAYyuySXhI0tNt/woWKWg1ug5tX6pk1VNMra6PKtd0X4Hv1bnA9+NNC3z7yQU3p0j6D9unLPYbloBKY/CXl8htGmo0oDy11ppm5WdfFZTsWr8NzE2Hugt4Y4EinLcB7yS5aEC6H496jUGNJtQ3kYot74O0x0BaJ2tUgAO8In9ehdTsfGY+3po0nxi4ACcsqIZIXiC6sHTlVzA70XAk775EugGdAXzLfb6pamClIek9JB/1Lkyeew/Pol7nE+IfT+qyOCaf2h1Y0XajLguNWS/tDSxn+/MaYesljfddfhD4NamS/vwCsZ9FWsAScIbtxuoetSnZWSDp7aQir6cBv+p7aS7JS73UInLAokr33UidvWbMguKehnGvzAv/+wPX2j5WfdZqDeJeYnsTSeeS/k5uJXVvNFXheoRsukZUInNIz9TO2kR0BUk/s/3cCnH3J41bShfK9OIXtbXQFFZqrqvwFBRElfzOJb2EpNrwfNKC9PnAIb3u8tnAhPEmTCi6LPDsqzJ/yt//WJINwLW2fyFpNWA9F5Bn7xoTiskO6ysmQ9nuuLXkhkjukt6u13GbN5sWekStnSFtLnmCFYuktW3/uvD7zAHOd3Pb6E7ZZ9ek1lxkkvcRaRN4M9sfHDDGY0mbyEeTGpGeTFLl/BRwnu1ZUXS6OFTAjlrSvwHPIBWH7k+y/Dq2QDfySaSx0DjVU+BmGL3C8r4C3w8Ad5LWEE1q0HmC7Q+1ldt0SLrc9kaL/5cDxX7E+uOIr0lWsX2phaSFJGu2zwIrkVTa5jd97uXYi+7teV7yCeCFtvduEPOHpGtiRdK13Rsjb0Jaf9qqYc57kK63eSTlotcAH7G9oEHMNXpfAqeSGhgFZawoJW1DWi/st5nfyw1UM/uKWe7JxSzzgCLFLLWuaQ3JZneyNcoGsTozBu97Pr0L+AMFG1ByAcQZs3E+OhmSriE9N87Lx1sAX2+yDp4LC6+z/azF/uMRIhd6P520v1ekCbW3TuYxdcRlgUttr1cgZSSdDrzBWekqr7ccbvvFg8YMBZwhkquSt2o7j2BmkDcKlyLdBGooZQBcQxqsTmYPtUmDuHOB05VUoY4j+Tjf1iBeVSpXj0O9LgtJ2pykeNNTm1iqQNwqOCsCVYp9A2XkGodJyc6CY4EfkxbE+hcx72ky0A4mx/afJX0PWI4k//pK4H2SvtpwIbJW5/uhSkpZHwF+ACwPfHTQYP0FX3ny0WMucEGDuNVUaobxTI1Cm6FwoaT1bF9bMmitBXPVs7X4HnC/7Yfy+yxF6hoOusP9tu+XhKRlbN+gZGnUlL14pLT+UcDOBWJ3AtdXoKw1f8L2X+jrvsoLQ79vErPDHMpYMdnGkhYVk82W4pvMHI+Xu7+D0VdF+h5pM2jiudIbwM+ggF0kY/bZ51HZPlvlFElqMRQVLqdO0ZMkfZzxc9dHE+MvwBmSbrX9Cli0aP9bkkrWyBfgqLDdierZUa9Muob/TFoz+xiwbcOYUEH1tDITLebf2veagWLziRL3Cg1BERD4q6Qtek11SipnjdXEK7IRY7YvkFRabuytP4xgI9EOJIWofUjKbCuQCmVK0H/vOd9J1aOput4XGn7/tNg+Rkllr9d8uVOB5ssjGLuu18zHyuca27SR1sf+i5TzXcBPSGoOTaimVke9a7qKzW4u6H2q7d/2TjWN2UeXxuATn0/vnfB6kwaUi4ETc+H7A4wVWQykYDgDeKhXfANg+3xJjVQunWzmb5S0RonCvyGyfYWYh5FUU3vjw51IqqqlWL1XfJO5jTQWGJhQwAmCjiPpDOBVruChmwcqr2RsYfp82ydO/12PKv76pOr0VwM32y4xSS9OzerxHL9Kl4WkF5C8nC+w/TklKct9R61TqIcq+i12kYqdBfMYu6YvKPV3HCQk7Qj8C6nD8kjgCNu35y7Mn9teq0Hsop3vSmpkjzidP9v2FweMuyLwBAoXfGk4KjXVnqlBPfqKsh5D2nC7iQ7YfOWuqRcxwdbCdhOLNiRdDGxr+958vDxweoluyGA45AWFvUhFnC8idVIvbfulDeP+fELR96TnZgOqp0BZdf4UJKb4/T3e9qwpJgOQdACwPvCdfGpX4JoCG+rFyQ0tzwE+D/TbsawAvM92o+JTjbcwgqTo+CHb328Y97GkDU4xtsF5TK0mhhKKJLUoPReZELt/LWAOsDFJbWHzhnEXKS5JOsT225vEGyYqrC40Wee/CqiT1oobTE/Te4UqKwLm99iAtCayYn6fPwH/YvvqprFrMNU6Q49RaaiRdL7tLSY893q/x4dJP+cDbA9sTyLpCGAf23fl4ycAB9p+Y4PUJ3ufl7ucJdDTSXsLf8sN8OsDR/b+DwXi11B8Kz6eVUW1ulrXtKRTSBaD25H2XP5KUkVqrCYj6VpndQwlq7YitmRdGoP3mKoBxfbARVT5WbIj6W9t1hcaKNkALkf6u+ip4N1PUmRk0D0YJcX6DUnKYf028zs0TLkz5CKvzUg/zy3y6fNsX1nwPQ4mrSn3X9e/dBP1t7gugqDbSDqZdAP+KeNvwI2LLCR9nbSJ3H/T+ZXtdzaNneOvSuq4fS0wd1Qn6KoshZi7V9YBxnVZkGyYRnbjsDSSTmVyn8U/kH4ORSd6o46ki0iL0f2dBV9osggp6aMkRZ1eMdNOJAWqxp0FQULS4cC3bZ87yWvb2D5j+FlNTu4qhXT/mU9Sv4FknXGJR9CaLCt5LHQllZqaz9SgHl1ZLJ2IKtla1CrgDNpB0gvJfue2/94wVqek9WtSqxip9vwpSEQx2VhXL2kM178IOZIFX7lIfSdSx/4P+l66B/iu7QtbSWwKhrTB+STbf2yY6tBRYevMHPMU0rwf0jrI/wL/Z/uohnGHsolcClWwO1ElO+qKcaupng4DSUsDb2fMSvxs4L/cQEVbk6gXTXZuwNjFN2QneY8VICkFl4oZTE0uzrrQzWxPH1GwUakApaQl0FWk4s21SPfPHwDPadrA0Be/xv+/+Hi2ZjFL33sUvaYrF/geQZr/Xto0Vl/MTo3Be9RoQMmFIVuVKmzqOpKms4+z7YGUs/Ka0GQBzxkkXhuojHpf8fvwJO/xSsbGcOc2va6jACcIOo6kN0x23vYRBWLfADy7V8GaKw2vs/3shnHfQSoCWBlYABxvu4TlUhVqVo/n+EPZOJT0GeBu4Ju27ygRsySq4LPYZWp0Fki6EdjAY16ZywFXNZmYB2PULg6pRZ4wvcz2Pfl4LnCq7RdM/53toLrKb9WeqUF9JB1l+3WLOzcqSFpI2oz8LLASyYZqvhsq1Ui6ANi7110jaSPSolOjLvJguOTC72fYPkzJU355279uGDOKvjO1ipFqzZ+C8UQxWaK/q7crSNrcdlN7hcninmF7m8WdK/h+A29w9jqw+zceJe1j+yvFEy2IpB2AAxmzzlwDuMEN1Yty7CtIawHX5uPdSOq9jZqehrWJXIq8cdNTJdkYuDR/3WTTppY6aWdVT2si6ZvA0iSrGkhW4g/ZfnODmNVUhmpsyPbFXoakeL4WSakUANufbBo7mJ7eddTg+68mbarfmY+fCJxTesxReG39CtvzJL0f+KvtgwrH/xfbh5eI1Rez+Hi2cjFL567pPDf7R+A3pCa7IirJHR2D1yj4OpxUiPtjxtsYDqSoHsxs1Fy97wskm74Teusto85jFv9PgiAYcb4H3G/7IVi0AbxModi/JC2q9Ca4q+dzTVmdtJhyVYFYw6Cq1/kQFxAuAZ4OfAkYxcXp4j6LXSYX2mxQuLPgFlK34v35eBlSZ0RQACdf1oclrVijOKQiTwb6VRX+ns+NKvcC10qqoVJT85ka1GfcBlD+/W3UUi5Lwg6k+/E+jNlafKJA3H2BBZJuIS0wrUpS4Qg6gpJC2cakYpnDSBs5RwPPbxi6hg93V9kIuFDSuGKkXsd9g0XZWvOnYDy1fn9d4wpJ80t29dZmYvFNbs65A/i+7QcfbTxJywKPBZ6U1U16KjUrAP9fw3SnxPYdShYXg3COpPuAVSVtD1wLvAEY6QIckhrwZkywziwU+zWkscvupK7T1wP/XCDuHElPmLCJPLJr4f2NHHnjeKCimwkx7yY1Yu2mCXbUpCafxnGb5jgh7u/zGP7wrjW2ZOZPUJo4MxczPGrUpzIk6Zq+l+aSfn8leO6EzdezJJVqkDyZ9DdyOX0bs0F9mhTfZA4ELpK0IB/vDHy6YczJmtbe2jRmHw/k4s3Xk1SdIc2hGqExJbXD83FJJbXi41nbfwFOkLSKxlTVbiiQKwzpmi6hlNFHrabezo3BSTlvNqHg67KGMX+dP/4hf8xqJO1DWr+5B/gGSYXqg4MWwOmRFrvjsL3CIHGHhZL7ySak/8Mltm8jKZQNyluB9wAPSupZBLvWzyE3TT5AuhcNZJc4spOOIAiWmDOAbUkbkpB8Bk8nWfk0ZS5wvaRLSDfKTYDLJP0ABvcZtP0hoIp0cSV2IW1YfMH2Xbl6/H2L+Z6Rw/ZJbeewGM6Q9BPGS/YvbDGfVpnYWSClteSGnQV3A9flwgWTisoukfTVHDtsdppTszikFkeS/g56soo7AYe3l85iOYExG7XS1HymBpWQ9CHg34HlJPWKFUUqJju0tcSmQNnWglRoOtHWYj9JjWwtbF8q6Vmk4g2AG91A9j5ohVeS7PCuALB9S1Yna8Sod40PmVrFSFXmT8EjiGKyxKbAHpKKdvUOGZEKAvYgFaY+Wt5KKjx9CmlDqPc8/TPQdNNmWgbd4LS9paTHk/KdD7wZeKak75IUBg4pmGZJHsiFR3Oyis9Zkr5cIrDtm/LG6UkklbZ/dhkLnCqbyF1Ej7SjPkzSSNpRd7ixBeAhSU+3/SsASU8DHhow1rEkVYGiKkMTqLEh2+OptuN53UFsHynpMqBXBPgqF1Cun3ht276kacw+9gLeBnza9q8lrQ00sjHMrO9sYwhg+05JpVTUil8fU6nVMaFZaUCGck3bfnZPKaNArN9oEmXZxkl2cwxeo+CrRAPZTOKNtr8i6cUklevXke5DAxXg2J4LIOlTwO9zLJHmTasVybgSkt4MfAw4k5TzQZI+afvbg8bs/TyGyOtJP+eB70VhQRUEHUfSVbaft7hzA8ae1F+whwf0GZT0CuCLjA0G1wSudwHp4pp0pWBI0kFMXx07soUAkl4FbJkPG/ssdhlJpzHWWbBowcb2gQ1iTmqv0xc7bHYaMtXPeNR/trkbsv/au7LNfKZD0uOYRKUmd/o0jV3tmRrUR9L+vSLfLqMGthb5+3cGTrN9j6SPkLpu9nO2pApGH0mX2N5EY1LqjwMuGvEFvYB686cgmAx1zKJFyZLtNbaPrxB7b9sHlY5bg1yofyHJ6mWTvJl3JbAj8ALbR7ea4BRozDpzf+BJFLDO7G369J1ahTQH/htAieeepHUZ20Q+s8Qm8jBQYbsTdcyOWtLJpGLkLjW2IGkbUvf7TaQNpzWBvWyf1TDuOPWiUuN6VbQnlXQocJCztVwQwHCubUnzCl4jQ7HjKkXO90VMUKuz/aYCsatc05KeZPuPJWP2xV6kLGv7mZKeAiyw3UhZtmtjcJg65x6D5J7HtDv3itSyQtR3bddSHhpplO0hJX0FONv2iSpghSfp6gnqepOeGyXyuPOfbN+Rj5vY986b7vVRXusMBZwg6D739Q8sJW0ElOgUqrlAvB/1pIuLU7l6vAalulWGju2ayhZdo3hnwagXgcwEbB+RFzPXsH1j2/ksKfkZMrID1gnUVKmp9kwNhsI6kl5KKj55uO1kBsXNbC0APmp7Qe702gb4AnAIqUsrGHGUJO9OUbI/fbyktwBvJEkYByNOFNgEwyBv/kCSN+8Mth+W9H6geAEOcKukuR0pPt0R2Bz4V+BISU8mbdK/GjivzcQWw46kcfG7SZ23KwJN1FkBXt40qcWRC246UXQDVe1OumZH3cm1IdtnSHoG45UoG1m1VFYvqqHA0Susewywl6SbSEV1XVCICOozjGv7m6RxQAm6pqRWXK2u1jWd83uYtJ43L5/bx3ZJS86iyrJdHYNDteKglSdRiFqlwvt0hcslnQ6sDXwo/62VWJu8T9IewHdJ1+Ju9BUwjih3MP46uSefG4ReM/qypIK6q0n3n/VJ+6CbDxh3HJKeD/wHaV72GMbucU8bNGYU4ARB99mX5JV9C+mmsCrJuqc4KuB7l6kmXVyJml7nxelakYWyFYem9rW8gwZWHB3mQknrlegskHS87V0m6TAEynQWBoms8PUFkvft2pKeB3zSYTlRkmVt94pvsH2vpMcWij20Z2pQha+TpKcPygtkh3WpEK4fD2hrkemppr0M+IbtUyWNnL1AMDm2nVWM3kOyUFkH+Jjtn7abWTAIBedPQdDP5aQxvUjNIXfmrx9PUjFYu73UFstCSf8GHMf4zvemdir9xafbAgcwosWnWbXxDEm32n4FLNrc+i3wBka3oeatwHG2fwcUWXMY5U7xFqlld9I1O+rvMYnqabspLRm54OYaAEkbS7rF9i0NQu7JePWizwJXkZobm+Za4xqsXlgXdJchNa1p8f9kyXAlO66K3CVpeeBc4BhJt9N8o77WNX2OpPuAVSVtD1xLGgeVLMD5e55fGxYpajehy2PwGjwkaQ1nl4issjObLXfeBDwPuMn2X7Lqy14F4u5Oui6+QlbCy+dGmV8C/51Vz0wq5L9G0nsAbH9xSQPZ3hpA0gnAvN5emaTnkgpmSvEtUqPBODeKJkQBThB0HNuXSnoW47srHqj0do197zK9weB5lBsM1qRrBUMASPoh01tRjURBgO0t8udJK9B7EnWkjdUZT6XOgn3y51gIqc9/AJsAZwPYvkrJ9z0oR03lt2E+U4PC2F5I2thbkdQRslDSb0nKIUfPot/l77J6ynbA5yQtA8xpOafg0XEFcJft97WdSNCYUvOnIFiE7bUBJH0DONH2j/LxS0gWQaNMr7D5nX3nDDQdL/cXnx7akeLTV/d9fb7t75GKDkaVucDpkv5EKqBaYPu2lnOaicyR9IQJdicl1u9PzB89zi4QsyY1VU+Hyd7A+pL+x/agjR2dUi/qFfVIOsr26/pfk3QU8LpJvzGYFQypae0TBWN1TUmtuFpdrWva9paSHk/a7J4PvBl4pqTvkmy+DmmSd+b4SZRlvzlosI6PwWvwYeB8SeeQ9i22JCk8zkqy2uevSX/HyxaM+7+ka7tL/Cp/9Dg5fx5YgYpkJbeoUd32zyQ9u0G8idxt+8cF4yF7NhekBUH3yR2yp3VEahmArFRwP+nBvCewAnBMga63KqiC1/kwUPKbXBXoecjvBtwGnATdksiXtFpDNYDOoAqerDnuUiQVp60HSixYIiRdbHsz9Xm8KnvAtp3bTEHSfJLs5jiVGtuXF4jduWdqMJ5ctPk60vjiFuAYYAtgPdtbtZja0MjjrO2Ba23/QtJqpP//6S2nFiwhkm4A/hH4DeMVIuJZEgTBIiRda3u9xZ2bDUg6hbQhvR1p/PZX4BLbG7Sa2DRozGrornxcymqoKpLWJxVSvRq42fa2Lac0o5D0euDfgXF2J7aPai+r4SPpKtvPW9y5rqBskTfg955E2pwep14E3AwjqV4EgKQrbM/rO16KND9Zt8W0gpaRdDlJTebsvjWzn9l+7oDxprWamm1rOVldoqdWVzp20Ws6K7JdSFLy2CQrvl1JKjR4ge2jpw2wZO8hUjHnP5PWD38CnOvm1oAxBs9IehJjzSYX2/5jm/m0iaQ3k5qgn0pSqtsMuMj2i6b9xsXHXRl4C7AWfUXZoz5nKI2k75DWx3r3hj2A5W3vVij+Z4GlSDaJi+4RTZ4joYATBN2nX2p5G1IVeRGpZRX2vVO2GiIVgfSq/3qykPvlTqpRtBqq4XU+DJ5ve+O+4x9Kusz2u1vL6FEg6fr85ddsH9xqMkOkYmfBQ5IelrSi7bsLpBpMznWSdgeWUvJ+fxdpQhkUorJKTbVnalAfSSeS/i6OAl5u+9b80nFZNnpWkK0tTpC0iqQ18ukb2swpeNS8uO0EgsEoPX8KgsVwSy4Y7l+EbGJzMhSyXPi6JEUHIFk8NAy7C6n49Au278rFp6OuIlbLaqg2twO3kqyiV2k5lxlHabsTddeOuprqaU3yOOAq2/dJ2pNUEPiVhlZPnVIvkvQhUhHZcpL+3DsN/B04tLXEglHhAdt3p7qIRTzcIN6B07xmxu6ls4XianUVr+kdgc1JiilHSnoyaQ71apJrQgm+lYsUfgqQHRl+RFrva0Inx+ClyQVO2wNPs/1JSWtI2sT2JW3n1hL7kApmL7a9dV67/kyBuCeTromFFLJGqo2kZwL/xiOLhprck/cC3s6Y08O5pDX7UvTW/vv3Uxs9R0IBJwg6Tk9lQdL+pKrjY/uVFxrGvoFJfO9s39E09hTvtxJwoe11FvuPh0jN6vGa5AKWl9m+KR8/DTjVdklptqrkv4nNbJ/adi7Dpka3kJLv5oakiUd/R/1Idkx1kaw88WHGd1d8ytmvPWhOTZWams/UoD5Z9vc5wPNJi3jnA4fMtutP0g6khcinkDbJ1gBusP2cVhMLglnAsOdPwexGyZrm48AL8qlzgU+MqrIsgKSPA1uRCnB+BLyEZL/0moLv8a+2R36TV9LVwFYTrIbOGdXuaUnvIBU6rUxSZzm+SWFIMBx6asJTKe02LAypRk3V05pIugbYAFgfOJxkdbKL7Re2mVcbSNrf9ofaziMYLSR9i2Qx90FSocW7gKVtv63VxGYYNdTqal3TExTEryVZiL3Q9t4FYn8KWMn2O7LS4KnAN2wf1jBu58bgNZB0CGnt7UW2n51/xqfbnt9yaq0g6VLb8yVdBWxq+2+Srmu6FtdFBcA8z/lPHrkuMtLjuNKEAk4QdJ/fKXlZbgd8TtIywJxCsYv73k2H7TskbTWs93sUdNXrfF/gbEk35eO1GGEfTkmfs/2BCaffP8m5GU3lbqET8kdQiaw88eH8EdShpkpNzWdqUJ+9gD8DX83Hu5PUcHZuLaN2+BRJ6nZhLijbmmTJFQRBfYY6fwpmN3mRf5/F/sPR4jWkzekrbe+Vu50bWwxM4G10Q2XhQOAiSeOshlrMZ3GsDuxr+6q2EwmWnFx8sxRwuDtkR11Z9bQmD9q2pB2Bg21/S9KbBgnUYfWiHutIeimpeaaJwkkws9ibtF72N+BYctNa06CSXjXd67Zn21poDbW6Wtf0q/u+Pt/294DvlQhs+6OSPi/pP4GNgM/a/n6BuF0cg9dgU9vzlKzDemqO/9B2Ui1ys6THAycBP5V0J8lWvCmnSHqp7R8ViDUsHrRdUp1mMrVhAEqpDUtakfGFdecAn2ziJBEKOEHQcbLawvakTv1fZKnl9WyfXiB2cd+7LlOjerwmWSXiJ8DawA7APwEfHtXf30TFl3zumg4sKlShRmeBpMcB99t+KB8vBSyTi0aCAkg6i8kXx2ab7G01Kiu/VXumBvWR9POJKmGTnZvpKNlNbpw7Tja0/bCkq21v0HZuQTDTiflTMEwqSXtXRdIltjeRdDmwNXAPcL3tZxV8j86oF0palzFZ8zO7oCgjaRXG24f9X4vpBEuIpDNIdladsKOuqXpaE0nnAKcBbwS2JG2CXz2IslVX1Yt6SNqW1CCxGUk16zDbN7abVdA2kna2vWBx5waIeypp3fvMfGprkh38H0h2sG9sEr8r1FSrq3VNSzoC2MfZljOrqBzY5Hc2oSBLwEeBS0j358YFWV0cg9dA0n+TrrtLcyHOyiQFnE6Mw2si6YXAiqSxzN8bxroHeBxpfeEBxmyuV2icaGGyOhQkdbM/8Mh1kYFVomqrDUv6PvAz4Ih86nXABranLYz/mFYAACAASURBVPCcjlDACYKOkzfOT5C0iqQ18ukbCoUv7nvXcbrmdd5TiZhL+p2VVIkohqS3A+8AnpblenvMBS5oJ6uRoEZnwRnAtsC9+Xg54HTSYDkow7/1fb0sqWDvwZZymalUU6mp/EwN6nOFpM1sXwwgaVPgspZzaoO7lLzNzwWOkXQ7fbaDQRBUJeZPwTBZQJL2/iZ9i5AjzmW5M/QbpMXTe4GLCr/HKwrHq0beFBv5ohsASa8AvsiYxeWawPUk+89g9LkXuFZSV+yoa6qe1mRXkgrnG23fmueUBwwSqKvqRT1sLwQW5o7y3fLXvyXd/4/uiKJRUJ4PkcYvizv3aFkaWNf27yEVsJGunb0axu0a1dTqKl7T6/eKb/L73CmpaQHHxLHglaS/kVeQ5mZNFZG6OAavwVeBE4FVJH2apHT5kXZTGg1sn1Mw1txc2PIM+orgR5TLSdeY8vF7J7zeRK2mttrw0233K3J9ItuJDUwo4ARBx5G0A0m6uLcIsgZwQ1NvwWCMmtXjNampElGSPHB/ArA/yQO4xz2zzTu1nxqdBZN5hnbRR7Rr9Dp9285jplBZ+S2eqR1G0vUkmfpeJ/YawI2kIjjPFkW1rHb2V1Jh2h6krptjSnWFBEEQBKOBpMttb9R2HoMiaS1gBdvXLOafThfjPdO9bvuLg8YOxpOV9V7EBItL2wPZ6wTDRdIbJjtv+4jJzrdNV9azJiNb683Ph5fYvr1hvE6pF/UjaSVSB/mewC3AMcAWpPn7Vi2mFgwZSS8BXkpaXz+u76UVSIUzjdbMJF1v+9l9x3OA6/rPzSZqqdXVuKbz+GIr23fm4ycC5wyiHDYsuj4GL4mSXeQ2pIKLM2xf33JKI0NeowT4mu2DG8R5M8ny7KnAVaS9ogttb9M8yzpIWo7UcL8FqSDnPOA/bf+1QcyqasOSLgLeZ/v8fPx84Au2Nx80ZijgBEH3+RTppjtuEaRE4Bq+dx2lq17n1VQiSpL/nu4GdpM0j7EH8wXArC3AqdRZcJ+keb2BiaSNSJu0QSH6pBYhXW8bkTa/g0JUVqmp9kwNhsL2bScwIrwVOM727xiTTg2CYAjE/CkYMj/MzSInUkjauxZ5njflaw0WTucO+H3Bo+cB23dImiNpju2zJH257aSCJeZ7TGJH3W5K09KJ9ayJSNqFpHhzNmkz8iBJ77P9vQZhu6ZeBICkE0nNEUcBL7d9a37pOEmzUaV0tnMLSZ12B5JCQo97SJYiTTlD0k+A7+Tj1wILC8TtFDXV6ipe0wcCF0nqqSDtDHy6QbxF1LC3ynRmDF4b2zcQyuGTYvvZuWhts4ah9iEV9l5se+tc9PSZxgnW5QjgzySVJEjqgEeQijAHpbba8NuAI/Oajkj7kv/SJGAo4ARBx5F0me2Nc7XwhrYflnS17Q0KxC7ue9dlalWP16KmSkQNJH2U9BDuyUDuBCywvV97WbVL6c4CSfOB7+ZYAlYFdrV9+bTfGCwxkn7NmNTig8CvSRtv57ea2AyipkpNzWdqEAwLSR8nPU//ROouXGD7tnazCoLZQcyfgmGSx50Tse0m0t5VkHTWNC/bdti0jTiSFpLm6J8FViKNw+fbDjvjDiDpYmBb2/fm4+WB00f199e19aweeR65XU/1RtLKpOaOgeeTXVMv6pEVT54DPB94GDgfOMT2/a0mFrSKpPfb/vyEc/vY/kqB2K9krAj+XNsnNo3ZNWqq1dW8piWty9gm+pmlnAcmU04roabWpTH4MCml+NJVJD3J9h8rxL3U9vxsh7Sp7b9Jum6U1dol/dz2uos7N4pIWgHA9p8bx4oCnCDoNn2LIPsDT6LgIkjY1SSmqh4f5YdcF5F0I2mD4v58vBxwle112s2sHSZ0FhzW11mwqEhgwLhL57gANw6opBMErVF5QaHaMzUIho2k9YFdgVcDN9vetuWUgmDGE/OnIGgHScsCbyJtDPU3zTTtcA4yuSDiflKjwZ4k25BjZmO3dxfp6vOpg41w1/bbpmQbnKubWKko2cs+Qr0oK8OOLJKOJ3W/H5NP7Q483vbO7WUVtI2kK2zPm3CuREHEoutE0jqkdc8fz7Y1z8qN2p27ptVBe6uu01N8sX1q27kMi6wM+XD//a1UYWGOdSKwF7AvaT38TmBp2y8tEb8Gko4GDrZ9cT7eFHin7dc3jPsyHjnf+2STmH2xlyGtn65Fn3tUk/hhQRUE3WdHkoXMu4E9SFYnRW46wF8lbTHB92422tXsR1iSDINbSA/PXuX8MsDv2kundQ5lrLNgY0mLOgsaFN/sDJxm+2eSPgLMk7RfA8n3YAKSpu1wt33CdK8HS0RN+fuaz9QgGDa3A7cCdwCrtJxLEMwWYv4UDBVJzwXWZfwi5JHtZTQ9kiZddC2Q81Ek+fsXk8Zue5AsF4KGSDrf9hbAbSSlT0hFOAD7SfoTcIDtr7eSYLCkdMqOeirVUwrYqFTmtAk2OLsCP2oY8wxgW5IVFcBywOnAqDeJPHdCp/tZkoqoWgTdQ9JupIKNtSX9oO+luSTl1qacC2yZLYZOI9ld7UoaD8wm7soKZ+cBx0i6nT7ruoZ08Zrut7cS8BrK2Vt1agxeA0mfs/2BCaffP8m5mc45ku4DVpW0PXAt8AagSAGO7VfmL/8jq4quSLrPjTIbARdK6hVOrwHcKOlaklrU+o82oKT/BB4LbA18k3Q9X1IoX4CTgbtJNol/W8y/XSJCAScIOo6k9wDH2S5eqCBpA+BI0k19ke+d7atLv9coE5Ykw0HSSSQ/y5+SFva2Iz1Eb4bR97cuTY3OAknX2F5f0hbAp4AvAB+zvelivjVYQiSdSloIOzOf2hq4EPgDaYAZXbgNqaz8Vu2ZGgTDQsmLfBdgZWABcHwpCecgCKYn5k/BMFGyHNyKtPj/I+AlwPm2X9NmXtMh6aC+w2WBbYArmubc657vm+8sDZxne7MmcYPFkzudL5ytyrVdQR2zo66pelobSa8mNVJBug81ssHpsHpRle73oJtIWhNYm7SO88G+l+4BrrH9YMP4V9ieJ2lvYDnbn+/CdVKammp1Xb2mNWZvZeCsEmsjXRyD12AKRatrBimu6DqSHk8q3Dgc2IBko3kKSXHpkBZTa4V8z58S278ZIGZvntf7vDxJ6WzLgRMdH/9ntp9bIlaPUMAJgu4zFzg9dx0dByywfVuJwHmheAMV9L3rKDWrx4MxTswfPc5uKY9RoUZnwUP588uAb9g+VdJ+DWMG41kaWNf27wGUvOoPt71Xu2nNKGqq1FR7pgbBEFkd2Nf2VW0nEgSzjZg/BUPmNaQF3itt7yXpycDRLec0Lbb37j/Oi9XfLRC6ZzFxV+5IvpVQfxsKWZlyq7bzCKbH9qWSnkV37Khrqp5Wxfb3ge8XDNkp9aI+ine/B90lb7b+Bti80ltI0uakNaJeod5Sld5r5BiSWl1Xr+mlGftZLF0oZufG4CWR9HbgHcDTJF3T99Jc4IJ2smoPST8lNd8+TCpSu1PSlcD7gRe0mlxLDFJgswT0xj5/kfQUktr3agXjXyhpPdvXlgoYBThB0HFsfwL4hKT1SdKK50i62fa2TWNP9L2T1HvP2WbHsQOpenwfxqrHP9FqRjMQ20e0ncOIcYWkzSZ0FlzWMObvJP0XSV3oc/kan9MwZjCe1XvFN5nbSJPSoBxvZUylpuh9o+YzNQiGhe0PAUhahfFyyP835TcFQVCEmD8FQ+avWZ31wVz0dTupCLNL3EfqiG/Kodl24qPAD4DlgY8ViBssARPmP8EIou7ZUfca4c6lA41wvc1vSfcwtvkNadPXtldoEH5fYIGkcepFDeINi+3bTiAYHSpfI5DW7D8EnGj7OklPA85qGLMz5OIbbM+d7PWeWh3QpACnc9e0pH2At5CKIgUcLelQ2wdN/52LZSaMwZtwLPBjJlG0KqG21EF2JBUX/itwZC7IWpO0LnBem4nNME7JzRsHAFeQniXfaBq0V0RIqpfZS9JNJAuq3vNp4OLCsKAKghmCpFWBnYHXAnNLVB1LOo0x37uecga2D2wauwtMMTnoVUw/TJKUD6/zhkg63vYufQ+7cYxwBX1VJF1P6k4b11kAPMjgXpmPJU2YrrX9i6zOsp7t0wulPeuRdDDwDMZ7vv9yYrdvMDhZ6nUX0j24ikpNjWdqEAwLSa8Avgg8hbQQtCZwve3ntJpYEMwCZvv8KRgukr4O/DtpvPJe4F7gqlFWXpT0Q8bmfHNI0v3H2/7g1N8VBEFT1DE7akmPI3U6z2FM9fQY23e0mlhLZFu9rqgXBUEwgkhabbYVzGZ1ls1t35ePHwdc1HSNr4tj8FpImgdsQRrfXzDChb3VUbbEzV9fS2rgf2HsCZQnNz4ta/vuArGK22Utih0FOEHQbSS9g7QRuTKwgLR41djLMscu7ns3k1B4nRehNwGY6mFXSbJu5Kn68A9VhKpIehXQ8x89t6nnezA5fSo1rwZKKb9Ve6YGwbCQdDXJ43yh7Q0lbQ3saftNi/nWIAgaEvOnoC0krQWsYPuaxfzTVpH0wr7DB4Hf2L65QNxJ1W5CfSoIEr1NIUn7kxpyju3fKBo1JL2HMdXTWU2fetE9PfUiYJTVi4JgSiR9CjiHVARRTdVK0mdIRfHfnK2Fe8GiIoj5tu/Px8sCl9per+B7rEUHxuA1kPRR0hrqCfnUTqQmyf3ay6o9JD3N9k3560Nsv73tnGYykjYGbrF9S6F4R9l+3eLOPRrCgioIus/qwL62r6oQu7jv3UwivM7LkItvlgIOt7112/mMCjUKjyTtABzImCrCGsANQKgiFMT2CYxNPoJ63A7cSvJ8XaVQzJrP1CAYFg/kMcocSXNsnyXpy20nFQSzhJg/Ba1g+38lrSZpGdt/azufqbB9TqXQ/Zt4ywIvB66v9F5B0EW6Zkc9FzhdUjXV0w7xUdsLsnrRNiT1okOAkVQvCoLFcBOwO3BQVpw/j9S4dnLh97kEeDrwJeD1hWMH3eEw4L8l9RojdwK+VfINujIGr8SewAZ9BU6fBa4CZmUBDvBxSfvYvsv227M97oG239h2YjOUvYH1Jf2P7RLWnOP2x/J+5UZNAoYCThDMEEoqWkzwvXsGaXBcxPcuCKZC0hnAq0pIxwWTE6oI9ZjGz7rHHYRlXRGGoVITKlFBl5G0kLSw9FlgJVKx2nzb/9RqYkEwg4n5UzAK5Pv/04Hv2/63tvOZjCnGyncDlwHv7XWNFnifZYCf2N6qRLwg6DpdtaOuoXraNbqmXhQES0K2/d4F+DfgCbbntpxSMEPps0gCOM/2lRXeY+TH4DWQdBbwStt35ePHAyfYflG7mbXDZM/meF7XR9Jc2/c0+P4PkSzllgP+0jsN/B041PaHBo4dBThB0G0kvQL4ImOKFmsC19seWNGipvVNEEyFpJOBDYGf0tfBaPtdrSU1w5B0me2NcyHOhrYflnS17Q3azm2mE5Z15cgLj8fVUKmp8UwNgmGTN1juJ00Y9wRWAI6x/adWEwuCGUzMn4JRQZKAdW1f13Yuk5GtJ24GjiU9p15L2rC4Anh7qYKZ3HF6qe1/LBEvCGYKXWs0yJv0O5PuFXNnY0GrpFOA35HUi+YBfwUuiXWcoItI+iawLnAbSf3mfOAK2w8OGO8gJm+CA2JNebYi6YnTvV5jbWTUx+A1kHQSMJ+0l2PSc+oS0lh/1l1/eb9lK9t35uMnAueUtDybjeQiuikpZckpaf8mxTaTERZUQdB99gM2Y4KiRZOAvQXiqXzvgIF974JgGsK2pz53SVoeOBc4RtLtjJdrDyoRlnXl6A2GKy0eF3+mBsGw6ClxkRYze4uQyp/3yxL+ocQVBBWI+VMwTJZgU2GUF/53mLBpfKikq2x/QNK/Dxq0T4UKYCmSUuInG+QZBDOKrtlRT6J6+pbSqqcdYheSetEXbN+V1Yve13JOQTAoK5Ge03cBfwL+OGjxTeayIlkFM43LSeNCkZ53d+avHw/8H7D2IEE7PgavwYn5o8fZLeUxKhwIXCRpQT7eGfh0i/nMFA7Mn5cFNgauJl3P65OeAZsXep91JL0UOM32wyUChgJOEHScmooWkq6wPa/veCmS3Om6TWMHwUQkPQ643/ZD+XgpYBnbf5n+O4MlJf+M/0ryet8DWJGkinBHq4nNYCRdn7/8mu2DW01mhlBTpSZUooKZTChxBUF9Yv4UDANJv2aaTQXbA20qDANJFwFfAr6XT70GeI/tzXIhzvMGjNuvQvUgcFvDDb0gmFF0zY66puppV+maelEQTIekZwMvBt4NLGX7qS2nFMxAJH0DONH2j/LxS4CdbL91wHidHYMHw0HSuqTxFsCZs7h4uDiSTgA+bvvafPxc4D9sv6ZQ/G2BvUiNuQuAw2zf2CRmKOAEQffpKVqcRyFFi37fO0l/7p0m+941iR0E03AGsC1wbz5eDjgd+KfWMpp5vJW0iPU74Ii2k5kN2H523vTerO1cZhA1VWqKP1ODYFQIJa4gqEfMn4Jh0lvcn2pToc3cloA9gK8AXydtYFwM7ClpOeD/NYj7GOBm23/Lz7pXSzrS9l1NEw6CGcIDeSw4R9Ic22dJ+nLbSU1FZdXTTtE19aIgmA5JLwe2BF5AKlo4k7T+0jTuD5neimqHpu8RdJLNbL+ld2D7x5I+P2iwjo/BiyHpeNu7TFCgXMRstIvskQtuouimDuv0im8AbP8sF3MWwfZCYKGkFYHd8te/Bb4BHG37gUcbMxRwgqDjSHoscD9pgXdPYAWSokVjL8savndBMBWTdTw26YIMHomkj5Pki/8EHAcssH1bu1nNLCR9zvYHFncuGJzKym/VnqlBEATBzCfmT8EwkXSt7fUWd242IOkqkiT5WsCPgJOB59h+aZt5BcGoIGkhaXNwf+BJpEKO+bZHsuGppupp1+iaelEQTIekg0kFN+fZvqVg3K8AqwJH51O7kayZTwKwfU6p9wq6g6SfkP7een8XewAvsP3ihnFn9Rhc0mq2fz9BgXIRPXvmICiJpO+QmmT7r+flbe9W8D1WItmH7wncAhwDbAGsZ3urRxtvTqnEgiAYLpLOz1/eRvJNvRM4GPgM8GtJv86eyU1YR9JLJcW9IhgG90nql+zfiGSXFBTC9ifygtU7gdWAc/JCXFCO7SY595KhZzGzmahS8xWaK78N45kaBEEQzHxi/hQMk1skfUTSWvnjw6SFwpFF0sqS/l3SoZK+3fsoEPrhbDn1KuAg2+8jzXeCIEjsCPyFZPdyGvAr4BWtZjQ9PdXT/8mKA9uQFLNmIw9k2/BF6kWkgsMg6CL32T6uv/hG0ucKxH2+7V1t/zB/7A5safucKL6Z1ewGrAycCJyQvy6xWd+5MXhJcvHNUsDhtn8z8aPt/IIZy17AdcA++ePn+VwRJJ1I2mtYDni57R3y82pvYPmBYoYCThDMTHK13oW212kQo7jvXRBMhaT5wHdJA1aROhd2tX15q4nNQCStCuwMvBaYO5ulIUsh6e3AO4CnkRYze8wFLrBdyiJp1tOGSk2JZ2oQBEEw84n5UzBMJD0R+DjJxgHgXOATo6zcJ+lC0sLm5cBDvfO2v98w7n8DXwY+DLzC9q8l/cz2c5vEDYKZgqT3MGZHPfLUVD3tGl1TLwqC6ZB0he15E85d03RdUtL1wMts35SPnwacaruYPUkQ9OjiGLwGks4AXmX77rZzCYKmZCu55wDPBx4GzgcOsX3/wDGjACcIZi49ObgCcXq+dx8GGvneBcF0SFoa6G1w3xh/Y2XJCh67kCr+FwDHZ2/SoCH5PvkE0qLYB/teume2TcBqIel821tIuocxj2Hlzw+TrNUOsP31Su9f5JkaBEEQzHxi/hQEk1PLYljSusDbgItsf0fS2sAutkt01QdB5+maHXVf0clngZWYxUUnkh5HUoeeQ7JbWJHUgHJHq4kFwaOgdtOapBeTxts35VNrAf9q+/QmcYOZhaTPAHcD34x7aHMknQxsCPyUPmVy2+9qLalgxiHpeNu7SLqWsf2ARZRqLJd0PPBnku0UwO7A423vPHDMKMAJgmA6SvveBcFUSNoZOM32PZI+AswD9rN9RcupzRgk7U/qeruq7VxmMtlKbQvSoPCC+BseDqFSEwRBEIwCMX8KaiPpy7b3lfRDJl+E3KGFtJYISfuRxms/ajuXIJiNSFof2BV4NXCz7W1bTmlS2lA9HVW6pl4UBJNRu2ktryn/BFgb2AH4J+DDsR4X9CNpJ+DpwAa2Xz/A93d2DF4DSW+Y7LztI4adSzBz6TXESlpzstdL2Z5J+rntdRd37lHFjAKcIAimIvverQMcRZJPv7Xvtctsh+dwUIye5KikLYBPAV8APmZ705ZTm3FIWgVYtnds+/9aTGdGIemjpM7CE/KpnUjdhfu1l9XsIVRqgiAIgjaJ+VMwDCRtZPtySS+c7HXb5ww7pyUlKxk+Dvgb8ABpc922Vxgw3lQdkb24YbUbBH2Muh1126qno0jX1IuCoA1iTTmYiKSlgHfZ/lLBmJ0dg9cgK7Tdb/uhfLwUsIztv7SbWTBTkfRkYH4+vMT27QVjHw0cbPvifLwp8M5BivUWxYwCnCAIpqKG710QTIWkK21vmFVarrV9bO9c27nNFCS9Avgi8BSShPOawPW2n9NqYjMISTeSOinuz8fLAVeFKksQBEEQzHxi/hQMG0n/ADwzH3bCwlfSE4FnML4hYKANi76OyPcCFwM3979eqiMyCLrOTLGjns2qp11RLwqCR4Ok6/OXX7N9cIM4saYcPAJJl9jepFLszo3BSyPpYmBb2/fm4+WB02ejXWRQH0m7AAcAZ5OKs7cE3mf7e4XiX09qpuo1qq8B3Ag8yICNHY8pkVgQBDOWvUi+d1/Nx7uTujkH9r0Lgmn4naT/ArYDPidpGZLHdVCO/YDNgIV5Yro1Sc45KMctpM2E3kbbMkBIRQdBEATB7CDmT8HQkLQVcATwv6RFyNUlvcH2uW3mNR2S3gzsAzwVuIo0N7kQ2GaQeH3Kh8sDhxIKEUEwFasD+3bdjtr2HfneNxu5HbgVuANYpeVcgqAItp+dC+s2axgq1pSDybhA0sGkseF9vZNNrcm6OAavxLK94hsA2/dmC8kgqMGHgfk91RtJKwMLgSIFOMD2heIsIhRwgiCYkhq+d0EwFXmAtj2pU+EXklYD1rN9esupzRh61geSrgY2tP2wpKttb9B2bjMFSSeRpBB/SpLM3g64hNyNa/td7WUXBEEQBEFNYv4UDBNJlwO7274xHz8T+I7tjdrNbGqyVdR84GLbz5P0LOAztl9VKH4oRATBNIQddfeYKepFQQCLLGv+mtcjnwk8C/hxU/WQWFMOJkPSWZOctu0XNYzbuTF4DSRdAOzdK2iStBHJwmfzdjMLZiKSrrW9Xt/xHODq/nOjRijgBEEwHVdI2myC791lLecUzFCyP+gJklaRtEY+fUObOc1A7spykOcBx0i6nb4OgKAIJ+aPHme3lEcQBEEQBMMn5k/BMFm6t/APYPt/JC3dZkJLwP2275eEpGVs3yCppJVMKEQEwSRMZUdNsk0MRpsZoV4UBJlzgS0lPQE4HbiUVDi7R5OgvTXlvuPfA7+f+juC2YDtrSuF7uIYvAb7Agsk3UJSAlqVdD0HQQ1Ok/QT4Dv5eFfgRy3ms1hCAScIgimp4XsXBFMhaQfgQMYWhNYAbrAdC0KFyB0h95MGxXsCKwDH2P5Tq4kFQRAEQRDMAGL+FAwTSYcBDwFH51N7AEvZfmN7WU2PpBNJVm37Ai8C7iRtYry0YdxQiAiCacgquC9igh217Te1nFqwhIR6UTATkHSF7XmS9gaWs/15SVfZfl7buQUzD0lPBj4DPMX2SyStC2xu+1sN43ZuDF6LXHjUK6a/samaVRBMhiSRLIznA1vk0+fZPnHq72qfKMAJgmBKJK053eu2fzOsXIKZTywI1UPS+ba3kHQPyRYJUhEOwMPAn4ADbH+9lQRnAJKOt71LltV/xOAqNtyCIAiCYOYT86dgmEhaBngnfYuQwNdt/629rJYcSS8EVgROs/33hrH2B44LhYggmJywo+4uU6kXRbNa0EUkXQm8A/gS8Cbb1020FQmCUkj6MXAY8GHbG0h6DHBl07+3ro/BSyFpZ9I4/h5JHwHmAfv1LKmCoCRdfFZEAU4QBEEwEsSCUHtIWgm40HZJ+fdZhaTVbP9+qo232HALgiAIgiAISiFpKeA6289qO5cgCEYfSQuBnYDPAiuRCjnm2/6nVhMLFks0qwUziVx8+17gAtufk/Q0ksXau1pOLZiBSLrU9nxJV9reMJ9rpLgUY/AxJF1je31JWwCfAr4AfMz2pi2nFsxAJB0BHGz70rZzWVIe03YCQRAEQZC5S9LyJD/gYyTdDtzXck6zAtt3SNqq7Ty6TC6+WQo4vKLHcBAEQRAEQRBg+yFJN0paI2xIgiBYAnYg2VHvw5gd9SdazShYUh7IazZzJM2xfZakL7edVBAMgu1zgHMAJM0B/hjFN0FF7stNpwaQtBlwd5OAMQYfx0P588uAb9g+VdJ+bSYUzGg2BfaQ9BvSnqEYcZvvKMAJgiAIRoUdgb8C7yZ5p64IfLLVjGYRtn/fdg5dJ0/CHpa0ou1GE7ogCIIgCIIgWAxPAK6TdAl9jQu2d2gvpSAIRomeHTVwG4+0o95PUthRjz69ZrXziGa1oONIOhZ4G2nj/lJgBUlfsX1Au5kFM5T3AD8Ani7pAmBl4DUF4sYYPPE7Sf8FbAd8LltzzWk5p2Dm8uK2E3i0hAVVEARBMBJIeg9wnO3ftZ1LEAyKpJOBDYGfMn4SFh09QRAEQRAEQTGyjcMjyN3lQRAEiyXsqEcfSY8lqReJMfWiY2z/qdXEgmAAevY/kvYA5gEfBC4fZQWDoLtIWhv4LbAO6R56I/C8phY2MQZP5OfT9sC1tn8haTVgPdunt5xaMIOQAX1QOQAADeFJREFU9MTpXh/l8VAU4ARBEAQjgaSPA7sAfwKOAxbYvq3drILg0SHpDZOdt33EsHMJgiAIgiAIgiAIgumQtFoo4o4ePfUiSffwSPWih///9u4+xtKyPuP49xppRV4WW8IKtAjYWFJRcCmkVJGK0LQWocY3NKyx1jYa/lBjTUxbW6IQTbUamxSssYmtuo2wMdTXIiUlvEitRQVRtJoURSiyugsNriygXP3jnCm76zLLMrN7z5z9fpKTc+77nHnmymQmc5/n/J7fzeTcmd2LtKIk+TrwTOCfgL9te3WSm9oePziaZlCSLwFnz1/sm+RU4KK2zxibbLYkWQ3sOz92ay4tpSS3MlkHBXgycPf08ROB29oePTDegizAkSQtK0mOA84BXgzc3vaMwZGkRy3J/sCWtj+djh8HPL7tj8cmkyRJ0izY7sPYn9F21R6MI0kawO5FWomSvB54C3ATcCaTD1M/2vY5Q4NpJiU5CbgYOItJx6V3Ai9o+73HeDzX4FtJcjbwHuBwYAOTv+dvtj12aDDNpCQfBC5r+9np+PnAC9u+dmyyR2YBjiRpWUlyKPBS4OXAgbYh1UqS5AvAGW1/NB0fAFzR9lljk0mSJGmWJLkAuBP4CJOrAM8FDmv7l0ODSZL2CLsXaaVLEuBxbX8yOotmU5LfBD7AZCu/M9v+YAmO6RocSHIT8DzgyrZrkpwGrG37msHRNIOS3Lx996odzS0nFuBIkpaFJOcx2YLqEGA9cGnbW8amknbN/H7WO5uTJEmSFmNHWza4jYMkSVqukrwDeFfbe6bjXwD+pO1bxybTLEnyKbbtVPM0JgUzdwO0PXuRx3cNDiS5oe2J00KcNW0f2ht/DtozknwOuBb46HTqXODUtr8zLtXC9hkdQJKkqSOAN7a9cXQQaRE2Jzmh7ZcBkvw6cN/gTJIkSZo9m5OcC3yMyYcMrwA2j40kSZL0iJ7f9s/mB23vTvJ7gAU4Wkp/vZuP7xp84p5p5/drgHVJNrB3/hy0Z7wCOB+4bDq+Zjq3bNkBR5K0rCRZDew7P25728A40i6Z7i/8MeB/mLQhPRQ4p+2XhgaTJEnSTElyFPA3wLOZnPz/PJMLGr4zLpUkSdKOJfkqcFLb+6fjJwA3tD12bDLNoiRHA3e23TIdPwF40mLXyq7BJ5Lsz+Si0zkm3UgOAta13Tg0mLRMWIAjSVoWkpwFvBc4HNgAHAl8wzdhWmmS/BxwzHT4X20fHJlHkiRJkiRJGinJW4CzgA9Np14NfLLtu8al0qxKcgPwrLYPTMc/D3y+7Uljk82GJG8CLml7x+gsmn1JfhV4M3AUW+3u1PZ5ozLtjFtQSZKWiwuBk4Er265JchqwdnAmaZckeSlweduvJXkrcEKSC+e3pJIkSZKWQpJDgD/mZ09C/uGoTJIkSY+k7V8luQk4Yzp1QdvPjcykmbbPfPENQNsHpkU4i+Ia/P8dCFyRZBNwCbC+7V2DM2l2rQf+Dvh74KeDszwqFuBIkpaLB9tuTDKXZK7tVUneNzqUtIv+ou36JKcApzPZd/j9wG+MjSVJkqQZ8wngWuBKVshJSEmStHdrezlw+egc2iv8IMnZbT8JkOT3gR8uwXFdgwNt3wa8LclxwDnA1Ulub3vGTr5Ueix+0vb9o0PsCgtwJEnLxT1JDmCygF2XZAOweXAmaVfNv/E6E/hg288kuXBkIEmSJM2k/dq+ZXQISZKkxyLJlcCDwEVtPz06j2bO65h8xnDRdPw94JVLcFzX4NvaAHwf2AisHpxFs+tTSc4DLgPun59su2lcpIWl7egMkiSRZD9gCxAmW0+tAtYt53+i0vaSfBq4A/ht4ATgPuCLbY8fGkySJEkzZVrkfX3bz47OIkmStKuSHA4cBpzc9qKdvV56LKYX/NL2R0t0PNfgwLQY4mXAIUy2B7q07S1jU2lWJbl1B9Nt+5Q9HuZRsgBHkjRUkuvanpLkXmD+n1Km9w8Bm4B3t714SEBpF0wLyX4XuLntt5McBjyj7RWDo0mSJGmGTN8/7c/kCsAHmbyHattVQ4NJkiRJgyU5CDgfOHU6dTXw9rb/u8jjugYHkrwTuKTtjaOzSMuRBTiSpGUtycFMqsqPGZ1FerSSrAb2nR+3vW1gHEmSJM2gJL8IPJVt151Xj0skSZK0rSQ38/BFl9s8xaRw4bg9HEl7gSQfB74G/ON06pXA8W1ftATHdg0+5Tlw7SlJng48jW1/3z48LtHCLMCRJC17SQ5re+foHNLOJDkbeA9wOJM9cJ8MfLPtsUODSZIkaaYk+SPgDcAvAzcCJzO5cOH0ocEkSZK2kuTIhZ5v+909lUV7jyQ3tn3mzuYew3FdgwNJzgLey8PnwI8EvuE5cO0OSc4HnsukAOezwPOB69q+ZGSuhcyNDiBJ0s5YfKMV5AImb7y+1fZo4AzgC2MjSZIkaQa9ATgJ+G7b04A1wKJa6kuSJC21tt/d+gbcDdy71U3aHe5Lcsr8IMmzgfuW4LiuwScuZNtz4KfjOXDtPi9h8jv2/bavBo4HDhobaWH7jA4gSZI0Qx5suzHJXJK5tlcled/oUJIkSZo5W9puSUKSx7f9ZhK37ZUkSctSktcCbwO28PCWVAWeMiyUZtnrgA8nmf+Q/m7gVUtwXNfgE54D1550X9uHkvwkySomXZeOGB1qIRbgSJIkLZ17khwAXAOsS7IB2Dw4kyRJkmbP7UmeCPwz8K9J7gbcwkGSJC1Xbwae3vaHo4NodiV501bDDwP7Tx9vZtKp/KuL/BauwSfmz4Ffi+fAtfvdMP27+yDwJeBHwL+PjbSwtN35qyRJkrRTSfZn0s50DjiXSSvEdW03Dg0mSZKkmZXkt5isOy9v+8DoPJIkSdtLcjnworY/Hp1FsyvJ+dOHxzDZKuoTQICzgC+2XbuE32uvXYMn2Y9JN6sAa4FVTM6BbxoaTDMvyVHAqraLLabbrSzAkSRJWiLTqywuaXvH6CySJEmSJEnScpBkDfAh4D+A++fn275+WCjNrCTXAGe2vXc6PhD4TNtTxyZb2ZJc1/aUJPfy8FZymd4/BGwC3t324iEBtVdIchiwqe39O33xIG5BJUmStHQOBK5Isgm4BFjf9q7BmSRJkiRJkqSRPgD8G3Azkw/qpd3pScDWXWkemM5pEdqeMr0/cEfPJzkYuB6wAEe700eAX0ny8bZvHh1mR+yAI0mStMSSHAecA7wYuL3tGYMjSZIkSZIkSUMk+UrbNaNzaO+Q5M+BlwGXTadeyKRr+TvHpdo7JDms7Z2jc2i2JQnwtLZfH51lRyzAkSRJWmJJDgVeCrwcOLDtcYMjSZIkSZIkSUMkeQfwHeBTbLsF1aZRmTTbkpwAPGc6vKbtV0bmkbQ4SVYD+86P2942MM6CLMCRJElaIknOY3J1xSHAeuDStreMTSVJkiRJkiSNk+TWHUy37VP2eBhJ0oqR5GzgPcDhwAbgSOAbbY8dGmwB+4wOIEmSNEOOAN7Y9sbRQSRJkiRJkqRl4tfabtl6Ism+j/RiSZKmLgBOBq5suybJacDawZkWNDc6gCRJ0qxo+6dtb0yyOsmT52+jc0mSJEmSJEkDXf8o5yRJ2tqDbTcCc0nm2l4FnDg61ELsgCNJkrREkpwFvJft2iECy7YdoiRJkiRJkrQ7JDkU+CXgCUlO2OqpVcB+Y1JJklaQe5IcAFwDrEuyAdg8ONOC0nZ0BkmSpJmQ5CbgeWzXDrHtawZHkyRJkiRJkvaoJK8C/oBJt4L/3Oqpe4F/aHvZiFySpJUhyf7AfUx2djoXOAhYN+2KsyxZgCNJkrREktzQ9sRpIc6atg8luant8aOzSZIkSZIkSSMkWQsUOIqHd+do27cPCyVJWvaSvAm4pO0do7M8Wm5BJUmStHTm2yFeywpphyhJkiRJkiTtZq8E7ga+DGwZnEWStHIcCFyRZBNwCbC+7V2DMy3IDjiSJElLJMl+TE4iBFjLZD/rdW03DQ0mSZIkSZIkDZLka22fPjqHJGllSnIccA7wYuD2tmcMjvSI5kYHkCRJWumSXDd9eBdwD5Mrev4WeAdwa5Jbk5w3Kp8kSZIkSZI00PVJnjE6hCRpxdoAfB/YCKwenGVBdsCRJEnazZIcDFzf9pjRWSRJkiRJkqQ9IcnNQIF9gKcC/w3cz6R7dNseNzCeJGmZm17Y/DLgEGA9cGnbW8amWtg+owNIkiTNurYbkzx3dA5JkiRJkiRpD3rB6ACSpBXtCOCNbW8cHeTRsgOOJEmSJEmSJEmSJEmSlp0kq4F958dtbxsYZ0FzowNIkiRJkiRJkiRJkiRJ85KcleTbwK3A1cB3gH8ZGmonLMCRJEmSJEmSJEmSJEnScnIhcDLwrbZHA6cDXxgbaWEW4EiSJEmSJEmSJEmSJGk5ebDtRmAuyVzbq4ATR4dayD6jA0iSJEmSJEmSJEmSJElbuSfJAcC1wLokG4DNgzMtKG1HZ5AkSZIkSZIkSZIkSZIASLIfsAUIsBZYBaxru2losAVYgCNJkiRJkiRJkiRJkqThklzX9pQk9wLzBS2Z3j8EbALe3fbiIQEXYAGOJEmSJEmSJEmSJEmSlr0kBwPXtz1mdJbtWYAjSZIkSZIkSZIkSZKkFSHJYW3vHJ1jexbgSJIkSZIkSZIkSZIkSYswNzqAJEmSJEmSJEmSJEmStJJZgCNJkiRJkiRJkiRJkiQtggU4kiRJkiRJkiRJkiRJ0iJYgCNJkiRJkiRJkiRJkiQtwv8B8fLlZPf8gywAAAAASUVORK5CYII=\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":573},"id":"73UChGrePhr1","outputId":"af8b97e5-cec0-469e-c55d-433364ee31a5"},"source":["exp = train_df.y.str.split(',').explode().value_counts()\n","top_100_tags = list(exp[0:25].index)\n","# z = lambda r : True if r.split(',') in top_100_tags else False\n","z = lambda r : True if all(x in top_100_tags for x in r.split(',') ) else False\n","top_100_idx = train_df.y.map(z)\n","train_df = train_df[top_100_idx]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
IdTitleBodyTagsCreationDateYytext
1334556906output FILE ,is this a fault?\\r\\nmy code here\\r\\n\\r\\n #include <stdi...<c++>2016-01-01 14:20:01LQ_EDITc++output FILE ,is this a fault?
2434560768Can I throw from class init() in Swift with co...<p>I'd like my class <em>init()</em> in Swift ...<swift>2016-01-01 22:42:24HQswiftCan I throw from class init() in Swift with co...
2534560942C# - Count a specific word in richTextBox1 and...<p>I'm not sure, if this question is unique, b...<c#>2016-01-01 23:06:53LQ_CLOSEc#C# - Count a specific word in richTextBox1 and...
3034562551c++ vector type function implemetationclass City\\r\\n {\\r\\n private:\\r\\n...<c++>2016-01-02 04:17:27LQ_EDITc++c++ vector type function implemetation
4834566364japanese and portuguese language cannot supportMy site Japanese supported. But Portuguese la...<php>2016-01-02 13:20:49LQ_EDITphpjapanese and portuguese language cannot support
...........................
4499260458575MySQL how to query five tables in one SELECT<p>I have 5 tables as follows:</p>\\n\\n<ul>\\n<l...<mysql>2020-02-28 20:07:09LQ_CLOSEmysqlMySQL how to query five tables in one SELECT
4499360460748Copy value of list not reference<p>I have a list that i want to compare to aft...<python>2020-02-28 23:54:33LQ_CLOSEpythonCopy value of list not reference
4499460461193Weird question, but how do I make a python scr...<p>Before you get confused, I am going to comp...<python><python-3.x>2020-02-29 01:25:40LQ_CLOSEpython,python-3.xWeird question, but how do I make a python scr...
4499660461754Does Python execute code from the top or botto...<p>I am working on learning Python and was won...<python>2020-02-29 03:33:59LQ_CLOSEpythonDoes Python execute code from the top or botto...
4499860465318how to implement fill in the blank in Swift<p>\"I _____ any questions.\"</p>\\n\\n<p>I want t...<ios><swift>2020-02-29 12:50:43LQ_CLOSEios,swifthow to implement fill in the blank in Swift
\n","

9968 rows × 8 columns

\n","
"],"text/plain":[" Id ... text\n","13 34556906 ... output FILE ,is this a fault?\n","24 34560768 ... Can I throw from class init() in Swift with co...\n","25 34560942 ... C# - Count a specific word in richTextBox1 and...\n","30 34562551 ... c++ vector type function implemetation\n","48 34566364 ... japanese and portuguese language cannot support\n","... ... ... ...\n","44992 60458575 ... MySQL how to query five tables in one SELECT\n","44993 60460748 ... Copy value of list not reference\n","44994 60461193 ... Weird question, but how do I make a python scr...\n","44996 60461754 ... Does Python execute code from the top or botto...\n","44998 60465318 ... how to implement fill in the blank in Swift\n","\n","[9968 rows x 8 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":653},"id":"e_z1IU-XT0a0","outputId":"dc80c79e-11a0-4e63-bd40-8d933dbbb6aa"},"source":[" import nlu\n","# load a trainable pipeline by specifying the train prefix \n","\n","unfitted_pipe = nlu.load('train.multi_classifier')\n","#configure epochs\n","unfitted_pipe['multi_classifier'].setMaxEpochs(100)\n","unfitted_pipe['multi_classifier'].setLr(0.005) \n","# fit it on a datset with label='y' and text columns. Labels seperated by ','\n","fitted_pipe = unfitted_pipe.fit(train_df[['y','text']], label_seperator=',')\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df[['y','text']])\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextmulti_classifier_confidencessentencedefault_name_embeddingsmulti_classifier_classes
origin_index
13c++output FILE ,is this a fault?[]output FILE ,is this a fault?[0.04620636999607086, -0.04046135023236275, -0...[]
24swiftCan I throw from class init() in Swift with co...[0.86285734, 0.98327714]Can I throw from class init() in Swift with co...[0.053270746022462845, -0.00784565694630146, -...[swift, c]
25c#C# - Count a specific word in richTextBox1 and...[0.64955217]C# - Count a specific word in richTextBox1 and...[-0.005682709161192179, -0.023547030985355377,...[regex]
30c++c++ vector type function implemetation[0.9755105, 0.77180904, 0.9789763]c++ vector type function implemetation[0.024628309532999992, -0.015623562969267368, ...[c++, python-3.x, python]
48phpjapanese and portuguese language cannot support[0.55255216]japanese and portuguese language cannot support[0.038325726985931396, -0.005848723463714123, ...[php]
.....................
44992mysqlMySQL how to query five tables in one SELECT[0.6404308, 0.99544823]MySQL how to query five tables in one SELECT[0.006962132174521685, -0.03580842167139053, -...[sql, mysql]
44993pythonCopy value of list not reference[0.591653]Copy value of list not reference[0.025995030999183655, 0.001833591377362609, -...[javascript]
44994python,python-3.xWeird question, but how do I make a python scr...[0.7427199, 0.99999976, 0.70473063, 0.72811186...Weird question, but how do I make a python scr...[0.018493961542844772, -0.04660267382860184, -...[html, python, javascript, node.js, php]
44996pythonDoes Python execute code from the top or botto...[0.9977689, 0.794142]Does Python execute code from the top or botto...[0.01413149293512106, -0.02844131551682949, -0...[python, php]
44998ios,swifthow to implement fill in the blank in Swift[0.9999993]how to implement fill in the blank in Swift[0.019475314766168594, -0.022571099922060966, ...[swift]
\n","

10944 rows × 6 columns

\n","
"],"text/plain":[" y ... multi_classifier_classes\n","origin_index ... \n","13 c++ ... []\n","24 swift ... [swift, c]\n","25 c# ... [regex]\n","30 c++ ... [c++, python-3.x, python]\n","48 php ... [php]\n","... ... ... ...\n","44992 mysql ... [sql, mysql]\n","44993 python ... [javascript]\n","44994 python,python-3.x ... [html, python, javascript, node.js, php]\n","44996 python ... [python, php]\n","44998 ios,swift ... [swift]\n","\n","[10944 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"DL_5aY9b3jSd"},"source":["# 4. Evaluate the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0YDA2KunCeqQ","outputId":"8f72b51d-8e4c-49e8-884e-af5b0fdfa1ac"},"source":["from sklearn.preprocessing import MultiLabelBinarizer\n","from sklearn.metrics import classification_report\n","from sklearn.metrics import f1_score\n","from sklearn.metrics import roc_auc_score\n","mlb = MultiLabelBinarizer()\n","mlb = mlb.fit(preds.y.str.split(','))\n","y_true = mlb.transform(preds['y'].str.split(','))\n","y_pred = mlb.transform(preds.multi_classifier_classes.str.join(',').str.split(','))\n","print(\"Classification report: \\n\", (classification_report(y_true, y_pred)))\n","print(\"F1 micro averaging:\",(f1_score(y_true, y_pred, average='micro')))\n","print(\"ROC: \",(roc_auc_score(y_true, y_pred, average=\"micro\")))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Classification report: \n"," precision recall f1-score support\n","\n"," 0 0.67 0.80 0.73 840\n"," 1 0.22 0.62 0.32 237\n"," 2 0.37 0.47 0.41 467\n"," 3 0.38 0.67 0.49 561\n"," 4 0.48 0.54 0.51 831\n"," 5 0.54 0.58 0.56 697\n"," 6 0.49 0.73 0.59 792\n"," 7 0.58 0.39 0.47 1352\n"," 8 0.20 0.18 0.19 158\n"," 9 0.49 0.77 0.60 1431\n"," 10 0.57 0.75 0.65 2343\n"," 11 0.36 0.56 0.43 833\n"," 12 0.34 0.24 0.28 300\n"," 13 0.51 0.74 0.60 539\n"," 14 0.19 0.28 0.23 106\n"," 15 0.63 0.67 0.65 1283\n"," 16 0.61 0.74 0.67 1402\n"," 17 0.21 0.25 0.23 411\n"," 18 0.38 0.47 0.42 261\n"," 19 0.90 0.10 0.19 183\n"," 20 0.56 0.75 0.64 451\n"," 21 0.56 0.73 0.63 485\n"," 22 0.45 0.60 0.51 340\n"," 23 0.34 0.13 0.19 220\n"," 24 0.53 0.73 0.61 268\n","\n"," micro avg 0.50 0.63 0.56 16791\n"," macro avg 0.46 0.54 0.47 16791\n","weighted avg 0.51 0.63 0.55 16791\n"," samples avg 0.54 0.65 0.55 16791\n","\n","F1 micro averaging: 0.5556585043017869\n","ROC: 0.7920968190895907\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mhFKVN93o1ZO"},"source":["# 5. Lets try different Sentence Emebddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CzJd8omao0gt","outputId":"c3903ffc-ee61-47c1-87cf-bb1876436e25"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0ofYHpu7sloS","outputId":"ea715585-daa2-433d-d281-02b9e61222a4"},"source":["pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.multi_classifier')\n","pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['en_embed_sentence_small_bert_L12_768'] has settable params:\n","pipe['en_embed_sentence_small_bert_L12_768'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['en_embed_sentence_small_bert_L12_768'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['en_embed_sentence_small_bert_L12_768'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['en_embed_sentence_small_bert_L12_768'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['en_embed_sentence_small_bert_L12_768'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['en_embed_sentence_small_bert_L12_768'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['multi_classifier'] has settable params:\n","pipe['multi_classifier'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['multi_classifier'].setLr(0.001) | Info: Learning Rate | Currently set to : 0.001\n","pipe['multi_classifier'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['multi_classifier'].setValidationSplit(0.0) | Info: Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off. | Currently set to : 0.0\n","pipe['multi_classifier'].setThreshold(0.5) | Info: The minimum threshold for each label to be accepted. Default is 0.5 | Currently set to : 0.5\n","pipe['multi_classifier'].setRandomSeed(44) | Info: Random seed | Currently set to : 44\n","pipe['multi_classifier'].setShufflePerEpoch(False) | Info: whether to shuffle the training data on each Epoch | Currently set to : False\n","pipe['multi_classifier'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":570},"id":"ABHLgirmG1n9","outputId":"60e9995e-080c-4213-cf03-c7baba89bd6a"},"source":["# Load pipe with bert embeds\n","# using large embeddings can take a few hours..\n","pipe['multi_classifier'].setMaxEpochs(120) \n","pipe['multi_classifier'].setLr(0.0005) \n","fitted_pipe = pipe.fit(train_df[['y','text']],label_seperator=',')\n","preds = fitted_pipe.predict(train_df[['y','text']])\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextmulti_classifier_confidencesen_embed_sentence_small_bert_L12_768_embeddingsdocumentmulti_classifier_classes
origin_index
13c++output FILE ,is this a fault?[][-0.0598912313580513, 0.429191917181015, -0.25...output FILE ,is this a fault?[]
24swiftCan I throw from class init() in Swift with co...[0.61310124][-0.45358699560165405, 0.1986018270254135, -0....Can I throw from class init() in Swift with co...[java]
25c#C# - Count a specific word in richTextBox1 and...[0.8172003][-0.592096209526062, 0.0025841565802693367, -0...C# - Count a specific word in richTextBox1 and...[c#]
30c++c++ vector type function implemetation[0.98100495][-0.6645137071609497, 0.34700289368629456, 0.1...c++ vector type function implemetation[c++]
48phpjapanese and portuguese language cannot support[][-0.30820634961128235, 0.5732622742652893, 0.5...japanese and portuguese language cannot support[]
.....................
44992mysqlMySQL how to query five tables in one SELECT[0.94582915][-0.6759300231933594, 0.1323285549879074, 0.56...MySQL how to query five tables in one SELECT[mysql]
44993pythonCopy value of list not reference[0.71518165][-0.7307966947555542, 0.3146328032016754, -0.5...Copy value of list not reference[python]
44994python,python-3.xWeird question, but how do I make a python scr...[0.9938545][-0.478365957736969, -0.015336859039962292, 0....Weird question, but how do I make a python scr...[python]
44996pythonDoes Python execute code from the top or botto...[0.998447][-0.7976136803627014, -0.17537403106689453, 0....Does Python execute code from the top or botto...[python]
44998ios,swifthow to implement fill in the blank in Swift[0.6266076, 0.9772264][-0.4111633598804474, 0.04349775239825249, 0.2...how to implement fill in the blank in Swift[ios, swift]
\n","

9968 rows × 6 columns

\n","
"],"text/plain":[" y ... multi_classifier_classes\n","origin_index ... \n","13 c++ ... []\n","24 swift ... [java]\n","25 c# ... [c#]\n","30 c++ ... [c++]\n","48 php ... []\n","... ... ... ...\n","44992 mysql ... [mysql]\n","44993 python ... [python]\n","44994 python,python-3.x ... [python]\n","44996 python ... [python]\n","44998 ios,swift ... [ios, swift]\n","\n","[9968 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"E7ah2LM6tIhG","outputId":"edaa6235-c8d2-474a-9cc1-331e0967086c"},"source":["from sklearn.preprocessing import MultiLabelBinarizer\n","from sklearn.metrics import classification_report\n","from sklearn.metrics import f1_score\n","from sklearn.metrics import roc_auc_score\n","mlb = MultiLabelBinarizer()\n","mlb = mlb.fit(preds.y.str.split(','))\n","y_true = mlb.transform(preds['y'].str.split(','))\n","y_pred = mlb.transform(preds.multi_classifier_classes.str.join(',').str.split(','))\n","print(\"Classification report: \\n\", (classification_report(y_true, y_pred)))\n","print(\"F1 micro averaging:\",(f1_score(y_true, y_pred, average='micro')))\n","print(\"ROC: \",(roc_auc_score(y_true, y_pred, average=\"micro\")))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Classification report: \n"," precision recall f1-score support\n","\n"," 0 0.96 0.67 0.79 738\n"," 1 0.95 0.71 0.82 228\n"," 2 0.70 0.53 0.60 440\n"," 3 0.91 0.63 0.75 508\n"," 4 0.95 0.57 0.71 733\n"," 5 0.91 0.58 0.71 621\n"," 6 0.88 0.70 0.78 736\n"," 7 0.81 0.65 0.72 1254\n"," 8 0.86 0.58 0.69 145\n"," 9 0.89 0.58 0.70 1288\n"," 10 0.87 0.73 0.80 2164\n"," 11 0.89 0.58 0.70 754\n"," 12 0.84 0.67 0.74 277\n"," 13 0.89 0.59 0.71 511\n"," 14 0.96 0.27 0.42 96\n"," 15 0.94 0.70 0.80 1193\n"," 16 0.93 0.70 0.80 1265\n"," 17 0.74 0.22 0.34 365\n"," 18 0.97 0.70 0.82 246\n"," 19 1.00 0.55 0.71 172\n"," 20 0.92 0.71 0.81 427\n"," 21 0.82 0.67 0.74 458\n"," 22 0.81 0.66 0.73 319\n"," 23 0.83 0.23 0.36 211\n"," 24 0.97 0.64 0.77 242\n","\n"," micro avg 0.89 0.64 0.74 15391\n"," macro avg 0.89 0.59 0.70 15391\n","weighted avg 0.89 0.64 0.73 15391\n"," samples avg 0.70 0.64 0.65 15391\n","\n","F1 micro averaging: 0.7401884721644023\n","ROC: 0.8150061228796474\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","outputId":"bbf99f56-d4b1-4440-ecb7-fe9d61935c62"},"source":["stored_model_path = './models/multi_classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/multi_classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Tesla plans to invest 10M into the ML sector')\n","preds"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"M1LjAwJVJxun"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_token_label_text_classifier_stackoverflow_tags.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_label_text_classification/NLU_training_multi_token_label_text_classifier_stackoverflow_tags.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier for sentences with multiple classes at the same time \n","MultiClassifierDL is a Multi-label Text Classification. MultiClassifierDL uses a Bidirectional GRU with Convolution model that we have built inside TensorFlow and supports up to 100 classes. The input to MultiClassifierDL is Sentence Embeddings such as state-of-the-art UniversalSentenceEncoder, BertSentenceEmbeddings, or SentenceEmbeddings\n","\n","\n","\n","### Multi ClassifierDL (Multi-class Text Classification with multiple classes per sentence)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#multiclassifierdl-multi-label-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu > /dev/null pyspark==2.4.7\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2 Download sample dataset 60k Stack Overflow Questions with Quality Rating\n","\n","\n","https://www.kaggle.com/imoore/60k-stack-overflow-questions-with-quality-rate"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"y4xSRWIhwT28","outputId":"f7ac934c-b18f-4ffd-d773-842c81b2a80a"},"source":["import pandas as pd\n","! wget -N https://ckl-it.de/wp-content/uploads/2020/11/60kstackoverflow.csv -P /tmp\n","test_path = '/tmp/60kstackoverflow.csv'\n","train_df = pd.read_csv(test_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-02 11:20:29-- https://ckl-it.de/wp-content/uploads/2020/11/60kstackoverflow.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 50356825 (48M) [text/csv]\n","Saving to: ‘/tmp/60kstackoverflow.csv’\n","\n","60kstackoverflow.cs 100%[===================>] 48.02M 2.57MB/s in 21s \n","\n","2021-01-02 11:20:51 (2.32 MB/s) - ‘/tmp/60kstackoverflow.csv’ saved [50356825/50356825]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"gBxgVIB787wd"},"source":["# Split labels and clean them.\n","import pandas as pd\n","\n","train_df = pd.read_csv(test_path)\n","\n","f = lambda x : x.replace('<','').replace('>','')\n","g = lambda l : list(map(f,l))\n","train_df['y'] = train_df.Tags.str.split('><').map(g).str.join(',')\n","train_df['text'] = train_df['Title']\n","\n"," \n","# train_df = train_df.iloc[:50]"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":430},"id":"OfMCrNk-L_pq","outputId":"6ce7798d-ff2f-4b02-a066-67497ba0bdfa"},"source":["counts = train_df.explode('y').y.value_counts()\n","counts.iloc[0:100].plot.bar(figsize=(40,8), title='Distribution of Label Tags in Dataset')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":4},{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAACOAAAAJhCAYAAADinV3wAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdf9SnZV0n8PcnJiAVQWEiGdCxZPuxdSQbSbfaSsrCsWDPUbPcJKKlHx77YZ6cytR+7C62FavbZkuyhZo/kPJA4boaarW7qQ1qWmk14iAgP0YEFNQS/ewf32vyYXyGeR6uZ3geptfrnO957vu6rvu6P/f3e/8zc97nuqq7AwAAAAAAAAAA3DNfsN4FAAAAAAAAAADAfZkADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAABgzVTVb1fVL6zRXA+tqtur6rBx/taq+qG1mHvM97+q6qy1mm8V9/2VqvpIVd2whnNuraquqk335rUH076/PwAAAMBGJoADAAAArEhV7a6qT1bVx6vq1qr6f1X1I1X1z/+/0N0/0t2/vMK5vu3uxnT3h7r7Ad39mTWo/QVV9Yp95j+9uy+anXuVdTw0yU8n+aru/pJl+r+lqq69N2u6OyMAs/fz2fH77z1/2sG898zvP77Hzy6p9dqquriqHr2KOT7vnTkY7q37AAAAAAeXAA4AAACwGt/V3UcleViS85I8J8mFa32TjbYayxp6aJKbu/um9S5kJUYA5gHd/YAkH8ri99/b9vvrXd8BfHjUfVSSxyR5f5I/r6rT1rcsAAAA4FAkgAMAAACsWnff1t2XJfmeJGdV1VcnSVX9XlX9yjg+rqr+eKyW89Gq+vOq+oKqenkWQZQ/GquT/MySbZDOqaoPJXnzfrZG+rKqekdVfayqLq2qB497fd7KMXtX2amq70zyc0m+Z9zvr0b/P29pNep6blVdXVU3VdXLquro0be3jrOq6kNj+6if3993U1VHj+v3jPmeO+b/tiRvSnLCqOP3VvOdV9X2qnrXePZrquoFywz7war6cFVdX1XPXnLtF1TVjqr6QFXdPFaDefBq7r9PLadW1V+M3/b6qvrNqjp8Sf/jq+rvquq2qvqtqvrTJd/1I8b5beO7fM1+7nGX33/8Xr9cVf93rML0xqo67kC19sK13f28JC9N8sIl93jR+C4/VlVXVtU3jfb9vTNnV9X7xv2vqqofXjLXsu/76Duhqv5gvBMfrKofv7v7AAAAAPc9AjgAAADAPdbd70hybZJvWqb7p0ff5iTHZxE06O7+/tx1NZVfXXLNNyf5yiTfsZ9bPj3JDyZ5SJI7k7x4BTW+Icl/SvKacb9HLjPsB8bnW5N8aZIHJPnNfcZ8Y5IvT3JakudV1Vfu55b/LcnRY55vHjWf3d1/kuT0jJVZuvsHDlT7Pu4Ycx2TZHuSH62qM/cZ861JTk7y+CTPqc9t8/XMJGeOek5IckuS/77K+y/1mSQ/leS4JI/N4jv5sWQRRElySZKfTXJskr9L8m+WXPvLSd6Y5EFJTszi+1qp70tydpIvTnJ4kmff/fDP84dJHlVV9x/nf5nklCQPTvLKJK+tqiPv5p25KckTkzxw1HF+VT1q9C37vo8Qzh8l+askW7L4rn6yqr5jhe8mAAAAcB8ggAMAAADM+nAWAYZ9fTqLoMzDuvvT3f3n3d0HmOsF3X1Hd39yP/0v7+6/7u47kvxCkqdU1WH3vPR/9rQkv9HdV3X37VmER566z+o7v9jdn+zuv8oiTPF5YYlRy1OT/Gx3f7y7dyf59STfP1tgd7+1u9/b3Z/t7vckeVUWgZqlfnF8f+9N8rtJvne0/0iSnx8rwfxjkhckeVLdw62+uvvK7n5bd985nvF/LKnlCUn+prv/sLv3hqRuWHL5p7PYwuyE7v5Ud/+fVdz6d7v778f7cXEW4ZnV+HCSyiLElO5+RXffPJ7j15MckUXIalndfXl3f2CsqvOnWQSJ9obP9ve+PzrJ5u7+pe7+p+6+KsnvZPGeAAAAAIcIARwAAABg1pYkH12m/b8k2ZXkjWO7nh0rmOuaVfRfneQLs1iFZdYJY76lc2/KYiWTvZaGSD6RxSo5+zpu1LTvXFtmC6yqr6+qt4xtjG7LIlSz77Pv+/2cMI4fluR1Y3ukW5O8L4tVbI7PPVBV/2pst3RDVX0si1Vc9tZywtI6Rghl6fZgP5NFCOYdVfU3VfWDq7j1Sn6Du7MlSSe5dTzHs8eWUreN7+Xo3M37VFWnV9XbxhZTt2YRNto7fn/v+8Oy2Hbs1iXf/8/lHn73AAAAwMYkgAMAAADcY1X16CxCDZ+3islYAeanu/tLk3x3kmdV1Wl7u/cz5YFWyDlpyfFDs1h15CNZbM90vyV1HZbFVkArnffDWQQlls59Z5IbD3Ddvj6Sz63wsnSu61Y5z3JemeSyJCd199FJfjuLIMtS+34/Hx7H1yQ5vbuPWfI5srvvaV0vSfL+JCd39wOzCJTsreX6LLaWSpJUVS097+4buvs/dPcJSX44yW9V1SPuYR2r9e+SvLO776iqb8oiDPSUJA/q7mOS3LbkOe7yzlTVEUn+IMmvJTl+jH/93vF3875fk+SD+3z3R3X3E5a7DwAAAHDfJIADAAAArFpVPbCqnpjk1UleMbY82nfME6vqESOAcVsWK658dnTfmORL78Gt/31VfVVV3S/JLyW5pLs/k+TvkxxZVdur6guTPDeL7YT2ujHJ1qra3/+FvCrJT1XVw6vqAVms6PKasYXSio1aLk7yH6vqqKp6WJJnJXnFauapqiP3+VSSo5J8tLs/VVWnJvm+ZS79haq6X1X96yRnJ3nNaP/tUdPDxvybq+qM1dS0j6OSfCzJ7VX1FUl+dEnf5Um+pqrOHFtcPSPJlyx5tidX1d5Azi1ZBFA+m4OkFrZU1fOT/FAWYaG9z3Bnkj1JNlXV85I8cMml+74zh2fxTu1JcmdVnZ7k8Uvus7/3/R1JPl5Vz6mqL6qqw6rqq0d4bbn7AAAAAPdB/mEPAAAArMYfVdXHs1jV4+eT/EYWQY/lnJzkT5LcnuQvkvxWd79l9P3nJM8dW/I8exX3f3mS38tiK6Ijk/x4knT3bUl+LMlLs1ht5o7cdduj146/N1fVO5eZ93+Ouf8syQeTfCrJM1dR11LPHPe/KouVgV455l+pLUk+uc/ny7J4vl8a3//zsgj67OtPs9gG6Yokv9bdbxztL8pi9Zw3juvfluTrV/dYd/HsLAJAH0/yO/lc0Cfd/ZEkT07yq0luTvJVSXYm+ccx5NFJ3l5Vt4+afqK7r5qoZX9OGPe4PclfJvmaJN+y5Dv530nekEV46+osfvOlW3jd5Z3p7o9n8b5dnEVw6PtG/Xst+76PUNYTk5ySxbv1kSze06OXu89aPDgAAABw76vFNtwAAAAAsPbGyi7XJnnakgAWAAAAwCHFCjgAAAAArKmq+o6qOqaqjshiy6fKYtUdAAAAgEOSAA4AAAAAa+2xST6QxXZL35XkzO7+5PqWBAAAAHDw2IIKAAAAAAAAAAAmWAEHAAAAAAAAAAAmbFrvApLkuOOO661bt653GQAAAAAAAAAAsKwrr7zyI929ebm+DRHA2bp1a3bu3LneZQAAAAAAAAAAwLKq6ur99dmCCgAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYIIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMEEABwAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYIIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkrCuBU1U9V1d9U1V9X1auq6siqenhVvb2qdlXVa6rq8DH2iHG+a/RvPZgPAAAAAAAAAAAA6+mAAZyq2pLkx5Ns6+6vTnJYkqcmeWGS87v7EUluSXLOuOScJLeM9vPHOAAAAAAAAAAAOCStdAuqTUm+qKo2JblfkuuTPC7JJaP/oiRnjuMzxnlG/2lVVWtTLgAAAAAAAAAAbCwHDOB093VJfi3Jh7II3tyW5Mokt3b3nWPYtUm2jOMtSa4Z1945xh+777xVdW5V7ayqnXv27Jl9DgAAAAAAAAAAWBebDjSgqh6Uxao2D09ya5LXJvnO2Rt39wVJLkiSbdu29XJjtu64fMXz7T5v+2xJAAAAAAAAAACwaivZgurbknywu/d096eT/GGSb0hyzNiSKklOTHLdOL4uyUlJMvqPTnLzmlYNAAAAAAAAAAAbxEoCOB9K8piqul9VVZLTkvxtkrckedIYc1aSS8fxZeM8o//N3b3sCjcAAAAAAAAAAHBfd8AATne/PcklSd6Z5L3jmguSPCfJs6pqV5Jjk1w4LrkwybGj/VlJdhyEugEAAAAAAAAAYEPYdOAhSXc/P8nz92m+Ksmpy4z9VJInz5cGAAAAAAAAAAAb30q2oAIAAAAAAAAAAPZDAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgwqb1LmA9bN1x+YrH7j5v+0GsBAAAAAAAAACA+zor4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgwgEDOFX15VX17iWfj1XVT1bVg6vqTVX1D+Pvg8b4qqoXV9WuqnpPVT3q4D8GAAAAAAAAAACsjwMGcLr777r7lO4+JcnXJflEktcl2ZHkiu4+OckV4zxJTk9y8vicm+QlB6NwAAAAAAAAAADYCFa7BdVpST7Q3VcnOSPJRaP9oiRnjuMzkrysF96W5JiqesiaVAsAAAAAAAAAABvMagM4T03yqnF8fHdfP45vSHL8ON6S5Jol11w72u6iqs6tqp1VtXPPnj2rLAMAAAAAAAAAADaGFQdwqurwJN+d5LX79nV3J+nV3Li7L+jubd29bfPmzau5FAAAAAAAAAAANozVrIBzepJ3dveN4/zGvVtLjb83jfbrkpy05LoTRxsAAAAAAAAAABxyVhPA+d58bvupJLksyVnj+Kwkly5pf3otPCbJbUu2qgIAAAAAAAAAgEPKppUMqqr7J/n2JD+8pPm8JBdX1TlJrk7ylNH++iRPSLIrySeSnL1m1QIAAAAAAAAAwAazogBOd9+R5Nh92m5OctoyYzvJM9akOgAAAAAAAAAA2OBWswUVAAAAAAAAAACwDwEcAAAAAAAAAACYIIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMEEABwAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYIIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMGHTehdwKNm64/IVj9193vaDWAkAAAAAAAAAAPcWK+AAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABNWFMCpqmOq6pKqen9Vva+qHltVD66qN1XVP4y/Dxpjq6peXFW7quo9VfWog/sIAAAAAAAAAACwfla6As6Lkryhu78iySOTvC/JjiRXdPfJSa4Y50lyepKTx+fcJC9Z04oBAAAAAAAAAGADOWAAp6qOTvJvk1yYJN39T919a5Izklw0hl2U5MxxfEaSl/XC25IcU1UPWfPKAQAAAAAAAABgA1jJCjgPT7Inye9W1buq6qVVdf8kx3f39WPMDUmOH8dbklyz5PprR9tdVNW5VbWzqnbu2bPnnj8BAAAAAAAAAACso5UEcDYleVSSl3T31ya5I5/bbipJ0t2dpFdz4+6+oLu3dfe2zZs3r+ZSAAAAAAAAAADYMFYSwLk2ybXd/fZxfkkWgZwb924tNf7eNPqvS3LSkutPHG0AAAAAAAAAAHDIOWAAp7tvSHJNVX35aDotyd8muSzJWaPtrCSXjuPLkjy9Fh6T5LYlW1UBAAAAAAAAAMAhZdMKxz0zye9X1eFJrkpydhbhnYur6pwkVyd5yhj7+iRPSLIrySfGWAAAAAAAAAAAOCStKIDT3e9Osm2ZrtOWGdtJnjFZFwAAAAAAAAAA3CcccAsqAAAAAAAAAABg/wRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACZsWu8CWJmtOy5f8djd520/iJUAAAAAAAAAALCUFXAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMGHTehfA+tq64/IVj9193vaDWAkAAAAAAAAAwH2TFXAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMEEABwAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYsKIATlXtrqr3VtW7q2rnaHtwVb2pqv5h/H3QaK+qenFV7aqq91TVow7mAwAAAAAAAAAAwHpazQo439rdp3T3tnG+I8kV3X1ykivGeZKcnuTk8Tk3yUvWqlgAAAAAAAAAANhoZragOiPJReP4oiRnLml/WS+8LckxVfWQifsAAAAAAAAAAMCGtdIATid5Y1VdWVXnjrbju/v6cXxDkuPH8ZYk1yy59trRdhdVdW5V7ayqnXv27LkHpQMAAAAAAAAAwPrbtMJx39jd11XVFyd5U1W9f2lnd3dV9Wpu3N0XJLkgSbZt27aqawEAAAAAAAAAYKNY0Qo43X3d+HtTktclOTXJjXu3lhp/bxrDr0ty0pLLTxxtAAAAAAAAAABwyDlgAKeq7l9VR+09TvL4JH+d5LIkZ41hZyW5dBxfluTptfCYJLct2aoKAAAAAAAAAAAOKSvZgur4JK+rqr3jX9ndb6iqv0xycVWdk+TqJE8Z41+f5AlJdiX5RJKz17xqAAAAAAAAAADYIA4YwOnuq5I8cpn2m5Octkx7J3nGmlQHAAAAAAAAAAAb3AG3oAIAAAAAAAAAAPZPAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEwRwAAAAAAAAAABgggAOAAAAAAAAAABMEMABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwIRN610Ah6atOy5f8djd520/iJUAAAAAAAAAABxcVsABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAEzatdwGwWlt3XL7isbvP234QKwEAAAAAAAAAsAIOAAAAAAAAAABMEcABAAAAAAAAAIAJAjgAAAAAAAAAADBBAAcAAAAAAAAAACYI4AAAAAAAAAAAwAQBHAAAAAAAAAAAmCCAAwAAAAAAAAAAE1YcwKmqw6rqXVX1x+P84VX19qraVVWvqarDR/sR43zX6N96cEoHAAAAAAAAAID1t5oVcH4iyfuWnL8wyfnd/YgktyQ5Z7Sfk+SW0X7+GAcAAAAAAAAAAIekFQVwqurEJNuTvHScV5LHJblkDLkoyZnj+IxxntF/2hgPAAAAAAAAAACHnJWugPNfk/xMks+O82OT3Nrdd47za5NsGcdbklyTJKP/tjEeAAAAAAAAAAAOOQcM4FTVE5Pc1N1XruWNq+rcqtpZVTv37NmzllMDAAAAAAAAAMC9ZiUr4HxDku+uqt1JXp3F1lMvSnJMVW0aY05Mct04vi7JSUky+o9OcvO+k3b3Bd29rbu3bd68eeohAAAAAAAAAABgvRwwgNPdP9vdJ3b31iRPTfLm7n5akrckedIYdlaSS8fxZeM8o//N3d1rWjUAAAAAAAAAAGwQK1kBZ3+ek+RZVbUrybFJLhztFyY5drQ/K8mOuRIBAAAAAAAAAGDj2nTgIZ/T3W9N8tZxfFWSU5cZ86kkT16D2uBetXXH5Sseu/u87QexEgAAAAAAAADgvmRmBRwAAAAAAAAAAPgXTwAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACQI4AAAAAAAAAAAwQQAHAAAAAAAAAAAmCOAAAAAAAAAAAMCETQcaUFVHJvmzJEeM8Zd09/Or6uFJXp3k2CRXJvn+7v6nqjoiycuSfF2Sm5N8T3fvPkj1w4a3dcflKx67+7ztB7ESAAAAAAAAAOBgWMkKOP+Y5HHd/cgkpyT5zqp6TJIXJjm/ux+R5JYk54zx5yS5ZbSfP8YBAAAAAAAAAMAh6YABnF64fZx+4fh0kscluWS0X5TkzHF8xjjP6D+tqmrNKgYAAAAAAAAAgA1kJSvgpKoOq6p3J7kpyZuSfCDJrd195xhybZIt43hLkmuSZPTflsU2VfvOeW5V7ayqnXv27Jl7CgAAAAAAAAAAWCcrCuB092e6+5QkJyY5NclXzN64uy/o7m3dvW3z5s2z0wEAAAAAAAAAwLpYUQBnr+6+Nclbkjw2yTFVtWl0nZjkunF8XZKTkmT0H53k5jWpFgAAAAAAAAAANpgDBnCqanNVHTOOvyjJtyd5XxZBnCeNYWcluXQcXzbOM/rf3N29lkUDAAAAAAAAAMBGsenAQ/KQJBdV1WFZBHYu7u4/rqq/TfLqqvqVJO9KcuEYf2GSl1fVriQfTfLUg1A3AAAAAAAAAABsCAcM4HT3e5J87TLtVyU5dZn2TyV58ppUBwAAAAAAAAAAG9wBt6ACAAAAAAAAAAD2TwAHAAAAAAAAAAAmCOAAAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYIIADgAAAAAAAAAATBDAAQAAAAAAAACACZvWuwDgntu64/IVj9193vaDWAkAAAAAAAAA/MslgAN8HsEeAP3xZUAAACAASURBVAAAAAAAAFg5W1ABAAAAAAAAAMAEARwAAAAAAAAAAJgggAMAAAAAAAAAABMEcAAAAAAAAAAAYMKm9S4A+Jdj647LVzx293nbD2IlAAAAAAAAALB2rIADAAAAAAAAAAATBHAAAAAAAAAAAGCCAA4AAAAAAAAAAEwQwAEAAAAAAAAAgAkCOAAAAAAAAAAAMEEABwAAAAAAAAAAJgjgAAAAAAAAAADABAEcAAAAAAAAAACYIIADAPD/2bv3eNuref/j73fl0pWiEyqVFDqIlBNC5FYhpDpJOp3cb4UfyjW3UzouR7lVonShKEcqQnQTqp3aKVLouCsOFQcpn98fnzH3mnvtudbea44x9t6z/Xo+Huux95xrrc/87rm/8/sdl8/4DAAAAAAAAAAAAKACCTgAAAAAAAAAAAAAAABABRJwAAAAAAAAAAAAAAAAgAok4AAAAAAAAAAAAAAAAAAVSMABAAAAAAAAAAAAAAAAKpCAAwAAAAAAAAAAAAAAAFQgAQcAAAAAAAAAAAAAAACoQAIOAAAAAAAAAAAAAAAAUIEEHAAAAAAAAAAAAAAAAKACCTgAAAAAAAAAAAAAAABAhVWW9QEAQK2NDzxzTj9//aE7dzoSAAAAAAAAAAAAAMCKiAo4AAAAAAAAAAAAAAAAQAUScAAAAAAAAAAAAAAAAIAKJOAAAAAAAAAAAAAAAAAAFUjAAQAAAAAAAAAAAAAAACqQgAMAAAAAAAAAAAAAAABUIAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFCBBBwAAAAAAAAAAAAAAACgAgk4AAAAAAAAAAAAAAAAQIXFJuDY3tD2N21fbfsq2/uX59ex/TXb15Y/1y7P2/bhtq+zPd/2Vr3/EQAAAAAAAAAAAAAAAMCysiQVcG6T9LqI2ELStpJeYXsLSQdKOiciNpN0TnksSTtK2qx8vVjSx5ofNQAAAAAAAAAAAAAAALCcWGwCTkT8OiIuK3+/RdIPJK0vaRdJx5UfO07Ss8rfd5H06UjfkXR32/dufuQAAAAAAAAAAAAAAADAcmBJKuAsYHtjSQ+X9F1J60XEr8u3fiNpvfL39SX9fOjXflGemx7rxbYvtX3pjTfeOMfDBgAAAAAAAAAAAAAAAJYPqyzpD9peQ9Kpkg6IiJttL/heRITtmMsLR8RRko6SpK233npOvwsAS8vGB565xD97/aE7dzwSAAAAAAAAAAAAAMDyaokq4Ni+kzL55sSIOK08/dvB1lLlzxvK87+UtOHQr29QngMAAAAAAAAAAAAAAADucBabgOMsdXOMpB9ExAeGvnW6pH3K3/eR9MWh51/gtK2km4a2qgIAAAAAAAAAAAAAAADuUJZkC6rHSNpb0pW2Ly/PvUnSoZJOsb2fpP+RtHv53lmSdpJ0naT/k7Rv0yMGAAAAAAAAAAAAAAAAliOLTcCJiAsleYZv7zDi50PSKyqPCwDu0DY+8Mwl/tnrD92545EAAAAAAAAAAAAAAGotdgsqAAAAAAAAAAAAAAAAADMjAQcAAAAAAAAAAAAAAACoQAIOAAAAAAAAAAAAAAAAUIEEHAAAAAAAAAAAAAAAAKACCTgAAAAAAAAAAAAAAABAhVWW9QEAANrZ+MAz5/Tz1x+6c6cjAQAAAAAAAAAAAIAVBxVwAAAAAAAAAAAAAAAAgAok4AAAAAAAAAAAAAAAAAAV2IIKALBE5rK9FVtbAQAAAAAAAAAAAFiRUAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFCBBBwAAAAAAAAAAAAAAACgAgk4AAAAAAAAAAAAAAAAQAUScAAAAAAAAAAAAAAAAIAKJOAAAAAAAAAAAAAAAAAAFUjAAQAAAAAAAAAAAAAAACqQgAMAAAAAAAAAAAAAAABUIAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFCBBBwAAAAAAAAAAAAAAACgAgk4AAAAAAAAAAAAAAAAQAUScAAAAAAAAAAAAAAAAIAKJOAAAAAAAAAAAAAAAAAAFUjAAQAAAAAAAAAAAAAAACqQgAMAAAAAAAAAAAAAAABUIAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFCBBBwAAAAAAAAAAAAAAACgAgk4AAAAAAAAAAAAAAAAQAUScAAAAAAAAAAAAAAAAIAKJOAAAAAAAAAAAAAAAAAAFUjAAQAAAAAAAAAAAAAAACqQgAMAAAAAAAAAAAAAAABUWGVZHwAAYMW28YFnLvHPXn/ozh2PBAAAAAAAAAAAAADGQwUcAAAAAAAAAAAAAAAAoAIJOAAAAAAAAAAAAAAAAEAFEnAAAAAAAAAAAAAAAACACiTgAAAAAAAAAAAAAAAAABVWWdYHAABADxsfeOacfv76Q3fudCQAAAAAAAAAAAAA7uiogAMAAAAAAAAAAAAAAABUIAEHAAAAAAAAAAAAAAAAqEACDgAAAAAAAAAAAAAAAFBhlWV9AAAATJqNDzxziX/2+kN37ngkAAAAAAAAAAAAAJYHi62AY/uTtm+w/f2h59ax/TXb15Y/1y7P2/bhtq+zPd/2Vj0PHgAAAAAAAAAAAAAAAFjWlmQLqmMlPW3acwdKOiciNpN0TnksSTtK2qx8vVjSx9ocJgAAAAAAAAAAAAAAALB8WuwWVBFxvu2Npz29i6Tty9+Pk3SupDeW5z8dESHpO7bvbvveEfHrVgcMAMAdFVtbAQAAAAAAAAAAAJNpSSrgjLLeUFLNbyStV/6+vqSfD/3cL8pzi7D9YtuX2r70xhtvHPMwAAAAAAAAAAAAAAAAgGVr3AScBUq1mxjj946KiK0jYut111239jAAAAAAAAAAAAAAAACAZWKxW1DN4LeDraVs31vSDeX5X0racOjnNijPAQCAZWQuW1tJbG8FAAAAAAAAAAAAzNW4FXBOl7RP+fs+kr449PwLnLaVdNPQVlUAAAAAAAAAAAAAAADAHc5iK+DY/oyk7SXd0/YvJL1d0qGSTrG9n6T/kbR7+fGzJO0k6TpJ/ydp3w7HDAAAlhNzqa4zl8o6veICAAAAAAAAAAAAPSw2ASci9pzhWzuM+NmQ9IragwIAAOiBxB4AAAAAAAAAAAD0sNgEHAAAACweyT0AAAAAAAAAAAArLhJwAAAAlmOTuM0XyUgAAAAAAAAAAGBFQwIOAAAAJgLJSP3jAgAAAAAAAACA8ay0rA8AAAAAAAAAAAAAAAAAmGRUwAEAAAAgico6AAAAAAAAAACMiwQcAAAAAN2xHRcAAAAAAAAA4I6MBBwAAAAAmIbEHgAAAAAAAADAXKy0rA8AAAAAAAAAAAAAAAAAmGRUwAEAAACApWQulXUkqusAAAAAAAAAwKQgAQcAAAAA7gB6bZu1PMSda2wAAAAAAAAAWNpIwAEAAAAA3KH0ShoCAAAAAAAAgJmQgAMAAAAAwBIgsQcAAAAAAADATEjAAQAAAABgGeq5HdfysIUYyUgAAAAAAABYEZCAAwAAAAAAlgs9k5EAAAAAAACAnkjAAQAAAAAAd3hU7QEAAAAAAEBPJOAAAAAAAACMiW2+AAAAAAAAIJGAAwAAAAAAsEIhaQgAAAAAAKC9lZb1AQAAAAAAAAAAAAAAAACTjAo4AAAAAAAAWG71rKxD1R4AAAAAANAKCTgAAAAAAABAQyT2AAAAAACw4iEBBwAAAAAAAJgAk1gNiGQkAAAAAMCKggQcAAAAAAAAABNlEpORAAAAAAB3bCTgAAAAAAAAAEBnJPYAAAAAwB0bCTgAAAAAAAAAMKGWh+3DesYmGQkAAADApCABBwAAAAAAAAAw8UjsAQAAALAskYADAAAAAAAAAMAMJrEa0PIQt2fs5SEuAAAAMB0JOAAAAAAAAAAAAHNAMlL/uL1jAwAAtEYCDgAAAAAAAAAAAFYYJCMBAIAeSMABAAAAAAAAAAAAVkAkIwEA0A4JOAAAAAAAAAAAAABWaCQjrRhxe8aetLg9Y5P8hhUVCTgAAAAAAAAAAAAAAGCpIxlp+YrbM/byELc3EnAAAAAAAAAAAAAAAABwh9Y7sWelOf8GAAAAAAAAAAAAAAAAgAVIwAEAAAAAAAAAAAAAAAAqkIADAAAAAAAAAAAAAAAAVCABBwAAAAAAAAAAAAAAAKhAAg4AAAAAAAAAAAAAAABQgQQcAAAAAAAAAAAAAAAAoAIJOAAAAAAAAAAAAAAAAEAFEnAAAAAAAAAAAAAAAACACiTgAAAAAAAAAAAAAAAAABVIwAEAAAAAAAAAAAAAAAAqkIADAAAAAAAAAAAAAAAAVOiSgGP7abavsX2d7QN7vAYAAAAAAAAAAAAAAACwPGiegGN7ZUkfkbSjpC0k7Wl7i9avAwAAAAAAAAAAAAAAACwPelTAeaSk6yLiJxFxq6TPStqlw+sAAAAAAAAAAAAAAAAAy5wjom1A+7mSnhYRLyyP95b0LxHxymk/92JJLy4PHyDpmiV8iXtK+l2jw11asSctbs/YxO0fe9Li9ow9aXF7xp60uD1jT1rcnrEnLW7P2JMWt2fsSYvbMzZx+8eetLg9Y09a3J6xJy1uz9iTFrdn7EmL2zP2pMXtGXvS4vaMTdz+sSctbs/Ykxa3Z+xJi9sz9qTF7Rl70uL2jD1pcXvGnrS4PWMTt3/sSYvbM/akxe0Ze9Li9ow9aXF7xp60uD1jzyXuRhGx7qhvrNLueOYmIo6SdNRcf8/2pRGxdYdD6hZ70uL2jE3c/rEnLW7P2JMWt2fsSYvbM/akxe0Ze9Li9ow9aXF7xp60uD1jE7d/7EmL2zP2pMXtGXvS4vaMPWlxe8aetLg9Y09a3J6xJy1uz9jE7R970uL2jD1pcXvGnrS4PWNPWtyesSctbs/Ykxa3Z+xJi9szNnH7x560uD1jT1rcnrEnLW7P2JMWt2fsSYvbM3aruD22oPqlpA2HHm9QngMAAAAAAAAAAAAAAADucHok4FwiaTPbm9i+s6R/lXR6h9cBAAAAAAAAAAAAAAAAlrnmW1BFxG22XynpbEkrS/pkRFzV8CXmvG3VchB70uL2jE3c/rEnLW7P2JMWt2fsSYvbM/akxe0Ze9Li9ow9aXF7xp60uD1jE7d/7EmL2zP2pMXtGXvS4vaMPWlxe8aetLg9Y09a3J6xJy1uz9jE7R970uL2jD1pcXvGnrS4PWNPWtyesSctbs/Ykxa3Z+xJi9szNnH7x560uD1jT1rcnrEnLW7P2JMWt2fsSYvbM3aTuI6IFnEAAAAAAAAAAAAAAACAFVKPLagAAAAAAAAAAAAAAACAFQYJOAAAAAAAAAAAAAAAAEAFEnAA3GE4bbisjwMAAAAAAAAAAAAAsGJZ4RNwbP+T7fsOvhrFfO+SPAe0YPsZtifis2x7q9m+auNHREg6q8GhTjzbK9nefVLiYumwvfrgemF7c9vPtH2nZX1cd1S217b90GV9HHdk5Zq01rI+DqBWy3PZ9rtsrzL0eC3bn2oRG1iaJqmfA+COyfY6I75W2P6T7XuVPuQzbN9rWR8P7rhs72Z7zfL3t9g+rcW4IXBHYft423cberyR7XMaxb7LkjyHFYvtlZf1MWD5UObS71r+btv72j7C9suGx6KAHmyvYXuNZX0cS2KFHcwqHcZrJf1U0nmSrpf05UbhnzziuR1bBLZ9zxZxZon/z43jHWf77kOP17b9yYbxm3X+ZxhYWfDV6Hh7TH7vIela24fZfmD9UU4Z3EinPVdzDr6/fH1E0nclHSXp6PL3j1TEHXaZ7W1aBLJ9pe35I76utD2/Qfxu51xE/EPSG2qPcWnEtf0l26fP9NXytVrrmXBpe/8yaWrbx9i+zPZTKsOeL+mutteX9FVJe0s6tvI4u147be+2JM+NGfshLeJMi3lu+X9bR9Jlko62/YEGcVe2fWL9EY6M+5rWcUvs45fkuTHinlTe49UlfV/S1bZfXxu3xD6sxL6T7XNs32j7+S1iTxrbd7P9QduXlq/3Dw/wLY9s7zft8cq2376sjmdxOp7Lq0j6ru2H2n6ypEskzWsQV1L7BJ/y//TNNkc3Mv6mLgPGtre3/erh/snyyPZ65d7/5fJ4i+nn95hx91+S55aXuOrYz2mt9OsW6Ts1fo1e58UikzWjnhsj7l1sP8/2m2y/bfBVG7fE/qcRzz2gQdxu7c6heMt98nDn9uFjluS55chlkm6U9CNJ15a/X1/6Zo+oCexOiyNa36eH4rxQ0sWSniPpuZK+Y/vfa+OW2E3v1T37qLZfO9tXTexZXnPfit/tPa71Y9sn2n6p244pvzUibrG9naQnSTpG0scaxu/C9ibD7QHbq9reuDJm8z6qOy+QHHqdLW2/snxt2TDuyrbv44aLqjteOx9oewdPmyy0/bTK0Bcq+3w72X6RpK9J+q/KmAPfXsLnxlbe3zVbxuyhx7265+fP9rql7X2U7U8OvmpiDrnW9n/a3qJRvG5jndNeYyPbTyp/X7XFeee+8wGr2X6r7aPL481sP71F7IbO0lRuwaGSdlbO722jnO9bIfVoA/TSs63ci+2H2P6epKuU46fzbD+4YfxX2V67VTxJchaMWL45O+MHS9pIOaBsZbGL+1XEvELSEyV9PSIebvsJkp4fEWMPYNl+maSXS7qfpB8PfWtNSd+KiLEbx7ZXioh/2L4sIrYqz+0fER8aN+YMr7MgfqN434uIhy/uuTFjv1DS2yR9Q3lOPF7SOyNirEaF7Z9KihLrvpL+UP5+d0k/i4hNGhzzPEmPlbS2pG8pJ0RujYi9KuOuJWlPSfsq/w2fkvSZiLilMu6Vkl4UEd8pj3eVdEhEbF4Z9zRJb4+IK8vjB0s6OCKeWxO3xPqhpPtL+h9Jf9bU9WLOVShsbzTb9yPif8Y6yKn4w+fciPDjX+NK/EMl/U7Sycr3YhD4f5enuLYfP9v3I+K8ceIOxV9X0hslbSFpQSMoIp5YE7fEXuSaaXv+OOfbiNhXRMSWtp8q6SWS3irp+Jpr9OB4bb9K0qoRcZjtyyPiYRUxe5/Ho97jJvcq2xdIuosyCenEiLipQczvlXbFCyVtGBFvb3hOXCjpiRFxa22saXEvjohHtoxZ4i70/+RcLXNlRFR11gfnrO29JG0l6UBJ8xq9x4PYz5b0dEmvlXR+RIw9WGj7S8rPyEgR8cwx4x6xmLivHifuUPxTlUkhx5Wn9pa0ZUQ8pyLmlRp9zGPfq6fFP0nZbttP0jrKz/Z5EfH/xozX5f9uKH7Pc3kHSWco27OPi4jramMOxT5E0lOU7c71JH1Y0hER8eGKmOdIek6L6/CI2JdL2lrSxsqBoi9K+ueI2KkiZu9z+cvK9vybS1tgFUnfi4iqxNEZ7qnVfbNecUucXv2cwyS9W9JfJH1F0kMlvSYiThgz3l+U7eIvS/qMpLMj4vaaYxzxGk3PizI4uJqkb0raXlNtubUkfSUiqpKebH9F0k3KBMAF70VEvL8mbol9jXJy9pTy+HWS9mvQxujS7iz3p5cq34dLlO/xhyLiPyvjbiDpCEnbKT8fF0jaPyJ+URO3xF4q7cOZnhsj7ubKyfn1IuLBziqUz4yId1fGPVrS5yPi7PL4KZJ2VX4WPxQR/1IRu9f4UPP7dIl7jaRHR8Tvy+N7SLooIlokvzW9V/cc3/Nikrsj4h3jxp7lNX8WEWMlGCyFca27SPoX5bn8GEkPkDQ/Ip5dGXfQrz5E2Yc8qWHb4vDZvl/Tj7J9qfJzcmt5fGflnMDYCwU79VFnS36PRuNl+0t6kaTTylPPlnRURBxRGfdVkt4u6beS/lGebtH+7tHHebWkV0j6gaSHKe/RXyzfa3Hv207ZjvudpIdHxG8q491L0vqSTpD0PC3cNvx4bduwvMY2kj6pnC+zpD9K+veIGGvBiO1bNHu/rCrpuce9uufnz/ZFyvbg9Pb3qePGHIq9pqR/VX5GVlL+P342Im6ujNtlrLPEfpGkF0taJyI2tb2Z8lzeoTJuz/mAk5X/fy8obdrVlO2tscbubV8YEduN+KyM/RmxffWg31U+I9tELtpeMJ8xzrFOe42m/fVpsddV3p82VuYbSJIioiqxvEcboMRpfry95nN6jqOW69ubI+Kb5fH2kv4jIh49bsxp8d+tvMZdpry+nR1Rl0AzKeWgjpH0Gk27cVT6e0T83rn6aKWI+Kbt2izhk5QDbocoB9AHbonKCW9J59n+s6R7OTOkr5S0j6SmCTga/YGrsZLttSPiD5LkzJ5rdd69Xtm4XKjzr/xwzNmgA14GWL4QEWeVxztKelaTI5YcEf/nXKn40cHkd23QiLjZ9uclrSrpAGWn5vW2D6/s2DxP0idtnyvpPpLuoUxcq/WAKMk3khQR37f9oAZxJempjeIsMhBRJgCaXTdrBn2W0B7lz1cMv6wySXC5iRuVCTZL4ERlstDOyoHvfZQrF8fmqYTLTb3wqrE1lR2yFgbX452UiTdX2a69Rtv2oyTtpZyglqSqEqK9zuNy7d1J0vrTBsjWknRbi9eIiMeWzte/S5pn+2JJn4qIr1WEXcX2vSXtLunNLY5zyE8kfctZGWo4+a22ws63bH9YiybVXTZOMNsHSXqTpFVt36ypc/lWtVkJcSfniqNnSfpwRPzddquM8sE1fmdJn4uIm+o/dnpfbYAZXNop7sCmEbHr0ON3NGizdF21ExHPs72Hsp38Z0nPi4iaa3Kv/7uBLuey7cdJOlzSOyU9RNIRtveLiF/VxpakiDjI9teVK5xaJfj8SdKVtr+mha9DVYlkxT8i4rYyaXFERBzhXDVTo/cKtHtGxCnleqpy/GP3g23vqWzXb+KFqwuuJWnsfuoscdesiTusYz/nKRHxhnJeXK+s6HC+csJhHD9U9pOeK+l1kj5l+wvKZKFWbd2m54UywfsAZT9v+J5/s3LSqdYGEVG7unsm20s6ylmdZj3lxNbYySJLod25RTmX91KOGR2oHN+qSsBRJoGcJGlQpef55blRVZnnqnX78FGSHi1pXS9cJWQtVfZHiqOVY0RHSlJEzHcmPlUl4EjaNiJeNHgQEV+1/b6IeInrt+PoNT7U4z4tSb+XNJz8eEt5roWm9+qe43sR8Q7nwoJXR8QHa2IN88zVaKy8zo2l97iWcpz+7+XPf0i6oXzV+qXtI5XXs/eWz1urSv53VS7QOrk83k3S1WpT4WOV4UnkiLi1TMBVxSx/NuujRsQTKo9pSewn6V8i4s+S5KwO8W1l4miN/ZXjyq2uP5K6XTtfJOkREfEnZxWEz9veOHJBddV/ou29lQv1XqCcmD7L9r4RcUVF2KdK+jdJG0gaHme6WTnG08Ixkl4eERdIC5KIPqX8N8xZRPSuotP8Xt3587daRLyxR+DIBRBHKyt9P17ZBv1g6au9q+Lz0musU8q5i0cqP9eKiGs9opLmkhqaD7hfx/mATSNij9LXVjn/xr5eRMR25c+Wn5Wf235iRHxD2ZfeUNL/lPnZVlr314d9UZmo9nW1yzeQ+rQBpA7H23Fesuc46uqD5BtJiohznRXFm4iIt9h+q6aScT9s+xRJx0TEj2f/7dEmJQHnpohotT3UwB+dpf/Ol3Si7Rs0dIEfR+SKzZsk7eksFzdYhfQtVQ5AlknCuysHabaR9EJJm9v+rHJl79hlOJ2rNwbZbut5qCx0RLyz5riV2w192/bnyuPdJL2nMuZAr87/9AGWLzszLltoPvltexdl4/j+kj4t6ZERcYMzO/ZqVXRsIuJK2++RdLzy/X1cNFhNJ2m+7U9o6oa5l6Tq0rdS/eqdUWy/RNI7JP1VUxmcLRJZBvFnXfEw7iBnrxtpx4SLp0t6lxatdlZbov0eEXGMs2rYecqExksqY86X9AxlmcXhjk2LhMuBeba/KmkTSQc5Vxr8YzG/szgHSDpIOQh5le37KVfLVOtwHv+vMsHgmVp465RblEm5TZTO11vKax0u6eGlY/OmiDht9t8e6Z2SzpZ0YURcUt7jaxsd7o/L10rKzl0rg1UUw/f80JgJlxFxiKRDbB8SEQfVHtwIRyo7X1dIOt+5qrNqxc2QM5yV1P4i6WXOVQZ/rQnYK8kwIo5b/E9V+Yvt7SLiQklyVqT8S03A3hMAJaFuf0mnSnqQpL2dK2X/b5x4w/93tleVdN+IuKbJwaZe5/L7JO0WEVdLku3nKCtGNtnCp1OCz2maWh3b2t/L4NU+ynu3JFWVDV8Kk1l/LoNXUeJvq+xnjusiSb+WdE9l/2zgFtW1wXvFldS3n6P2k1kRuehkMCh9L2Uy7qG2N4iIDWuCF03PizL58yHbr6pMZprJRbYfMrzwopWI+LWzws5ByvbxgRHxp4qQv1Lfdmev5OF1I2J4e4xjbR/QIK7UuH0o6c6S1lB+9obbsTcrE9dqrRYRF0/7HLdInvq17TdK+mx5vIek35YEjNq+WfPxoRK0VyLudcrtTr6oPBd2UY7tvFaqnihrfq8uuozvRcTt5XibJeAok2yeqpz4H2bl/bZKx3Gtm5XJ7x+QdHTDpIjdJT1N0vsi4o/ORS5Nth1WTvRvFxG3SZLtj0u6ICJe2iD2jbafGRGnl9i7KKuT1GjeRx0obarXKvs5Ly79qQdExBktwmvhicLb1Wbh789V1y4eqdO1c6VB+yQirneu1v986ffVvhe7Ks/jGyR9xpn4fZym7t9zVsYYjrO9azSomDKD2wfJN+U1L7Rdfb/2YrZMqRir7TGXM2tV4THHIgfOsL3TIPG0pdL22Vk5Mb2xsv93orJC0FmSxt0xoddYpyT9rSRBSJKcFURr2uA9CzAM3FrGnwZ9vk0l/W3cYJ0+Gy+U9GnbByuvx5eXxLS7K+8pLfRYIDnQK1GtRxtA6nC8Hecjey60/0lJkDm+PH6+MoGvmYgI27+R9BtlX3Jt5X37axHxhrnGm5QtqA5V3thO09DFZtyToMRcXdlwXUl5A72bcvuJ6o5CXW5GxgAAIABJREFUOQl219Qg8rOUF4mxV944V4NepFxh+MiI+INzNcguyqSIsTP/bO8z9PCdym2dJLWZ3HHuCzkYpPnGYDKgQdxPKxvEC3X+y9fYnX/bZyszCoeTQx4XEdWVVZzZwa9Tlh57b5mYPSDqypseK+mTEXH+iO/tEBHnVMQ+RtKmyobV5sqKS0dExEfGjVni3lXSyyQ9rjx1vqSPRUSTDmRrtq+V9KiIaHHDHBX/O8qtJ+YrO2APVQ4A/1UVpSd7dqSd24ZN39Lp05Uxr1NmM18ZDW9Otr8TEduWz/bhygH2z0fEphUx50XEI9x4275pr7GSsuP8kzLgdA9J60dEk2S1En+NqCwVOhSv6Xnsqe2yToqI57U4xhGv8VDl9W1n5X7Zx0TEZbbvI+nbETFrue5lxfZq4yYULE0lkenZGtoWISL+u9NrrTIYQG0Qax1l8vftpb24ZlSWci5xu2xZ435bXG2pnPC+W3nqD5L2aXENmmkCIOq3rPuhpFdGxNfL+fdaZTnrf66M+wxlUsudI2IT2w9TbntauwXVyjG0RU055pVrz+Xpcctz92g1IeKsFvZv0xJ8/iPqt6vpkeQ06Iu8VHld/4ztTSTtHhHVe7R3PJcfoWyzPFi5Fdy6kp5b+/kb9IEjtzbeXJmU9eWI+HtN3BJ7I0mblc/fqsrVX7XbRB2rfv2cQ5X99L8oV0XeXdIZMeZ2Mp5lWwzbG01P2hrzNbZSJh01OS9cVi3ONAlQOfgv21crk6d+qhzHabJFW4n9dWWb/tXKFZfHKLfjGGvLwaG4d1IO9ja9Fjm3oHijMuFyZ+WWOCdExGMr456jsi1beWpPSftGZVn9ngafB2fiYtReJ4biflnSK5Vjb1vZfq5yW7IdK+PeU7nVyXblqW8pr/s3Kc+Tsasj9BgfKnF73ae7bb3U617deXzvg8okoVZVoo5RVmO9cMT3qvvEvca1yuTSdsp76a3Ksevzx71H214rsmLYqAnDkHTz9HbuGK9xjfK9+N/yeG1J34k226ltqpyQvo/yvvdzSXvHmCunh+IO91FXk7RWoz5q061OpsV+rTKp7gvlqWdJOjYixtqBwFPV0/5ZudXZmVp4nqiqWkaPa6ftb0h6bURcPvTcKsrq/XtFRIukywXjQ7bvHA228XEmkr9H0n0iYsdyjX5URBzTIPZ/KStbfkb5md5D2Zc6Qaq6hl6vbBMutOVg+fbY/TNnYtb/U9u5nE/N8u2Ium1lbpG0uvJ6POjjRdQvbpXtnygXcR4TERdN+97hDdovzcc6nUm3f1RWinqVsnrN1RFRVbHc9shtISPiZ6Oen2PsJ0t6i3Le5avK7R3/LSLOHTNel62GSuwHKecMV5H0C0mXRNmKqlbr/vq02O9W3uuaJqrZvr/yWnaf8tQv1KYN0Px43Xkryh5j4KW99g5N9csukHRwlN13ajm3znyBMmnqE5L+O3LhzEqSrh1nLnFSEnBGnQxVJ0FptJ0cEb8c/8hmjH2NpC2jJBOUAcjLaxrypQH8KOUH+FLlyoj7K6tFXBARTbYh6DmZ3Fqvzn/p1LxdCyeHvCPaZbEOXqd68tuZefz16FS60Ll67kNRLhS27ybpAxGx3+y/ecfiXF35nNaNwKH4p0l6e5TVoc7kloMjomolYK+OdPnsba9sCJ4laUdltY/a4/2mpB1aNdSG4j5deUPeUDlpsZbyM336rL84e8zvKBNNdtFU2eIFajsdQ6+ztqTNtHCi0yKTUHOId5JyYPN25b7Fayk/47Xl75ufx7a/L+k/lPe5RVa61U4Kldc4Tzlh87mI+Mu07+0dEceP/s1ZY/bco/ZR5XjXiIj7lgSJl0TEyyvjrqd8r5sOrtj+qLKtMpgU2kPSjyPiFTP/1qzxnh8RJ3jhbQsGQlk16fSahrdz7+JjlFuGNGnAD8UerLYdnFeDPcM/Jo1fwc32hyTdS1MTC3tK+q2k/y5xx1p9MPQ+r1H+/JNysmne8ADimLF7TQCsNb1dZXvziPhRZdx5ymTyc6NMsNu+MiIeUhn3J5I+r5xs+UFNrGlxN1eeV+uV+/9DJT0zKhYDTIvfPMHHnZKcRrzO2pI2jHbJrN2StMvg/AOUAxTXRJskmXnKFYprKyeRL5F0a0TsNesvLj7uiyS9WNI6EbGpM+n741GRBNC7n1Neo1nCpe3txx0UnePrNDsvbL8jIt5eJgEGg7IL/qwZ/C/xRyYyj3u/mxb7WTGU1Fvel4Mi4l2VcZfKtai8VnXycHmPj1COFYVy4vvVjQb/e7UPt1YmDQ1WON+kTJadN/NvLVHc+ym3On20cvLtp5KeHxHX18SdRD3u00uDOyTi9hzf6zFW3dNSGNd6oHJs6ABJ/xQRq44Z54yIePqICcPB39dQVtoZezsc2/tKOlg5kWzl+XFwNKww6qy6r6iozubZE2UH/d8Lp3/e5/gal0bE1h5KJLZ9RURsOW7MafEHOwRIOX8x9tZyveYBhuL36ONsIOm2Ue1L24+Jiu2Se40PldhfVt6r3xwRW5Z21vdq+78ldpdJX8+w5WBEvGSceENxu1RznES216i5ps0St+e5vJKyctFTlNf7syV9YjDXVRF3kFxg5Zj9Jsq+WdXir6H495C0bYn/nR7jDa3Zfnq0qZ42HLNX8mmXRLXBfaRFG2Ba3MHx/k15vK12jeim1xh4T86KTp8adWy2HzTOuO1EJOD0UBptuysbqycrJ99+2yj2NyU9OyL+WB7fXdJpLTph0xrEVyozvh4fEa+qjT09PtrrMfntXPX2nMgt0JZrtk+JiN09QwZkNFgN2YPthys7Ht/VwqsrWiVZXDW9gTbquTHidulIl/+/LZWdry3L4OwJEfHkyrjbKJMtzlPDVSw9OFdCPknSezVUNWygxcCN7Rcqt1LZQNLlyob3t2vuJbYvj4iH2d5LWa3mQOVkeouVyE3PY+c+0Hsp79XTk6WqJ4WGXufOygoAoewsVa0WGnqPny3p6coKHOe3GMCy/V1lif7Thz7T34+IB1fG7TK44qxG8qBBx7Z0fK+KiAeNGe8lEXHkLANv91BWCdx2vCOWnKsV9lUmC12qfF++Wts5L7EXaWO5QeLz4Fq/uOfGiHuSpK2Vnz8rz+f5ylLDn4uIscv395oAGJosXD8intZwsnBQSW34fjq/9trp3FrwX5Xn3ErKlZCfjcrKZM7kwtdLOrLltWIofvMEH49OcmpyzLbPVW4rs4oyMfkG5QrG6tLIHc/l+crtTk6OytVS0+IOqsu9StKqEXHY4L5VGfdy5aq070bbJLVu/Rx3TLhsbYaJtwWivlLN67ToJOfYCZeevXpBzRYA3c1wLRr7XPZSSB7upWP7cL6kV0TZfqK0+T/aaizAmUy3UrSrrLO5cuX7xhraarDR+N7Wkt40InZt+6JLIm7n92KpJb9NohaTnr3GtWyfqhwf+rFy4dMFyvZAlwrXzgTd74/bpxyKcy9Jg1X0320xoTfiNc6IiKdX/P5wouwo91C258Yei7N9kaQdlG3jrZwVfD4TEY8cN+ZQ7FHtgFuiQVL50Gs0q+zc69pZYh8fEXsv7rk5xuwyPlTiXBIR20zr/1b3GXoa1V5r1B+5QNJdJB2r3D2jWb/E/ZKdn6mpxNNzo1FShDPZ+UPKxO9/SPq2csFh1RYwPc/laa+zjqQNotFinGmxt5L08oh4YWWMGUXFbjBLKX6Twg5eCsmnvdj+mXIx7snKnWCW++QLd9pBo+UYuO3/iogDPEMF+Bb9hdK+vCoqK4ZO13Kf+m6cVTeGVyycp+yI1ex3/g5J7yiNqT0knWf7FxHxpOoDzsGqq5zbRoWkJ0u62Pbh5bVrOje7Dv39woj4vHLlbCvLbani6Vp3/pfGB1nSFmUwci/lfpEHKicCaqpP/EnSleV8Gy572yQ5ZDrbB0fEwWP++v7lz7E7oMvIkZK+odzXuml1lmK+7U9o4bLILRqDTfcMHTLYuuA2Z+nwG5TVZWq9R3k+31XSnRvEk9SngxCZef5Z2z+IiCvaHOki9pe0jTLT/QnOFWX/URnzTs7S+s+S9OHIMnqtGoNNz+PI0tsXlkSC6jK3o9jeSfn5/rFy0mmTkuTx5YqwPfeoVUT8fFq8Fh2Oe0bEKbYPKq9xm+0Wca9TbrEwyBzfsDw3log4svz16zFtpZjL6jHb7xw3fnmN6yS92bmd6NOVCRG3l0HPD1VOGtpDq9xsP1qZdFFrddv3G1zTyjVv9QZxN5C01WCw35n4dKayPT5P0tgJOJIOknRRGWhpmdh6rMpkYXn8I2XHt/YacpXt50lauXRGX62sMlClTA4eLelo5zYUJ0n6oO3PS3pXjL+txWoRcfG0a0WTLdqKo1USfCQpIuY7E7ZqBqf/PuJ62arNdbfS/n6hpE+XiYxWg269zuVnKPuop9j+h/I8PiXqq1rYucJwL+VKQCm3eq71t4i4dfD/VybqW7QvevZz9lAmv11iu2nC5XS2j4qIF1eEeMYs3wtNbX09rkdodMLlS22Pk3B5UokxT4uWOw9JVVu0zaTB+yyNvhbVnBOD+/GaM3x/E+XWzHNKHrb9hpJAd8So42v0GenVPrx9kHxT4l5oe+x7lEcnN2nwfxj1izk+J+njyjLkrQf6T1TeT1uPM/S4T0t934uDlYmc50pSRFxe2rRjKckVb1e+r29TbjvxHEk/lLR/RPy68ni7TZzO4Gplv6pGr3GtQ5TJeUtlIqy8TlXyTbGypBuV/ffNnVUzx642PIP1a365tFlXUm4Xesqon3FuXVbjYOVk4Ya2T1TZ6qQy5sBlWnRLoN/Y/q2kF8WYlc88YnGr7RaVnXtdO6XcNmuB0lZ+RG3QTuNDkvRnZwWOwXjytsr5riZs76x8T4arfVeN5Uj6le23aOExyV9VxlREPLbMQe0raZ5zq7JjI+KrtbHVYfzCuW3PNso2hiTtX8ahDqo4zoGTJH1EudW8lIuJPqOpZMax9TqXPWIxju2LIuI1LeIPRMRltmvfh/fP9hLKRQI1PqpcjDtfeU1+qHLh4WA77dr4rQa/H69sr8zUB76Hcouu2oXgPRLVHqjsB79C0jG2z1Ausltke9ElPMYHRsQPZ0qeqk2aKj6l/Gw8ujz+pbLNX/t+tBwDH1TReV/lMc0osnLRNbbv22DcbYGJSMBRTn58X7kKXpL2Vp4Ys64EW0I3SPqNpN9L+qcG8aTc2/QLQ4/PbRRXkt5ue/+I+GNEvMxZRv390agSQOXE0tLWuvPf/YOsPpPfp6l+0HUuxi4NPRjkiNzzfT1lg1CSLo6IG1ocXCd3igarpGexr3LgdZCgdL5KObZKB6tPR/pSZ2Wvo5Xnw5+USS217tM6u71o3kEYDHpLeuGoz3CjQe+/RsRfbcv2XUqDq3ZP8iMlXS/pCknnO8vWV68UKpqexy4Z75L+4BFZ79FgCypJH5D0hMEkd0lSO1OZIDmuM5yVX/4i6WW211V2Zlr4eWmwRrmX7C+pxbY1vQZX1pT0gzJAEcpB9Uttny5VJbYeoew0LvJcRCxSkWqunMnZ+0raSdKpyoGL7ZQdwJoVX/tJ+qQzsdzKgcgW7bcDJJ3r3M5IysTk2glIKdvFw0mbf1euBvyL7dpkzl4TAL0mC1+lHBT7m/KecrYaDMQ6V1jsrDzfNlYOuJyo3CLoLOVe2uP4XbmeDT7Tz5VUPdE0pEeCT5ckp2IV2/dW9iWr9nofocu5HFny9jBJh5X3463Kqnu1yTIHKJOGvhARV5XJzdnKwC+p82y/SdKqzj3rXy7pSw3iduvndE64nO7Ixf/IzCJi31YHMoOmCZdRVvtHxCaNj3Nxqt7noum1qGPy8KD912Qr8hn0ah+eZ/tIZX8slMlw5w4GlscYSB4kNz1AOb4wqJz5DEkX1x+ubouIFn3zUW6Mim2RZ9ErEbfne9E6EfdY5XVsdeV97kRlm+tZynHEXSpiD79Gs4nTmZLJpAXbLtXqNa51haRX2B5eNPvxaFjlZMDtVte/V3ntuUpT51koxzBaGnu7pYGy+O0NkkYm4ETEfqOen0P8rzqrvw22Otk/2m118jVJn4+IsyXJ9lOUC4s/pZwIHndMrsfiVqnDtbP0Swdt5Js1NUF9q3LbxBq9xoekrIZwuqRNbX9L0rrKCiXVbH9c0mqSnqCc03mu2tyv91QmXg7m5M4rz1WLiB+V5J5LJR0u6eHOE+VNlWOTPcYvdpL0sIj4hyTZPk55LWqRgLNaRBw/9PgE269vELfnudxlMc60e/ZKyjHKqoSv6Lj1cvErZfLjlZJk+8HK7RfH/myX+YrB+OBLRjw3Z0sj+bRXolpkZeRTlAup1lYuCD9P44/jvE7SizQ6OatF0pQkbRoRe9jeU8p/g91kJXGzMfBBwm5EnNfguGaztnI84GItvABs7MIcE7EFlUeUuBv13Bxjvlw5CLuuMpHjlIi4uu5I+/Po0k3Nto2yfWpE7Lr4n1z2bM+LiOps8Rli31lTkx7XtOo42n61pDcqO6g7K1ewnBARj62M23yv7J5s767sGJ2rvAA/VtLrIys6LXds/4cyaeFLWnh1c/OENTcuh+jOe4ba3li5/2aLxuthysHpFqsIhuMusk2IK7fisv2MiPiS7X1GfT/abEH1BeWk7AHKBtUflINmO9XGHnoNS1o5IlpWRmhyHnv2csvRIvHUpazu0GMrEwK3meXXliTu8B61q0taM9rsUXtPZeP9ScrP9FeVA2Rj70le4m6lTGB5sDLheV1Ju0VldSdnRY8ZzbXh7Kza8GjlZ+KDQ99aS7n1Z4ttvuZJ+qNy4PzU4U6j7dMiojr5u3Q+FI1KF9veTZkQsolyZc+jldtF1JaPfasycfGL5alnKAfh3i/pqIjYa6bfXYLYXbY8da5u2lXS1yJLqG8r6b0RMeu5uKyUpKlvSjomIi6a9r3Dx03mLEkVRynPhT9I+qmk50fE9XVHvCD+lyW9Ulnla6uS4LNfROxYEXM15QTWU8pTZyurAFVX7ivH9zZl9dCXl/fnP1v0eXqdyyX2RsrJoT2Uiw1OjojZVsUtM2WAbD/l/5+V/3+fiAaDDT37OdMSLs/WVMLl3jVjDb2UPslhMbXV9dqSXhcRb6mM+0NJDxn0eW3fRdIVEfHAcc5xdy5v3tO0a9HgXH5XVG6lMmrSuNVEci8ztA+fW9vvc27ZPpOI8Ssany9p5yhbTzm3eTwzIh43+28uNu7ByoV7X1DjsQDbOygnB8+ZFrt2W7mm92lPbSPzavV7L45Rvg8HKttyr1b2fV86ZrzhbVN+FhH3Hfpek21U3HiLFtt/VY6TjeqbvyYi7l5xuN3GtZxVcO8kaTAGsrey0tXY23D0ZvsaSQ9t0c6cIX7TtkuZKPydMsFreEKoxWfvS8oFBqdHxJ8X9/NzjD1qS6D5EfHQys/KVcqFMScpF7eeVzvGV+I27+MMxT6kdmJ3RMwu40ND8VdRJrdabedGBufA4M81lBPtVXMj015jZUmrR5utyQb9hZ2VSWXHRFY7uY+kb0fERhWxz1Xj8YuSXLL94PpQ7uHnTh8XHzP2e5XjC5/VVBL12irJb+Nek3qey7avVLbrj1OOk10yap5gjLhvH3p4m/L+empNn8Gzb73Uon14VURMr8a1yHNzjNmtj+OsiL91bZwZYs/XwolqKyur+bX4nDxe+dl4mjJp7+SIOLU2bi/uuBVliV89Bl4+xzOOLbX4fyuvM/LaO9f5i4ViNhgT6872t5WT8xeWx4+R9L6IeFRFzEOUJ/+c9zSfJeYpEbH7TCdEow/wFcqb6B/K43UknTe9QVsRv9sgcivTOv83KldFtuw8bq+8KV+vvOlvKGmfaF+KtMnktzvslV0GHl+pPI+PUFYMGZTrfWfU7z19haQnR6l646wQ8fUWk6c92P7piKcjIpqUT/eIcoiSqsshtu5I9x5Mt32LcoXa35SVFpxhY60x4w2uFW/UiA5Ci86v7U0j4se1cZbgdR4v6W6SvhIRt1bE6TJxU2Kdqw7ncQ9DnZknS9pImZ0eknaT9LOIeHlF7Fco94Uefo/3jIiP1h11Xisj4sbaOCPi3kU5ubtgcEXSSr0GJMdVPgfbK8tNf3zoW7dI+lJEXNvgNRZs5dSa7f2Vq/0G2w5tJenAqEw6HBq42k7Su5RtgrdFRHUZYNtbK6unSdkZa7LSvuMEQK/Jwq8pk9KGP9efjYinVsZdo7ZNtZj4qys/y7c0jts8wcf2bhHxucU9N2bs4yQdMNR/alZBtOO5/F3lZNbnlH3WquuSl85Wu8316OcMxW6acFkG7V6orCbzlRiqdmL7LRHRompWs33Up8VomnDZK8GixB61rcyuylWyTbaVacmdkodn+iwPNPqMbCLp51q4ffiwiLikNnYP0yfUS/t2fkRUVRDtORZg+wRlqfqFqnDU3p9a36fLezB9O7mBVu9F00Tc4cl42+8e7u+2mHgrcc5Vw4nTMgHyqhixLY/tn0dE1bbfvc7lUYkPLZIhSpyNJG0WEV93JrWs0qJdWxItduvRDu80RtvzOjSYKNxZuZ3TZyWdUTOJPBT7q8rEus+Wp/ZQjsE8TdIl47Zf3G9xa+9FDD22O+nGWZFkYw3toBERn24Q9+KIeKTt7yjnGn4v6aqIuH9l3EW2JlNWtayqjGT7PGWlns9HxF+mfW/vWLgizFxjNx+/cFaxOFS50MfKc+7AiDh53JhDsUddiwbGvib1GusssXdTVpP9VuRuIs0W45T4a0hSi/uJ7YMj4mB3Wohq+zPKJM7hbdrWiIg5V4oqfbL1S6znaaqNuJayCt4Da461vEbP5NMuiWq2r1dWnDpFDebjPEMy1kA02B3AWcH4LZK2UCa/PUbSv0XEuZVxm42Bl/aglFt7SVM72Txf+dk4sOZYe5qUBJwtJX1aOfloSf+rPAmqVmaX2P+khfebHHt/L9v3johfD50QC4ksJV7F9guUpQsHg9G7SXpP5c1+sBLEyhKtO5a/V70fvYzo/C90EjfoPM6T9LwoKxWc+3x+JhpU2+kx+V2O94nKm8Rg1c33o2IrH9unKAfbVlUOuP1AebN7pqR7RcTe48Yu8RdaBeFcNXtFNEoka832Xad3Pkc9VxH/exHxcGc5xA2jlENscNNv2pHuOZjew1IaKDxPOdFyiaQLJJ0fpZRjRcx1Zvt+TUOz18TNcOwO5/GPJX1H+f5eEBFXNTjWUZ2ZBaJiqwePrtrXJLnV9o+UE70nKycL/1gbs8RtumLB9oURsZ0zqW74Hl2VVDcUf6MWbapZ4vfYj3zBQLTtpyoHht4i6fgGE6eDz94hkq6MiJNanXO9dB5Ebr5Sb4ZrZ/V7bPuuyqoh08+32oGVUdsL3CRpXrRdfNAswaf1dWhanG4VRDtOZj0gGlZ8sf2IiJjnDit6SvzHKLc+3Ug5SD+43rfolzXt5wzFbppw6awAsJqyhP7eykUyry3fa3Uuz5e0TUwlGKwq6dKoWLU4FLt5wmWPPpTtr2hqW5nnKasWnaTcVuZJEVG1rUzp+/8/LTrhNG5Vli7Jw0Of5edIupemBtL3lPTbaJAAXz5/z4yIX5bHj5P0kXH77LafHxEnzHCPUkR8YPyjlWy/WVnl+gvKa9AuygTGQ2ri9mT7mqhMEFpM/C6JuD24cSKuc2u3w6ZPiNm+v6RDo2K7haFYg4nTf1YmUVVNnDq3m/59jKhabHu9iPht5fF2GdeyfZkymeXH5fH9lBPVtX2cFym31V0nIjZ1bgv48YjYoSZuiX2qpC21aPWp6q3EZ2i7LFIJZnnjTCR+onKri6fV9tlLzHsqk2a3K099S9I7lP2S+0bZCrwF26tEo8rOPa6dpa/+SE1td7KnMgnpTRUxDx/x9E3KtuEXR3xvLrGPl7SppMuVCS1Stu1bfEbeqrx27iDpI8oxo6OjcjvxwVicc2uyrVS2Jqsdk+yt0/jFvZXb60hZ5bu6IndPvcY6e3Ju33S8pMEY/u+UC/i/XxFz/4j4kO3tohSiaKmMP71MU4mA50v62DjtAOeOAP8maWstvCXuLZKOjTaJIT3HDbskqtleKxpU3hqKN9v8RdSOHZbXWEf5HizYQUNZyX+2hLslidt8DLznvFaJta3y/vQgSXdWbh3255o20SqL/5FlLzLRZkvba5XHLcrHPUPSByTdR7lKfyNlksHYg1eRyTcrKy8yXfbsi4hP275UU/u7PSfqt846TlOT1BuVx1a7feSairKXfBlwfLmyIR/KydmPz/KrS+pOwwPekft83qlBXEnacbhxHRF/sL2T8gI0rtZ7ZUvS5pHVnCzp18rBzLB9oXKFQa2v2D5bue+7lEkiZzWI28tFysb74p4b1yqlcby7pvYPr1YmU86b1pH+pDIbeZx4XfcitX3O9MGUUc8tqcG1oqeIeLxzy7ptlIPrZzorGsyaRLMY8zR1Tb6vcuWNJd1d0s8k1fy7VvbQXqzlOnqXinjDupzHygzsf1FuVfefZWByfkQ8e9yAUZFgswRWtu2IzHAun787twgcEZvbfqSyKtmbbV+trMJxwmJ+dSRPrVhY1fbDpYVWLKxWcZzblT/XHDfGYnyiDMg3rUZSYvXaj1yaen93Uu49fZXdZF/dX9o+Urma8L3OFd8rNYjb04NGTQDUBvWiq0M2t32TMjHphorQ/7B93yiJ6c5k+xarGI5XVhd8qqR3KlcgtdjrfOvy9aXy+OmS5kt6qe3PRcRhNcGnT56W03isBB/bOyo/E+tPG0ReS6O3YBjHSrbXjoUriLbqBzc9lwcT1JJ2diYDLmTcCerov2f2MZJeo2zD3L6Yn52LHv0cSVJE/MRtEy4fORjgt/1hSR+1fZpygqXFtV7KSZtzhgbi9tXUlh9VSsJNkypnQ3r0odaLiCMkyfbLI+K95fkjbO9XEXfgc8oxhU+owbk81B87NhomDw8+y7bfHwuXZv9SGS8wY8J4AAAgAElEQVRq4aWS/ruMnW0l6RDl9Xpcq5c/u7QPI+I9zqoWj1Xeo/eNiO+NG8+dtwEoLrK9RYMxvYW0vE9Pi3NXjRiDq03gKA7S1ELD2Z5bIoOJXNsrR8TtQ89fp2zft3C1MuHr/5QTTv8t6UfjBotZEm9rk2+KXuNar5f0TefWqoNx5Rb97VcoExa+K0kRca1zEW0Lp5evHka1XVpsyflg5djIcJuluhpJib2qsvrdHsrzoVXb4nfKSnWjVCXfjGrDKftTNTG7XDuLnbXwdifHKaskjJ2Ao/y3P1BT18ldlVV7trT9hIg4oCL21pK2GIxrteJcgHtOGcs51fYZku4abbbnvlOZv3mWcmuyv7cYbinJf4do0c9fk6r4yuvcxsr+6Va2qz7bzoURl0fE6bafL+kNtj/Uoh1axjd31qKJ6lVJ1K3HOofZ3kA5oT5YaHCBsmrmLypDHyXptRHxzfI622uqgta49lVuxXW42s05LVDaax+U9MEyJrLBuG24iDhO0nG2d41O2yv1nNeJiM84KxkOEtXe2ChR7R7O7Ws31sKfkbGq4A3mL2xvMj0hxlmttIUvKeeszyxxH6S8r9QueuoxBm7bj4lSbdhZqa3l+PeHldehzynvgy+QtHlNwIlIwCkTCbuqnLiD/6eKwTFJercyq+vrkauGn6AsWVQlIm63/Q/bd2vUeBj1GlcrO3qt4i2YVC9ZZMtd0s0MjpN0s/KmJOUKuOOUk7815jlXMA6XY2s1iNVj8vsq288rsTdTbs11UWVMSZlGafusQYO7PK5qfJcL7eHKG9xgFcRREfGFuqNtr9fk9AjvVJZYvjByL9L7SareSkXq05F2VuJaxLgdhDKQt5qke5aJ9OH3ef2xDnLh+POVyV7VWziMiL2dcpD3scoEmTOUjfmxDSUZHi3pCxFxVnm8o7IjWaPbxI36nce3K7cku1056XZD+apW3odRW3HUZJB/RdLJJSFCkl5SnmsiIi6WdLGzotoHlP9/43ZKn6pcsbCBcpuJwWfvZtUNBA065ldFg7KjI9wzhlbElGTWVoOxj46p/cjfYfv9kr7cKPY8ZxnuTSQdZHtNtZlI3l1Zxvt9EfHHkgj3+gZxe+o1AbCfpEcpV7FImRg5T9Imtt8Z41eMfLOkC51Vz6y85r+k8lgl6f4RsZvtXSLiOGfZ7Kp7SLGBpK2irPp27k9+pnJVzzxJVQk4apvg8ytlO/uZ5dgGblEmdLTwfknftr1QBdFGsVufy7NNUI/dBnf/PbNviohW18ph3fo5bp9wuSDZNnIF9ottv03SNyStURF3gYh4b2nXDpLT3xURZ7eI3VLnPtTwANv0vkeLwbfbIuJjDeJM1yt5eHUPVXMqA7GrL+Z3lkhpz79aWYr8r/r/7J13mGRV9bXfNQTJQUSSgoIEkShZFEExK4KkH0lEREBEEMGEKIKKCHwKiIDkZAARFCQjOWeQZA6IiCLKIFnX98c+d+pWdXX3TJ1zerpl1vPM09Strn0v1feesPfaa0VTzsD2ALaPTevDJ2x/c9QPDIZmv2Dy11hvJp7f9/V5z4QVei7WBu5UdPg+C1PUw3LH5FpE3FOJ+fnI9Hprgky8+aAXqvpE3F8p1E5OKk10Ir6PJ4CvpdfZ30cbkj7nAgpOtfNati9Pc3Sj5vSgy1gZP2v7uaYGoFCJKEIGSIXDWii+dklr+fUJAsAFhHL9tQydBweJfSZBALiIKDxd1ZBECsQuqirXiluraaZqEwORM2wUrefNjAWwErBuQzKUdDSxl3wjkKXMTdghLUw05RaD7f9KOgpYNb1+lpYKVSaOJVRU7gKuVjTNlKjNnUQoOX2TuOd2oFDBV8MoDZH3bB9NkLBWBvYiGiVOJdY1uTiPWBPeQ6GmiAaFc51tnESoZTZz87bp2Nsy487ZkG8AbF+pUM/Kwf2SfgUsmvZ8DYqsDxPhZCNiPL4NeFTS9c5Tzrwyrd8acva1hO3iYznX2qAW+bQiUe1c4pk7j7LPyNkMzTH9CMh2bCHWsecpRCKWI8aLabKgHgY1cuA7AidKapySHgeyVYDasP1rdQj8J0m6g2gKGAgTgoBDeJH/ixgYSk3Kz9t+TNIkSZNsXyHpW4ViPwncI+lSuv3psmX6ZqALK9hevvX6isSQzcUuRKdF8/e6BvhOgbhQp/i9O1EYepYgGVwMHJgZ81aFiseT7UK0pKWIhMvAaJF6VqRM0qom2sXpNpt7MpnF6R783C1p5ZRAzfYirbiRXqP137MRRYDbGXyDsDOwJ6FIdhvdJIBvDxizjYaAdJak/xKSlme6jMXelcQ1HwRcYPu5AjEbrG17p+aF7QslZW34U+HmLmDDdKhk4abKfUzcB/cQz+BxpRbxCW3P7dmATYhicA4+Q0hl75peX0okhbKhUALchGBjL0V0XK45aLxWx8Kne5NJuUz6REh+UC3VkIKopUYCkUwAeErSooQf+SKFYu8IrAL81vZTkhagQHeo7adozae2/0LhRFkpjAGxdWZCkeSv6XwLEXPTWoS87kAEHNsXKWwG1k6H9nQfe4AB0MhL/zMlFh4BSpDJXk73nul5QjXiaUkl9lLFCD4OpdO7FNYKXWtihWf04bkX6woKoj33cjsRkqsg1pA3L2u6elrnXLfPR6YW78347NTgCkmHEGNR28rh9sy4NfY5DUoTLm+V9E7bU0i3tg+Q9DCRCC+CRHSqQXYqiZp7qJ+09qlTVGQVtjIDK0+0cJ6kjxFrrPa9PLAFbEIt8vAnicR3W3kiiyAq6Ty611VzEPm4ExRd2QN1cMKU9eFWRBGrKNKcsRORoBZwuqTvOikmTSscdrqTgAttn1nwUoEpjUk7AzVsVWsRcWvk4GoTcVcm9k7Hp7/niQT5rYQ9QK2cZIPNiTxDLqqMyRpGHQp4TRorcvN9V0n6PLHWehuhvnTeKJ8ZEZLOdKh99yUmFyC/Qffa5XvE2uUrmTE3I+7lO2zvkPY4JQrTEIXCrdxSiiqIoqpyLdRqmqnZxHAQcIekLruTvMtlfoLk3ZBM5iQs2/5TYM/3MuA+STfTvR4aeA3QwuWSNgV+bBdV2DnP9hQip6Q/UqYwO3siGioV5/dXWM1lWWYl1FAaeiHVXt5PWIeeoDIqkRCKKcUtvUrnOnuwoO22jc/JknLUoRr8VmGn1uSYtgWymn5tb5XyDBcT66LSmNf2E5I+QiiSfKmH6DMIfkDk2pr8/zZE3WXDYT8xlahJPqUeUe2Z9jiUC0nLEWpv8/asu+ahWwFuYNj+mUI97FKiEWwT2yX21cVz4A5l55UTAQeXF0B5SuF0cWeqwf2FTMKlyo7vdaBCPu89MS8jlAS+DixAdNOvYTtHJqyJvX2/45UZ9kUg6UO2T57e1zE1kHQ6ISl4Y3q9FrCb7b7qHFMZs2bHfnOOd9KZhC4tWPweE6QFZ64KzinE3+6WQpdVFaoop5fi/4pgu59EJPhKyNNOIjZzB1faSLfPNR+RxHpnZpzdB02QTsM5lgb2A7axPVOBePMRMpbrEcSk/wI32N6vQOyLCQJgW41rPRew2amBGvdxivt+gkm/JvAc0T12te3LS8TvOdckQsEney1QA4ru2HMJAtkNBeMO8UuVdJvtLCa9pKuJzqab6SYkZ20k0zz6XaCtRvLREvOpKvmRD3OuRYB/uEyH6ISAKvtES7qvXQhJxa17bS+vPl7B0xD3QGB/d7oL5wEOd6adXUp8nA2sCJxMJFD3a5EwBo27H5HA+kk69D5CZv8wQnUwq5tF0gPAik7+9Aq10rtsLzfo9zzMODTw36w2eu7l9nq2uZezlB2H+T6KeVuXRiom9MIex8qqkm62vaakG4EPEITLe22/Zjpf2rBQBU/ymqi9h6qBtNbqhZ1pM5AKNZv0kIfPKfFMpzG4yV88kLuukDRi4tmZVnaSvgnMQiTn2+vDLMJeKiCsY/vf6fWcxL4st1v4VnfbfBWDpHscjUml4xafp1Oc4jm4VuxZCCL14h7BiinzHG8mCBHzEV3DBzosqQaNV+37SPGKzvulx2R1Ggv7wc5TlW325jsCbyf2fBcDx+fkGSQtYvsvaQweApexaHl97njWJ2azZrmNUOCYDNxfKnetegoD2TmFYeLeZHut0mu4WmNnK/4idBoab3am3UkiVXyBaAxsSD1fI0jr+9seWBV3uLVA7hogxZ5MkIVeIBqgGnWPrPVsxbzW9URO8keEMt6fga/bXnbED05d7LOATziaqIpAodp7EVHoXo+oed5VYr0h6WDCQuyS3Fg9cavkOlPsy4k89ffToa0Im9K3Dv+pqYo7P/Blum05v+xke10KJecUBfn07YQYwL4Otcu7c9bK/er2pda36Xob8unKSuRT27nqRVPGC4Vi7Z8TUS173aVQwFuaUBDNbkxKNZGNCUJW2zpzMlGLG1hhT9KRdJOR3wr8hlASKy4mUiIHnv5eQ+A8p6R2/CWAvxK5lk8SanXfydkvTBQFnOslrWg7Vz6vjY2ISX4PgqE4DzFolsCPCLZbk6SfiXyboapIhIg9GvJNmkQOy90wVcZqxL3RdNYvDjyYBmcPMnm4bsd+c46LKGhHkhLe/To3iia8Fd1jHy1UVF8L2EbSH4ikWymp5Vo4P02gr6JbNrXI4E54CW5IMPOPUCjXnOwMtqlD1nNz218b/bez8W8g23fS9pEK78ZX0f09l9j4L0Go4GxJdN58OjcmgMPu5bfAK4lumTcQCeUS2IqQOW2KeFenY8WgIKM+T3RFnD/a74+C4vcxgO2fEN3OyxFs9z2Jv9/smdfbD0tTRn1iCtL8+hTxHf8iI85MRIfQpwpeW20mfTYRrR9cSY1Edf3I++E0YClJZ9veu9I5xhVc3yf6yvR3a9S4NkvH5gT+OfzHRsXMhBzyDsBChDpbFmE03W9PpATN1UApD3lsHyjpQjo+57vYbghPJaRkzwBuktQm+Hwvfc/T1PmtUELYGni1pHZCYW46Eu3jDq17eVtiHf4qOmuXFenM3dMESesQa4kFJe3VemsegmxRFKXWAW5ZGpdE5X3OeQoi9SGEkqOB4wrEnYJm/1QwZHFP8sq4XNL/I5L/EMTZA0rPq5LOt11E5cnJCrYC+lkZZt8bGmoNvLJCeWLg/VNTXFOoIf7F9jPp9ezEHJiLVdLP9l7adFTKBoXoVlj4TzqWi8sk7c1QwlCJOep2SWu4fGNSsXm6B8VzcC28EziUSHi/WtIqxHiRS9yfCXgPUYh8FUFGPoN4Bi8gbwwt/n2kIqSJe3cRddStsomAlM9r3Wn7cElvtH1t5rUNgUO9+TgKzs1NkbsE0WYEHKZQMPgRYYM+cA6ghVvTmuU4QonlSaBIkVp1FQZqqcqdn76Pb9BRziqhOlx87FS3UibAQ+nnopIWzSmsp4LxBXSUQj5vu1F0zrKkLkG0GSF2P6vdgTEGea09CDXATxBKnBsAfZvvpxbqqA3OTXmloS2J/fWOth+RtDix3ymBG4FzUi7jeQqQp2rkOnvwYSJ306gvXkcBJWpCDWgs3E6OJ9+qvcEBBJn12kS+WRL4VWbMSyT9H9CoRW6WzlECT6f61guKJrhHifpLCUyW9DmCF7BeuqdL1HNWBLYj9jWNC8XA+5xWTWSd0uQ0upsioVuJsgZK5MD/3frv2Qil5/uzryyhtT58hkJckXGtgKOOJOTMRFHst2T6Iku61vYbE9u2+Z9vNuX/JRK9h9ge2HJIwcDe0B3JwrmASzxOO+qhf4dpv2PjCRqmY6HBoBsqVerYH+Zc2UlvSW0m92yE5NsLtosQDFrnKdZ9M9zfrvImeGBIuoiODd6UpJ7twyqcawNC8WROwrf2s4NOsJIOJTblRWU91S1LPonYqJ9pO0s+VcN43+YuaCXdRCyizkzXmSUJ2RP7t8ADBNP9GqKLpaQNVVUoLHYWIeyujioYt+R9fDbBeP8NUaRuvudnRvzg6HGbJP2TrcOPAJ8rSQyQtAaRjF3T9mcyY91ge50yV1aXSZ/i9y3c2P59TtwUa35ibdju0ru6QNwxXfuk+3B52/eO1TnHAxTdhJtSmNiavs8PEF1IEMmVs0vMgZLeStjWPU6okQ3cAdGKWbOr/o3A0rZPkrQgMJftfsoOg8ZfnQ7B57oWwWda4yxBkHgPoluGfTJwt+0Xsi60MhRqdY8TBI7sNaKi23R9whL3mNZbkwlZ9dwEWe/5qqwDUuzsbr1a+5yUYFu7mefSmFSccFly/5Ti3Wp7dbU6Fcfznj2t4X5Bx3J5O2Bl28PZlgx6nqrfgaSFndmlnuK8jA55+MZC5OE2GXSKNbDtzQrEvpWw+XguvZ6VGO/XGPmT0weJtLg9HQLkxkQzQJbVvCqpIqXYDwCvIWyoijYmlZqne2JWycGl2LcRxYkrm+dZBTqo0379CuCE3r2NpCNycg01v48Uv+jYVjqvJelO26tUmOv62kM1yHk+emoBXW9RQIGjdZ6FgS2IQvg8BBEn14aqif0qYB7bubYhTbyaCgO1VOVmJyy/30RHeeLo3BxRil107FR/lcgGdgapPO19twGWdNieLg4sbPvmQWOOcr4ixHJJl7tHfaTfsWmIV1MhYiZCWb5ow5Qqqw3WQnqm3w/cU7jOUDTXORaQdA0huHAS8L3S+8jWeYqtBSQtYPuxErFaMRtFq4b4PolOPTWXnPUdwirz/4BPEfn7O52pRJ1iL0wQ1W6xfU0aP9d3ZhO4pF8TOd6itSFJryCIZM38dA0hpvHQ8J8a6DzzA68stcboE79oDjzlcS62vX6heOsC+xO2zu1c9cDrlvFOwKm6mRnmnAsA1ztDRq7ZhIx2bDxB0l3EIPN4ev1S4Krcze5ExHCLoBqLn4rF75ttl/LKbGJe5HyLoZeO9L7LdJEVhyrY4PXEX4Bg3G5HyJydQGwaVgHO8oCdmD2LoKcplFToeUZeAP5QYsKXdD/lvW+RtKzryVhPcnRm1Yi9DLA3QwvU49LOoXUff5AgspS6jz8NHOPwqd2PIEgeaPuOAtdc9dkuDUlHA4sRne9tgmiubU8NJn21wo3CtmcPQnXqTqKYdUOJZ6MGcXGizn01UZPYmvYPS9u+TNIcwEy2J2fGXI/wiD6d6GiZn+goe3jED44e9+vA3yncVa/oZF0dWNb2Mmm9eZbtdUf56LScoyrBZ6Kg1jwiaYkae92xhKTjbO9UIW6Rfc5YEFdK7J964l1NqA0eT6y1/gJ8yPbKpc5REmOVF5F0oisq90r6me33FIhThTzcc44i1sApVr+/312595ukrwHfcCgONt/Lp2x/ISduivV6OiTcawrtF2brLer2OzZg7Jo2OBNqnpZ0o+2122OzMm0RUowh6iyS1rV9XU7csUAFAk7RNYuk7xPrzcWANjE9i0jWei52Sz9PSz+3TXGzGr/GEpJWJNR7t7Q96wCfH5HY5AK2JKpsb1UDCrXlyXTs2rcG5rW9RYHYE2bsTPmh/wJvsf3aNJ9ekptvGeF8q9keWCFB0myEkswVRMNB0xA/D3BR7j1XMa91o+21R//NgWLPSUflYxnCUvRCJxu0aYzVT3SggSkjOnA1UT8smgevletMsZcEDifyhibyfZ90gebc9DfbAdicaOQ/yfaluXF7zrGx7XMLxfoVkUM9ibjPxi8xoAelyae1IOlc4KO2Hy0c91LCRrW9JtrGZciyVxIExpmJHO2jRO5+r5E+N0K8McuBp3nvFheyEVc0R3ySobnqgYlr49qCqtl0SjrN9nbt9xRKCdv1/WDeOR+TtH5mmH+r1fGn6Nx7Ovvi6uIw4AaF9yTExPHV6Xg90w01iDYjnOth4GEyJL56BrVJhAzuvJmXNgSFkse30ZHTXZzoGBbhwf1HyLcxqoQaNnht3EBMoBv3EFlulXTMMJ8ZFS4s69mKW+sZ+QWwMFFMKIZe8o2iO+IR2zcVCP91SV8hxviLgJWIhfzpI39sqnAW0f1+PN1y6tMMDd9FVtL+rbmPN7L959bxrPsY2Nb2N1Ii5C2ENPkxhJVdLm5TIdl3dStDDYHLqKjNRvibt4kmBnI3pR+VNKRIWqCYNbNbrH/bzyUSTi72ILzTb7S9gUJyuJTd3s7AXsALkkr5kbfnvl6YgtZDEwivKFmUbpDu448CLyUU1RYjxossf29i3Nnc9n3pPB8g/N9zE9Nbpp+7tY6VuCc2IciKt0OsNyUVWxO0CT5E8mYWIgE+MMGnJ1E4a4r571zS8Big1hrxKUmHEHLq7WJ9FtFQQzt6StlaDEEJ8k3lfc7lkjalsFJkGxXGue2I7+HjRGLolYQq0HjF0+3id7r/iudFapJvUvwS5Ju+5GHybZd6UcQaOOFvkjay/VOYsofKVu0B3mX7880L249LejeQRcCRtDZwbysPN4+ktQrs+a5nqPR/v2PTDNt/kLQyoeIAQRq6KzdujXl6mPM0ku9H2f52Zrh7FfZIM0lamrD6yFLjTDiCoX+rI/scG48oTRIqumaxvZWig/xionBTBK1awNt6CEifkXQ73YqJ4w6SXkus8Tcl9u4/JDr3B8FIzQklrPugor0VgKQVCNXs9no2195qBdvLt15fISnHXg+oO3Ym8snHCJJoo9pzTCaZcy3br5d0B0yZT0vkW6ZAYfti25NzyDcJOxOW8osS91qTH3mCsFnNQi/5RmF/9hihiJujqnqHwiq5ODmEUPl+U0OeAm4hxo9pto22/cb0s+++X0l0ABiYgEO4k1ypsLtuW2b9v4yYUC/XCUFaOIrIkUCoqXyfAjll27+U9AXCyucIYFVJIuzgchslFyP27P9QNISVIO4vQzRzfBg4IpEZT7b9ywGubznbDwxHFM0hiI5EPlWmyu4YENXmAx6QdAvlbOUAXm77pNbrkyXtmRmzwbyOpuePAKfa/pKkHKJTtRx4T31rJmBBwhqwFP5l+8KC8cY3AaeF17VfKOTfVhvmd7Ph5AmbgT2BsyQ9TNxoC9NJsI9L2D5V0aneTHQfaIoMLxYMM/BOQU4BoHLxuz2ovQD8DtgxI94UJCbvPgyV3RrUt/DVKe5xwDm2L0iv30XIRY4rqNsGbweFfHGWDd4wWHa4xL/tgwcNmhZ92wCvtn2gpFcCizhTjnSYZ+VfxILzU9PKIldd79t+WAtYUdLMtt+VGevttj8taRPg94T1ydV0OnFy8ILtowvEgfDErI1tCGnITSS1x4uVcu5jOuSj9wDH2f5ZIj2VwFrANpJKyL4fWuiahoULyGwOg7YF4mzExjRL2SOhVuHmGdvPSELSS9Kmb2DlwgYKS5J3unBHrAdUf/ofRy3Swm7AmsBNALZ/JenlBeKuY7vd/fBjSVlk1HS/fdb2D7Ovbiies21JTueas3D84gSfdqIwrV/eT8eqZdxhDNaIZxAFm/cSdlTbA3/LjAmhTjeko6cEEjGtKSpca/ucUT4yNai2z6EO4bL4/qkVdybga7a3oaAneWXsDpwgqSFNPQ4UsfkcSzJZIVQhD6u/NfBZw39imrALcIakRqn3T5Rpgpsprd+eBVBYibykQNyj6SZVPNnn2FQjEQsWA2aXtCrdnfpzZFxn+xx7ADvRKTCdrrD4OHKEj00NqhJxGzgUFxagzHy9O7AvMZd+nyB1DJxQl7QO8AZgQYU9WYN5iIT9uIftj5eIU3nN8jfgF66j2ie11IokvYEY52qcqCSZ7ERiDfcOZ6pl2t4g81pGRFpzH+RQJDtGoVJa0t7qS4TSyfLABcC7gGuBXALO7ZLWtn1jOs9aRD4yFzXHzlMJ1Z5mfN+aaGDbPCPm82l92Oz5FiQUcbKhsFI/kcjTStI/gQ/nkHBsHw4cLmn3AvPc1EDE3mQb8kiCNckhsv2UpB2B7ziaD7OJuP3gMqIDv0v/Zk3/iqBirhNgDtuntV6fLmmf3KCSViLUb94DXAq8z/btCvXhG8i4PyQdTNSS76OzZzdRbxgYqfZ0KXCppA2I2sXH0j332V4S2yjYi2h+60cUzSWIViOfjgFR7UsDfm40/F3StsQaGWArYlwqgZklLUJYZ+6bG6xyDrxd33oB+GsmwbIXVyga4X5Md21yYNLXuCbgSPocUcybXdITzWHgOeC70+3CRoHtW1JCpSkGPegBpOPGGolw86Ii3bTRDLySDiQUOE4j7rdtCJuoHFQrflce1BoFjuMom6Rf262uWNsXSvpGwfilMBakBYDVJO3L0ARybvHmOyQ5UiJ59STB+s6VI/0W8BDBIhfBHl+K2KSeSGywpwXViQttuNVxWQCzpJ/vIew9/hU5jCI4L3VsnEP3pD/NUn29CbHUxVJ6DXA6YZn1Cwpt+hP+LOlY4G3AwQp/z1KJt3cUijMm6mmpqHc0sJDtFdKGbyNn+snb7iqGKeTErx3m16cFTeHm28RY8SfCoiwXDym69M4lNo6PA9lJX4fs77eJpFtxjNTFkc6fLSE+3jEGpIVnHUpLzflmZgRy9TRgKYUsctezBwz87KX7bR8iQV8aZ6Zxcz6FKtCHibVcKVQl+KTE0LkpaT9eu5xrrxEXsH2CpD3S/HKVoosqF8U7egAU/uyvoZMQ2lnShrZ3G+Fjo6LWPqcW4TKhyv7J9n8kLSFpVhf2lK+I7xLy2M0c/R6iWakEkboamayNgkXZKuRhuvdRxayBAWz/Blhb0lzp9ZMl4hIEw8slNV2cOwCnFIirdlNLmmdz9jvvAD5EqBYdRoeAM5nIU5bAjoSCwb9hSsHlBjoF2kFRZZ6WdLDtz/Qc/nSfY9MM208RSf/sxH/CrMBcxJqzXWR5Atis0DmqQtLrbN9bIFTNfOR/JC1eaW7aETgxkThFkDirKJ6VJJPZXqfAJXVBUt89tDOVZNI4cQFhsYvt3+fE64PNgJWBO2zvIGkhMprVWnvJWYiGjj+m10sADxS43pp7nBqqPUcQ+cKXS/oq8X1n2zkmnAB8zPY1QGPNdRKh+p2LRyTNbXuyQg4jWOsAACAASURBVD3k9cBXcvIhaW2/me0zm2O2jxrhI1MbdybgMdt758Ya/hRah6g9NU0GRYmGJQmGtqs0ANTKdSZcKOmzwA+I8WJL4AIlpdVBcuwJRxKK9Z+3PUXhMxH3cp/DjYlm7WdH/c1pQJrntiUI9X8lyM8/BVYh9rBTvfe2/dH03H2h9J66Nvl0lHNnEdXatQFJ77V9/ki/Pw34MHHPfZO4j68n9iklcABBfL828RqWBH6VG7RSDvwr7uOU1HssA40y1uqtY1mkL3kCWL1JOsj256b3dUwtJG1OeFcWW0jMwNhBfXzN+x3LPEdX8Ttjsm+6TYeFMyTvJN1mu7jalKSLCbnNZuO1DbCe7WLF8IkESQ8SnbL30CIu5HYRSbrdSY7UHR/17Ht5mGfkTturlH5WclDz2Wid4+vEwvhpQnVhPuB829lSlpL6+UzbGZ29knYmOqafoVOUzorZin1twyQvCUlzAO8E7nGoWSwCrGj7ktLnKgFJ7yUIb72EumwbFYXixj7Asa1n+he2V8iN3XOeZYGfuZyHaunCTTv2mwk7kotKJH0lHUrqVmkXcUpA0o3EmvBu4r5YiejUe4a4R0rbUIw7SFpipPcLzHvfAP5JEL12J2S+77OdVcyp9eylOeTvBAmnLWedsy4UUSxcDng7ca9d7EJe5Cn+foQqwNuAg4hkwPec0cnYM2dPIja8b65RxJgIkHSj7bXTmvkIQpXsR7aXyoz7daLzv1hHT4r7APDaZtxMybh7bb82M27Nfc6U9XFJ1No/pdinAq8lEqXtMSNX9r0KUvLuLKLLez0i2fs+2/8qEPumEuvtqTzXAkQDyc8yYpxDEE32JBJ4jwOz2H535rUNIUQMQ5IYJPa8RBfneunQVcABhf5+7yQk8AEutX1xgZg/Bq4kCjgQa4ANbGcp7Ura1D1k9VJIxeQ1nCxIFBYlt9heMTPu3sDSFJynU9zbbb++59jdLqAMnIpvewOvoqx62BK568vphX7f93hE7bkpjUWUGHtaMV9mu4Qyazvmmba30FAF9OxGA0ntZ3c2wl73dtvZZDJJpwDfdgFb7j6xb7a9pqTbgA0IAuP9tgey8R2DvWSVsTPFPp34ntuqPbvZHqhBKa211ybsUt5K3GeX275/xA9Offwh6+RSY1IzbyRSz1eAQ4Av5q7rJN1qe/XRf3Oa495Qa0+qsBbaG7jO9sFp7byn7U8UPk/2WjbFuYI+DU4F5upquc5hcusNiuTDS0Nh8bV56RyqpF8SggMnuYewL+kzHkC9vtaeOsWuQj4d5lwllfCamMXWcZLe5Z5GKkm72D6mRPwaqJED7/1OFc0Wd7ub4DquMK4VcFpYVuEJfZHtkl31tbCf7bPSQuKtRFfS0RTwFpyBMcG/JW1Dhxm7Fa1NZA6GK36T4X1HMKTfAPw8vd6AYEH+jXxJxGIKHD3YikjmNdL0V6djEwIVJuW/OVm0FEYtOdKnJG0B/Ci93oy4p6HPQnxQSLoMeJ74ngdhDL9vhPeKyIXa/mwq+P7L0f31FGGbkQ3X6frem+i8KZpsSviSpOOBy+keL7K+Z0c35I9br/9CqJSNV3yLsCK7pzSBg5BOvVndKkvZUovq2Mop/XwEKFG42YPokpoMHJfY758tQZ5Ka6ylbZ+UxrbFCCncXFSxJEl4GNjJyXZJ0grA/iUSpxMFY1AA+QzwEYLQujMhdX58gbhVnj06FrVtlZCsdaEdnaypeFeEdNMn/ubEc/IEofj5xQIEn/ac/QJh61hkPp2g+EoqOH2K6HSah1D7yEXxjp6EXwOL01E6eWU6loua+5zLJW1KecJlrf0TwG/Sv0l0KzqMS9j+raStCMW6PxJ2HE+P8rGpRXF5aOhflLX9GJBVsLC9SfrP/VPxYl7gopyYCW9j6JrtXX2ODYITCWXLLdLr7Yh13YjEuNGgUBS4xPZFifS9rKRZnK8avQtBWPwCMT5cTkjj5+IVigaqyYSyVbH1LPF93pQIWhCNHSfkBExE2R8SRNwi87SkXQlC05KS2tY0cwOlup4b9bDjKaBqJelbtvcEvq2kZtGGy9tc10Axad2+wcvltarMTb0kwFSgzSIBSpqU6gqXkOzpFGqDhxe45D3Sz+KKQ7Z3b79WqMH+oFD4krbcvbg1XetxhGLdk0Szy0CouZesMXb2YDU6qj0Q6+YHG8LWtH7fDpW3o1Lhu4T6Ty+uUiiqfp+OasiVKaeTu95q28x/1+Vs5i9LJKpizS0Jd0r6KTFPteNm5TpTvn6j9nxk+7dAFvlmGCJ2EbU6IqfcYDZgU8rkRWrlW2oqqi5NEPWWJ76L5nw5TbNHEs/bU8R915tfzyVmLTvcvncQ8k1CrT01dDs4TCGfkm9jOAQua6vaoOQ6bj9Jz9r+OYBCTfstxNo5C6kBYEfgdXTfy7mqg8Vy4Bojp6QaDSgTRQFnQ6JbaG1isjvJ9oPT96qGR8P8k3QQUYD7Xk024AyUhaRXAYcD6xKT3nUE+/j3BWL/ClinZPFb0iXA9qkojUId4mQXUJMZhiU8LtnBY41S7PEU660EAakocSERybYkEgunkORIbZ+VGXdJ4hlZh3hGbiSKQn8GVrNdwroGhW/qIsT3nC0dOhaQtDrwsDO9vlvxVmDoYn7ghabC0/sDidRSFKmjZzngXjpELxdYsE0opKLKW2sQhlMXxMcJu7PXS9oM2NH2u0qfqwSUFLEkvYMoinwBOC23A0BhS7M6sXlcJo0VZ9leN/+q60HSvbZfN9qxFyNKFABSAuteD9hROUrsifbsVetkHYv4MzBxIOk8Yi04L5Eguzm9tSZws+31M+PX3OdMBuYkkrvFCJdjsX9SRWW5EtDQ7v+XA/8i7XNKFPXSeqsX9oAduE1Rtt1VV7Ao25yjlzw8l+2ByMMtQsRSdJPd5ia6qLctcL132l5ltGMDxL0NeBMwP2F5eith+7FNTtxaqLGelfTq5m+fipmNiug1tu8ocM33OFNFpyfevMTf6yC6rSEnFyIXosLqYZJWs32bQi1zCDwG9sGDIO1zmsaInWkVVmwfUOF8JfNaRecmSWcTJMDGom47YGXbA5MAJV1DFNBXItRN7gHOy92f9pyjmjJZK94swC9sZ1sZahhVmdJkl5Rrn8f23aP86nRD6bGzJ3Zx9R7VVe/tt85qMPB6K8U+n8gfv43IVz9N7Bty1dqrrMHVsczsjZud61RSPc2N0xOzmlrdMOe72faamTGq5VtSnug9DFXYy1Jpk3QtUaj/JtFMtAMwyfYXM2JuP9L7trMsW1PNYl+GKrbnKLRV2VMPc675gB/YfmeheAsTeQsTY9BfC8R8iZN1mKQ1E7FsyrGMuC8DzieUot5J1F+2chkl+LMIIufWhB3VNoRa3R4jfnD0uMVz4KrslFRl7TkRCDgN0qZvK2Kg+BPBoD7d+Z0yRVFrITEDEx81it+S7ndL3l2FJN9rQpXkhWuh9ga6JnFB0nJUkCOtAUnvI2xvihIXFD7TXwMWtf0uScsTRLis7sJhznUKkcz5pe0tR/v9UWJ9CVifIOBcQHSzXusMtQxJq5I6LSnLokfSgyUSQBMdktYgLKiuovs7zpbgTuS37xJqAI8Tii/bDkoQVR0/1nb8Rlr4cOBK2+eUICRLuhNYlZDdbuRpiyUUJM1PSE+3iW9XF4j7fSLh27ZfnMv2hFGAq4kSBQBJPwF2t/3HUX952uIWffZ6YhclWqaYDwCvIdRISney9osPDFZUl3TESO+XmJ8mIlJxfieGrpWz1oYq3NHTKmyuTqindUlZ5xY4J+I+pybSeHEa8NJ06O/AB23fO/2uaihqFJlqo3ZRtjR5eIwIETcA+zg1V0haFzjUmTYM6tgk7w7MbvsbJYg9/c5RKFbx9WxDNpF0ue23lrjOnvg1LWUawpAJslfWfqEVd3/gUeqoh00Y9BTfDgCmFPJyC28pfo1iU5W5qSIJcD5CjeVkYGWikHU+cJXto0f46NTGL178VofwDKE0tDxwpu3PDv+paT7Hy+nejwy8n5K0nO0Hhsk3GPjHOF0L1Bw7X9rn8OScmtZYFr5LQhPMZr4mJB1NqDlnq+uopVZHqJI1KEnObt/HkwhlpyNyc8GV8y0XEM/HPbRcAWx/OTNus5abQtwrRSZWqEU+Y/s/6fVMwEty64mSHiQIHL3fxbgbj/uhMPn0I8Qa6+fE2PlmIi9yYmbcfmuAUvZ9LwcuI9YwHy5FvFRHTKTZ88xCNAVkkQNr5MDTfvRO2/+WtC3BvTi81D1cY+05USyomqT8dsC2wB3AGcSmb3uiQDmesAWxkDjU9j/TQmKf6XxNMzCVqJXwTvgcITlZsvh9uaSLCVlICMWTyzLiTUEacHelk6S/kvDjzCW9FZUXHgPUlPaG8H0vTlyQtBTwO9tHSVofeJukv9j+Z2bcWs/IlsC3Etv0RNulZFRPJkgn+6bXvyQkSYsTcGxvDyCphPTyZkRC6A7bOyQi0emjfGY0HEssLrsW24VwvaTlbd9XOO5Ew1cJaeXZgFlLBnbI0W6YNmOTbE/ODHnYSKcj35LkNoV6wauBz6XnosR995xtK8nJp++jCNImbA/gFcCdhPriDeR/FxAdMbvSkSe/mrAofVFimAJAbvft/MC9km6mO4GVZTNQ4dkDhidaki+pm60OMobxP0DMz/MTybYZCPwEuIZY05dcKxe1lGkINmmd+XngH8Qa66wSRT2G7nP+j0L7HKhDuKy4f4JITO9l+4p0rvWJpqQ3FIhdDGORxC1NJrP9plZRdg3CznAZST+gTFF2ExJ5OJ3v4Zz9Qvr//JekLwCP2H423Q8rSTo1d7+XsAtwavquIcboETtzpxKStA6RhN0xHZupQNyucxSMVWM9O0nS54l7bK/eNwsQ96tYykjaj5g/msLgSZLOsl3COqS5t9q501y79iZRvz9Du73Hpapzm2SjUOHKJt204vUWm46UlF1sot7c9LSkN/aQALNsDCVdSlhZ/pcgWjwu6Q7g03Tmk0FjT1Em01CrtutzYgOHtv77BeAPth8a7penBZI2IvICixIkuCWA+wkrikHxKSJnOFy+YQGFuth2GeeogZp2XLcTFq2Pp7jzAY9I+ith0XHbtAa0Xc2OVD124pS1Xzy2/be3/RdJ3yCs4QZGIvbsBSxu+6MKi6BlbZ+fGbeWPQsp3mN055sGtdn9HnAhFcnZxDq5UWl7gSDK7DjiJ6YCtfItCa8o9Az34llFc8ivJH2cEGOYq1Dsy4ENidwywOzE85E7r/7N9k8zY3RB0ibAz5s9WNpPrW/73AKx+5JPc+Mm7AOs6rAabrgH1xP5kmlGym8uRlgkrUpnLzIPMMegF5mIlm2izazE2ngzSaUIl02e4p+JVP0IoWCbixo58KOBlSWtTKw1jifyp30VLwdA+bVnIaJUVSj8kJclGPUn2X6k9d6ttlcf9sPTESXZ4zMwdpB0PZHwvo1Wwtv22QVi30wUVnqZprkSch8gZJwBrrZ9zki/Pw1xjwdmoVt26z+2P5IZt6i8cC2MBXs8neck4JDSxAWFSsTqBFHmZ8BPgdfZfndm3JrPyDyE0tkOxALjJOD7OYtvSbfYXkOtTsVc9mordjXmrZKMp0KmfQNi43u/M+xVVNEOUWEhsxSxAXuWwooLEwWSfmF7hUqxiyZBJG1h+0xJS6YNb1GkzegqwG8TIXkBYDFnyk4rfL2XJsiRBxGd6t+zfWSBa76HKLzdaHsVhYrY15whNznMeV5KJALGrQR3TVTsNhkTmwFJ7ycKnjdlxrmHDtFy5YZoafttJa5zIkDSfURy6UKCjNRVNC2YLJxQKLVOmZq4Jc8laSWCUL0p8JDtDQvE3ITufU52Mi/F7Uu4dKYiZ639U4p9l3tUdfsdezFAheWhW0XZrYE1W0XZ9wPr2c4iwbfW9Y36y5zE/ZZLiGjv9y4gyHtZ+70eQoiI7nqIYqRzySFprv4UsZ8+WNH1vGdmU1LvOb5i+wuFYhVfz0paFtgY2JOWxVAD53dlV7GUUXROr2z7mfR6dmIvnNv5PgnY3PYPc+IME/sBwi67N3fxWOlzlUbpvXv6+72ht9hU4O9XZW5KxZVTCZtLESTfD9m+KyPmHISV+umE/d1ChKrjgUS3960Zsasrk9WApLuI4v9ljg74DQjlieyi+ijnvcT222ueY1pRa+xMsY8DfmT74vT67cR6+SQih7hWgXMsQqgLZVmdpFhV7MRT7C41CIW6xz22l8+M+0NirP+g7RXS83597j5HlexZakKV1OpqoSbhS9LBhCNAUYUlhfr5/QSZ7kBirvqG7RsLxK6lAPdWouZyOd3CAIMQvoa9rlLrl578Xmny6fUEUei59HpWQulyIJKTQsHwQ8S+rL2WeAI4Jec7ro2UFzkbWJFoYp8L2M/2sQXPUSQH3tpLfxH4s+0TeueUzPirEPmFYmvPiaKA812CZbousLrCY+9o28+MR/KNhrLHFycm6hz2+AyMHeZwQW/eHsxie0iHUy7SIF5jIF+jZ9P887Q5y8V5kj7G+JcXHgv2OETC/06FX21J4sJ/bb+QCFrftn1kSiTnotozYvsJST8i2N17Et2i+0g6IqO4/u+UXGrUMtYGBuqO7YOazNtbFczx44hN5JOEEkcOLpT0UeA8yj97RTxY/wdwgaS3l97cJXzY9uEpCdIoA57G4J1CnyW6B35EbHBLw0SHwnuJRMWctIjJg0CSCHWF5YiNzLLAF21fmnepU/CM7WckofDpfSAVSbIh6UpgI2L9fRvwqKTrbX+yRPwJhqLdJg1KE21GwFrAipJmdp4v+dO2/yvphURAfZTojCyKRJAEOMr2t0vHz8QxRAJoSeK5aCAKdL5PYJwv6d22Lygct3hHTw8eJTqmHiOja0rStbbfqE7HV0PM2knSf4lEyCG2v5NxrXvQIVxu0BAuM+I1qLV/AvitQoHitPR6W6A4gXaCYCnbm7ZefzmRUQbF+4mi7EcJ1ZeFCBWATYnGg4GR1i7nSzoWmE/STgR5+LicuAnt/d6RhfZ7TUf9ssQz8hPiGdwWuDkzdjNXX5UKY03XcxHyTUqeLwP8QNIsLqA85bBHbheuZgUezIz5IHCwQur9wpxYw8SvpUL1MLGWfya9fgnR8Z2FtBbah1jjl8a/anzHY4TS9mSPEcXNBpPTsVxUmZtSsWPltEbG9hMFYj5FqOs9Yvt9MIUQ/ydChWlgAo47ymSHEySIySn+PJLWyiHua2gHPERO61bgU85rpnne9mOSJkmaZPsKSd/KiNc0iQ4L2z8eb+QbqK7gt7btnVrnukTSobZ3lvSSQuc4jVBgOtv23pmxmrX3u4FTbd+b1jODB5Q+Ryhmzi7pidY5niNqf7lYyvaWkraCeN5zrznhNbY3l/R+26dI+h6Za8MGkpYh8soLJdLQSsBGzlCWU121OiS9gaGK+LkKvqVznW3cCJyTyL7PQxmrNnes6p4kmohL4t+SXt8QpyStRpk9+w5ELnUWOsIAgyouNZjU51gRzkHl/N6vgZsUNvYm9oJ3KzUieBobDhziCqdI2tQFGtRHQ0nCJfGsbUo8101zy0K5QSvlwCenuWRbYL30XM+Se60NbN9J4bXnRCHg7EAUWY5Ir7cmbozNp9sVjYwDiYJ6F3t8Ol/TDEw9aiW8oWDxu09iuhePkZ+Y/o+kpWz/Jp1zScrI4FeRFy6NZgMNbNXLHicS/6VQi7jwfNp4fBB4XzpWYlKq8owoFAU+RHQgnUp0nz6akrP3AYMScPYi1H+WknQdsCBh71QCL9h2uvZvJ+ZtkU4h2x9L/3mMpIuAeXKZwgTTHcIOb8qpKPDsVU5WTCTsCuwt6VkKbu4SSidBHlOS1Jc0RIbUmbY9wHeITd1bCALOZIJVv8agAdPzdoHDY7kU6aaNhxLx7VzgUkmPA6Xu7XkTyfAjxN/vS+qWJ38xoVYBoAuSLiOew6OcKT3dhu3PFwpVg2g5BLZfm0hOWR7ONWD7COAISUfb3nV6X884wh7A59Nc8kI6VmIuaVvKTOnoyYxJItZvQayxziKk9AdWdrT9xvSzr7R+i7SXs8+pRbistX+CIG18mUiSmkj+l5C/n4goSiarXJS1pM2JPUlp8nDx/Z6T+oqkq4HXt4rI+5NvFYnCfuoEortycUUjw86tvc+gcdcnksa/J8a3V0ra3pm2cn1QssD5c0lbM7SQdUBm3C4UJOL+i7D6vJQYg94G3CzpCMi2Vr9MoXL5Q7ptRHPzLldIOoQYN9s5uHGrBiDp1cBfmv93hdLQQrZ/nxm6aLGphfbcBGExkD03JVJCUxCaudn2Fno+2gTOa23/iGhKKYGj6W5uebLPsWnFt4CHiCZBEZacSxHkwBMJFclB8U9JcxF/tzMkPUrrGRwQzXz0csIy5efp9QbE+m3cKgG0UXDsBPiLpM8AP0ivtwT+qlB/KWIPb3vDlB/KUpJJKG6/aPsg4CBJB9n+3KgfmHY8l8bLpvlyKVrjfgZq2bNA5AH2AY4FsH13IvjkkGW2pVut7uuE2mc2AUfSacTYcyed/Y3Jt9AuTvhq4f8RJPt77HwrGHXbIg1BgTwqREPyWZIeJr6bhYlxPxdrOFP1rg9ulfT/gKPS64/T3Vg1MCqTT39Dt9vFT9LPXGu/6ySdACxq+12SlgfWsX1CZtxelNyP/IT4Xm+jzJjZoEYOfEuCG7Kj7UckLQ4cknuhkra1fbp6rIFba8+BFWAnigXVfe6Roet3bLxAyRZL0em2aurmeFFKQ09EpMF9TmLAKVo8VSic9MKu4D+tAnKyCmm6k4juFRFdgDs4eTu/WNCHPb4xUIw9Xgtpkt+FkDf/fkrkbGH74My4VZ4RSScDJ/ZLkkp6q+3LM2LPTCS7BTzoAt2QKe5VwEVEgulNROf3XYkcUCL+B+gQv651pr2cpNmaTdhIx2ZgfEJhV7cYkQRZGZiJkMgcyNJP0SX8emLhPsQaI7fbQB1pyLb9Wwk58lMIwtsto/5y3nneTMhOXuQkS5oZ7x7g7URxaF/btyg6n19UNm0Akk4l5E27CgDpX9bmpuc8iwKLEB2HR432+6PEWoFIZrbtZXMTTe34r6IM0RJJB7tHqa7fsRl48UIFO3pSvIOAH6aOoTGBpEVs/yXj8+cQjT57EkTRxwm10lyr1hn7pzGAKliTpLhTbDlLEgNrrV1q7fdS7AeBlZw6K1Mx/O7cxL2km4hmiJ+21ofZFq4K296tHeoyTVf59wddJ49yLgHL2743M85FdBLebXukw/KusO+5FiDWQwOTqBTy+sPCGdbqtXJlkvqNvXam3WBNSLqVsIpq2yJcZ3vgJoYU50sjve9M67N0jpmAOUusL2o+H2lM3sP2P9Pr+YHDbJcgDvWz48ja8/XbQzfnyd1fKywRnyHm0m2IefUMF7BpSwSO7Zv1mqJj/2Tb78iNPVYoMXamOC8DvkTk9yAaO79M3OOL2/51RuyX071H/WPGpTYxq9iJt+JvBKyXXl7pAs0yCluvfYk9+yWEk8aHbF+ZGbeaPYukW2yv0ZMzy7IaSvPeJq3xbT7gxyXmvURKW74EkaUnbtFcZ0/sqwmroSJENw1je94gN4+azvESgvDWrLkfBCY5U+0kfc+HOKNRpk/MOYH9CFtxiEbJr9jOJXIi6UCGJ5/uanv93HOUhqQLiVzAvg4bv5kJy/kidaKec5Xaj2Tvw4aJWzUHLum9JeaOFGtn28cOs1a2M8jfE4WAczqRrLgxvV4L2M32B6fvlfWHott2Y8K25mVEQXYND+ghNwNjD4Uv3dJ0L2BLTKBjWvzOTUynGC+hNeHnTvatuFULWSWhSl7nI5yvuFWEWtKFheIVfUZSouYy2xsUuLze2LsRCYR2cmUr56lDNbEXJpi3t9i+JjFv1y9xL0v6DqEG9P10aEvgN7Z3y4g5xBez37EZGBySLrf91tGODRi7ShJE0oK2/5Z7fX3i3kR0vN2SiDgLApc40wtY0gPEs/EHojuvlG1fVSi63/cjyHQfU6giHOJuC40XBcaiAFAS6XrXJ9YtFwDvIv6OpdTUmvMUkZEdZqx/UZK9/hcgaX/b+xeK1dVN3hzPSSj8L6AC4bLK/qnPeb5GFG6OL1Eom4ioQCarUpTts3YBoOS4XGG/ty/RhNI0AGxMEO0Oyox7k+21VJ6gPWSeK5zkrVHgrJXwfpntv5eOOwNjg2EIHOO2qVOh1rALQZK5BZgHONx2VkdyrecjxZ4y/ox0bMDYPwauJFRvAD4GbGB744yYNwDfpKPSsxmwl+21c4v1NSHpftuvbb2eBNzbPjbekHJ8axJNIjfb/ut0vqRhkUgshwGLErWnJYD7bb+uUPz5GZr3zVaVUxD31wTOSIe2InJG2Sqz6qi+irCZHddzYSrWf5xo8n29pM0IZYeBba4lnUuoTnep1RFEhiy1OklnAZ/IrTX1iVuN8KVo9l0SuJBuJbwiTV/pHKXX4FVy96netBTwO+K7KJpLLUnCTfFqkk+XAfZmaF4ki6hWmlSX6m/DwvlKkUj6LmFlfE9urJ64VXPgNepZkta1fd1ox6YFE8WCajXgeknNBndx4MHEohqPBZf3E/LHn6TDHn9RJzUnEhKzeQ/gFYSk3tqERGYJ/+XrGSo72u/YwKhA3liNzmS0iqRsosxwhSzyJQtroYrX+XBwHauI4yl0n9V4Rmz/R9J/Jc3rsP4qiZ3cUj6w/bikncizLWhiPSLpDGANSe8lNuil7uO3AK91YsqmgsBArOaURFiM8FpeFabIe84DzFHgWl/0kDQb8V2+LCUq2t/xYiXO4ejYaDyAm4JsdsGtl3xTsKB3BFG0WUjSV4lE4Rcy4jUYs465wnPqz22f1bxwdNi/6Mg3UI9go7Ag2Z9IPs5MJ6GQqzS4GdGJdYftHSQtBJyeGbMfsmRkJe1KJPmXVLe069xEl+UMTExsRNzXJVBLXqXGmwAAIABJREFUXnhCo0SjRQ+K75+Gwc1EEvWbhAXRiwa9ZDKVsyZZqSHfpHiPp7VzLsZi7VJsvwdg+6upMPSmdGgH23cUCP0nSW8ALGkWYl95/yifmRrcKul4OvPzNmRYhzUYrsAJlChwXi9pxVIJb0mT0n7hEtK9IGkP24dnxj3T9hZNDrb3/RI5WYXt9F6EEsRHJS0NLOvMzlZJX+x3fJwTT/8maSPbPwVQ2F1nF5FrFZuI7usnJG1DFDg/S6wzci0Bij4fPZgkaX7bj8OUAlepGskuxD74C8Tzcjnw0cyY2wCHEzksAzcC2yqaAz8+SED1t/YAilpoXy7pYrqbyi4rELcKUq7zi4RlloAjJR1g+8QCsfvZ1jRWKsd6sMbcA4l87GW2V5W0AWE/lI1h8r43EHnKXLwHWCXNV02u8w4gi4CTvuPvEQp72eobI5zn/cAjtm8qEG434LvAcpL+TBAjtsmMeQ4d8jQEIbAUXgbcJ+lmusksWbZLtXKdCb9L/2ZN/2qgyBp8DHL37ywQowv9SLiSskm4CU9J2oJu8mkzVuaqipwFHEP87UrZRQP8O9X0mlrO2sRYPyhuS7FEcCIeT/89H/BH4NWDBm6t62cGdpD0W8oSs2rnwEvZ1LVxJEOf5X7HphoThYBTfHCojJ2JzqA/ExJLMzCxsAfBFL7R9gaSlgO+lhOwZwJtP7DFi98lyRuq5+05VoWsUqjpdQ4M22WRJXHae4qCsYo/IwlPAvek77ndGZr7/c4kSS0iy0wUWnSnheAhxIam2aDv4/AQz8WvicXVH9LrV6Zjg+AdwIeIzXOb5T+ZzE3uDEzBzoSNxaLEArl55p4AiihZ9aBkQbYXRQp6ts9Q2AE05LyNbWcXWGz/YfTfKoPChMgbJd1JyJFe2IxJL0ZULACcQBDgu6TqC+Bph6XsCwq1hUeJMbkobG+oqCIParP7PaLwcRBR/GgwuURnzAxMN5Rcw73C9kTbW48JShEuK+6fhsD2uaVjTiDUIpNVKcqO0dqleBIydfMW6+hN2IUoJC9GNLpcTBSgcrFritPsHa+hQMMFFQuchBXJhxT2SyUS3ldJ+jewsKR3AvcA2xPfdw72SD/fmxlnJJxEPM+NcvifieJIrrR8uwg7G/H/UILwVRO7AGdIauaihyhDsqxVbJolkek2JtTrn5dUYp9T+vlo4zDgBoWaA8DmwFcLxMX2o4RNRjGkotX7hnn72gFjzj34FU31OT4uaRM6VkPfdaatemXsA6zaNCGlPMD1QDYBh7AmXZBuMtJkYBngOGC7AWI+b/sxSZMSAfMKSd8qcK1QL+/bYD7CPhSicb0EDiW+169LugX4AXD+gOSmkbAWsKKkmZ2hVANTnu0NFTY+k2xPzr04Z9hBTgX2rxi7QdFcZ9P8JWmu9PrJUrFbKLUGr5q7r7QfqUXChQrk0xZesH306L82zdgL+CnRWHcdMe4PrJxt+9UAko4DzrF9QXr9LmLdlYOa63qonwPfuVQgSesQe5AFJe3VemsewhJvYEwIAs5YFloKYW7gEkn/AH5IyMiNW8nCGRiCZ2w/IwlJL7H9gKRcq6H2BHpo6/hk4HM5gSUdbPszPYc/3efYIFidCt6ejFEhqyBqsserdlm0UFJtoMYzAvDj9K80LgJ+KKnx5905HSuBfQmLwUcBFBY7l9FhZ+dgbuD+1FlggqB1q6SfwrR1GKQN2CmSNrV9doFrm4EepM7SwyXtbvvIMThlsSKLeuQUbZ+bjh1WIPwcxGLVwOwF4g1BqaJpilVzTl2G8EX+MHCEpDOBk23/skDsiYZaBYB/2b6wYLwGtyq8048jkglPEh2ARaChthYDqZ05FOT+BWyVCN9vJJ696+gkOGdgAqBnXF6tz7FBUbObfEKjIOGy+P5J0pGM0OVXohlgAqIWmaxaUXYMMK7sG4eDww4it7u7X9xniWJFMUuBhJoFzqzCXS9svymtV24jCqcfAZaR9APgqkELDbb/kppYTnYFy+iEpWxvKWmrdM6nEik5C737GUmHEqSvcQvbvwHWrlAsrFVsOhb4PXAXcLWkJYgmlFwUfT7asH2qpFvpqHl8wPZ9JWKnRoOjgYVsryBpJWAj21/JiLkgsBNDmxey7BHHAolwM55JN208RuTpG0ymnArHG2yv0Xp9njpWJQPt/YB/pnHiaoK09yjdpMMc1Mr7QjSL3CHpCiKvtR7dzSMDwaFoeVWar95CPDMnEgXUYnABq6wGkn5DkAquSf8GvRfGRK3O5VVD+6EooVzSCoTS8EvT678DHxw05zIMiqzBxzp3XyiXWouEW4V8qo6l03mSdiPqUG01p6y8me3bFfbWyxL38oO2n8+JmbC27Z1a57lQ0jdyAo4B56JYDlzSB4Y5/goA27n1xFmBuYg1Vpug/AQZBCoAvYibb6sjLbK3JKSVHrK94XS+pBmYCkg6B9iBUDJ4CyHtNYvtdxeIvS2xCHoVnU2TnSF/q/6+kEW8zlXP2/M7BHP3/4BPEYWsO23vUPI8EwWSHiQ2Y11dFrYH2tyoW2VpCJzpS1r5GZmdkJ1+MDdWK+YkQvK3GYMvJax1sou+ku6xvWLPue5qH8uI/eaR3h9k46Meuf5WrPEswT3hoJDVfxXd33HRznd1JOZLxKrlL/xFonB1NrHx2JggJQ+cfBzhXAsQG5Is5bCac2pPzA0I5bc5iWT1Z20XI3SMd0i6zfZqFeJ+nSB89W6iS/pxvwqYx2X8yPvaWtjOsrWQtB+wBR1Sa7VnbwbqoPS4rG554aWJLtzivu8TDeqvQJkbs/j+SdL2I71fudN1XEKVvOpT7OXpFGV/XqooWxK193s1IWlJopt1beLZuwH4ZEqyDxKvasFJ0mXEPHoQYb3wKNGA8YYRPzhyzHkc3cIv7ff+oMl/hZLs9cDWwJoOC7U7gPcD69nOUh2WdDlBVChtGY2kxtL6Otuvl7QU8H3baxY+z/zALbZfUzJuTSjT7qR1n32CuH/PoWCxaZhzzmz7hdJxJwIkXUUoqRxre9V07Be2V8iIeT1RnO9S+axRoC3Z3NIn9mXA8yl2rrpVcUg6FViRUNkzMXbenf5he2CCZ/pe32H7j+n14sDFiQB+R3OvTGPMOYGngUkEsXVe4Azn2Yg3savlfVP8RQiiKMQa/JFCcWcnCvZbEpYh59vevVDs1xUmbTQ52rUIu891iaL93bY3GSDWIokwu0S/92sU22s80yVznSne9cC+tq9Ir9cHvjboOm6s1uCS3kPYnbYbtYrn7nNzqZI+AXyGyGu+h1DyP932m0b84NTFLk4+VajqNZZO0LN3cL59fZWagMLO8Rq6rXbXsz0WNsfZyM2BSzop/efLCaWan6fXGxA11CJqPpKWKD1WziDgVERK6G1OkAzmfrEmNicyUgF8XuAi288ViHcxsWi9ne5N0zSrDEjaFfgYsCTwm9ZbcxNJi4FlkdXxpp0bWIWwIynm7dlzrldRqJBVGmPBHk/nuR5Yv7nHJM0KXJmxGLxihLftfIuP9rmKPSOS3kcoRM1q+9WSVgEOyL3fJC3fmziXtL7tK3PipjiHACvRLSN7t8uoZRSHpIvoyPVnjUEz0B8axnrCGd3pqtT5ro7E4p6E3VSDeYBNbK88SNxW/AeBlZ0kf1My5M5ByYWtuENUavodm8aY1ebU1jkWICwLPgg8Qtgl/ZSYZ89ykhb9X0ZPAeBvFO42GWb+KzLvpY6LRlHmWheQT5d0F5HQ7LK1sL1jZtwqz94M1EetcXm4RGyDGgnZ8Q4NVaB8M7HuHEiBciz3Ty9mzCCTBVrz3WyE6tJdxHewEnCr7XWm17WNBkk3AkfR2T/9H7C77bUGjFe14FSjwCnpfNvv7VMESJc8WPJf0hzAOkSC+1ZgIeA1hI3WNbZvHfSaU/yfAKsSDS0lLaOR9DbgC4QN5yVEIfJDuXv2nlzOTIQVwAGliQU1IelrBClgILuTYe6zBgPfb634Y9LkU5MYUhLqqJpMIVVIutP2Khkxsz4/wPmKNLf0ibsosEiKfVTJ2CUg6Usjve9kZTNg7HcTCrC/IZ7FVxP5hyuBnWxPs7Kawibjh7b/POh1TeV5itZGUsyN6FiTXWX7vAIxzySI9RcRjhRXFSZyZDep9Yk5M0FEejORa1iAyCkPZK2iUP+5zPXU6nrPl/VM18p19pzjrt49dL9j0xCv+hpc0jGEmvgGhGr0ZgRRLStPlGIXb0Dpc44iJNya5NOUI/sYnRzfNcAxtp/OjFu8JpDivhT4Ep1x82rgyzVI1KXQyoFvB/yVAjlwSZcA2zs1OyUy58mliEipmWFz2/9Mr+cHfpATf0JYUE00SPoY0XW6ICGxv1Nv8XcGJgZcXlpvsYLMxO8R3ooH0S3VOLnA4Hvo6L9SBrZ/L2kRhaTls6N/YkwxFl7nAL8GbkoJrSldFmkzNc1dFmO10E7nKvmM7E8sAq9Mse9UdEjm4szUyXIIsUD+BrFIzl4U295H0qZEghAqeloX6iyoJdc/Ax3UsO7LSpaPgGoSiwkPE89c47n9EqBEcuhtRIdFG+/qc2xaUHNObXADIX27UU+S7Na0wX4x4Da6CwCf6nk/a8yvNf8plPteQ6dYuLOkDW3vlhm6lq1FrWdvBuqjyrjcFKAlnWZ7u/Z7KUm0Xd8P/m9jH2BV9yhQElL1g6D6/qlF8umLFwnJp/a+bEKgme8k/Rh4vZMSkEJmf//peGlTgzlsn9Z6fbqkfQYN1iRgKxIJd6ZT4CyiNuXUpVmafG37KeBySY/Yfh9MIaD8Cdie/D1FLctobF8q6XZCGUnAHg67sly0x4wXgL+WKAqNJZxpdzIGJP+f0GnyqZbTczmryNr4u0LByQCSNgNyVfHOl/Ru2xdkX10fDFOQLUq+AbD9MLFPua107BLIIdhMRewLJC0NLJcOPdg0SwCD7v/mBi6R9A+CcHJWjWJ66dqIQrV2DeCMdOgTktbJGesUauR3Alu5gNr5cKepEPMJ4B7CPvO4HHIvgO3/SPqvpHldQa2uz/lyn+lauc42fqtQCG7WntsSBP6BMEZr8DfYXkmhxv1lSYcR+cos9GlAOVLSwA0orbhD1HqAEiTcOVyvwfkU4vk7Ir3eOh3bIjNujZpA06y4x6i/OL7Q5MA3tv1Q63hODvyV7lYa/iuhulQKL2vINwAONdGX5wScoYBTAZIOIjbod07va5mB8QVVkspWyN81jM3rXE7urrjKwDDnuYxgh55te++SsXMxFuzxWl0WGsYfsRW3SvJsUEi60fbaPZ1C2dYvqWvxYGA1YoN6BnBwyU6IsUCJbqFaY9AMdKBK1n21kMa4M21vWiH2uURi5VJifnoboQjwEEx7N8tYqNSk89SaU9cg7BeXoLs79EXRrd9GxW6TeenuCrmK6HLOSj5JegB4bbOJTgm+e22/NjNucVuLFLfoszcDYw9VkL5Ncbs6N9MccI/t5Uufa7xDhRUoW3Gr7Z8kHQ4sTEd6eisi6XQuVGkeGbcYjkzWe+x/HZLudY9tYb9j4wmSDiZUgX9AzFFbAvMTzRLTrIYnaTIjE9PmGfhimbJX3wIoXuBMBMirCXWaB0rETHGXdLL0knS07V0LxZ0TeKYpbqY55CWJ+FMifnG1wYkMFbY7kTQb/dffz4z4wdHjZtkrjRB3TPKRpZGayL5LKBo+DvyOULj8fUbMyYRtw7NEU1aj+pY1vqXYRRUBe2KvSxSkm/1vc90lGu2KQ9IywN4MVXMqpiLeOtfqwMOJwJAbayViLt0UeMj2hrkx+5yjmAKVpLuBVZqcbJpL7iiQ+x3IymuUmF+i00C0M6FiBJRR+VJYDL6RIMA9RzQDXG378oyYNdXqJtQzDVMULL5M99z3ZduPZ8attgaXdJPttRSqkR8AHiPyT1n2mQql5Df0NqA4QylZddV6vpKurzj5VNJ9vTmQfscGiFu0JiDpW7b3HK4Rx+O4AUeSShORJH2bUMFtO1H82uWsBm8jlKcbu8glgHOcoX42g4BTEYkd1fbp++N0vJwZmI5QRansxOLdgk4n0sZEUugrWRfN0CR9OpZNiBjmXCIYokX9VEtAFb3Oa0LSz+jji0jYftgZnpk1IOkE4HJCfWJTwqJkFtu7ZMadFfgqUYCcC/iC7R9kxrzW9hv7JHyLJUJKouYYNAMBjYH1xHAL7txzSLrBFawKJG0/0vu2p6mLOJEr5qeiSk3lOfVBIqH3C2AKAbBGkX28QyEP/QSdrretgXltZ3WbSDqb+H6be2s7woppRELqVMQ9H9jNHRWRJYBvO3WXZ8QtbmuR4hZ99mZg7KHC0reSPkcQAGcHmkKpiETvd21/Lv+qJxYU6ogrEp37UxQo079pVqBsxa22f5J0q+3VRzv2YsAMMllA0veJ4kpDytoGmMv2VtPvqkaGwg5nOAxcxJF0IKEycRoxvm0DLGL7i4PE6xO/eIFTYT35pvRvKeAOovB2eGbcUwgFmfYcclju/j8Vgja0/WR6PRdwSS5xMcXqVRvcEviN89UGe88zIWyMoP98khnvTGAynfFia2A+25tnxq3VaDhm+cgaSOv8SbYnT+9rGQk1CrKt2A8An2SofUjWXqcWFPbAxzD0erMUe1K++xW2/9Q6dgphWfNL21tmxl8Y2JywdJy71jOiQtZkiYCzfpPDUVirXFmAgHMoobjw41JF35599QEEWQ0ou6eWtByh6rwn8HLbs2fE6psLKHG9tZ7pWrnOmqi5Bk95ySMJu/KmAfd42/tlxi3egNLMy62fcwEX2n5TzrWm2DXJp6cTOb0b0+u1iJzfBzPjXkHBmoCk1WzfprACHAKP4wacRDTdl6GEvdyx/gPE3gli31SMsC/pnQSJ+iriet8EfNT2xQPHnEHAKQ9J7yOk4xYlOlmXAO4vwYCcgYkJDeNH3iCn+JY2Sys7da0ousrvzGSvNioDSxH2SA1KqwxMCJJaTfZ4il+ly0KVfRFLQ+FZvy/wdmKSuxg40PkdWXcRxZUDCGvAY4DnchNNtVGys6DmGDQDgeEWww1KLIpVqfNd0tHAYoRtZnuMy1LJSuuhn7mC2pTqqdQUn1Nbsa+1/cbcOP8LqNhtcqftVUY7Ng3xmkTQvISizM3prTWJrp71My4XhdVkY2sxAzMwBerTwdnv2ABxD3oxkm36QYUVKMdi/5QKx+9xR9liSWKezVLjmkiYQSbrhkLRYlc6ym9XA0fn7p8mIiTdZXvl0Y5lxK9S4EzksTWIZpldgKdtLzfyp0aNWWsOKbrO6olTRW1wmHMVKSLXRom/WU+8Wuvv+wjy1O8o0OSjMVI9LQ1J29o+Pa3ve2FCReunHlB1IRHplqY7h3r1QBfbHbeKImCKdZPttXLjjBUk3WZ7tUqx77G9Yp/jcw9K0pL0MaJ5aEEil3Om7fvyrnRK7Je5jA1gv9hbAV8HriDGi/WAz9r+YWbcplj/H6LJpWiDZGlSZIp5NrAyMdZdk/7dlLOOU0W1ulrPdK1cZ4pdtLGlFbfaGjzlIXcliv+Nak92bFVoQFEltZ5W/JcydO4rkV+/H1gWaGqRiwMPEnalOWuYakSZND8vk14+aPv53Jg1kfLr+xA2exOmCVXSy+jYnd6YOxfOPPqvzMAA+ArxR7rM9qqKrpZxuUGYgbFB5YHlYWISaibhlwC5hZzvEd6SVVQGJG0EHEYPSY3wixyPqOZ1nnAWQQo5nhaDvABq+yIWRdoM7Jv+lcROxKLq87YPkLQ7kMVoHiOcQJ/OgkEw3hc3/wsYI9b5uu7ucj9P0fn+ycy4sxGbpDbpz+SPe1sC30pJhRNdSFpfQ1VqTpJURKWGOnNqgy9JOp5Q+mp3QowrO8Axwu2S1u7pNinh//20pDfavjbFXZdIvg2KQ9PP1YkuiIdG+N1BMDdwiaQithaSzrS9hTqqZ10oVSycgTHB/2fvvcMsq6r0/8/byABKg4og+JOkoygKSEMTRlAQGDEBBkCCOhjG9EVQxzSmUVFUxATKDAYyCq0EBUVschxyEIFRcRwRAUVAQFHC+/tj79t1q6iqbu7Z+557qtbneeqpOud2rbu6qs45O6z1vg9LWsPjpW9LdM6sI+mlwGk1iiO7xKMtsFkCqs6fMvsCZ0u6KR+vBfxrodidwPb+wP5RTJbIC/Ffyh+dQEna+1vAsb3NkELcJ2kPxqytdqOvsHxQJtngfEvBDc4zSJuFF5E2V+bbvr1A6DmSntDb6M+bFyXWf++TNK9X+C5pI5qNs/r5JWmtojdvXZ3xxYwDMdkmslOn/kgW32i83cmTJfWrLTS1O6k1/n5JgRj9DON5WoPH5c9zp3h9bdKG6mZTvD4lSjZR+wBPBa7KMS5i/Px9UH4J/HduPFy0IdsrJBpkQ7aPsyQdQJq3989/izTPlCLfIyGtr7yTR+Zb4u/uCknzbV/af3LQ4pvM6sC+tq9qltoYkubkOcLpwLx8bh83VGbrx/Z3JJ1NKj418AHbtxaIO9W1VwpViLk/yX6r5F7AGcC2wL35eDnS77NxUR31rulaa50AT+ofb9q+MzeEN6LyGPwIkmLdV/Px7sCRpPFoE37F+MLWk/PnJtfOKZIeT7KSvYJ0TX+zQbxFTPHsuxDYpkD47QvEeAS19gYkbUX6u/hf0r1odUlvKFGIW5E/2P5BiUCq7EQh6Vm2b8jNvpD2BgDWyOtyA9/jQgGnAvkBsbGS6sKGth8u2XkTBP1IOok0aP0p6Qa0HalD+2ZoptIi6YskibsiC0x9ca8mTRTHFam5gD9kDWpWj+d4Vbos9EhfxNcCv3AhX8TSKMn0TbZh2FQJ6BBSpe2LbD87V7ufbnt+k7i1GUa3kDokwd1VJC0kSWV+zfYpDWN1rvNd0gqkDZC9SNf3YcB3miw0qa5KTc1n6tHAs4DrGKv+t0fMDnAYVOw22YC0MLEiaSL2J+BfbF/dMN+PkxY7ihTKTBK/iK2FpNVs/15TqJ5FMWZ3UAXp2xx3W9L9eDPSRvJhtm9smG4nUT0Fyirzpxx7Z5JC5NrADqSF9A+P2kbWMJB0Amlxd1YWk3W54FLSP5LuQ7uSNv8PI83NGi1OSloL+ArwfLJCImlj8n8bxt2fpFZXbIOzL/aXgI1Im1gXkLqnL7LdqKhF0utJSlEL8qmdgU/bPqph3PmkAqdbSM+mVYFd3cCeRZOrDRrYlAZqg71N5H7FgtKbyDVQRbuTWuPvHHsL4Bm2D5O0MsmGYzq7uSWNW0X1tC0kfdID2OLle/18Uif285Tsaj7jhja7OXZRRcAJsc+aPGSzsVZplKwRe4VvMOG56gGtESe8xw0kpajfkIpDi9nBq6C6vKTzcn7rA28kKRf80OWVX17F2LV9vgtYiEjq2U+ubftTklYnWVFesphvXdL4TyxdBChpacarqJwD/KcbKFuorlpdlWt6krXOtYEflVjrzIXfr5zQ2HLioH/TwxiDq5JiXW0kLQMsa/vuQvGqPfuGiaRDbTdqmsl/x7v31m7yWsZ3auwnlkLSNqT9gJFvQpX0DdtvqXGPiwKcCuTNtp1IcnorkRQ+5ruAfGMQTERTeHv2aDJJz5Wme5EWpXubpo0fol0rUlMlr/O+Lot3AX+gQpeFpFfSJ4dYYlJTi9xB12NZ0kbkg7bf3zDuFbbnqU/CudTkoyaSPgssReVuIXVEgrurSHoKsBrpZ/y1xf37xcR6MfANYFznu+3TG8ZdFngTSYWsf+GmSGFI/ht7Halz/3rSwtNXbR80YLyzSBPonoTs40k+340X8io/U28sUSQ0E5iqQKRH00KRXPiF7T83iTNJ3CKFMpPELWZrkYuEF9reukRuQXuosPTthNgrkhZDPgz8lvRsObrJYm/XyPOQ/2SC0mCTjeQct8r8Kce+xvb6eaPzUySVro/VLtgeRWZ7MdlMKLhUshh6OXAI6Ro8DPhK6c2tUpTc4Jwk9lzgX0hFgavaXqZAzHUZU8c4s1RRYN4s7I1nG8vfq5KN77A2kWuiwnYntcbfuYBjY2Ad28/M898Ftp8/SLy+uBNVT3fKcUuonlYjN8l8hfR8Mkmp5t29jeUBY15qe76kq4BNbf9N0nW2R1VFvJPkxp53MFYYch6pGKKx0leN57WS5fcXmaAu3/TvIq+vXA4cTrJH2h44BTjH9iFNYuf4XyetCfWaRXcFfmX7nQ3jdq75UkkheWmSsgWkdbOHbL+5QcwLgL09Xq3uYNubN823Fn3NJzeRitPWpMBa54TYRRpbhjEGV2reO9jjFeveabuRmn+NBpS8/vSySWI2UU/rxZ4Rzz5JGxVYY7hm4jrhZOdGCVVqQlVqvNwyH55r+5om8WoTFlR12IFkXbAPyXpqBaC0xHUQAM27YBYT+5vANyWtQ1rgvCYP5L5he7KKwCXlrlzEci5wjKTbKSAPXZFle8U3ALbvlfTYAnEvZ3yXxXsnvN6oy0JJuecHtk/Mv8N1JC09qpsrkwxGLpBUolPhgTwg7PnJr8wkVeojSG8zpV+G0zSQGZb0OdsfmHD6/ZOcCwYkL4Kc6tyNbfsWUpdoo8F2ZgXguYzvfC+xIXsUcAPwYlKn5R6kQplGSNqRtJnwjyRVkk1s357vnz8HBirAAe4GrlPycl6kUiPpq9BMpabmMxW4UNK6pTZAukytjcHccfNq8uQ/NcEVkezvcTtwK8myrbFssSrYWth+SNLDklYstekftMZDpL+5ZYF1JeECEsN9RZF7AlcCx5A2Gt4AbNU0fod4sMQmwkQqzp9grFDoZTneqZJGeiOyFrYXAgv7iskWSpo1xWTONsO2fyPpyaTuUEiKISUsjKqSC1r3Al4KfJ+x+9CZwEBNEpIOY/JO5KaLvJNucFLAPlvS/yMtIG9EkpT/NmnDtzF5PFFayXhnkurUzyR9BJgnab8mDSKDFtgsQdwt+zaR5wNvBp4p6bsU2kQeAkXtTioW5r0S2JBkPYHtW3JRWVMA5V0yAAAgAElEQVT2ZLzq6WdJFhSj/tw7Fvga6ecCqcD+O4yt7wzCzfnv+STgp5LuZMyurRE1NmT7Yq8IfJzx6h6fHOE5yhHAnxlv+3IEzW1fal1/+5EKvcapyzcJmNdYLiRtlh7sZNdzJfB+xn6PTXkR8GzbvTXaI0gbtE3ZtNd8CYushv6hQFwAJH3f9qtLxcvMn9CMfGZuEmjCvsACSePU6hrGBOpd07ZPk/QM0mY9wA22/zbd9zzK2PMYa2zZt0ljS80xuMZUdZYmrR/+Xz5ek7Rm25QFpAaUb9LXgNKQH5L2wa9lrMiiFNWefbXJTYG2fU/T4pvM5blg7+h8vAdlbERrMr90E6qkfYC3MFacfYySwtCg+wu9uNOqKrmBak8U4BRE2YsMuI2xiX9vwrSfpD8BB9j+eisJBjMKDUl2OhcuPCt//BG4GniPpLfafu2AYXckeYW/m/TAWJG06TuqVPE6t712jjdpl0XT+KQCpy1z1f9ppAfzrqSf+cihMUUggDmkhcgVC4T+KnAisIqkTwOvAT5SIG5VKqkXbAdMLLZ5ySTngsHZFfiypO8D37ZdYpLU46O2F+QFzReROt8PodliHsA/2t5Z0o62j5B0LGUW/3cHvtS/YdwrApPUxHLwxPzR4+wGsYChPVM3A65Skrj+G5STnQ4WcTKpQOty+pTDmlKjUCazOmkBqLStxb3AtXkBdVGBc5PitGC4aHK/84toUISb455IUi44Cni57VvzS8dJGvUFnCL0jTd/KOmd1FGgrDF/AvidpP8ijec+l4sO5zTNt6tEMRlI2gU4gDQWEnCQpPfZ/l6riU2DkoT6XcC3gA/2bbD8t6Qmihn9Vq/Lkja/b2kQr0fxDc4+liUV91xu+8FCMWvSm4tsAWxDubkIku5hbAz+D6TNp/tsrzBgvGFsItdmm5rBVc6O+u+2Lam3mf645tkB6fpdlrSxB7AM8LtCsWvyWI+3ezta0vuaBLTdK+b5DyU12BVJa3wlqLEh2+PbwM8YK2B5HUntbFTtQ57r8RYvZ0ka5eaZB2zfIWmOku3eWZK+3DDmjsDmwL8CR+YCgzVJTS5FCkSBX5Is8Hob6avnc02ZrPmyZEFAYyuySXhI0tNt/woWKWg1ug5tX6pk1VNMra6PKtd0X4Hv1bnA9+NNC3z7yQU3p0j6D9unLPYbloBKY/CXl8htGmo0oDy11ppm5WdfFZTsWr8NzE2Hugt4Y4EinLcB7yS5aEC6H496jUGNJtQ3kYot74O0x0BaJ2tUgAO8In9ehdTsfGY+3po0nxi4ACcsqIZIXiC6sHTlVzA70XAk775EugGdAXzLfb6pamClIek9JB/1Lkyeew/Pol7nE+IfT+qyOCaf2h1Y0XajLguNWS/tDSxn+/MaYesljfddfhD4NamS/vwCsZ9FWsAScIbtxuoetSnZWSDp7aQir6cBv+p7aS7JS73UInLAokr33UidvWbMguKehnGvzAv/+wPX2j5WfdZqDeJeYnsTSeeS/k5uJXVvNFXheoRsukZUInNIz9TO2kR0BUk/s/3cCnH3J41bShfK9OIXtbXQFFZqrqvwFBRElfzOJb2EpNrwfNKC9PnAIb3u8tnAhPEmTCi6LPDsqzJ/yt//WJINwLW2fyFpNWA9F5Bn7xoTiskO6ysmQ9nuuLXkhkjukt6u13GbN5sWekStnSFtLnmCFYuktW3/uvD7zAHOd3Pb6E7ZZ9ek1lxkkvcRaRN4M9sfHDDGY0mbyEeTGpGeTFLl/BRwnu1ZUXS6OFTAjlrSvwHPIBWH7k+y/Dq2QDfySaSx0DjVU+BmGL3C8r4C3w8Ad5LWEE1q0HmC7Q+1ldt0SLrc9kaL/5cDxX7E+uOIr0lWsX2phaSFJGu2zwIrkVTa5jd97uXYi+7teV7yCeCFtvduEPOHpGtiRdK13Rsjb0Jaf9qqYc57kK63eSTlotcAH7G9oEHMNXpfAqeSGhgFZawoJW1DWi/st5nfyw1UM/uKWe7JxSzzgCLFLLWuaQ3JZneyNcoGsTozBu97Pr0L+AMFG1ByAcQZs3E+OhmSriE9N87Lx1sAX2+yDp4LC6+z/azF/uMRIhd6P520v1ekCbW3TuYxdcRlgUttr1cgZSSdDrzBWekqr7ccbvvFg8YMBZwhkquSt2o7j2BmkDcKlyLdBGooZQBcQxqsTmYPtUmDuHOB05VUoY4j+Tjf1iBeVSpXj0O9LgtJ2pykeNNTm1iqQNwqOCsCVYp9A2XkGodJyc6CY4EfkxbE+hcx72ky0A4mx/afJX0PWI4k//pK4H2SvtpwIbJW5/uhSkpZHwF+ACwPfHTQYP0FX3ny0WMucEGDuNVUaobxTI1Cm6FwoaT1bF9bMmitBXPVs7X4HnC/7Yfy+yxF6hoOusP9tu+XhKRlbN+gZGnUlL14pLT+UcDOBWJ3AtdXoKw1f8L2X+jrvsoLQ79vErPDHMpYMdnGkhYVk82W4pvMHI+Xu7+D0VdF+h5pM2jiudIbwM+ggF0kY/bZ51HZPlvlFElqMRQVLqdO0ZMkfZzxc9dHE+MvwBmSbrX9Cli0aP9bkkrWyBfgqLDdierZUa9Muob/TFoz+xiwbcOYUEH1tDITLebf2veagWLziRL3Cg1BERD4q6Qtek11SipnjdXEK7IRY7YvkFRabuytP4xgI9EOJIWofUjKbCuQCmVK0H/vOd9J1aOput4XGn7/tNg+Rkllr9d8uVOB5ssjGLuu18zHyuca27SR1sf+i5TzXcBPSGoOTaimVke9a7qKzW4u6H2q7d/2TjWN2UeXxuATn0/vnfB6kwaUi4ETc+H7A4wVWQykYDgDeKhXfANg+3xJjVQunWzmb5S0RonCvyGyfYWYh5FUU3vjw51IqqqlWL1XfJO5jTQWGJhQwAmCjiPpDOBVruChmwcqr2RsYfp82ydO/12PKv76pOr0VwM32y4xSS9OzerxHL9Kl4WkF5C8nC+w/TklKct9R61TqIcq+i12kYqdBfMYu6YvKPV3HCQk7Qj8C6nD8kjgCNu35y7Mn9teq0Hsop3vSmpkjzidP9v2FweMuyLwBAoXfGk4KjXVnqlBPfqKsh5D2nC7iQ7YfOWuqRcxwdbCdhOLNiRdDGxr+958vDxweoluyGA45AWFvUhFnC8idVIvbfulDeP+fELR96TnZgOqp0BZdf4UJKb4/T3e9qwpJgOQdACwPvCdfGpX4JoCG+rFyQ0tzwE+D/TbsawAvM92o+JTjbcwgqTo+CHb328Y97GkDU4xtsF5TK0mhhKKJLUoPReZELt/LWAOsDFJbWHzhnEXKS5JOsT225vEGyYqrC40Wee/CqiT1oobTE/Te4UqKwLm99iAtCayYn6fPwH/YvvqprFrMNU6Q49RaaiRdL7tLSY893q/x4dJP+cDbA9sTyLpCGAf23fl4ycAB9p+Y4PUJ3ufl7ucJdDTSXsLf8sN8OsDR/b+DwXi11B8Kz6eVUW1ulrXtKRTSBaD25H2XP5KUkVqrCYj6VpndQwlq7YitmRdGoP3mKoBxfbARVT5WbIj6W9t1hcaKNkALkf6u+ip4N1PUmRk0D0YJcX6DUnKYf028zs0TLkz5CKvzUg/zy3y6fNsX1nwPQ4mrSn3X9e/dBP1t7gugqDbSDqZdAP+KeNvwI2LLCR9nbSJ3H/T+ZXtdzaNneOvSuq4fS0wd1Qn6KoshZi7V9YBxnVZkGyYRnbjsDSSTmVyn8U/kH4ORSd6o46ki0iL0f2dBV9osggp6aMkRZ1eMdNOJAWqxp0FQULS4cC3bZ87yWvb2D5j+FlNTu4qhXT/mU9Sv4FknXGJR9CaLCt5LHQllZqaz9SgHl1ZLJ2IKtla1CrgDNpB0gvJfue2/94wVqek9WtSqxip9vwpSEQx2VhXL2kM178IOZIFX7lIfSdSx/4P+l66B/iu7QtbSWwKhrTB+STbf2yY6tBRYevMHPMU0rwf0jrI/wL/Z/uohnGHsolcClWwO1ElO+qKcaupng4DSUsDb2fMSvxs4L/cQEVbk6gXTXZuwNjFN2QneY8VICkFl4oZTE0uzrrQzWxPH1GwUakApaQl0FWk4s21SPfPHwDPadrA0Be/xv+/+Hi2ZjFL33sUvaYrF/geQZr/Xto0Vl/MTo3Be9RoQMmFIVuVKmzqOpKms4+z7YGUs/Ka0GQBzxkkXhuojHpf8fvwJO/xSsbGcOc2va6jACcIOo6kN0x23vYRBWLfADy7V8GaKw2vs/3shnHfQSoCWBlYABxvu4TlUhVqVo/n+EPZOJT0GeBu4Ju27ygRsySq4LPYZWp0Fki6EdjAY16ZywFXNZmYB2PULg6pRZ4wvcz2Pfl4LnCq7RdM/53toLrKb9WeqUF9JB1l+3WLOzcqSFpI2oz8LLASyYZqvhsq1Ui6ANi7110jaSPSolOjLvJguOTC72fYPkzJU355279uGDOKvjO1ipFqzZ+C8UQxWaK/q7crSNrcdlN7hcninmF7m8WdK/h+A29w9jqw+zceJe1j+yvFEy2IpB2AAxmzzlwDuMEN1Yty7CtIawHX5uPdSOq9jZqehrWJXIq8cdNTJdkYuDR/3WTTppY6aWdVT2si6ZvA0iSrGkhW4g/ZfnODmNVUhmpsyPbFXoakeL4WSakUANufbBo7mJ7eddTg+68mbarfmY+fCJxTesxReG39CtvzJL0f+KvtgwrH/xfbh5eI1Rez+Hi2cjFL567pPDf7R+A3pCa7IirJHR2D1yj4OpxUiPtjxtsYDqSoHsxs1Fy97wskm74Teusto85jFv9PgiAYcb4H3G/7IVi0AbxModi/JC2q9Ca4q+dzTVmdtJhyVYFYw6Cq1/kQFxAuAZ4OfAkYxcXp4j6LXSYX2mxQuLPgFlK34v35eBlSZ0RQACdf1oclrVijOKQiTwb6VRX+ns+NKvcC10qqoVJT85ka1GfcBlD+/W3UUi5Lwg6k+/E+jNlafKJA3H2BBZJuIS0wrUpS4Qg6gpJC2cakYpnDSBs5RwPPbxi6hg93V9kIuFDSuGKkXsd9g0XZWvOnYDy1fn9d4wpJ80t29dZmYvFNbs65A/i+7QcfbTxJywKPBZ6U1U16KjUrAP9fw3SnxPYdShYXg3COpPuAVSVtD1wLvAEY6QIckhrwZkywziwU+zWkscvupK7T1wP/XCDuHElPmLCJPLJr4f2NHHnjeKCimwkx7yY1Yu2mCXbUpCafxnGb5jgh7u/zGP7wrjW2ZOZPUJo4MxczPGrUpzIk6Zq+l+aSfn8leO6EzdezJJVqkDyZ9DdyOX0bs0F9mhTfZA4ELpK0IB/vDHy6YczJmtbe2jRmHw/k4s3Xk1SdIc2hGqExJbXD83FJJbXi41nbfwFOkLSKxlTVbiiQKwzpmi6hlNFHrabezo3BSTlvNqHg67KGMX+dP/4hf8xqJO1DWr+5B/gGSYXqg4MWwOmRFrvjsL3CIHGHhZL7ySak/8Mltm8jKZQNyluB9wAPSupZBLvWzyE3TT5AuhcNZJc4spOOIAiWmDOAbUkbkpB8Bk8nWfk0ZS5wvaRLSDfKTYDLJP0ABvcZtP0hoIp0cSV2IW1YfMH2Xbl6/H2L+Z6Rw/ZJbeewGM6Q9BPGS/YvbDGfVpnYWSClteSGnQV3A9flwgWTisoukfTVHDtsdppTszikFkeS/g56soo7AYe3l85iOYExG7XS1HymBpWQ9CHg34HlJPWKFUUqJju0tcSmQNnWglRoOtHWYj9JjWwtbF8q6Vmk4g2AG91A9j5ohVeS7PCuALB9S1Yna8Sod40PmVrFSFXmT8EjiGKyxKbAHpKKdvUOGZEKAvYgFaY+Wt5KKjx9CmlDqPc8/TPQdNNmWgbd4LS9paTHk/KdD7wZeKak75IUBg4pmGZJHsiFR3Oyis9Zkr5cIrDtm/LG6UkklbZ/dhkLnCqbyF1Ej7SjPkzSSNpRd7ixBeAhSU+3/SsASU8DHhow1rEkVYGiKkMTqLEh2+OptuN53UFsHynpMqBXBPgqF1Cun3ht276kacw+9gLeBnza9q8lrQ00sjHMrO9sYwhg+05JpVTUil8fU6nVMaFZaUCGck3bfnZPKaNArN9oEmXZxkl2cwxeo+CrRAPZTOKNtr8i6cUklevXke5DAxXg2J4LIOlTwO9zLJHmTasVybgSkt4MfAw4k5TzQZI+afvbg8bs/TyGyOtJP+eB70VhQRUEHUfSVbaft7hzA8ae1F+whwf0GZT0CuCLjA0G1wSudwHp4pp0pWBI0kFMXx07soUAkl4FbJkPG/ssdhlJpzHWWbBowcb2gQ1iTmqv0xc7bHYaMtXPeNR/trkbsv/au7LNfKZD0uOYRKUmd/o0jV3tmRrUR9L+vSLfLqMGthb5+3cGTrN9j6SPkLpu9nO2pApGH0mX2N5EY1LqjwMuGvEFvYB686cgmAx1zKJFyZLtNbaPrxB7b9sHlY5bg1yofyHJ6mWTvJl3JbAj8ALbR7ea4BRozDpzf+BJFLDO7G369J1ahTQH/htAieeepHUZ20Q+s8Qm8jBQYbsTdcyOWtLJpGLkLjW2IGkbUvf7TaQNpzWBvWyf1TDuOPWiUuN6VbQnlXQocJCztVwQwHCubUnzCl4jQ7HjKkXO90VMUKuz/aYCsatc05KeZPuPJWP2xV6kLGv7mZKeAiyw3UhZtmtjcJg65x6D5J7HtDv3itSyQtR3bddSHhpplO0hJX0FONv2iSpghSfp6gnqepOeGyXyuPOfbN+Rj5vY986b7vVRXusMBZwg6D739Q8sJW0ElOgUqrlAvB/1pIuLU7l6vAalulWGju2ayhZdo3hnwagXgcwEbB+RFzPXsH1j2/ksKfkZMrID1gnUVKmp9kwNhsI6kl5KKj55uO1kBsXNbC0APmp7Qe702gb4AnAIqUsrGHGUJO9OUbI/fbyktwBvJEkYByNOFNgEwyBv/kCSN+8Mth+W9H6geAEOcKukuR0pPt0R2Bz4V+BISU8mbdK/GjivzcQWw46kcfG7SZ23KwJN1FkBXt40qcWRC246UXQDVe1OumZH3cm1IdtnSHoG45UoG1m1VFYvqqHA0Susewywl6SbSEV1XVCICOozjGv7m6RxQAm6pqRWXK2u1jWd83uYtJ43L5/bx3ZJS86iyrJdHYNDteKglSdRiFqlwvt0hcslnQ6sDXwo/62VWJu8T9IewHdJ1+Ju9BUwjih3MP46uSefG4ReM/qypIK6q0n3n/VJ+6CbDxh3HJKeD/wHaV72GMbucU8bNGYU4ARB99mX5JV9C+mmsCrJuqc4KuB7l6kmXVyJml7nxelakYWyFYem9rW8gwZWHB3mQknrlegskHS87V0m6TAEynQWBoms8PUFkvft2pKeB3zSYTlRkmVt94pvsH2vpMcWij20Z2pQha+TpKcPygtkh3WpEK4fD2hrkemppr0M+IbtUyWNnL1AMDm2nVWM3kOyUFkH+Jjtn7abWTAIBedPQdDP5aQxvUjNIXfmrx9PUjFYu73UFstCSf8GHMf4zvemdir9xafbAgcwosWnWbXxDEm32n4FLNrc+i3wBka3oeatwHG2fwcUWXMY5U7xFqlld9I1O+rvMYnqabspLRm54OYaAEkbS7rF9i0NQu7JePWizwJXkZobm+Za4xqsXlgXdJchNa1p8f9kyXAlO66K3CVpeeBc4BhJt9N8o77WNX2OpPuAVSVtD1xLGgeVLMD5e55fGxYpajehy2PwGjwkaQ1nl4issjObLXfeBDwPuMn2X7Lqy14F4u5Oui6+QlbCy+dGmV8C/51Vz0wq5L9G0nsAbH9xSQPZ3hpA0gnAvN5emaTnkgpmSvEtUqPBODeKJkQBThB0HNuXSnoW47srHqj0do197zK9weB5lBsM1qRrBUMASPoh01tRjURBgO0t8udJK9B7EnWkjdUZT6XOgn3y51gIqc9/AJsAZwPYvkrJ9z0oR03lt2E+U4PC2F5I2thbkdQRslDSb0nKIUfPot/l77J6ynbA5yQtA8xpOafg0XEFcJft97WdSNCYUvOnIFiE7bUBJH0DONH2j/LxS0gWQaNMr7D5nX3nDDQdL/cXnx7akeLTV/d9fb7t75GKDkaVucDpkv5EKqBaYPu2lnOaicyR9IQJdicl1u9PzB89zi4QsyY1VU+Hyd7A+pL+x/agjR2dUi/qFfVIOsr26/pfk3QU8LpJvzGYFQypae0TBWN1TUmtuFpdrWva9paSHk/a7J4PvBl4pqTvkmy+DmmSd+b4SZRlvzlosI6PwWvwYeB8SeeQ9i22JCk8zkqy2uevSX/HyxaM+7+ka7tL/Cp/9Dg5fx5YgYpkJbeoUd32zyQ9u0G8idxt+8cF4yF7NhekBUH3yR2yp3VEahmArFRwP+nBvCewAnBMga63KqiC1/kwUPKbXBXoecjvBtwGnATdksiXtFpDNYDOoAqerDnuUiQVp60HSixYIiRdbHsz9Xm8KnvAtp3bTEHSfJLs5jiVGtuXF4jduWdqMJ5ctPk60vjiFuAYYAtgPdtbtZja0MjjrO2Ba23/QtJqpP//6S2nFiwhkm4A/hH4DeMVIuJZEgTBIiRda3u9xZ2bDUg6hbQhvR1p/PZX4BLbG7Sa2DRozGrornxcymqoKpLWJxVSvRq42fa2Lac0o5D0euDfgXF2J7aPai+r4SPpKtvPW9y5rqBskTfg955E2pwep14E3AwjqV4EgKQrbM/rO16KND9Zt8W0gpaRdDlJTebsvjWzn9l+7oDxprWamm1rOVldoqdWVzp20Ws6K7JdSFLy2CQrvl1JKjR4ge2jpw2wZO8hUjHnP5PWD38CnOvm1oAxBs9IehJjzSYX2/5jm/m0iaQ3k5qgn0pSqtsMuMj2i6b9xsXHXRl4C7AWfUXZoz5nKI2k75DWx3r3hj2A5W3vVij+Z4GlSDaJi+4RTZ4joYATBN2nX2p5G1IVeRGpZRX2vVO2GiIVgfSq/3qykPvlTqpRtBqq4XU+DJ5ve+O+4x9Kusz2u1vL6FEg6fr85ddsH9xqMkOkYmfBQ5IelrSi7bsLpBpMznWSdgeWUvJ+fxdpQhkUorJKTbVnalAfSSeS/i6OAl5u+9b80nFZNnpWkK0tTpC0iqQ18ukb2swpeNS8uO0EgsEoPX8KgsVwSy4Y7l+EbGJzMhSyXPi6JEUHIFk8NAy7C6n49Au278rFp6OuIlbLaqg2twO3kqyiV2k5lxlHabsTddeOuprqaU3yOOAq2/dJ2pNUEPiVhlZPnVIvkvQhUhHZcpL+3DsN/B04tLXEglHhAdt3p7qIRTzcIN6B07xmxu6ls4XianUVr+kdgc1JiilHSnoyaQ71apJrQgm+lYsUfgqQHRl+RFrva0Inx+ClyQVO2wNPs/1JSWtI2sT2JW3n1hL7kApmL7a9dV67/kyBuCeTromFFLJGqo2kZwL/xiOLhprck/cC3s6Y08O5pDX7UvTW/vv3Uxs9R0IBJwg6Tk9lQdL+pKrjY/uVFxrGvoFJfO9s39E09hTvtxJwoe11FvuPh0jN6vGa5AKWl9m+KR8/DTjVdklptqrkv4nNbJ/adi7Dpka3kJLv5oakiUd/R/1Idkx1kaw88WHGd1d8ytmvPWhOTZWams/UoD5Z9vc5wPNJi3jnA4fMtutP0g6khcinkDbJ1gBusP2cVhMLglnAsOdPwexGyZrm48AL8qlzgU+MqrIsgKSPA1uRCnB+BLyEZL/0moLv8a+2R36TV9LVwFYTrIbOGdXuaUnvIBU6rUxSZzm+SWFIMBx6asJTKe02LAypRk3V05pIugbYAFgfOJxkdbKL7Re2mVcbSNrf9ofaziMYLSR9i2Qx90FSocW7gKVtv63VxGYYNdTqal3TExTEryVZiL3Q9t4FYn8KWMn2O7LS4KnAN2wf1jBu58bgNZB0CGnt7UW2n51/xqfbnt9yaq0g6VLb8yVdBWxq+2+Srmu6FtdFBcA8z/lPHrkuMtLjuNKEAk4QdJ/fKXlZbgd8TtIywJxCsYv73k2H7TskbTWs93sUdNXrfF/gbEk35eO1GGEfTkmfs/2BCaffP8m5GU3lbqET8kdQiaw88eH8EdShpkpNzWdqUJ+9gD8DX83Hu5PUcHZuLaN2+BRJ6nZhLijbmmTJFQRBfYY6fwpmN3mRf5/F/sPR4jWkzekrbe+Vu50bWwxM4G10Q2XhQOAiSeOshlrMZ3GsDuxr+6q2EwmWnFx8sxRwuDtkR11Z9bQmD9q2pB2Bg21/S9KbBgnUYfWiHutIeimpeaaJwkkws9ibtF72N+BYctNa06CSXjXd67Zn21poDbW6Wtf0q/u+Pt/294DvlQhs+6OSPi/pP4GNgM/a/n6BuF0cg9dgU9vzlKzDemqO/9B2Ui1ys6THAycBP5V0J8lWvCmnSHqp7R8ViDUsHrRdUp1mMrVhAEqpDUtakfGFdecAn2ziJBEKOEHQcbLawvakTv1fZKnl9WyfXiB2cd+7LlOjerwmWSXiJ8DawA7APwEfHtXf30TFl3zumg4sKlShRmeBpMcB99t+KB8vBSyTi0aCAkg6i8kXx2ab7G01Kiu/VXumBvWR9POJKmGTnZvpKNlNbpw7Tja0/bCkq21v0HZuQTDTiflTMEwqSXtXRdIltjeRdDmwNXAPcL3tZxV8j86oF0palzFZ8zO7oCgjaRXG24f9X4vpBEuIpDNIdladsKOuqXpaE0nnAKcBbwS2JG2CXz2IslVX1Yt6SNqW1CCxGUk16zDbN7abVdA2kna2vWBx5waIeypp3fvMfGprkh38H0h2sG9sEr8r1FSrq3VNSzoC2MfZljOrqBzY5Hc2oSBLwEeBS0j358YFWV0cg9dA0n+TrrtLcyHOyiQFnE6Mw2si6YXAiqSxzN8bxroHeBxpfeEBxmyuV2icaGGyOhQkdbM/8Mh1kYFVomqrDUv6PvAz4Ih86nXABranLYz/mFYAACAASURBVPCcjlDACYKOkzfOT5C0iqQ18ukbCoUv7nvXcbrmdd5TiZhL+p2VVIkohqS3A+8AnpblenvMBS5oJ6uRoEZnwRnAtsC9+Xg54HTSYDkow7/1fb0sqWDvwZZymalUU6mp/EwN6nOFpM1sXwwgaVPgspZzaoO7lLzNzwWOkXQ7fbaDQRBUJeZPwTBZQJL2/iZ9i5AjzmW5M/QbpMXTe4GLCr/HKwrHq0beFBv5ohsASa8AvsiYxeWawPUk+89g9LkXuFZSV+yoa6qe1mRXkgrnG23fmueUBwwSqKvqRT1sLwQW5o7y3fLXvyXd/4/uiKJRUJ4PkcYvizv3aFkaWNf27yEVsJGunb0axu0a1dTqKl7T6/eKb/L73CmpaQHHxLHglaS/kVeQ5mZNFZG6OAavwVeBE4FVJH2apHT5kXZTGg1sn1Mw1txc2PIM+orgR5TLSdeY8vF7J7zeRK2mttrw0233K3J9ItuJDUwo4ARBx5G0A0m6uLcIsgZwQ1NvwWCMmtXjNampElGSPHB/ArA/yQO4xz2zzTu1nxqdBZN5hnbRR7Rr9Dp9285jplBZ+S2eqR1G0vUkmfpeJ/YawI2kIjjPFkW1rHb2V1Jh2h6krptjSnWFBEEQBKOBpMttb9R2HoMiaS1gBdvXLOafThfjPdO9bvuLg8YOxpOV9V7EBItL2wPZ6wTDRdIbJjtv+4jJzrdNV9azJiNb683Ph5fYvr1hvE6pF/UjaSVSB/mewC3AMcAWpPn7Vi2mFgwZSS8BXkpaXz+u76UVSIUzjdbMJF1v+9l9x3OA6/rPzSZqqdXVuKbz+GIr23fm4ycC5wyiHDYsuj4GL4mSXeQ2pIKLM2xf33JKI0NeowT4mu2DG8R5M8ny7KnAVaS9ogttb9M8yzpIWo7UcL8FqSDnPOA/bf+1QcyqasOSLgLeZ/v8fPx84Au2Nx80ZijgBEH3+RTppjtuEaRE4Bq+dx2lq17n1VQiSpL/nu4GdpM0j7EH8wXArC3AqdRZcJ+keb2BiaSNSJu0QSH6pBYhXW8bkTa/g0JUVqmp9kwNhsL2bScwIrwVOM727xiTTg2CYAjE/CkYMj/MzSInUkjauxZ5njflaw0WTucO+H3Bo+cB23dImiNpju2zJH257aSCJeZ7TGJH3W5K09KJ9ayJSNqFpHhzNmkz8iBJ77P9vQZhu6ZeBICkE0nNEUcBL7d9a37pOEmzUaV0tnMLSZ12B5JCQo97SJYiTTlD0k+A7+Tj1wILC8TtFDXV6ipe0wcCF0nqqSDtDHy6QbxF1LC3ynRmDF4b2zcQyuGTYvvZuWhts4ah9iEV9l5se+tc9PSZxgnW5QjgzySVJEjqgEeQijAHpbba8NuAI/Oajkj7kv/SJGAo4ARBx5F0me2Nc7XwhrYflnS17Q0KxC7ue9dlalWP16KmSkQNJH2U9BDuyUDuBCywvV97WbVL6c4CSfOB7+ZYAlYFdrV9+bTfGCwxkn7NmNTig8CvSRtv57ea2AyipkpNzWdqEAwLSR8nPU//ROouXGD7tnazCoLZQcyfgmGSx50Tse0m0t5VkHTWNC/bdti0jTiSFpLm6J8FViKNw+fbDjvjDiDpYmBb2/fm4+WB00f199e19aweeR65XU/1RtLKpOaOgeeTXVMv6pEVT54DPB94GDgfOMT2/a0mFrSKpPfb/vyEc/vY/kqB2K9krAj+XNsnNo3ZNWqq1dW8piWty9gm+pmlnAcmU04roabWpTH4MCml+NJVJD3J9h8rxL3U9vxsh7Sp7b9Jum6U1dol/dz2uos7N4pIWgHA9p8bx4oCnCDoNn2LIPsDT6LgIkjY1SSmqh4f5YdcF5F0I2mD4v58vBxwle112s2sHSZ0FhzW11mwqEhgwLhL57gANw6opBMErVF5QaHaMzUIho2k9YFdgVcDN9vetuWUgmDGE/OnIGgHScsCbyJtDPU3zTTtcA4yuSDiflKjwZ4k25BjZmO3dxfp6vOpg41w1/bbpmQbnKubWKko2cs+Qr0oK8OOLJKOJ3W/H5NP7Q483vbO7WUVtI2kK2zPm3CuREHEoutE0jqkdc8fz7Y1z8qN2p27ptVBe6uu01N8sX1q27kMi6wM+XD//a1UYWGOdSKwF7AvaT38TmBp2y8tEb8Gko4GDrZ9cT7eFHin7dc3jPsyHjnf+2STmH2xlyGtn65Fn3tUk/hhQRUE3WdHkoXMu4E9SFYnRW46wF8lbTHB92422tXsR1iSDINbSA/PXuX8MsDv2kundQ5lrLNgY0mLOgsaFN/sDJxm+2eSPgLMk7RfA8n3YAKSpu1wt33CdK8HS0RN+fuaz9QgGDa3A7cCdwCrtJxLEMwWYv4UDBVJzwXWZfwi5JHtZTQ9kiZddC2Q81Ek+fsXk8Zue5AsF4KGSDrf9hbAbSSlT0hFOAD7SfoTcIDtr7eSYLCkdMqOeirVUwrYqFTmtAk2OLsCP2oY8wxgW5IVFcBywOnAqDeJPHdCp/tZkoqoWgTdQ9JupIKNtSX9oO+luSTl1qacC2yZLYZOI9ld7UoaD8wm7soKZ+cBx0i6nT7ruoZ08Zrut7cS8BrK2Vt1agxeA0mfs/2BCaffP8m5mc45ku4DVpW0PXAt8AagSAGO7VfmL/8jq4quSLrPjTIbARdK6hVOrwHcKOlaklrU+o82oKT/BB4LbA18k3Q9X1IoX4CTgbtJNol/W8y/XSJCAScIOo6k9wDH2S5eqCBpA+BI0k19ke+d7atLv9coE5Ykw0HSSSQ/y5+SFva2Iz1Eb4bR97cuTY3OAknX2F5f0hbAp4AvAB+zvelivjVYQiSdSloIOzOf2hq4EPgDaYAZXbgNqaz8Vu2ZGgTDQsmLfBdgZWABcHwpCecgCKYn5k/BMFGyHNyKtPj/I+AlwPm2X9NmXtMh6aC+w2WBbYArmubc657vm+8sDZxne7MmcYPFkzudL5ytyrVdQR2zo66pelobSa8mNVJBug81ssHpsHpRle73oJtIWhNYm7SO88G+l+4BrrH9YMP4V9ieJ2lvYDnbn+/CdVKammp1Xb2mNWZvZeCsEmsjXRyD12AKRatrBimu6DqSHk8q3Dgc2IBko3kKSXHpkBZTa4V8z58S278ZIGZvntf7vDxJ6WzLgRMdH/9ntp9bIlaPUMAJgu4zFzg9dx0dByywfVuJwHmheAMV9L3rKDWrx4MxTswfPc5uKY9RoUZnwUP588uAb9g+VdJ+DWMG41kaWNf27wGUvOoPt71Xu2nNKGqq1FR7pgbBEFkd2Nf2VW0nEgSzjZg/BUPmNaQF3itt7yXpycDRLec0Lbb37j/Oi9XfLRC6ZzFxV+5IvpVQfxsKWZlyq7bzCKbH9qWSnkV37Khrqp5Wxfb3ge8XDNkp9aI+ine/B90lb7b+Bti80ltI0uakNaJeod5Sld5r5BiSWl1Xr+mlGftZLF0oZufG4CWR9HbgHcDTJF3T99Jc4IJ2smoPST8lNd8+TCpSu1PSlcD7gRe0mlxLDFJgswT0xj5/kfQUktr3agXjXyhpPdvXlgoYBThB0HFsfwL4hKT1SdKK50i62fa2TWNP9L2T1HvP2WbHsQOpenwfxqrHP9FqRjMQ20e0ncOIcYWkzSZ0FlzWMObvJP0XSV3oc/kan9MwZjCe1XvFN5nbSJPSoBxvZUylpuh9o+YzNQiGhe0PAUhahfFyyP835TcFQVCEmD8FQ+avWZ31wVz0dTupCLNL3EfqiG/Kodl24qPAD4DlgY8ViBssARPmP8EIou7ZUfca4c6lA41wvc1vSfcwtvkNadPXtldoEH5fYIGkcepFDeINi+3bTiAYHSpfI5DW7D8EnGj7OklPA85qGLMz5OIbbM+d7PWeWh3QpACnc9e0pH2At5CKIgUcLelQ2wdN/52LZSaMwZtwLPBjJlG0KqG21EF2JBUX/itwZC7IWpO0LnBem4nNME7JzRsHAFeQniXfaBq0V0RIqpfZS9JNJAuq3vNp4OLCsKAKghmCpFWBnYHXAnNLVB1LOo0x37uecga2D2wauwtMMTnoVUw/TJKUD6/zhkg63vYufQ+7cYxwBX1VJF1P6k4b11kAPMjgXpmPJU2YrrX9i6zOsp7t0wulPeuRdDDwDMZ7vv9yYrdvMDhZ6nUX0j24ikpNjWdqEAwLSa8Avgg8hbQQtCZwve3ntJpYEMwCZvv8KRgukr4O/DtpvPJe4F7gqlFWXpT0Q8bmfHNI0v3H2/7g1N8VBEFT1DE7akmPI3U6z2FM9fQY23e0mlhLZFu9rqgXBUEwgkhabbYVzGZ1ls1t35ePHwdc1HSNr4tj8FpImgdsQRrfXzDChb3VUbbEzV9fS2rgf2HsCZQnNz4ta/vuArGK22Utih0FOEHQbSS9g7QRuTKwgLR41djLMscu7ns3k1B4nRehNwGY6mFXSbJu5Kn68A9VhKpIehXQ8x89t6nnezA5fSo1rwZKKb9Ve6YGwbCQdDXJ43yh7Q0lbQ3saftNi/nWIAgaEvOnoC0krQWsYPuaxfzTVpH0wr7DB4Hf2L65QNxJ1W5CfSoIEr1NIUn7kxpyju3fKBo1JL2HMdXTWU2fetE9PfUiYJTVi4JgSiR9CjiHVARRTdVK0mdIRfHfnK2Fe8GiIoj5tu/Px8sCl9per+B7rEUHxuA1kPRR0hrqCfnUTqQmyf3ay6o9JD3N9k3560Nsv73tnGYykjYGbrF9S6F4R9l+3eLOPRrCgioIus/qwL62r6oQu7jv3UwivM7LkItvlgIOt7112/mMCjUKjyTtABzImCrCGsANQKgiFMT2CYxNPoJ63A7cSvJ8XaVQzJrP1CAYFg/kMcocSXNsnyXpy20nFQSzhJg/Ba1g+38lrSZpGdt/azufqbB9TqXQ/Zt4ywIvB66v9F5B0EW6Zkc9FzhdUjXV0w7xUdsLsnrRNiT1okOAkVQvCoLFcBOwO3BQVpw/j9S4dnLh97kEeDrwJeD1hWMH3eEw4L8l9RojdwK+VfINujIGr8SewAZ9BU6fBa4CZmUBDvBxSfvYvsv227M97oG239h2YjOUvYH1Jf2P7RLWnOP2x/J+5UZNAoYCThDMEEoqWkzwvXsGaXBcxPcuCKZC0hnAq0pIxwWTE6oI9ZjGz7rHHYRlXRGGoVITKlFBl5G0kLSw9FlgJVKx2nzb/9RqYkEwg4n5UzAK5Pv/04Hv2/63tvOZjCnGyncDlwHv7XWNFnifZYCf2N6qRLwg6DpdtaOuoXraNbqmXhQES0K2/d4F+DfgCbbntpxSMEPps0gCOM/2lRXeY+TH4DWQdBbwStt35ePHAyfYflG7mbXDZM/meF7XR9Jc2/c0+P4PkSzllgP+0jsN/B041PaHBo4dBThB0G0kvQL4ImOKFmsC19seWNGipvVNEEyFpJOBDYGf0tfBaPtdrSU1w5B0me2NcyHOhrYflnS17Q3azm2mE5Z15cgLj8fVUKmp8UwNgmGTN1juJ00Y9wRWAI6x/adWEwuCGUzMn4JRQZKAdW1f13Yuk5GtJ24GjiU9p15L2rC4Anh7qYKZ3HF6qe1/LBEvCGYKXWs0yJv0O5PuFXNnY0GrpFOA35HUi+YBfwUuiXWcoItI+iawLnAbSf3mfOAK2w8OGO8gJm+CA2JNebYi6YnTvV5jbWTUx+A1kHQSMJ+0l2PSc+oS0lh/1l1/eb9lK9t35uMnAueUtDybjeQiuikpZckpaf8mxTaTERZUQdB99gM2Y4KiRZOAvQXiqXzvgIF974JgGsK2pz53SVoeOBc4RtLtjJdrDyoRlnXl6A2GKy0eF3+mBsGw6ClxkRYze4uQyp/3yxL+ocQVBBWI+VMwTJZgU2GUF/53mLBpfKikq2x/QNK/Dxq0T4UKYCmSUuInG+QZBDOKrtlRT6J6+pbSqqcdYheSetEXbN+V1Yve13JOQTAoK5Ge03cBfwL+OGjxTeayIlkFM43LSeNCkZ53d+avHw/8H7D2IEE7PgavwYn5o8fZLeUxKhwIXCRpQT7eGfh0i/nMFA7Mn5cFNgauJl3P65OeAZsXep91JL0UOM32wyUChgJOEHScmooWkq6wPa/veCmS3Om6TWMHwUQkPQ643/ZD+XgpYBnbf5n+O4MlJf+M/0ryet8DWJGkinBHq4nNYCRdn7/8mu2DW01mhlBTpSZUooKZTChxBUF9Yv4UDANJv2aaTQXbA20qDANJFwFfAr6XT70GeI/tzXIhzvMGjNuvQvUgcFvDDb0gmFF0zY66puppV+maelEQTIekZwMvBt4NLGX7qS2nFMxAJH0DONH2j/LxS4CdbL91wHidHYMHw0HSuqTxFsCZs7h4uDiSTgA+bvvafPxc4D9sv6ZQ/G2BvUiNuQuAw2zf2CRmKOAEQffpKVqcRyFFi37fO0l/7p0m+941iR0E03AGsC1wbz5eDjgd+KfWMpp5vJW0iPU74Ii2k5kN2H523vTerO1cZhA1VWqKP1ODYFQIJa4gqEfMn4Jh0lvcn2pToc3cloA9gK8AXydtYFwM7ClpOeD/NYj7GOBm23/Lz7pXSzrS9l1NEw6CGcIDeSw4R9Ic22dJ+nLbSU1FZdXTTtE19aIgmA5JLwe2BF5AKlo4k7T+0jTuD5neimqHpu8RdJLNbL+ld2D7x5I+P2iwjo/BiyHpeNu7TFCgXMRstIvskQtuouimDuv0im8AbP8sF3MWwfZCYKGkFYHd8te/Bb4BHG37gUcbMxRwgqDjSHoscD9pgXdPYAWSokVjL8savndBMBWTdTw26YIMHomkj5Pki/8EHAcssH1bu1nNLCR9zvYHFncuGJzKym/VnqlBEATBzCfmT8EwkXSt7fUWd242IOkqkiT5WsCPgJOB59h+aZt5BcGoIGkhaXNwf+BJpEKO+bZHsuGppupp1+iaelEQTIekg0kFN+fZvqVg3K8AqwJH51O7kayZTwKwfU6p9wq6g6SfkP7een8XewAvsP3ihnFn9Rhc0mq2fz9BgXIRPXvmICiJpO+QmmT7r+flbe9W8D1WItmH7wncAhwDbAGsZ3urRxtvTqnEgiAYLpLOz1/eRvJNvRM4GPgM8GtJv86eyU1YR9JLJcW9IhgG90nql+zfiGSXFBTC9ifygtU7gdWAc/JCXFCO7SY595KhZzGzmahS8xWaK78N45kaBEEQzHxi/hQMk1skfUTSWvnjw6SFwpFF0sqS/l3SoZK+3fsoEPrhbDn1KuAg2+8jzXeCIEjsCPyFZPdyGvAr4BWtZjQ9PdXT/8mKA9uQFLNmIw9k2/BF6kWkgsMg6CL32T6uv/hG0ucKxH2+7V1t/zB/7A5safucKL6Z1ewGrAycCJyQvy6xWd+5MXhJcvHNUsDhtn8z8aPt/IIZy17AdcA++ePn+VwRJJ1I2mtYDni57R3y82pvYPmBYoYCThDMTHK13oW212kQo7jvXRBMhaT5wHdJA1aROhd2tX15q4nNQCStCuwMvBaYO5ulIUsh6e3AO4CnkRYze8wFLrBdyiJp1tOGSk2JZ2oQBEEw84n5UzBMJD0R+DjJxgHgXOATo6zcJ+lC0sLm5cBDvfO2v98w7n8DXwY+DLzC9q8l/cz2c5vEDYKZgqT3MGZHPfLUVD3tGl1TLwqC6ZB0he15E85d03RdUtL1wMts35SPnwacaruYPUkQ9OjiGLwGks4AXmX77rZzCYKmZCu55wDPBx4GzgcOsX3/wDGjACcIZi49ObgCcXq+dx8GGvneBcF0SFoa6G1w3xh/Y2XJCh67kCr+FwDHZ2/SoCH5PvkE0qLYB/teume2TcBqIel821tIuocxj2Hlzw+TrNUOsP31Su9f5JkaBEEQzHxi/hQEk1PLYljSusDbgItsf0fS2sAutkt01QdB5+maHXVf0clngZWYxUUnkh5HUoeeQ7JbWJHUgHJHq4kFwaOgdtOapBeTxts35VNrAf9q+/QmcYOZhaTPAHcD34x7aHMknQxsCPyUPmVy2+9qLalgxiHpeNu7SLqWsf2ARZRqLJd0PPBnku0UwO7A423vPHDMKMAJgmA6SvveBcFUSNoZOM32PZI+AswD9rN9RcupzRgk7U/qeruq7VxmMtlKbQvSoPCC+BseDqFSEwRBEIwCMX8KaiPpy7b3lfRDJl+E3KGFtJYISfuRxms/ajuXIJiNSFof2BV4NXCz7W1bTmlS2lA9HVW6pl4UBJNRu2ktryn/BFgb2AH4J+DDsR4X9CNpJ+DpwAa2Xz/A93d2DF4DSW+Y7LztI4adSzBz6TXESlpzstdL2Z5J+rntdRd37lHFjAKcIAimIvverQMcRZJPv7Xvtctsh+dwUIye5KikLYBPAV8APmZ705ZTm3FIWgVYtnds+/9aTGdGIemjpM7CE/KpnUjdhfu1l9XsIVRqgiAIgjaJ+VMwDCRtZPtySS+c7HXb5ww7pyUlKxk+Dvgb8ABpc922Vxgw3lQdkb24YbUbBH2Muh1126qno0jX1IuCoA1iTTmYiKSlgHfZ/lLBmJ0dg9cgK7Tdb/uhfLwUsIztv7SbWTBTkfRkYH4+vMT27QVjHw0cbPvifLwp8M5BivUWxYwCnCAIpqKG710QTIWkK21vmFVarrV9bO9c27nNFCS9Avgi8BSShPOawPW2n9NqYjMISTeSOinuz8fLAVeFKksQBEEQzHxi/hQMG0n/ADwzH3bCwlfSE4FnML4hYKANi76OyPcCFwM3979eqiMyCLrOTLGjns2qp11RLwqCR4Ok6/OXX7N9cIM4saYcPAJJl9jepFLszo3BSyPpYmBb2/fm4+WB02ejXWRQH0m7AAcAZ5OKs7cE3mf7e4XiX09qpuo1qq8B3Ag8yICNHY8pkVgQBDOWvUi+d1/Nx7uTujkH9r0Lgmn4naT/ArYDPidpGZLHdVCO/YDNgIV5Yro1Sc45KMctpM2E3kbbMkBIRQdBEATB7CDmT8HQkLQVcATwv6RFyNUlvcH2uW3mNR2S3gzsAzwVuIo0N7kQ2GaQeH3Kh8sDhxIKEUEwFasD+3bdjtr2HfneNxu5HbgVuANYpeVcgqAItp+dC+s2axgq1pSDybhA0sGkseF9vZNNrcm6OAavxLK94hsA2/dmC8kgqMGHgfk91RtJKwMLgSIFOMD2heIsIhRwgiCYkhq+d0EwFXmAtj2pU+EXklYD1rN9esupzRh61geSrgY2tP2wpKttb9B2bjMFSSeRpBB/SpLM3g64hNyNa/td7WUXBEEQBEFNYv4UDBNJlwO7274xHz8T+I7tjdrNbGqyVdR84GLbz5P0LOAztl9VKH4oRATBNIQddfeYKepFQQCLLGv+mtcjnwk8C/hxU/WQWFMOJkPSWZOctu0XNYzbuTF4DSRdAOzdK2iStBHJwmfzdjMLZiKSrrW9Xt/xHODq/nOjRijgBEEwHVdI2myC791lLecUzFCyP+gJklaRtEY+fUObOc1A7spykOcBx0i6nb4OgKAIJ+aPHme3lEcQBEEQBMMn5k/BMFm6t/APYPt/JC3dZkJLwP2275eEpGVs3yCppJVMKEQEwSRMZUdNsk0MRpsZoV4UBJlzgS0lPQE4HbiUVDi7R5OgvTXlvuPfA7+f+juC2YDtrSuF7uIYvAb7Agsk3UJSAlqVdD0HQQ1Ok/QT4Dv5eFfgRy3ms1hCAScIgimp4XsXBFMhaQfgQMYWhNYAbrAdC0KFyB0h95MGxXsCKwDH2P5Tq4kFQRAEQRDMAGL+FAwTSYcBDwFH51N7AEvZfmN7WU2PpBNJVm37Ai8C7iRtYry0YdxQiAiCacgquC9igh217Te1nFqwhIR6UTATkHSF7XmS9gaWs/15SVfZfl7buQUzD0lPBj4DPMX2SyStC2xu+1sN43ZuDF6LXHjUK6a/samaVRBMhiSRLIznA1vk0+fZPnHq72qfKMAJgmBKJK053eu2fzOsXIKZTywI1UPS+ba3kHQPyRYJUhEOwMPAn4ADbH+9lQRnAJKOt71LltV/xOAqNtyCIAiCYOYT86dgmEhaBngnfYuQwNdt/629rJYcSS8EVgROs/33hrH2B44LhYggmJywo+4uU6kXRbNa0EUkXQm8A/gS8Cbb1020FQmCUkj6MXAY8GHbG0h6DHBl07+3ro/BSyFpZ9I4/h5JHwHmAfv1LKmCoCRdfFZEAU4QBEEwEsSCUHtIWgm40HZJ+fdZhaTVbP9+qo232HALgiAIgiAISiFpKeA6289qO5cgCEYfSQuBnYDPAiuRCjnm2/6nVhMLFks0qwUziVx8+17gAtufk/Q0ksXau1pOLZiBSLrU9nxJV9reMJ9rpLgUY/AxJF1je31JWwCfAr4AfMz2pi2nFsxAJB0BHGz70rZzWVIe03YCQRAEQZC5S9LyJD/gYyTdDtzXck6zAtt3SNqq7Ty6TC6+WQo4vKLHcBAEQRAEQRBg+yFJN0paI2xIgiBYAnYg2VHvw5gd9SdazShYUh7IazZzJM2xfZakL7edVBAMgu1zgHMAJM0B/hjFN0FF7stNpwaQtBlwd5OAMQYfx0P588uAb9g+VdJ+bSYUzGg2BfaQ9BvSnqEYcZvvKMAJgiAIRoUdgb8C7yZ5p64IfLLVjGYRtn/fdg5dJ0/CHpa0ou1GE7ogCIIgCIIgWAxPAK6TdAl9jQu2d2gvpSAIRomeHTVwG4+0o95PUthRjz69ZrXziGa1oONIOhZ4G2nj/lJgBUlfsX1Au5kFM5T3AD8Ani7pAmBl4DUF4sYYPPE7Sf8FbAd8LltzzWk5p2Dm8uK2E3i0hAVVEARBMBJIeg9wnO3ftZ1LEAyKpJOBDYGfMn4SFh09QRAEQRAEQTGyjcMjyN3lQRAEiyXsqEcfSY8lqReJMfWiY2z/qdXEgmAAevY/kvYA5gEfBC4fZQWDoLtIWhv4LbAO6R56I/C8phY2MQZP5OfT9sC1tn8haTVgPdunt5xaMIOQAX1QOQAADeFJREFU9MTpXh/l8VAU4ARBEAQjgaSPA7sAfwKOAxbYvq3drILg0SHpDZOdt33EsHMJgiAIgiAIgiAIgumQtFoo4o4ePfUiSffwSPWih///9u4+xtKyPuP49xppRV4WW8IKtAjYWFJRcCmkVJGK0LQWocY3NKyx1jYa/lBjTUxbW6IQTbUamxSssYmtuo2wMdTXIiUlvEitRQVRtJoURSiyugsNriygXP3jnCm76zLLMrN7z5z9fpKTc+77nHnmymQmc5/n/J7fzeTcmd2LtKIk+TrwTOCfgL9te3WSm9oePziaZlCSLwFnz1/sm+RU4KK2zxibbLYkWQ3sOz92ay4tpSS3MlkHBXgycPf08ROB29oePTDegizAkSQtK0mOA84BXgzc3vaMwZGkRy3J/sCWtj+djh8HPL7tj8cmkyRJ0izY7sPYn9F21R6MI0kawO5FWomSvB54C3ATcCaTD1M/2vY5Q4NpJiU5CbgYOItJx6V3Ai9o+73HeDzX4FtJcjbwHuBwYAOTv+dvtj12aDDNpCQfBC5r+9np+PnAC9u+dmyyR2YBjiRpWUlyKPBS4OXAgbYh1UqS5AvAGW1/NB0fAFzR9lljk0mSJGmWJLkAuBP4CJOrAM8FDmv7l0ODSZL2CLsXaaVLEuBxbX8yOotmU5LfBD7AZCu/M9v+YAmO6RocSHIT8DzgyrZrkpwGrG37msHRNIOS3Lx996odzS0nFuBIkpaFJOcx2YLqEGA9cGnbW8amknbN/H7WO5uTJEmSFmNHWza4jYMkSVqukrwDeFfbe6bjXwD+pO1bxybTLEnyKbbtVPM0JgUzdwO0PXuRx3cNDiS5oe2J00KcNW0f2ht/DtozknwOuBb46HTqXODUtr8zLtXC9hkdQJKkqSOAN7a9cXQQaRE2Jzmh7ZcBkvw6cN/gTJIkSZo9m5OcC3yMyYcMrwA2j40kSZL0iJ7f9s/mB23vTvJ7gAU4Wkp/vZuP7xp84p5p5/drgHVJNrB3/hy0Z7wCOB+4bDq+Zjq3bNkBR5K0rCRZDew7P25728A40i6Z7i/8MeB/mLQhPRQ4p+2XhgaTJEnSTElyFPA3wLOZnPz/PJMLGr4zLpUkSdKOJfkqcFLb+6fjJwA3tD12bDLNoiRHA3e23TIdPwF40mLXyq7BJ5Lsz+Si0zkm3UgOAta13Tg0mLRMWIAjSVoWkpwFvBc4HNgAHAl8wzdhWmmS/BxwzHT4X20fHJlHkiRJkiRJGinJW4CzgA9Np14NfLLtu8al0qxKcgPwrLYPTMc/D3y+7Uljk82GJG8CLml7x+gsmn1JfhV4M3AUW+3u1PZ5ozLtjFtQSZKWiwuBk4Er265JchqwdnAmaZckeSlweduvJXkrcEKSC+e3pJIkSZKWQpJDgD/mZ09C/uGoTJIkSY+k7V8luQk4Yzp1QdvPjcykmbbPfPENQNsHpkU4i+Ia/P8dCFyRZBNwCbC+7V2DM2l2rQf+Dvh74KeDszwqFuBIkpaLB9tuTDKXZK7tVUneNzqUtIv+ou36JKcApzPZd/j9wG+MjSVJkqQZ8wngWuBKVshJSEmStHdrezlw+egc2iv8IMnZbT8JkOT3gR8uwXFdgwNt3wa8LclxwDnA1Ulub3vGTr5Ueix+0vb9o0PsCgtwJEnLxT1JDmCygF2XZAOweXAmaVfNv/E6E/hg288kuXBkIEmSJM2k/dq+ZXQISZKkxyLJlcCDwEVtPz06j2bO65h8xnDRdPw94JVLcFzX4NvaAHwf2AisHpxFs+tTSc4DLgPun59su2lcpIWl7egMkiSRZD9gCxAmW0+tAtYt53+i0vaSfBq4A/ht4ATgPuCLbY8fGkySJEkzZVrkfX3bz47OIkmStKuSHA4cBpzc9qKdvV56LKYX/NL2R0t0PNfgwLQY4mXAIUy2B7q07S1jU2lWJbl1B9Nt+5Q9HuZRsgBHkjRUkuvanpLkXmD+n1Km9w8Bm4B3t714SEBpF0wLyX4XuLntt5McBjyj7RWDo0mSJGmGTN8/7c/kCsAHmbyHattVQ4NJkiRJgyU5CDgfOHU6dTXw9rb/u8jjugYHkrwTuKTtjaOzSMuRBTiSpGUtycFMqsqPGZ1FerSSrAb2nR+3vW1gHEmSJM2gJL8IPJVt151Xj0skSZK0rSQ38/BFl9s8xaRw4bg9HEl7gSQfB74G/ON06pXA8W1ftATHdg0+5Tlw7SlJng48jW1/3z48LtHCLMCRJC17SQ5re+foHNLOJDkbeA9wOJM9cJ8MfLPtsUODSZIkaaYk+SPgDcAvAzcCJzO5cOH0ocEkSZK2kuTIhZ5v+909lUV7jyQ3tn3mzuYew3FdgwNJzgLey8PnwI8EvuE5cO0OSc4HnsukAOezwPOB69q+ZGSuhcyNDiBJ0s5YfKMV5AImb7y+1fZo4AzgC2MjSZIkaQa9ATgJ+G7b04A1wKJa6kuSJC21tt/d+gbcDdy71U3aHe5Lcsr8IMmzgfuW4LiuwScuZNtz4KfjOXDtPi9h8jv2/bavBo4HDhobaWH7jA4gSZI0Qx5suzHJXJK5tlcled/oUJIkSZo5W9puSUKSx7f9ZhK37ZUkSctSktcCbwO28PCWVAWeMiyUZtnrgA8nmf+Q/m7gVUtwXNfgE54D1550X9uHkvwkySomXZeOGB1qIRbgSJIkLZ17khwAXAOsS7IB2Dw4kyRJkmbP7UmeCPwz8K9J7gbcwkGSJC1Xbwae3vaHo4NodiV501bDDwP7Tx9vZtKp/KuL/BauwSfmz4Ffi+fAtfvdMP27+yDwJeBHwL+PjbSwtN35qyRJkrRTSfZn0s50DjiXSSvEdW03Dg0mSZKkmZXkt5isOy9v+8DoPJIkSdtLcjnworY/Hp1FsyvJ+dOHxzDZKuoTQICzgC+2XbuE32uvXYMn2Y9JN6sAa4FVTM6BbxoaTDMvyVHAqraLLabbrSzAkSRJWiLTqywuaXvH6CySJEmSJEnScpBkDfAh4D+A++fn275+WCjNrCTXAGe2vXc6PhD4TNtTxyZb2ZJc1/aUJPfy8FZymd4/BGwC3t324iEBtVdIchiwqe39O33xIG5BJUmStHQOBK5Isgm4BFjf9q7BmSRJkiRJkqSRPgD8G3Azkw/qpd3pScDWXWkemM5pEdqeMr0/cEfPJzkYuB6wAEe700eAX0ny8bZvHh1mR+yAI0mStMSSHAecA7wYuL3tGYMjSZIkSZIkSUMk+UrbNaNzaO+Q5M+BlwGXTadeyKRr+TvHpdo7JDms7Z2jc2i2JQnwtLZfH51lRyzAkSRJWmJJDgVeCrwcOLDtcYMjSZIkSZIkSUMkeQfwHeBTbLsF1aZRmTTbkpwAPGc6vKbtV0bmkbQ4SVYD+86P2942MM6CLMCRJElaIknOY3J1xSHAeuDStreMTSVJkiRJkiSNk+TWHUy37VP2eBhJ0oqR5GzgPcDhwAbgSOAbbY8dGmwB+4wOIEmSNEOOAN7Y9sbRQSRJkiRJkqRl4tfabtl6Ism+j/RiSZKmLgBOBq5suybJacDawZkWNDc6gCRJ0qxo+6dtb0yyOsmT52+jc0mSJEmSJEkDXf8o5yRJ2tqDbTcCc0nm2l4FnDg61ELsgCNJkrREkpwFvJft2iECy7YdoiRJkiRJkrQ7JDkU+CXgCUlO2OqpVcB+Y1JJklaQe5IcAFwDrEuyAdg8ONOC0nZ0BkmSpJmQ5CbgeWzXDrHtawZHkyRJkiRJkvaoJK8C/oBJt4L/3Oqpe4F/aHvZiFySpJUhyf7AfUx2djoXOAhYN+2KsyxZgCNJkrREktzQ9sRpIc6atg8luant8aOzSZIkSZIkSSMkWQsUOIqHd+do27cPCyVJWvaSvAm4pO0do7M8Wm5BJUmStHTm2yFeywpphyhJkiRJkiTtZq8E7ga+DGwZnEWStHIcCFyRZBNwCbC+7V2DMy3IDjiSJElLJMl+TE4iBFjLZD/rdW03DQ0mSZIkSZIkDZLka22fPjqHJGllSnIccA7wYuD2tmcMjvSI5kYHkCRJWumSXDd9eBdwD5Mrev4WeAdwa5Jbk5w3Kp8kSZIkSZI00PVJnjE6hCRpxdoAfB/YCKwenGVBdsCRJEnazZIcDFzf9pjRWSRJkiRJkqQ9IcnNQIF9gKcC/w3cz6R7dNseNzCeJGmZm17Y/DLgEGA9cGnbW8amWtg+owNIkiTNurYbkzx3dA5JkiRJkiRpD3rB6ACSpBXtCOCNbW8cHeTRsgOOJEmSJEmSJEmSJEmSlp0kq4F958dtbxsYZ0FzowNIkiRJkiRJkiRJkiRJ85KcleTbwK3A1cB3gH8ZGmonLMCRJEmSJEmSJEmSJEnScnIhcDLwrbZHA6cDXxgbaWEW4EiSJEmSJEmSJEmSJGk5ebDtRmAuyVzbq4ATR4dayD6jA0iSJEmSJEmSJEmSJElbuSfJAcC1wLokG4DNgzMtKG1HZ5AkSZIkSZIkSZIkSZIASLIfsAUIsBZYBaxru2losAVYgCNJkiRJkiRJkiRJkqThklzX9pQk9wLzBS2Z3j8EbALe3fbiIQEXYAGOJEmSJEmSJEmSJEmSlr0kBwPXtz1mdJbtWYAjSZIkSZIkSZIkSZKkFSHJYW3vHJ1jexbgSJIkSZIkSZIkSZIkSYswNzqAJEmSJEmSJEmSJEmStJJZgCNJkiRJkiRJkiRJkiQtggU4kiRJkiRJkiRJkiRJ0iJYgCNJkiRJkiRJkiRJkiQtwv8B8fLlZPf8gywAAAAASUVORK5CYII=\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":573},"id":"73UChGrePhr1","outputId":"af8b97e5-cec0-469e-c55d-433364ee31a5"},"source":["exp = train_df.y.str.split(',').explode().value_counts()\n","top_100_tags = list(exp[0:25].index)\n","# z = lambda r : True if r.split(',') in top_100_tags else False\n","z = lambda r : True if all(x in top_100_tags for x in r.split(',') ) else False\n","top_100_idx = train_df.y.map(z)\n","train_df = train_df[top_100_idx]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
IdTitleBodyTagsCreationDateYytext
1334556906output FILE ,is this a fault?\\r\\nmy code here\\r\\n\\r\\n #include <stdi...<c++>2016-01-01 14:20:01LQ_EDITc++output FILE ,is this a fault?
2434560768Can I throw from class init() in Swift with co...<p>I'd like my class <em>init()</em> in Swift ...<swift>2016-01-01 22:42:24HQswiftCan I throw from class init() in Swift with co...
2534560942C# - Count a specific word in richTextBox1 and...<p>I'm not sure, if this question is unique, b...<c#>2016-01-01 23:06:53LQ_CLOSEc#C# - Count a specific word in richTextBox1 and...
3034562551c++ vector type function implemetationclass City\\r\\n {\\r\\n private:\\r\\n...<c++>2016-01-02 04:17:27LQ_EDITc++c++ vector type function implemetation
4834566364japanese and portuguese language cannot supportMy site Japanese supported. But Portuguese la...<php>2016-01-02 13:20:49LQ_EDITphpjapanese and portuguese language cannot support
...........................
4499260458575MySQL how to query five tables in one SELECT<p>I have 5 tables as follows:</p>\\n\\n<ul>\\n<l...<mysql>2020-02-28 20:07:09LQ_CLOSEmysqlMySQL how to query five tables in one SELECT
4499360460748Copy value of list not reference<p>I have a list that i want to compare to aft...<python>2020-02-28 23:54:33LQ_CLOSEpythonCopy value of list not reference
4499460461193Weird question, but how do I make a python scr...<p>Before you get confused, I am going to comp...<python><python-3.x>2020-02-29 01:25:40LQ_CLOSEpython,python-3.xWeird question, but how do I make a python scr...
4499660461754Does Python execute code from the top or botto...<p>I am working on learning Python and was won...<python>2020-02-29 03:33:59LQ_CLOSEpythonDoes Python execute code from the top or botto...
4499860465318how to implement fill in the blank in Swift<p>\"I _____ any questions.\"</p>\\n\\n<p>I want t...<ios><swift>2020-02-29 12:50:43LQ_CLOSEios,swifthow to implement fill in the blank in Swift
\n","

9968 rows × 8 columns

\n","
"],"text/plain":[" Id ... text\n","13 34556906 ... output FILE ,is this a fault?\n","24 34560768 ... Can I throw from class init() in Swift with co...\n","25 34560942 ... C# - Count a specific word in richTextBox1 and...\n","30 34562551 ... c++ vector type function implemetation\n","48 34566364 ... japanese and portuguese language cannot support\n","... ... ... ...\n","44992 60458575 ... MySQL how to query five tables in one SELECT\n","44993 60460748 ... Copy value of list not reference\n","44994 60461193 ... Weird question, but how do I make a python scr...\n","44996 60461754 ... Does Python execute code from the top or botto...\n","44998 60465318 ... how to implement fill in the blank in Swift\n","\n","[9968 rows x 8 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":653},"id":"e_z1IU-XT0a0","outputId":"dc80c79e-11a0-4e63-bd40-8d933dbbb6aa"},"source":["import nlu\n","# load a trainable pipeline by specifying the train prefix \n","\n","unfitted_pipe = nlu.load('train.multi_classifier')\n","#configure epochs\n","unfitted_pipe['multi_classifier'].setMaxEpochs(100)\n","unfitted_pipe['multi_classifier'].setLr(0.005) \n","# fit it on a datset with label='y' and text columns. Labels seperated by ','\n","fitted_pipe = unfitted_pipe.fit(train_df[['y','text']], label_seperator=',')\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df[['y','text']])\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextmulti_classifier_confidencessentencedefault_name_embeddingsmulti_classifier_classes
origin_index
13c++output FILE ,is this a fault?[]output FILE ,is this a fault?[0.04620636999607086, -0.04046135023236275, -0...[]
24swiftCan I throw from class init() in Swift with co...[0.86285734, 0.98327714]Can I throw from class init() in Swift with co...[0.053270746022462845, -0.00784565694630146, -...[swift, c]
25c#C# - Count a specific word in richTextBox1 and...[0.64955217]C# - Count a specific word in richTextBox1 and...[-0.005682709161192179, -0.023547030985355377,...[regex]
30c++c++ vector type function implemetation[0.9755105, 0.77180904, 0.9789763]c++ vector type function implemetation[0.024628309532999992, -0.015623562969267368, ...[c++, python-3.x, python]
48phpjapanese and portuguese language cannot support[0.55255216]japanese and portuguese language cannot support[0.038325726985931396, -0.005848723463714123, ...[php]
.....................
44992mysqlMySQL how to query five tables in one SELECT[0.6404308, 0.99544823]MySQL how to query five tables in one SELECT[0.006962132174521685, -0.03580842167139053, -...[sql, mysql]
44993pythonCopy value of list not reference[0.591653]Copy value of list not reference[0.025995030999183655, 0.001833591377362609, -...[javascript]
44994python,python-3.xWeird question, but how do I make a python scr...[0.7427199, 0.99999976, 0.70473063, 0.72811186...Weird question, but how do I make a python scr...[0.018493961542844772, -0.04660267382860184, -...[html, python, javascript, node.js, php]
44996pythonDoes Python execute code from the top or botto...[0.9977689, 0.794142]Does Python execute code from the top or botto...[0.01413149293512106, -0.02844131551682949, -0...[python, php]
44998ios,swifthow to implement fill in the blank in Swift[0.9999993]how to implement fill in the blank in Swift[0.019475314766168594, -0.022571099922060966, ...[swift]
\n","

10944 rows × 6 columns

\n","
"],"text/plain":[" y ... multi_classifier_classes\n","origin_index ... \n","13 c++ ... []\n","24 swift ... [swift, c]\n","25 c# ... [regex]\n","30 c++ ... [c++, python-3.x, python]\n","48 php ... [php]\n","... ... ... ...\n","44992 mysql ... [sql, mysql]\n","44993 python ... [javascript]\n","44994 python,python-3.x ... [html, python, javascript, node.js, php]\n","44996 python ... [python, php]\n","44998 ios,swift ... [swift]\n","\n","[10944 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"DL_5aY9b3jSd"},"source":["# 4. Evaluate the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0YDA2KunCeqQ","outputId":"8f72b51d-8e4c-49e8-884e-af5b0fdfa1ac"},"source":["from sklearn.preprocessing import MultiLabelBinarizer\n","from sklearn.metrics import classification_report\n","from sklearn.metrics import f1_score\n","from sklearn.metrics import roc_auc_score\n","mlb = MultiLabelBinarizer()\n","mlb = mlb.fit(preds.y.str.split(','))\n","y_true = mlb.transform(preds['y'].str.split(','))\n","y_pred = mlb.transform(preds.multi_classifier_classes.str.join(',').str.split(','))\n","print(\"Classification report: \\n\", (classification_report(y_true, y_pred)))\n","print(\"F1 micro averaging:\",(f1_score(y_true, y_pred, average='micro')))\n","print(\"ROC: \",(roc_auc_score(y_true, y_pred, average=\"micro\")))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Classification report: \n"," precision recall f1-score support\n","\n"," 0 0.67 0.80 0.73 840\n"," 1 0.22 0.62 0.32 237\n"," 2 0.37 0.47 0.41 467\n"," 3 0.38 0.67 0.49 561\n"," 4 0.48 0.54 0.51 831\n"," 5 0.54 0.58 0.56 697\n"," 6 0.49 0.73 0.59 792\n"," 7 0.58 0.39 0.47 1352\n"," 8 0.20 0.18 0.19 158\n"," 9 0.49 0.77 0.60 1431\n"," 10 0.57 0.75 0.65 2343\n"," 11 0.36 0.56 0.43 833\n"," 12 0.34 0.24 0.28 300\n"," 13 0.51 0.74 0.60 539\n"," 14 0.19 0.28 0.23 106\n"," 15 0.63 0.67 0.65 1283\n"," 16 0.61 0.74 0.67 1402\n"," 17 0.21 0.25 0.23 411\n"," 18 0.38 0.47 0.42 261\n"," 19 0.90 0.10 0.19 183\n"," 20 0.56 0.75 0.64 451\n"," 21 0.56 0.73 0.63 485\n"," 22 0.45 0.60 0.51 340\n"," 23 0.34 0.13 0.19 220\n"," 24 0.53 0.73 0.61 268\n","\n"," micro avg 0.50 0.63 0.56 16791\n"," macro avg 0.46 0.54 0.47 16791\n","weighted avg 0.51 0.63 0.55 16791\n"," samples avg 0.54 0.65 0.55 16791\n","\n","F1 micro averaging: 0.5556585043017869\n","ROC: 0.7920968190895907\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mhFKVN93o1ZO"},"source":["# 5. Lets try different Sentence Emebddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CzJd8omao0gt","outputId":"c3903ffc-ee61-47c1-87cf-bb1876436e25"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0ofYHpu7sloS","outputId":"ea715585-daa2-433d-d281-02b9e61222a4"},"source":["pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.multi_classifier')\n","pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n","The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['en_embed_sentence_small_bert_L12_768'] has settable params:\n","pipe['en_embed_sentence_small_bert_L12_768'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['en_embed_sentence_small_bert_L12_768'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['en_embed_sentence_small_bert_L12_768'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['en_embed_sentence_small_bert_L12_768'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['en_embed_sentence_small_bert_L12_768'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['en_embed_sentence_small_bert_L12_768'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['multi_classifier'] has settable params:\n","pipe['multi_classifier'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['multi_classifier'].setLr(0.001) | Info: Learning Rate | Currently set to : 0.001\n","pipe['multi_classifier'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['multi_classifier'].setValidationSplit(0.0) | Info: Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off. | Currently set to : 0.0\n","pipe['multi_classifier'].setThreshold(0.5) | Info: The minimum threshold for each label to be accepted. Default is 0.5 | Currently set to : 0.5\n","pipe['multi_classifier'].setRandomSeed(44) | Info: Random seed | Currently set to : 44\n","pipe['multi_classifier'].setShufflePerEpoch(False) | Info: whether to shuffle the training data on each Epoch | Currently set to : False\n","pipe['multi_classifier'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":570},"id":"ABHLgirmG1n9","outputId":"60e9995e-080c-4213-cf03-c7baba89bd6a"},"source":["# Load pipe with bert embeds\n","# using large embeddings can take a few hours..\n","pipe['multi_classifier'].setMaxEpochs(120) \n","pipe['multi_classifier'].setLr(0.0005) \n","fitted_pipe = pipe.fit(train_df[['y','text']],label_seperator=',')\n","preds = fitted_pipe.predict(train_df[['y','text']])\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextmulti_classifier_confidencesen_embed_sentence_small_bert_L12_768_embeddingsdocumentmulti_classifier_classes
origin_index
13c++output FILE ,is this a fault?[][-0.0598912313580513, 0.429191917181015, -0.25...output FILE ,is this a fault?[]
24swiftCan I throw from class init() in Swift with co...[0.61310124][-0.45358699560165405, 0.1986018270254135, -0....Can I throw from class init() in Swift with co...[java]
25c#C# - Count a specific word in richTextBox1 and...[0.8172003][-0.592096209526062, 0.0025841565802693367, -0...C# - Count a specific word in richTextBox1 and...[c#]
30c++c++ vector type function implemetation[0.98100495][-0.6645137071609497, 0.34700289368629456, 0.1...c++ vector type function implemetation[c++]
48phpjapanese and portuguese language cannot support[][-0.30820634961128235, 0.5732622742652893, 0.5...japanese and portuguese language cannot support[]
.....................
44992mysqlMySQL how to query five tables in one SELECT[0.94582915][-0.6759300231933594, 0.1323285549879074, 0.56...MySQL how to query five tables in one SELECT[mysql]
44993pythonCopy value of list not reference[0.71518165][-0.7307966947555542, 0.3146328032016754, -0.5...Copy value of list not reference[python]
44994python,python-3.xWeird question, but how do I make a python scr...[0.9938545][-0.478365957736969, -0.015336859039962292, 0....Weird question, but how do I make a python scr...[python]
44996pythonDoes Python execute code from the top or botto...[0.998447][-0.7976136803627014, -0.17537403106689453, 0....Does Python execute code from the top or botto...[python]
44998ios,swifthow to implement fill in the blank in Swift[0.6266076, 0.9772264][-0.4111633598804474, 0.04349775239825249, 0.2...how to implement fill in the blank in Swift[ios, swift]
\n","

9968 rows × 6 columns

\n","
"],"text/plain":[" y ... multi_classifier_classes\n","origin_index ... \n","13 c++ ... []\n","24 swift ... [java]\n","25 c# ... [c#]\n","30 c++ ... [c++]\n","48 php ... []\n","... ... ... ...\n","44992 mysql ... [mysql]\n","44993 python ... [python]\n","44994 python,python-3.x ... [python]\n","44996 python ... [python]\n","44998 ios,swift ... [ios, swift]\n","\n","[9968 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"E7ah2LM6tIhG","outputId":"edaa6235-c8d2-474a-9cc1-331e0967086c"},"source":["from sklearn.preprocessing import MultiLabelBinarizer\n","from sklearn.metrics import classification_report\n","from sklearn.metrics import f1_score\n","from sklearn.metrics import roc_auc_score\n","mlb = MultiLabelBinarizer()\n","mlb = mlb.fit(preds.y.str.split(','))\n","y_true = mlb.transform(preds['y'].str.split(','))\n","y_pred = mlb.transform(preds.multi_classifier_classes.str.join(',').str.split(','))\n","print(\"Classification report: \\n\", (classification_report(y_true, y_pred)))\n","print(\"F1 micro averaging:\",(f1_score(y_true, y_pred, average='micro')))\n","print(\"ROC: \",(roc_auc_score(y_true, y_pred, average=\"micro\")))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Classification report: \n"," precision recall f1-score support\n","\n"," 0 0.96 0.67 0.79 738\n"," 1 0.95 0.71 0.82 228\n"," 2 0.70 0.53 0.60 440\n"," 3 0.91 0.63 0.75 508\n"," 4 0.95 0.57 0.71 733\n"," 5 0.91 0.58 0.71 621\n"," 6 0.88 0.70 0.78 736\n"," 7 0.81 0.65 0.72 1254\n"," 8 0.86 0.58 0.69 145\n"," 9 0.89 0.58 0.70 1288\n"," 10 0.87 0.73 0.80 2164\n"," 11 0.89 0.58 0.70 754\n"," 12 0.84 0.67 0.74 277\n"," 13 0.89 0.59 0.71 511\n"," 14 0.96 0.27 0.42 96\n"," 15 0.94 0.70 0.80 1193\n"," 16 0.93 0.70 0.80 1265\n"," 17 0.74 0.22 0.34 365\n"," 18 0.97 0.70 0.82 246\n"," 19 1.00 0.55 0.71 172\n"," 20 0.92 0.71 0.81 427\n"," 21 0.82 0.67 0.74 458\n"," 22 0.81 0.66 0.73 319\n"," 23 0.83 0.23 0.36 211\n"," 24 0.97 0.64 0.77 242\n","\n"," micro avg 0.89 0.64 0.74 15391\n"," macro avg 0.89 0.59 0.70 15391\n","weighted avg 0.89 0.64 0.73 15391\n"," samples avg 0.70 0.64 0.65 15391\n","\n","F1 micro averaging: 0.7401884721644023\n","ROC: 0.8150061228796474\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","outputId":"bbf99f56-d4b1-4440-ecb7-fe9d61935c62"},"source":["stored_model_path = './models/multi_classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/multi_classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Tesla plans to invest 10M into the ML sector')\n","preds"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"M1LjAwJVJxun"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/examples/colab/Training/named_entity_recognition/NLU_training_NER_demo.ipynb b/examples/colab/Training/named_entity_recognition/NLU_training_NER_demo.ipynb index d521e8d8..ddabd159 100644 --- a/examples/colab/Training/named_entity_recognition/NLU_training_NER_demo.ipynb +++ b/examples/colab/Training/named_entity_recognition/NLU_training_NER_demo.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_NER_demo.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyOOijIjq9lJM7l/lkKS+Vv4"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/collab/Training/named_entity_recognition/NLU_training_demo.ipynb)\n","\n","\n","\n","# Training a Named Entity Recognition (NER) model with NLU \n","With the [NER_DL model](https://nlp.johnsnowlabs.com/docs/en/annotators#ner-dl-named-entity-recognition-deep-learning-annotator) from Spark NLP you can achieve State Of the Art results on any NER problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu > /dev/null\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download conll2003 dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1606719395651,"user_tz":-60,"elapsed":59783,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"29d77c28-a56f-4e18-afab-0b23b69a7b5c"},"source":["! wget https://github.com/patverga/torch-ner-nlp-from-scratch/raw/master/data/conll2003/eng.train"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2020-11-30 06:56:34-- https://github.com/patverga/torch-ner-nlp-from-scratch/raw/master/data/conll2003/eng.train\n","Resolving github.com (github.com)... 140.82.121.4\n","Connecting to github.com (github.com)|140.82.121.4|:443... connected.\n","HTTP request sent, awaiting response... 302 Found\n","Location: https://raw.githubusercontent.com/patverga/torch-ner-nlp-from-scratch/master/data/conll2003/eng.train [following]\n","--2020-11-30 06:56:34-- https://raw.githubusercontent.com/patverga/torch-ner-nlp-from-scratch/master/data/conll2003/eng.train\n","Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n","Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 3283420 (3.1M) [text/plain]\n","Saving to: ‘eng.train’\n","\n","eng.train 100%[===================>] 3.13M --.-KB/s in 0.06s \n","\n","2020-11-30 06:56:35 (55.8 MB/s) - ‘eng.train’ saved [3283420/3283420]\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.ner')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":244},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1606720185735,"user_tz":-60,"elapsed":849845,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"785fa952-bce5-4ba6-9647-0dc4db57bab0"},"source":["import nlu\n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# Since there are no\n","train_path = '/content/eng.train'\n","trainable_pipe = nlu.load('train.ner')\n","fitted_pipe = trainable_pipe.fit(dataset_path=train_path)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["pos_anc download started this may take some time.\n","Approximate size to download 4.3 MB\n","[OK!]\n","glove_100d download started this may take some time.\n","Approximate size to download 145.3 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
posentitiesentities_confidencener_confidencedefault_name_embeddings
origin_index
0[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]Donald TrumpPER[0.9993000030517578, 0.9976000189781189, 0.999...[[-0.5496799945831299, -0.488319993019104, 0.5...
0[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]Angela MerkelPER[0.9993000030517578, 0.9976000189781189, 0.999...[[-0.5496799945831299, -0.488319993019104, 0.5...
\n","
"],"text/plain":[" pos ... default_name_embeddings\n","origin_index ... \n","0 [NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS] ... [[-0.5496799945831299, -0.488319993019104, 0.5...\n","0 [NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS] ... [[-0.5496799945831299, -0.488319993019104, 0.5...\n","\n","[2 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"owFhjKqzQiv5","executionInfo":{"status":"ok","timestamp":1606720185739,"user_tz":-60,"elapsed":849824,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"fcfc6b10-79c7-453c-f2af-b4d2622d4e91"},"source":["# Check out the Parameters of the NER model we can configure\n","trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['named_entity_recognizer_dl'] has settable params:\n","pipe['named_entity_recognizer_dl'].setMinEpochs(0) | Info: Minimum number of epochs to train | Currently set to : 0\n","pipe['named_entity_recognizer_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['named_entity_recognizer_dl'].setLr(0.001) | Info: Learning Rate | Currently set to : 0.001\n","pipe['named_entity_recognizer_dl'].setPo(0.005) | Info: Learning rate decay coefficient. Real Learning Rage = lr / (1 + po * epoch) | Currently set to : 0.005\n","pipe['named_entity_recognizer_dl'].setBatchSize(8) | Info: Batch size | Currently set to : 8\n","pipe['named_entity_recognizer_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['named_entity_recognizer_dl'].setVerbose(0) | Info: Level of verbosity during training | Currently set to : 0\n","pipe['named_entity_recognizer_dl'].setUseContrib(True) | Info: whether to use contrib LSTM Cells. Not compatible with Windows. Might slightly improve accuracy. | Currently set to : True\n","pipe['named_entity_recognizer_dl'].setValidationSplit(0.0) | Info: Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off. | Currently set to : 0.0\n","pipe['named_entity_recognizer_dl'].setEvaluationLogExtended(False) | Info: Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off. | Currently set to : False\n","pipe['named_entity_recognizer_dl'].setIncludeConfidence(True) | Info: whether to include confidence scores in annotation metadata | Currently set to : True\n","pipe['named_entity_recognizer_dl'].setEnableOutputLogs(False) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : False\n","pipe['named_entity_recognizer_dl'].setEnableMemoryOptimizer(False) | Info: Whether to optimize for large datasets or not. Enabling this option can slow down training. | Currently set to : False\n",">>> pipe['pos'] has settable params:\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setIncludeStorage(True) | Info: whether to include indexed storage in trained model | Currently set to : True\n","pipe['default_name'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['default_name'].setDimension(100) | Info: Number of embedding dimensions | Currently set to : 100\n","pipe['default_name'].setStorageRef('glove_100d') | Info: unique reference name for identification | Currently set to : glove_100d\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['NerToChunkConverter'] has settable params:\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"25RTuUXMFyEA"},"source":["# 4. Lets use BERT embeddings instead of the default Glove_100d ones!"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"QMxPpeiDGNVi","executionInfo":{"status":"ok","timestamp":1606720185740,"user_tz":-60,"elapsed":849801,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"cb469930-5b2a-4706-b4f4-3c931be59799"},"source":["# We can use nlu.print_components(action='embed') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed') returns Spark NLP model glove_100d\n","nlu.load('en.embed.glove') returns Spark NLP model glove_100d\n","nlu.load('en.embed.glove.100d') returns Spark NLP model glove_100d\n","nlu.load('en.embed.bert') returns Spark NLP model bert_base_uncased\n","nlu.load('en.embed.bert.base_uncased') returns Spark NLP model bert_base_uncased\n","nlu.load('en.embed.bert.base_cased') returns Spark NLP model bert_base_cased\n","nlu.load('en.embed.bert.large_uncased') returns Spark NLP model bert_large_uncased\n","nlu.load('en.embed.bert.large_cased') returns Spark NLP model bert_large_cased\n","nlu.load('en.embed.biobert') returns Spark NLP model biobert_pubmed_base_cased\n","nlu.load('en.embed.biobert.pubmed_base_cased') returns Spark NLP model biobert_pubmed_base_cased\n","nlu.load('en.embed.biobert.pubmed_large_cased') returns Spark NLP model biobert_pubmed_large_cased\n","nlu.load('en.embed.biobert.pmc_base_cased') returns Spark NLP model biobert_pmc_base_cased\n","nlu.load('en.embed.biobert.pubmed_pmc_base_cased') returns Spark NLP model biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed.biobert.clinical_base_cased') returns Spark NLP model biobert_clinical_base_cased\n","nlu.load('en.embed.biobert.discharge_base_cased') returns Spark NLP model biobert_discharge_base_cased\n","nlu.load('en.embed.elmo') returns Spark NLP model elmo\n","nlu.load('en.embed.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed.albert.base_uncased') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed.albert.large_uncased') returns Spark NLP model albert_large_uncased\n","nlu.load('en.embed.albert.xlarge_uncased') returns Spark NLP model albert_xlarge_uncased\n","nlu.load('en.embed.albert.xxlarge_uncased') returns Spark NLP model albert_xxlarge_uncased\n","nlu.load('en.embed.xlnet') returns Spark NLP model xlnet_base_cased\n","nlu.load('en.embed.xlnet_base_cased') returns Spark NLP model xlnet_base_cased\n","nlu.load('en.embed.xlnet_large_cased') returns Spark NLP model xlnet_large_cased\n","nlu.load('en.embed.electra') returns Spark NLP model electra_small_uncased\n","nlu.load('en.embed.electra.small_uncased') returns Spark NLP model electra_small_uncased\n","nlu.load('en.embed.electra.base_uncased') returns Spark NLP model electra_base_uncased\n","nlu.load('en.embed.electra.large_uncased') returns Spark NLP model electra_large_uncased\n","nlu.load('en.embed.covidbert') returns Spark NLP model covidbert_large_uncased\n","nlu.load('en.embed.covidbert.large_uncased') returns Spark NLP model covidbert_large_uncased\n","nlu.load('en.embed.bert.small_L2_128') returns Spark NLP model small_bert_L2_128\n","nlu.load('en.embed.bert.small_L4_128') returns Spark NLP model small_bert_L4_128\n","nlu.load('en.embed.bert.small_L6_128') returns Spark NLP model small_bert_L6_128\n","nlu.load('en.embed.bert.small_L8_128') returns Spark NLP model small_bert_L8_128\n","nlu.load('en.embed.bert.small_L10_128') returns Spark NLP model small_bert_L10_128\n","nlu.load('en.embed.bert.small_L12_128') returns Spark NLP model small_bert_L12_128\n","nlu.load('en.embed.bert.small_L2_256') returns Spark NLP model small_bert_L2_256\n","nlu.load('en.embed.bert.small_L4_256') returns Spark NLP model small_bert_L4_256\n","nlu.load('en.embed.bert.small_L6_256') returns Spark NLP model small_bert_L6_256\n","nlu.load('en.embed.bert.small_L8_256') returns Spark NLP model small_bert_L8_256\n","nlu.load('en.embed.bert.small_L10_256') returns Spark NLP model small_bert_L10_256\n","nlu.load('en.embed.bert.small_L12_256') returns Spark NLP model small_bert_L12_256\n","nlu.load('en.embed.bert.small_L2_512') returns Spark NLP model small_bert_L2_512\n","nlu.load('en.embed.bert.small_L4_512') returns Spark NLP model small_bert_L4_512\n","nlu.load('en.embed.bert.small_L6_512') returns Spark NLP model small_bert_L6_512\n","nlu.load('en.embed.bert.small_L8_512') returns Spark NLP model small_bert_L8_512\n","nlu.load('en.embed.bert.small_L10_512') returns Spark NLP model small_bert_L10_512\n","nlu.load('en.embed.bert.small_L12_512') returns Spark NLP model small_bert_L12_512\n","nlu.load('en.embed.bert.small_L2_768') returns Spark NLP model small_bert_L2_768\n","nlu.load('en.embed.bert.small_L4_768') returns Spark NLP model small_bert_L4_768\n","nlu.load('en.embed.bert.small_L6_768') returns Spark NLP model small_bert_L6_768\n","nlu.load('en.embed.bert.small_L8_768') returns Spark NLP model small_bert_L8_768\n","nlu.load('en.embed.bert.small_L10_768') returns Spark NLP model small_bert_L10_768\n","nlu.load('en.embed.bert.small_L12_768') returns Spark NLP model small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed.bert.') returns Spark NLP model bert_finnish_cased\n","nlu.load('fi.embed.bert.cased.') returns Spark NLP model bert_finnish_cased\n","nlu.load('fi.embed.bert.uncased.') returns Spark NLP model bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed') returns Spark NLP model glove_840B_300\n","nlu.load('xx.embed.glove.840B_300') returns Spark NLP model glove_840B_300\n","nlu.load('xx.embed.glove.6B_300') returns Spark NLP model glove_6B_300\n","nlu.load('xx.embed.bert_multi_cased') returns Spark NLP model bert_multi_cased\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":244},"id":"Xz7xnvbCFxE3","executionInfo":{"status":"ok","timestamp":1606721019713,"user_tz":-60,"elapsed":1683756,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"b8b15ce7-149e-427a-9aad-37d76e074154"},"source":["# Add bert word embeddings to pipe \n","fitted_pipe = nlu.load('bert train.ner').fit(dataset_path=train_path)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["small_bert_L2_128 download started this may take some time.\n","Approximate size to download 16.1 MB\n","[OK!]\n","pos_anc download started this may take some time.\n","Approximate size to download 4.3 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
bert_embeddingsposentities_confidencener_confidenceentities
origin_index
0[[-0.447601854801178, 1.0348625183105469, 0.51...[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]PER[0.7784000039100647, 0.9710999727249146, 0.997...Donald Trump
0[[-0.447601854801178, 1.0348625183105469, 0.51...[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]PER[0.7784000039100647, 0.9710999727249146, 0.997...Angela Merkel dont
\n","
"],"text/plain":[" bert_embeddings ... entities\n","origin_index ... \n","0 [[-0.447601854801178, 1.0348625183105469, 0.51... ... Donald Trump\n","0 [[-0.447601854801178, 1.0348625183105469, 0.51... ... Angela Merkel dont\n","\n","[2 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606721039475,"user_tz":-60,"elapsed":1703498,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"b4b22c80-3318-4070-e516-36847a66e88e"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":137},"executionInfo":{"status":"ok","timestamp":1606721049691,"user_tz":-60,"elapsed":1713703,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"cdd160bf-462e-4dbe-b618-db259feb3987"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions on laws about cheeseburgers')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
bert_embeddingsposentities_confidencener_confidenceentities
origin_index
0[[-0.6870577335357666, 1.1118954420089722, 0.5...[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS, IN, ...PER[0.7975000143051147, 0.9343000054359436, 0.995...Donald Trump
0[[-0.6870577335357666, 1.1118954420089722, 0.5...[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS, IN, ...PER[0.7975000143051147, 0.9343000054359436, 0.995...Angela Merkel dont
\n","
"],"text/plain":[" bert_embeddings ... entities\n","origin_index ... \n","0 [[-0.6870577335357666, 1.1118954420089722, 0.5... ... Donald Trump\n","0 [[-0.6870577335357666, 1.1118954420089722, 0.5... ... Angela Merkel dont\n","\n","[2 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606721049695,"user_tz":-60,"elapsed":1713695,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"90c19529-41ab-4533-fba6-6107dac7c23e"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentiment_dl'] has settable params:\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(128) | Info: Number of embedding dimensions | Currently set to : 128\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setStorageRef('small_bert_L2_128') | Info: unique reference name for identification | Currently set to : small_bert_L2_128\n",">>> pipe['named_entity_recognizer_dl'] has settable params:\n","pipe['named_entity_recognizer_dl'].setIncludeConfidence(True) | Info: whether to include confidence scores in annotation metadata | Currently set to : True\n","pipe['named_entity_recognizer_dl'].setBatchSize(8) | Info: Size of every batch. | Currently set to : 8\n","pipe['named_entity_recognizer_dl'].setClasses(['O', 'B-ORG', 'I-ORG', 'I-MISC', 'I-PER', 'B-LOC', 'B-MISC', 'I-LOC']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['O', 'B-ORG', 'I-ORG', 'I-MISC', 'I-PER', 'B-LOC', 'B-MISC', 'I-LOC']\n","pipe['named_entity_recognizer_dl'].setStorageRef('small_bert_L2_128') | Info: unique reference name for identification | Currently set to : small_bert_L2_128\n",">>> pipe['NerToChunkConverter'] has settable params:\n","pipe['NerToChunkConverter'].setPreservePosition(True) | Info: Whether to preserve the original position of the tokens in the original document or use the modified tokens | Currently set to : True\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"o3jCHbIsMZrn","colab":{"base_uri":"https://localhost:8080/","height":299},"executionInfo":{"status":"error","timestamp":1607488227274,"user_tz":-60,"elapsed":919,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"f00d563f-c7ea-4f33-85d7-b9d45f35b1f0"},"source":["from varname import nameof\n"],"execution_count":null,"outputs":[{"output_type":"error","ename":"ModuleNotFoundError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mvarname\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnameof\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'varname'","","\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n"]}]},{"cell_type":"code","metadata":{"id":"USD6d66Sw6_P"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_NER_demo.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/named_entity_recognition/NLU_training_NER_demo.ipynb)\n","\n","\n","\n","# Training a Named Entity Recognition (NER) model with NLU \n","With the [NER_DL model](https://nlp.johnsnowlabs.com/docs/en/annotators#ner-dl-named-entity-recognition-deep-learning-annotator) from Spark NLP you can achieve State Of the Art results on any NER problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu > /dev/null\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download conll2003 dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1606719395651,"user_tz":-60,"elapsed":59783,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"29d77c28-a56f-4e18-afab-0b23b69a7b5c"},"source":["! wget https://github.com/patverga/torch-ner-nlp-from-scratch/raw/master/data/conll2003/eng.train"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2020-11-30 06:56:34-- https://github.com/patverga/torch-ner-nlp-from-scratch/raw/master/data/conll2003/eng.train\n","Resolving github.com (github.com)... 140.82.121.4\n","Connecting to github.com (github.com)|140.82.121.4|:443... connected.\n","HTTP request sent, awaiting response... 302 Found\n","Location: https://raw.githubusercontent.com/patverga/torch-ner-nlp-from-scratch/master/data/conll2003/eng.train [following]\n","--2020-11-30 06:56:34-- https://raw.githubusercontent.com/patverga/torch-ner-nlp-from-scratch/master/data/conll2003/eng.train\n","Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n","Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 3283420 (3.1M) [text/plain]\n","Saving to: ‘eng.train’\n","\n","eng.train 100%[===================>] 3.13M --.-KB/s in 0.06s \n","\n","2020-11-30 06:56:35 (55.8 MB/s) - ‘eng.train’ saved [3283420/3283420]\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.ner')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":244},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1606720185735,"user_tz":-60,"elapsed":849845,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"785fa952-bce5-4ba6-9647-0dc4db57bab0"},"source":["import nlu\n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# Since there are no\n","train_path = '/content/eng.train'\n","trainable_pipe = nlu.load('train.ner')\n","fitted_pipe = trainable_pipe.fit(dataset_path=train_path)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["pos_anc download started this may take some time.\n","Approximate size to download 4.3 MB\n","[OK!]\n","glove_100d download started this may take some time.\n","Approximate size to download 145.3 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
posentitiesentities_confidencener_confidencedefault_name_embeddings
origin_index
0[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]Donald TrumpPER[0.9993000030517578, 0.9976000189781189, 0.999...[[-0.5496799945831299, -0.488319993019104, 0.5...
0[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]Angela MerkelPER[0.9993000030517578, 0.9976000189781189, 0.999...[[-0.5496799945831299, -0.488319993019104, 0.5...
\n","
"],"text/plain":[" pos ... default_name_embeddings\n","origin_index ... \n","0 [NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS] ... [[-0.5496799945831299, -0.488319993019104, 0.5...\n","0 [NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS] ... [[-0.5496799945831299, -0.488319993019104, 0.5...\n","\n","[2 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"owFhjKqzQiv5","executionInfo":{"status":"ok","timestamp":1606720185739,"user_tz":-60,"elapsed":849824,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"fcfc6b10-79c7-453c-f2af-b4d2622d4e91"},"source":["# Check out the Parameters of the NER model we can configure\n","trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['named_entity_recognizer_dl'] has settable params:\n","pipe['named_entity_recognizer_dl'].setMinEpochs(0) | Info: Minimum number of epochs to train | Currently set to : 0\n","pipe['named_entity_recognizer_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['named_entity_recognizer_dl'].setLr(0.001) | Info: Learning Rate | Currently set to : 0.001\n","pipe['named_entity_recognizer_dl'].setPo(0.005) | Info: Learning rate decay coefficient. Real Learning Rage = lr / (1 + po * epoch) | Currently set to : 0.005\n","pipe['named_entity_recognizer_dl'].setBatchSize(8) | Info: Batch size | Currently set to : 8\n","pipe['named_entity_recognizer_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['named_entity_recognizer_dl'].setVerbose(0) | Info: Level of verbosity during training | Currently set to : 0\n","pipe['named_entity_recognizer_dl'].setUseContrib(True) | Info: whether to use contrib LSTM Cells. Not compatible with Windows. Might slightly improve accuracy. | Currently set to : True\n","pipe['named_entity_recognizer_dl'].setValidationSplit(0.0) | Info: Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off. | Currently set to : 0.0\n","pipe['named_entity_recognizer_dl'].setEvaluationLogExtended(False) | Info: Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off. | Currently set to : False\n","pipe['named_entity_recognizer_dl'].setIncludeConfidence(True) | Info: whether to include confidence scores in annotation metadata | Currently set to : True\n","pipe['named_entity_recognizer_dl'].setEnableOutputLogs(False) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : False\n","pipe['named_entity_recognizer_dl'].setEnableMemoryOptimizer(False) | Info: Whether to optimize for large datasets or not. Enabling this option can slow down training. | Currently set to : False\n",">>> pipe['pos'] has settable params:\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setIncludeStorage(True) | Info: whether to include indexed storage in trained model | Currently set to : True\n","pipe['default_name'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['default_name'].setDimension(100) | Info: Number of embedding dimensions | Currently set to : 100\n","pipe['default_name'].setStorageRef('glove_100d') | Info: unique reference name for identification | Currently set to : glove_100d\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['NerToChunkConverter'] has settable params:\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"25RTuUXMFyEA"},"source":["# 4. Lets use BERT embeddings instead of the default Glove_100d ones!"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"QMxPpeiDGNVi","executionInfo":{"status":"ok","timestamp":1606720185740,"user_tz":-60,"elapsed":849801,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"cb469930-5b2a-4706-b4f4-3c931be59799"},"source":["# We can use nlu.print_components(action='embed') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed') returns Spark NLP model glove_100d\n","nlu.load('en.embed.glove') returns Spark NLP model glove_100d\n","nlu.load('en.embed.glove.100d') returns Spark NLP model glove_100d\n","nlu.load('en.embed.bert') returns Spark NLP model bert_base_uncased\n","nlu.load('en.embed.bert.base_uncased') returns Spark NLP model bert_base_uncased\n","nlu.load('en.embed.bert.base_cased') returns Spark NLP model bert_base_cased\n","nlu.load('en.embed.bert.large_uncased') returns Spark NLP model bert_large_uncased\n","nlu.load('en.embed.bert.large_cased') returns Spark NLP model bert_large_cased\n","nlu.load('en.embed.biobert') returns Spark NLP model biobert_pubmed_base_cased\n","nlu.load('en.embed.biobert.pubmed_base_cased') returns Spark NLP model biobert_pubmed_base_cased\n","nlu.load('en.embed.biobert.pubmed_large_cased') returns Spark NLP model biobert_pubmed_large_cased\n","nlu.load('en.embed.biobert.pmc_base_cased') returns Spark NLP model biobert_pmc_base_cased\n","nlu.load('en.embed.biobert.pubmed_pmc_base_cased') returns Spark NLP model biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed.biobert.clinical_base_cased') returns Spark NLP model biobert_clinical_base_cased\n","nlu.load('en.embed.biobert.discharge_base_cased') returns Spark NLP model biobert_discharge_base_cased\n","nlu.load('en.embed.elmo') returns Spark NLP model elmo\n","nlu.load('en.embed.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed.albert.base_uncased') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed.albert.large_uncased') returns Spark NLP model albert_large_uncased\n","nlu.load('en.embed.albert.xlarge_uncased') returns Spark NLP model albert_xlarge_uncased\n","nlu.load('en.embed.albert.xxlarge_uncased') returns Spark NLP model albert_xxlarge_uncased\n","nlu.load('en.embed.xlnet') returns Spark NLP model xlnet_base_cased\n","nlu.load('en.embed.xlnet_base_cased') returns Spark NLP model xlnet_base_cased\n","nlu.load('en.embed.xlnet_large_cased') returns Spark NLP model xlnet_large_cased\n","nlu.load('en.embed.electra') returns Spark NLP model electra_small_uncased\n","nlu.load('en.embed.electra.small_uncased') returns Spark NLP model electra_small_uncased\n","nlu.load('en.embed.electra.base_uncased') returns Spark NLP model electra_base_uncased\n","nlu.load('en.embed.electra.large_uncased') returns Spark NLP model electra_large_uncased\n","nlu.load('en.embed.covidbert') returns Spark NLP model covidbert_large_uncased\n","nlu.load('en.embed.covidbert.large_uncased') returns Spark NLP model covidbert_large_uncased\n","nlu.load('en.embed.bert.small_L2_128') returns Spark NLP model small_bert_L2_128\n","nlu.load('en.embed.bert.small_L4_128') returns Spark NLP model small_bert_L4_128\n","nlu.load('en.embed.bert.small_L6_128') returns Spark NLP model small_bert_L6_128\n","nlu.load('en.embed.bert.small_L8_128') returns Spark NLP model small_bert_L8_128\n","nlu.load('en.embed.bert.small_L10_128') returns Spark NLP model small_bert_L10_128\n","nlu.load('en.embed.bert.small_L12_128') returns Spark NLP model small_bert_L12_128\n","nlu.load('en.embed.bert.small_L2_256') returns Spark NLP model small_bert_L2_256\n","nlu.load('en.embed.bert.small_L4_256') returns Spark NLP model small_bert_L4_256\n","nlu.load('en.embed.bert.small_L6_256') returns Spark NLP model small_bert_L6_256\n","nlu.load('en.embed.bert.small_L8_256') returns Spark NLP model small_bert_L8_256\n","nlu.load('en.embed.bert.small_L10_256') returns Spark NLP model small_bert_L10_256\n","nlu.load('en.embed.bert.small_L12_256') returns Spark NLP model small_bert_L12_256\n","nlu.load('en.embed.bert.small_L2_512') returns Spark NLP model small_bert_L2_512\n","nlu.load('en.embed.bert.small_L4_512') returns Spark NLP model small_bert_L4_512\n","nlu.load('en.embed.bert.small_L6_512') returns Spark NLP model small_bert_L6_512\n","nlu.load('en.embed.bert.small_L8_512') returns Spark NLP model small_bert_L8_512\n","nlu.load('en.embed.bert.small_L10_512') returns Spark NLP model small_bert_L10_512\n","nlu.load('en.embed.bert.small_L12_512') returns Spark NLP model small_bert_L12_512\n","nlu.load('en.embed.bert.small_L2_768') returns Spark NLP model small_bert_L2_768\n","nlu.load('en.embed.bert.small_L4_768') returns Spark NLP model small_bert_L4_768\n","nlu.load('en.embed.bert.small_L6_768') returns Spark NLP model small_bert_L6_768\n","nlu.load('en.embed.bert.small_L8_768') returns Spark NLP model small_bert_L8_768\n","nlu.load('en.embed.bert.small_L10_768') returns Spark NLP model small_bert_L10_768\n","nlu.load('en.embed.bert.small_L12_768') returns Spark NLP model small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed.bert.') returns Spark NLP model bert_finnish_cased\n","nlu.load('fi.embed.bert.cased.') returns Spark NLP model bert_finnish_cased\n","nlu.load('fi.embed.bert.uncased.') returns Spark NLP model bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed') returns Spark NLP model glove_840B_300\n","nlu.load('xx.embed.glove.840B_300') returns Spark NLP model glove_840B_300\n","nlu.load('xx.embed.glove.6B_300') returns Spark NLP model glove_6B_300\n","nlu.load('xx.embed.bert_multi_cased') returns Spark NLP model bert_multi_cased\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":244},"id":"Xz7xnvbCFxE3","executionInfo":{"status":"ok","timestamp":1606721019713,"user_tz":-60,"elapsed":1683756,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"b8b15ce7-149e-427a-9aad-37d76e074154"},"source":["# Add bert word embeddings to pipe \n","fitted_pipe = nlu.load('bert train.ner').fit(dataset_path=train_path)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["small_bert_L2_128 download started this may take some time.\n","Approximate size to download 16.1 MB\n","[OK!]\n","pos_anc download started this may take some time.\n","Approximate size to download 4.3 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
bert_embeddingsposentities_confidencener_confidenceentities
origin_index
0[[-0.447601854801178, 1.0348625183105469, 0.51...[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]PER[0.7784000039100647, 0.9710999727249146, 0.997...Donald Trump
0[[-0.447601854801178, 1.0348625183105469, 0.51...[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS]PER[0.7784000039100647, 0.9710999727249146, 0.997...Angela Merkel dont
\n","
"],"text/plain":[" bert_embeddings ... entities\n","origin_index ... \n","0 [[-0.447601854801178, 1.0348625183105469, 0.51... ... Donald Trump\n","0 [[-0.447601854801178, 1.0348625183105469, 0.51... ... Angela Merkel dont\n","\n","[2 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606721039475,"user_tz":-60,"elapsed":1703498,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"b4b22c80-3318-4070-e516-36847a66e88e"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":137},"executionInfo":{"status":"ok","timestamp":1606721049691,"user_tz":-60,"elapsed":1713703,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"cdd160bf-462e-4dbe-b618-db259feb3987"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions on laws about cheeseburgers')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
bert_embeddingsposentities_confidencener_confidenceentities
origin_index
0[[-0.6870577335357666, 1.1118954420089722, 0.5...[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS, IN, ...PER[0.7975000143051147, 0.9343000054359436, 0.995...Donald Trump
0[[-0.6870577335357666, 1.1118954420089722, 0.5...[NNP, NNP, CC, NNP, NNP, NN, NN, JJ, NNS, IN, ...PER[0.7975000143051147, 0.9343000054359436, 0.995...Angela Merkel dont
\n","
"],"text/plain":[" bert_embeddings ... entities\n","origin_index ... \n","0 [[-0.6870577335357666, 1.1118954420089722, 0.5... ... Donald Trump\n","0 [[-0.6870577335357666, 1.1118954420089722, 0.5... ... Angela Merkel dont\n","\n","[2 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1606721049695,"user_tz":-60,"elapsed":1713695,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"90c19529-41ab-4533-fba6-6107dac7c23e"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentiment_dl'] has settable params:\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(128) | Info: Number of embedding dimensions | Currently set to : 128\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setStorageRef('small_bert_L2_128') | Info: unique reference name for identification | Currently set to : small_bert_L2_128\n",">>> pipe['named_entity_recognizer_dl'] has settable params:\n","pipe['named_entity_recognizer_dl'].setIncludeConfidence(True) | Info: whether to include confidence scores in annotation metadata | Currently set to : True\n","pipe['named_entity_recognizer_dl'].setBatchSize(8) | Info: Size of every batch. | Currently set to : 8\n","pipe['named_entity_recognizer_dl'].setClasses(['O', 'B-ORG', 'I-ORG', 'I-MISC', 'I-PER', 'B-LOC', 'B-MISC', 'I-LOC']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['O', 'B-ORG', 'I-ORG', 'I-MISC', 'I-PER', 'B-LOC', 'B-MISC', 'I-LOC']\n","pipe['named_entity_recognizer_dl'].setStorageRef('small_bert_L2_128') | Info: unique reference name for identification | Currently set to : small_bert_L2_128\n",">>> pipe['NerToChunkConverter'] has settable params:\n","pipe['NerToChunkConverter'].setPreservePosition(True) | Info: Whether to preserve the original position of the tokens in the original document or use the modified tokens | Currently set to : True\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"o3jCHbIsMZrn","colab":{"base_uri":"https://localhost:8080/","height":299},"executionInfo":{"status":"error","timestamp":1607488227274,"user_tz":-60,"elapsed":919,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"f00d563f-c7ea-4f33-85d7-b9d45f35b1f0"},"source":["from varname import nameof\n"],"execution_count":null,"outputs":[{"output_type":"error","ename":"ModuleNotFoundError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mvarname\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnameof\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'varname'","","\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n"]}]},{"cell_type":"code","metadata":{"id":"USD6d66Sw6_P"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/examples/colab/Training/part_of_speech/NLU_training_POS_demo.ipynb b/examples/colab/Training/part_of_speech/NLU_training_POS_demo.ipynb index 8a736050..a6a23e1a 100644 --- a/examples/colab/Training/part_of_speech/NLU_training_POS_demo.ipynb +++ b/examples/colab/Training/part_of_speech/NLU_training_POS_demo.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_POS_demo.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyNu3YIWBC0UgJMtKu9Hxgqm"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/https://github.com/JohnSnowLabs/nlu/blob/master/examples/collab/Training/part_of_speech/NLU_training_demo.ipynb)\n","\n","\n","\n","# Training a Named Entity Recognition (POS) model with NLU \n","With the [POS tagger](https://nlp.johnsnowlabs.com/docs/en/annotators#postagger-part-of-speech-tagger) from Spark NLP you can achieve State Of the Art results on any POS problem.\n","It uses an Averaged Percetron Model approach under the hood.\n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning POS classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"IWp5LbydCkqC"},"source":[""]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download French POS dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1607932039873,"user_tz":-60,"elapsed":80981,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"76f3b769-a646-444b-fdfc-d764d4b74e45"},"source":["! wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/fr/pos/UD_French/UD_French-GSD_2.3.txt"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2020-12-14 07:47:19-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/fr/pos/UD_French/UD_French-GSD_2.3.txt\n","Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.143.238\n","Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.143.238|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 3565213 (3.4M) [text/plain]\n","Saving to: ‘UD_French-GSD_2.3.txt’\n","\n","UD_French-GSD_2.3.t 100%[===================>] 3.40M 15.8MB/s in 0.2s \n","\n","2020-12-14 07:47:19 (15.8 MB/s) - ‘UD_French-GSD_2.3.txt’ saved [3565213/3565213]\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.pos')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1607932112061,"user_tz":-60,"elapsed":153158,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"c6032381-0446-484a-8c4e-0ad9fc500c48"},"source":["import nlu\n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# Since there are no\n","train_path = '/content/UD_French-GSD_2.3.txt'\n","trainable_pipe = nlu.load('train.pos')\n","fitted_pipe = trainable_pipe.fit(dataset_path=train_path)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
tokenpos
origin_index
0DonaldPROPN
0TrumpPROPN
0andCCONJ
0AngelaPROPN
0MerkelPROPN
0dontPRON
0shareVERB
0manyADJ
0oppinionsNOUN
\n","
"],"text/plain":[" token pos\n","origin_index \n","0 Donald PROPN\n","0 Trump PROPN\n","0 and CCONJ\n","0 Angela PROPN\n","0 Merkel PROPN\n","0 dont PRON\n","0 share VERB\n","0 many ADJ\n","0 oppinions NOUN"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 4. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607932114637,"user_tz":-60,"elapsed":155726,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"24d34ea2-dcc1-42b2-a5c6-10d345b76a3c"},"source":["stored_model_path = './models/pos_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/pos_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 5. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":485},"executionInfo":{"status":"ok","timestamp":1607932120301,"user_tz":-60,"elapsed":161383,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"db790b35-a51d-4226-8a0b-bb3e9e39e368"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions on laws about cheeseburgers')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
tokenpos
origin_index
0DonaldPROPN
0TrumpPROPN
0andCCONJ
0AngelaPROPN
0MerkelPROPN
0dontPRON
0shareVERB
0manyADJ
0oppinionsNOUN
0onPRON
0lawsVERB
0aboutADV
0cheeseburgersNOUN
\n","
"],"text/plain":[" token pos\n","origin_index \n","0 Donald PROPN\n","0 Trump PROPN\n","0 and CCONJ\n","0 Angela PROPN\n","0 Merkel PROPN\n","0 dont PRON\n","0 share VERB\n","0 many ADJ\n","0 oppinions NOUN\n","0 on PRON\n","0 laws VERB\n","0 about ADV\n","0 cheeseburgers NOUN"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607932120301,"user_tz":-60,"elapsed":161374,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"6bb7769e-f545-40b8-f0ef-90fd9f32c149"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentiment_dl'] has settable params:\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"o3jCHbIsMZrn"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_POS_demo.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/part_of_speech/NLU_training_POS_demo.ipynb)\n","\n","\n","\n","# Training a Named Entity Recognition (POS) model with NLU \n","With the [POS tagger](https://nlp.johnsnowlabs.com/docs/en/annotators#postagger-part-of-speech-tagger) from Spark NLP you can achieve State Of the Art results on any POS problem.\n","It uses an Averaged Percetron Model approach under the hood.\n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning POS classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"IWp5LbydCkqC"},"source":[""]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download French POS dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1607932039873,"user_tz":-60,"elapsed":80981,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"76f3b769-a646-444b-fdfc-d764d4b74e45"},"source":["! wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/fr/pos/UD_French/UD_French-GSD_2.3.txt"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2020-12-14 07:47:19-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/fr/pos/UD_French/UD_French-GSD_2.3.txt\n","Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.143.238\n","Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.143.238|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 3565213 (3.4M) [text/plain]\n","Saving to: ‘UD_French-GSD_2.3.txt’\n","\n","UD_French-GSD_2.3.t 100%[===================>] 3.40M 15.8MB/s in 0.2s \n","\n","2020-12-14 07:47:19 (15.8 MB/s) - ‘UD_French-GSD_2.3.txt’ saved [3565213/3565213]\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.pos')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1607932112061,"user_tz":-60,"elapsed":153158,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"c6032381-0446-484a-8c4e-0ad9fc500c48"},"source":["import nlu\n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# Since there are no\n","train_path = '/content/UD_French-GSD_2.3.txt'\n","trainable_pipe = nlu.load('train.pos')\n","fitted_pipe = trainable_pipe.fit(dataset_path=train_path)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
tokenpos
origin_index
0DonaldPROPN
0TrumpPROPN
0andCCONJ
0AngelaPROPN
0MerkelPROPN
0dontPRON
0shareVERB
0manyADJ
0oppinionsNOUN
\n","
"],"text/plain":[" token pos\n","origin_index \n","0 Donald PROPN\n","0 Trump PROPN\n","0 and CCONJ\n","0 Angela PROPN\n","0 Merkel PROPN\n","0 dont PRON\n","0 share VERB\n","0 many ADJ\n","0 oppinions NOUN"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 4. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607932114637,"user_tz":-60,"elapsed":155726,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"24d34ea2-dcc1-42b2-a5c6-10d345b76a3c"},"source":["stored_model_path = './models/pos_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/pos_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 5. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":485},"executionInfo":{"status":"ok","timestamp":1607932120301,"user_tz":-60,"elapsed":161383,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"db790b35-a51d-4226-8a0b-bb3e9e39e368"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Donald Trump and Angela Merkel dont share many oppinions on laws about cheeseburgers')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
tokenpos
origin_index
0DonaldPROPN
0TrumpPROPN
0andCCONJ
0AngelaPROPN
0MerkelPROPN
0dontPRON
0shareVERB
0manyADJ
0oppinionsNOUN
0onPRON
0lawsVERB
0aboutADV
0cheeseburgersNOUN
\n","
"],"text/plain":[" token pos\n","origin_index \n","0 Donald PROPN\n","0 Trump PROPN\n","0 and CCONJ\n","0 Angela PROPN\n","0 Merkel PROPN\n","0 dont PRON\n","0 share VERB\n","0 many ADJ\n","0 oppinions NOUN\n","0 on PRON\n","0 laws VERB\n","0 about ADV\n","0 cheeseburgers NOUN"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607932120301,"user_tz":-60,"elapsed":161374,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"6bb7769e-f545-40b8-f0ef-90fd9f32c149"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentiment_dl'] has settable params:\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"o3jCHbIsMZrn"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file